From ce7875473324729b07661e588ac0c44862efbb0f Mon Sep 17 00:00:00 2001
From: Christy Lee <christylee@fb.com>
Date: Fri, 7 Jan 2022 10:46:04 -0800
Subject: selftests/bpf: Change bpf_prog_attach_xattr() to
 bpf_prog_attach_opts()

bpf_prog_attach_opts() is being deprecated and renamed to
bpf_prog_attach_xattr(). Change all selftests/bpf's uage to the new name.

Signed-off-by: Christy Lee <christylee@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220107184604.3668544-3-christylee@fb.com
---
 tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index d3e8f729c623..38b3c47293da 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -194,14 +194,14 @@ void serial_test_cgroup_attach_multi(void)
 
 	attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
 	attach_opts.replace_prog_fd = allow_prog[0];
-	if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+	if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
 					 BPF_CGROUP_INET_EGRESS, &attach_opts),
 		  "fail_prog_replace_override", "unexpected success\n"))
 		goto err;
 	CHECK_FAIL(errno != EINVAL);
 
 	attach_opts.flags = BPF_F_REPLACE;
-	if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+	if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
 					 BPF_CGROUP_INET_EGRESS, &attach_opts),
 		  "fail_prog_replace_no_multi", "unexpected success\n"))
 		goto err;
@@ -209,7 +209,7 @@ void serial_test_cgroup_attach_multi(void)
 
 	attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
 	attach_opts.replace_prog_fd = -1;
-	if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+	if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
 					 BPF_CGROUP_INET_EGRESS, &attach_opts),
 		  "fail_prog_replace_bad_fd", "unexpected success\n"))
 		goto err;
@@ -217,7 +217,7 @@ void serial_test_cgroup_attach_multi(void)
 
 	/* replacing a program that is not attached to cgroup should fail  */
 	attach_opts.replace_prog_fd = allow_prog[3];
-	if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+	if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
 					 BPF_CGROUP_INET_EGRESS, &attach_opts),
 		  "fail_prog_replace_no_ent", "unexpected success\n"))
 		goto err;
@@ -225,14 +225,14 @@ void serial_test_cgroup_attach_multi(void)
 
 	/* replace 1st from the top program */
 	attach_opts.replace_prog_fd = allow_prog[0];
-	if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+	if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
 					BPF_CGROUP_INET_EGRESS, &attach_opts),
 		  "prog_replace", "errno=%d\n", errno))
 		goto err;
 
 	/* replace program with itself */
 	attach_opts.replace_prog_fd = allow_prog[6];
-	if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+	if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
 					BPF_CGROUP_INET_EGRESS, &attach_opts),
 		  "prog_replace", "errno=%d\n", errno))
 		goto err;
-- 
cgit 


From 8d6fabf1654a8c26e4e081d0b934285d6d8868cb Mon Sep 17 00:00:00 2001
From: Christy Lee <christylee@fb.com>
Date: Fri, 7 Jan 2022 16:42:17 -0800
Subject: selftests/bpf: Stop using bpf_map__def() API

libbpf bpf_map__def() API is being deprecated, replace selftests/bpf's
usage with the appropriate getters and setters.

Signed-off-by: Christy Lee <christylee@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220108004218.355761-5-christylee@fb.com
---
 .../selftests/bpf/prog_tests/flow_dissector.c      |  2 +-
 .../testing/selftests/bpf/prog_tests/global_data.c |  2 +-
 .../selftests/bpf/prog_tests/global_data_init.c    |  2 +-
 .../selftests/bpf/prog_tests/sockmap_listen.c      | 12 +++-----
 tools/testing/selftests/bpf/prog_tests/tailcalls.c | 36 +++++++++++-----------
 5 files changed, 26 insertions(+), 28 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index ac54e3f91d42..dfafd62df50b 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -457,7 +457,7 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
 	if (map_fd < 0)
 		return -1;
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
 
 		prog = bpf_object__find_program_by_name(obj, prog_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index 9da131b32e13..917165e04427 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -121,7 +121,7 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
 	if (CHECK_FAIL(map_fd < 0))
 		return;
 
-	buff = malloc(bpf_map__def(map)->value_size);
+	buff = malloc(bpf_map__value_size(map));
 	if (buff)
 		err = bpf_map_update_elem(map_fd, &zero, buff, 0);
 	free(buff);
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 1db86eab101b..57331c606964 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -20,7 +20,7 @@ void test_global_data_init(void)
 	if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
 		goto out;
 
-	sz = bpf_map__def(map)->value_size;
+	sz = bpf_map__value_size(map);
 	newval = malloc(sz);
 	if (CHECK_FAIL(!newval))
 		goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 7e21bfab6358..2cf0c7a3fe23 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1413,14 +1413,12 @@ close_srv1:
 
 static void test_ops_cleanup(const struct bpf_map *map)
 {
-	const struct bpf_map_def *def;
 	int err, mapfd;
 	u32 key;
 
-	def = bpf_map__def(map);
 	mapfd = bpf_map__fd(map);
 
-	for (key = 0; key < def->max_entries; key++) {
+	for (key = 0; key < bpf_map__max_entries(map); key++) {
 		err = bpf_map_delete_elem(mapfd, &key);
 		if (err && errno != EINVAL && errno != ENOENT)
 			FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
@@ -1443,13 +1441,13 @@ static const char *family_str(sa_family_t family)
 
 static const char *map_type_str(const struct bpf_map *map)
 {
-	const struct bpf_map_def *def;
+	int type;
 
-	def = bpf_map__def(map);
-	if (IS_ERR(def))
+	if (!map)
 		return "invalid";
+	type = bpf_map__type(map);
 
-	switch (def->type) {
+	switch (type) {
 	case BPF_MAP_TYPE_SOCKMAP:
 		return "sockmap";
 	case BPF_MAP_TYPE_SOCKHASH:
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 5dc0f425bd11..796f231582f8 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -37,7 +37,7 @@ static void test_tailcall_1(void)
 	if (CHECK_FAIL(map_fd < 0))
 		goto out;
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
 		prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -53,7 +53,7 @@ static void test_tailcall_1(void)
 			goto out;
 	}
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
 					&duration, &retval, NULL);
 		CHECK(err || retval != i, "tailcall",
@@ -69,7 +69,7 @@ static void test_tailcall_1(void)
 	CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
 	      err, errno, retval);
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
 		prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -90,8 +90,8 @@ static void test_tailcall_1(void)
 	CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
 	      err, errno, retval);
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-		j = bpf_map__def(prog_array)->max_entries - 1 - i;
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+		j = bpf_map__max_entries(prog_array) - 1 - i;
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", j);
 
 		prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -107,8 +107,8 @@ static void test_tailcall_1(void)
 			goto out;
 	}
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-		j = bpf_map__def(prog_array)->max_entries - 1 - i;
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+		j = bpf_map__max_entries(prog_array) - 1 - i;
 
 		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
 					&duration, &retval, NULL);
@@ -125,7 +125,7 @@ static void test_tailcall_1(void)
 	CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
 	      err, errno, retval);
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		err = bpf_map_delete_elem(map_fd, &i);
 		if (CHECK_FAIL(err >= 0 || errno != ENOENT))
 			goto out;
@@ -175,7 +175,7 @@ static void test_tailcall_2(void)
 	if (CHECK_FAIL(map_fd < 0))
 		goto out;
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
 		prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -353,7 +353,7 @@ static void test_tailcall_4(void)
 	if (CHECK_FAIL(map_fd < 0))
 		return;
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
 		prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -369,7 +369,7 @@ static void test_tailcall_4(void)
 			goto out;
 	}
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
 		if (CHECK_FAIL(err))
 			goto out;
@@ -380,7 +380,7 @@ static void test_tailcall_4(void)
 		      "err %d errno %d retval %d\n", err, errno, retval);
 	}
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
 		if (CHECK_FAIL(err))
 			goto out;
@@ -441,7 +441,7 @@ static void test_tailcall_5(void)
 	if (CHECK_FAIL(map_fd < 0))
 		return;
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
 		prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -457,7 +457,7 @@ static void test_tailcall_5(void)
 			goto out;
 	}
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
 		if (CHECK_FAIL(err))
 			goto out;
@@ -468,7 +468,7 @@ static void test_tailcall_5(void)
 		      "err %d errno %d retval %d\n", err, errno, retval);
 	}
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
 		if (CHECK_FAIL(err))
 			goto out;
@@ -520,7 +520,7 @@ static void test_tailcall_bpf2bpf_1(void)
 		goto out;
 
 	/* nop -> jmp */
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
 		prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -681,7 +681,7 @@ static void test_tailcall_bpf2bpf_3(void)
 	if (CHECK_FAIL(map_fd < 0))
 		goto out;
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
 		prog = bpf_object__find_program_by_name(obj, prog_name);
@@ -778,7 +778,7 @@ static void test_tailcall_bpf2bpf_4(bool noise)
 	if (CHECK_FAIL(map_fd < 0))
 		goto out;
 
-	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
 		prog = bpf_object__find_program_by_name(obj, prog_name);
-- 
cgit 


From e80f2a0d194605553315de68284fc41969f81f62 Mon Sep 17 00:00:00 2001
From: Menglong Dong <imagedong@tencent.com>
Date: Thu, 13 Jan 2022 11:16:58 +0800
Subject: test: selftests: Remove unused various in sockmap_verdict_prog.c

'lport' and 'rport' in bpf_prog1() of sockmap_verdict_prog.c is not
used, just remove them.

Signed-off-by: Menglong Dong <imagedong@tencent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20220113031658.633290-1-imagedong@tencent.com
---
 tools/testing/selftests/bpf/progs/sockmap_parse_prog.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index 95d5b941bc1f..c9abfe3a11af 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -7,8 +7,6 @@ int bpf_prog1(struct __sk_buff *skb)
 {
 	void *data_end = (void *)(long) skb->data_end;
 	void *data = (void *)(long) skb->data;
-	__u32 lport = skb->local_port;
-	__u32 rport = skb->remote_port;
 	__u8 *d = data;
 	int err;
 
-- 
cgit 


From b202d84422223b7222cba5031d182f20b37e146e Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 14 Jan 2022 22:09:46 +0530
Subject: bpf: Remove check_kfunc_call callback and old kfunc BTF ID API

Completely remove the old code for check_kfunc_call to help it work
with modules, and also the callback itself.

The previous commit adds infrastructure to register all sets and put
them in vmlinux or module BTF, and concatenates all related sets
organized by the hook and the type. Once populated, these sets remain
immutable for the lifetime of the struct btf.

Also, since we don't need the 'owner' module anywhere when doing
check_kfunc_call, drop the 'btf_modp' module parameter from
find_kfunc_desc_btf.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-4-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                                |  8 ----
 include/linux/btf.h                                | 44 ---------------------
 kernel/bpf/btf.c                                   | 46 ----------------------
 kernel/bpf/verifier.c                              | 20 ++++------
 net/bpf/test_run.c                                 | 23 ++++++-----
 net/core/filter.c                                  |  1 -
 net/ipv4/bpf_tcp_ca.c                              | 22 ++++++-----
 net/ipv4/tcp_bbr.c                                 | 18 +++++----
 net/ipv4/tcp_cubic.c                               | 17 ++++----
 net/ipv4/tcp_dctcp.c                               | 18 +++++----
 .../selftests/bpf/bpf_testmod/bpf_testmod.c        | 17 ++++----
 11 files changed, 73 insertions(+), 161 deletions(-)

(limited to 'tools/testing')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6e947cd91152..6d7346c54d83 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -573,7 +573,6 @@ struct bpf_verifier_ops {
 				 const struct btf_type *t, int off, int size,
 				 enum bpf_access_type atype,
 				 u32 *next_btf_id);
-	bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner);
 };
 
 struct bpf_prog_offload_ops {
@@ -1719,7 +1718,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
 				const union bpf_attr *kattr,
 				union bpf_attr __user *uattr);
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner);
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		    const struct bpf_prog *prog,
 		    struct bpf_insn_access_aux *info);
@@ -1971,12 +1969,6 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
 	return -ENOTSUPP;
 }
 
-static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id,
-						  struct module *owner)
-{
-	return false;
-}
-
 static inline void bpf_map_put(struct bpf_map *map)
 {
 }
diff --git a/include/linux/btf.h b/include/linux/btf.h
index c451f8e2612a..b12cfe3b12bb 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -359,48 +359,4 @@ static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
 }
 #endif
 
-struct kfunc_btf_id_set {
-	struct list_head list;
-	struct btf_id_set *set;
-	struct module *owner;
-};
-
-struct kfunc_btf_id_list {
-	struct list_head list;
-	struct mutex mutex;
-};
-
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-			       struct kfunc_btf_id_set *s);
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-				 struct kfunc_btf_id_set *s);
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
-			      struct module *owner);
-
-extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
-extern struct kfunc_btf_id_list prog_test_kfunc_list;
-#else
-static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-					     struct kfunc_btf_id_set *s)
-{
-}
-static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-					       struct kfunc_btf_id_set *s)
-{
-}
-static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
-					    u32 kfunc_id, struct module *owner)
-{
-	return false;
-}
-
-static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused;
-static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused;
-#endif
-
-#define DEFINE_KFUNC_BTF_ID_SET(set, name)                                     \
-	struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set),     \
-					 THIS_MODULE }
-
 #endif
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 74037bd65d17..4be5cf629ca9 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6682,52 +6682,6 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
 }
 EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
 
-#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
-
-void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-			       struct kfunc_btf_id_set *s)
-{
-	mutex_lock(&l->mutex);
-	list_add(&s->list, &l->list);
-	mutex_unlock(&l->mutex);
-}
-EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set);
-
-void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
-				 struct kfunc_btf_id_set *s)
-{
-	mutex_lock(&l->mutex);
-	list_del_init(&s->list);
-	mutex_unlock(&l->mutex);
-}
-EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set);
-
-bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
-			      struct module *owner)
-{
-	struct kfunc_btf_id_set *s;
-
-	mutex_lock(&klist->mutex);
-	list_for_each_entry(s, &klist->list, list) {
-		if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) {
-			mutex_unlock(&klist->mutex);
-			return true;
-		}
-	}
-	mutex_unlock(&klist->mutex);
-	return false;
-}
-
-#define DEFINE_KFUNC_BTF_ID_LIST(name)                                         \
-	struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list),           \
-					  __MUTEX_INITIALIZER(name.mutex) };   \
-	EXPORT_SYMBOL_GPL(name)
-
-DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
-DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
-
-#endif
-
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
 			      const struct btf *targ_btf, __u32 targ_id)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bfb45381fb3f..72802c1eb5ac 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1741,7 +1741,7 @@ find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
 }
 
 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
-					 s16 offset, struct module **btf_modp)
+					 s16 offset)
 {
 	struct bpf_kfunc_btf kf_btf = { .offset = offset };
 	struct bpf_kfunc_btf_tab *tab;
@@ -1795,8 +1795,6 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
 		     kfunc_btf_cmp_by_off, NULL);
 	}
-	if (btf_modp)
-		*btf_modp = b->module;
 	return b->btf;
 }
 
@@ -1813,8 +1811,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
 }
 
 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
-				       u32 func_id, s16 offset,
-				       struct module **btf_modp)
+				       u32 func_id, s16 offset)
 {
 	if (offset) {
 		if (offset < 0) {
@@ -1825,7 +1822,7 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
 			return ERR_PTR(-EINVAL);
 		}
 
-		return __find_kfunc_desc_btf(env, offset, btf_modp);
+		return __find_kfunc_desc_btf(env, offset);
 	}
 	return btf_vmlinux ?: ERR_PTR(-ENOENT);
 }
@@ -1888,7 +1885,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
 		prog_aux->kfunc_btf_tab = btf_tab;
 	}
 
-	desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
+	desc_btf = find_kfunc_desc_btf(env, func_id, offset);
 	if (IS_ERR(desc_btf)) {
 		verbose(env, "failed to find BTF for kernel function\n");
 		return PTR_ERR(desc_btf);
@@ -2349,7 +2346,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
 	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
 		return NULL;
 
-	desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
+	desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off);
 	if (IS_ERR(desc_btf))
 		return "<error>";
 
@@ -6820,7 +6817,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	struct bpf_reg_state *regs = cur_regs(env);
 	const char *func_name, *ptr_type_name;
 	u32 i, nargs, func_id, ptr_type_id;
-	struct module *btf_mod = NULL;
 	const struct btf_param *args;
 	struct btf *desc_btf;
 	int err;
@@ -6829,7 +6825,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	if (!insn->imm)
 		return 0;
 
-	desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
+	desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off);
 	if (IS_ERR(desc_btf))
 		return PTR_ERR(desc_btf);
 
@@ -6838,8 +6834,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	func_name = btf_name_by_offset(desc_btf, func->name_off);
 	func_proto = btf_type_by_id(desc_btf, func->type);
 
-	if (!env->ops->check_kfunc_call ||
-	    !env->ops->check_kfunc_call(func_id, btf_mod)) {
+	if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
+				      BTF_KFUNC_TYPE_CHECK, func_id)) {
 		verbose(env, "calling kernel function %s is not allowed\n",
 			func_name);
 		return -EACCES;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 46dd95755967..7796a8c747a0 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -5,6 +5,7 @@
 #include <linux/btf.h>
 #include <linux/btf_ids.h>
 #include <linux/slab.h>
+#include <linux/init.h>
 #include <linux/vmalloc.h>
 #include <linux/etherdevice.h>
 #include <linux/filter.h>
@@ -236,18 +237,11 @@ __diag_pop();
 
 ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
 
-BTF_SET_START(test_sk_kfunc_ids)
+BTF_SET_START(test_sk_check_kfunc_ids)
 BTF_ID(func, bpf_kfunc_call_test1)
 BTF_ID(func, bpf_kfunc_call_test2)
 BTF_ID(func, bpf_kfunc_call_test3)
-BTF_SET_END(test_sk_kfunc_ids)
-
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
-{
-	if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id))
-		return true;
-	return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner);
-}
+BTF_SET_END(test_sk_check_kfunc_ids)
 
 static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
 			   u32 headroom, u32 tailroom)
@@ -1067,3 +1061,14 @@ out:
 	kfree(ctx);
 	return err;
 }
+
+static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
+	.owner     = THIS_MODULE,
+	.check_set = &test_sk_check_kfunc_ids,
+};
+
+static int __init bpf_prog_test_run_init(void)
+{
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+}
+late_initcall(bpf_prog_test_run_init);
diff --git a/net/core/filter.c b/net/core/filter.c
index 4603b7cd3cd1..f73a84c75970 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10062,7 +10062,6 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
 	.convert_ctx_access	= tc_cls_act_convert_ctx_access,
 	.gen_prologue		= tc_cls_act_prologue,
 	.gen_ld_abs		= bpf_gen_ld_abs,
-	.check_kfunc_call	= bpf_prog_test_check_kfunc_call,
 };
 
 const struct bpf_prog_ops tc_cls_act_prog_ops = {
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index de610cb83694..b60c9fd7147e 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook  */
 
+#include <linux/init.h>
 #include <linux/types.h>
 #include <linux/bpf_verifier.h>
 #include <linux/bpf.h>
@@ -212,26 +213,23 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
 	}
 }
 
-BTF_SET_START(bpf_tcp_ca_kfunc_ids)
+BTF_SET_START(bpf_tcp_ca_check_kfunc_ids)
 BTF_ID(func, tcp_reno_ssthresh)
 BTF_ID(func, tcp_reno_cong_avoid)
 BTF_ID(func, tcp_reno_undo_cwnd)
 BTF_ID(func, tcp_slow_start)
 BTF_ID(func, tcp_cong_avoid_ai)
-BTF_SET_END(bpf_tcp_ca_kfunc_ids)
+BTF_SET_END(bpf_tcp_ca_check_kfunc_ids)
 
-static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
-{
-	if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id))
-		return true;
-	return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner);
-}
+static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = {
+	.owner     = THIS_MODULE,
+	.check_set = &bpf_tcp_ca_check_kfunc_ids,
+};
 
 static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
 	.get_func_proto		= bpf_tcp_ca_get_func_proto,
 	.is_valid_access	= bpf_tcp_ca_is_valid_access,
 	.btf_struct_access	= bpf_tcp_ca_btf_struct_access,
-	.check_kfunc_call	= bpf_tcp_ca_check_kfunc_call,
 };
 
 static int bpf_tcp_ca_init_member(const struct btf_type *t,
@@ -300,3 +298,9 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
 	.init = bpf_tcp_ca_init,
 	.name = "tcp_congestion_ops",
 };
+
+static int __init bpf_tcp_ca_kfunc_init(void)
+{
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set);
+}
+late_initcall(bpf_tcp_ca_kfunc_init);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index ec5550089b4d..02e8626ccb27 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1154,7 +1154,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
 	.set_state	= bbr_set_state,
 };
 
-BTF_SET_START(tcp_bbr_kfunc_ids)
+BTF_SET_START(tcp_bbr_check_kfunc_ids)
 #ifdef CONFIG_X86
 #ifdef CONFIG_DYNAMIC_FTRACE
 BTF_ID(func, bbr_init)
@@ -1167,25 +1167,27 @@ BTF_ID(func, bbr_min_tso_segs)
 BTF_ID(func, bbr_set_state)
 #endif
 #endif
-BTF_SET_END(tcp_bbr_kfunc_ids)
+BTF_SET_END(tcp_bbr_check_kfunc_ids)
 
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = {
+	.owner     = THIS_MODULE,
+	.check_set = &tcp_bbr_check_kfunc_ids,
+};
 
 static int __init bbr_register(void)
 {
 	int ret;
 
 	BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
-	ret = tcp_register_congestion_control(&tcp_bbr_cong_ops);
-	if (ret)
+
+	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set);
+	if (ret < 0)
 		return ret;
-	register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
-	return 0;
+	return tcp_register_congestion_control(&tcp_bbr_cong_ops);
 }
 
 static void __exit bbr_unregister(void)
 {
-	unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
 	tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
 }
 
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index e07837e23b3f..24d562dd6225 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -485,7 +485,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
 	.name		= "cubic",
 };
 
-BTF_SET_START(tcp_cubic_kfunc_ids)
+BTF_SET_START(tcp_cubic_check_kfunc_ids)
 #ifdef CONFIG_X86
 #ifdef CONFIG_DYNAMIC_FTRACE
 BTF_ID(func, cubictcp_init)
@@ -496,9 +496,12 @@ BTF_ID(func, cubictcp_cwnd_event)
 BTF_ID(func, cubictcp_acked)
 #endif
 #endif
-BTF_SET_END(tcp_cubic_kfunc_ids)
+BTF_SET_END(tcp_cubic_check_kfunc_ids)
 
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = {
+	.owner     = THIS_MODULE,
+	.check_set = &tcp_cubic_check_kfunc_ids,
+};
 
 static int __init cubictcp_register(void)
 {
@@ -534,16 +537,14 @@ static int __init cubictcp_register(void)
 	/* divide by bic_scale and by constant Srtt (100ms) */
 	do_div(cube_factor, bic_scale * 10);
 
-	ret = tcp_register_congestion_control(&cubictcp);
-	if (ret)
+	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set);
+	if (ret < 0)
 		return ret;
-	register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
-	return 0;
+	return tcp_register_congestion_control(&cubictcp);
 }
 
 static void __exit cubictcp_unregister(void)
 {
-	unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
 	tcp_unregister_congestion_control(&cubictcp);
 }
 
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 0d7ab3cc7b61..1943a6630341 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -238,7 +238,7 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
 	.name		= "dctcp-reno",
 };
 
-BTF_SET_START(tcp_dctcp_kfunc_ids)
+BTF_SET_START(tcp_dctcp_check_kfunc_ids)
 #ifdef CONFIG_X86
 #ifdef CONFIG_DYNAMIC_FTRACE
 BTF_ID(func, dctcp_init)
@@ -249,25 +249,27 @@ BTF_ID(func, dctcp_cwnd_undo)
 BTF_ID(func, dctcp_state)
 #endif
 #endif
-BTF_SET_END(tcp_dctcp_kfunc_ids)
+BTF_SET_END(tcp_dctcp_check_kfunc_ids)
 
-static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set);
+static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = {
+	.owner     = THIS_MODULE,
+	.check_set = &tcp_dctcp_check_kfunc_ids,
+};
 
 static int __init dctcp_register(void)
 {
 	int ret;
 
 	BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
-	ret = tcp_register_congestion_control(&dctcp);
-	if (ret)
+
+	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set);
+	if (ret < 0)
 		return ret;
-	register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
-	return 0;
+	return tcp_register_congestion_control(&dctcp);
 }
 
 static void __exit dctcp_unregister(void)
 {
-	unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
 	tcp_unregister_congestion_control(&dctcp);
 }
 
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index df3b292a8ffe..c0805d0d753f 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -109,26 +109,27 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
 	.write = bpf_testmod_test_write,
 };
 
-BTF_SET_START(bpf_testmod_kfunc_ids)
+BTF_SET_START(bpf_testmod_check_kfunc_ids)
 BTF_ID(func, bpf_testmod_test_mod_kfunc)
-BTF_SET_END(bpf_testmod_kfunc_ids)
+BTF_SET_END(bpf_testmod_check_kfunc_ids)
 
-static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set);
+static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
+	.owner     = THIS_MODULE,
+	.check_set = &bpf_testmod_check_kfunc_ids,
+};
 
 static int bpf_testmod_init(void)
 {
 	int ret;
 
-	ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
-	if (ret)
+	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
+	if (ret < 0)
 		return ret;
-	register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
-	return 0;
+	return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
 }
 
 static void bpf_testmod_exit(void)
 {
-	unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
 	return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
 }
 
-- 
cgit 


From 87091063df5d4845d1db0761a9ed5510c4756a96 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 14 Jan 2022 22:09:50 +0530
Subject: selftests/bpf: Add test for unstable CT lookup API

This tests that we return errors as documented, and also that the kfunc
calls work from both XDP and TC hooks.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-8-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/config              |   4 +
 tools/testing/selftests/bpf/prog_tests/bpf_nf.c |  48 +++++++++++
 tools/testing/selftests/bpf/progs/test_bpf_nf.c | 109 ++++++++++++++++++++++++
 3 files changed, 161 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_nf.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_bpf_nf.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index f6287132fa89..32d80e77e910 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -48,3 +48,7 @@ CONFIG_IMA_READ_POLICY=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_FUNCTION_TRACER=y
 CONFIG_DYNAMIC_FTRACE=y
+CONFIG_NETFILTER=y
+CONFIG_NF_DEFRAG_IPV4=y
+CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_CONNTRACK=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
new file mode 100644
index 000000000000..e3166a81e989
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_bpf_nf.skel.h"
+
+enum {
+	TEST_XDP,
+	TEST_TC_BPF,
+};
+
+void test_bpf_nf_ct(int mode)
+{
+	struct test_bpf_nf *skel;
+	int prog_fd, err, retval;
+
+	skel = test_bpf_nf__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
+		return;
+
+	if (mode == TEST_XDP)
+		prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
+	else
+		prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test);
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+				(__u32 *)&retval, NULL);
+	if (!ASSERT_OK(err, "bpf_prog_test_run"))
+		goto end;
+
+	ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
+	ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
+	ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
+	ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
+	ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
+	ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id");
+	ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup");
+	ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple");
+end:
+	test_bpf_nf__destroy(skel);
+}
+
+void test_bpf_nf(void)
+{
+	if (test__start_subtest("xdp-ct"))
+		test_bpf_nf_ct(TEST_XDP);
+	if (test__start_subtest("tc-bpf-ct"))
+		test_bpf_nf_ct(TEST_TC_BPF);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
new file mode 100644
index 000000000000..6f131c993c0b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define EAFNOSUPPORT 97
+#define EPROTO 71
+#define ENONET 64
+#define EINVAL 22
+#define ENOENT 2
+
+int test_einval_bpf_tuple = 0;
+int test_einval_reserved = 0;
+int test_einval_netns_id = 0;
+int test_einval_len_opts = 0;
+int test_eproto_l4proto = 0;
+int test_enonet_netns_id = 0;
+int test_enoent_lookup = 0;
+int test_eafnosupport = 0;
+
+struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
+				  struct bpf_ct_opts *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+				  struct bpf_ct_opts *, u32) __ksym;
+void bpf_ct_release(struct nf_conn *) __ksym;
+
+static __always_inline void
+nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32,
+				   struct bpf_ct_opts *, u32),
+	   void *ctx)
+{
+	struct bpf_ct_opts opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+	struct bpf_sock_tuple bpf_tuple;
+	struct nf_conn *ct;
+
+	__builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+	ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def));
+	if (ct)
+		bpf_ct_release(ct);
+	else
+		test_einval_bpf_tuple = opts_def.error;
+
+	opts_def.reserved[0] = 1;
+	ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+	opts_def.reserved[0] = 0;
+	opts_def.l4proto = IPPROTO_TCP;
+	if (ct)
+		bpf_ct_release(ct);
+	else
+		test_einval_reserved = opts_def.error;
+
+	opts_def.netns_id = -2;
+	ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+	opts_def.netns_id = -1;
+	if (ct)
+		bpf_ct_release(ct);
+	else
+		test_einval_netns_id = opts_def.error;
+
+	ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1);
+	if (ct)
+		bpf_ct_release(ct);
+	else
+		test_einval_len_opts = opts_def.error;
+
+	opts_def.l4proto = IPPROTO_ICMP;
+	ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+	opts_def.l4proto = IPPROTO_TCP;
+	if (ct)
+		bpf_ct_release(ct);
+	else
+		test_eproto_l4proto = opts_def.error;
+
+	opts_def.netns_id = 0xf00f;
+	ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+	opts_def.netns_id = -1;
+	if (ct)
+		bpf_ct_release(ct);
+	else
+		test_enonet_netns_id = opts_def.error;
+
+	ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def));
+	if (ct)
+		bpf_ct_release(ct);
+	else
+		test_enoent_lookup = opts_def.error;
+
+	ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def));
+	if (ct)
+		bpf_ct_release(ct);
+	else
+		test_eafnosupport = opts_def.error;
+}
+
+SEC("xdp")
+int nf_xdp_ct_test(struct xdp_md *ctx)
+{
+	nf_ct_test((void *)bpf_xdp_ct_lookup, ctx);
+	return 0;
+}
+
+SEC("tc")
+int nf_skb_ct_test(struct __sk_buff *ctx)
+{
+	nf_ct_test((void *)bpf_skb_ct_lookup, ctx);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 0201b80772ac2b712bbbfe783cdb731fdfb4247e Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 14 Jan 2022 22:09:51 +0530
Subject: selftests/bpf: Add test_verifier support to fixup kfunc call insns

This allows us to add tests (esp. negative tests) where we only want to
ensure the program doesn't pass through the verifier, and also verify
the error. The next commit will add the tests making use of this.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-9-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 76cd903117af..29bbaa58233c 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -31,6 +31,7 @@
 #include <linux/if_ether.h>
 #include <linux/btf.h>
 
+#include <bpf/btf.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
@@ -66,6 +67,11 @@ static bool unpriv_disabled = false;
 static int skips;
 static bool verbose = false;
 
+struct kfunc_btf_id_pair {
+	const char *kfunc;
+	int insn_idx;
+};
+
 struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
@@ -92,6 +98,7 @@ struct bpf_test {
 	int fixup_map_reuseport_array[MAX_FIXUPS];
 	int fixup_map_ringbuf[MAX_FIXUPS];
 	int fixup_map_timer[MAX_FIXUPS];
+	struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
 	/* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
 	 * Can be a tab-separated sequence of expected strings. An empty string
 	 * means no log verification.
@@ -744,6 +751,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
 	int *fixup_map_ringbuf = test->fixup_map_ringbuf;
 	int *fixup_map_timer = test->fixup_map_timer;
+	struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
 
 	if (test->fill_helper) {
 		test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -936,6 +944,26 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 			fixup_map_timer++;
 		} while (*fixup_map_timer);
 	}
+
+	/* Patch in kfunc BTF IDs */
+	if (fixup_kfunc_btf_id->kfunc) {
+		struct btf *btf;
+		int btf_id;
+
+		do {
+			btf_id = 0;
+			btf = btf__load_vmlinux_btf();
+			if (btf) {
+				btf_id = btf__find_by_name_kind(btf,
+								fixup_kfunc_btf_id->kfunc,
+								BTF_KIND_FUNC);
+				btf_id = btf_id < 0 ? 0 : btf_id;
+			}
+			btf__free(btf);
+			prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
+			fixup_kfunc_btf_id++;
+		} while (fixup_kfunc_btf_id->kfunc);
+	}
 }
 
 struct libcap {
-- 
cgit 


From c1ff181ffabc292abcd1832a1c83aac2bc499e71 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 14 Jan 2022 22:09:52 +0530
Subject: selftests/bpf: Extend kfunc selftests

Use the prog_test kfuncs to test the referenced PTR_TO_BTF_ID kfunc
support, and PTR_TO_CTX, PTR_TO_MEM argument passing support. Also
testing the various failure cases for invalid kfunc prototypes.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-10-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/bpf/test_run.c                                 | 129 ++++++++++++++++++++-
 .../testing/selftests/bpf/prog_tests/kfunc_call.c  |   6 +
 .../testing/selftests/bpf/progs/kfunc_call_test.c  |  52 ++++++++-
 tools/testing/selftests/bpf/verifier/calls.c       |  75 ++++++++++++
 4 files changed, 258 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 7796a8c747a0..93ba56507240 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -233,6 +233,105 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
 	return sk;
 }
 
+struct prog_test_ref_kfunc {
+	int a;
+	int b;
+	struct prog_test_ref_kfunc *next;
+};
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+	.a = 42,
+	.b = 108,
+	.next = &prog_test_struct,
+};
+
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+	/* randomly return NULL */
+	if (get_jiffies_64() % 2)
+		return NULL;
+	return &prog_test_struct;
+}
+
+noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
+{
+}
+
+struct prog_test_pass1 {
+	int x0;
+	struct {
+		int x1;
+		struct {
+			int x2;
+			struct {
+				int x3;
+			};
+		};
+	};
+};
+
+struct prog_test_pass2 {
+	int len;
+	short arr1[4];
+	struct {
+		char arr2[4];
+		unsigned long arr3[8];
+	} x;
+};
+
+struct prog_test_fail1 {
+	void *p;
+	int x;
+};
+
+struct prog_test_fail2 {
+	int x8;
+	struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+	int len;
+	char arr1[2];
+	char arr2[0];
+};
+
+noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
 __diag_pop();
 
 ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
@@ -241,8 +340,31 @@ BTF_SET_START(test_sk_check_kfunc_ids)
 BTF_ID(func, bpf_kfunc_call_test1)
 BTF_ID(func, bpf_kfunc_call_test2)
 BTF_ID(func, bpf_kfunc_call_test3)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID(func, bpf_kfunc_call_test_pass1)
+BTF_ID(func, bpf_kfunc_call_test_pass2)
+BTF_ID(func, bpf_kfunc_call_test_fail1)
+BTF_ID(func, bpf_kfunc_call_test_fail2)
+BTF_ID(func, bpf_kfunc_call_test_fail3)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2)
 BTF_SET_END(test_sk_check_kfunc_ids)
 
+BTF_SET_START(test_sk_acquire_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_acquire_kfunc_ids)
+
+BTF_SET_START(test_sk_release_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_SET_END(test_sk_release_kfunc_ids)
+
+BTF_SET_START(test_sk_ret_null_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_SET_END(test_sk_ret_null_kfunc_ids)
+
 static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
 			   u32 headroom, u32 tailroom)
 {
@@ -1063,8 +1185,11 @@ out:
 }
 
 static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
-	.owner     = THIS_MODULE,
-	.check_set = &test_sk_check_kfunc_ids,
+	.owner        = THIS_MODULE,
+	.check_set    = &test_sk_check_kfunc_ids,
+	.acquire_set  = &test_sk_acquire_kfunc_ids,
+	.release_set  = &test_sk_release_kfunc_ids,
+	.ret_null_set = &test_sk_ret_null_kfunc_ids,
 };
 
 static int __init bpf_prog_test_run_init(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 7d7445ccc141..b39a4f09aefd 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -27,6 +27,12 @@ static void test_main(void)
 	ASSERT_OK(err, "bpf_prog_test_run(test2)");
 	ASSERT_EQ(retval, 3, "test2-retval");
 
+	prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd;
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, (__u32 *)&retval, NULL);
+	ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)");
+	ASSERT_EQ(retval, 0, "test_ref_btf_id-retval");
+
 	kfunc_call_test_lskel__destroy(skel);
 }
 
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index 8a8cf59017aa..5aecbb9fdc68 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -1,13 +1,20 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2021 Facebook */
-#include <linux/bpf.h>
+#include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
 
 extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
 extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
 				  __u32 c, __u64 d) __ksym;
 
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
+extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
+extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
+extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
+
 SEC("tc")
 int kfunc_call_test2(struct __sk_buff *skb)
 {
@@ -44,4 +51,45 @@ int kfunc_call_test1(struct __sk_buff *skb)
 	return ret;
 }
 
+SEC("tc")
+int kfunc_call_test_ref_btf_id(struct __sk_buff *skb)
+{
+	struct prog_test_ref_kfunc *pt;
+	unsigned long s = 0;
+	int ret = 0;
+
+	pt = bpf_kfunc_call_test_acquire(&s);
+	if (pt) {
+		if (pt->a != 42 || pt->b != 108)
+			ret = -1;
+		bpf_kfunc_call_test_release(pt);
+	}
+	return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_pass(struct __sk_buff *skb)
+{
+	struct prog_test_pass1 p1 = {};
+	struct prog_test_pass2 p2 = {};
+	short a = 0;
+	__u64 b = 0;
+	long c = 0;
+	char d = 0;
+	int e = 0;
+
+	bpf_kfunc_call_test_pass_ctx(skb);
+	bpf_kfunc_call_test_pass1(&p1);
+	bpf_kfunc_call_test_pass2(&p2);
+
+	bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a));
+	bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b));
+	bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c));
+	bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d));
+	bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e));
+	bpf_kfunc_call_test_mem_len_fail2(&b, -1);
+
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index d7b74eb28333..829be2b9e08e 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -21,6 +21,81 @@
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	.result  = ACCEPT,
 },
+{
+	"calls: invalid kfunc call: ptr_to_mem to struct with non-scalar",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_fail1", 2 },
+	},
+},
+{
+	"calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_fail2", 2 },
+	},
+},
+{
+	"calls: invalid kfunc call: ptr_to_mem to struct with FAM",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_fail3", 2 },
+	},
+},
+{
+	"calls: invalid kfunc call: reg->type != PTR_TO_CTX",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "arg#0 expected pointer to ctx, but got PTR",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_pass_ctx", 2 },
+	},
+},
+{
+	"calls: invalid kfunc call: void * not allowed in func proto without mem size arg",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "arg#0 pointer type UNKNOWN  must point to scalar",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_mem_len_fail1", 2 },
+	},
+},
 {
 	"calls: basic sanity",
 	.insns = {
-- 
cgit 


From 4656569643409568fa7c162614c17277abdf84de Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Fri, 14 Jan 2022 22:09:53 +0530
Subject: selftests/bpf: Add test for race in btf_try_get_module

This adds a complete test case to ensure we never take references to
modules not in MODULE_STATE_LIVE, which can lead to UAF, and it also
ensures we never access btf->kfunc_set_tab in an inconsistent state.

The test uses userfaultfd to artificially widen the race.

When run on an unpatched kernel, it leads to the following splat:

[root@(none) bpf]# ./test_progs -t bpf_mod_race/ksym
[   55.498171] BUG: unable to handle page fault for address: fffffbfff802548b
[   55.499206] #PF: supervisor read access in kernel mode
[   55.499855] #PF: error_code(0x0000) - not-present page
[   55.500555] PGD a4fa9067 P4D a4fa9067 PUD a4fa5067 PMD 1b44067 PTE 0
[   55.501499] Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI
[   55.502195] CPU: 0 PID: 83 Comm: kworker/0:2 Tainted: G           OE     5.16.0-rc4+ #151
[   55.503388] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.15.0-1 04/01/2014
[   55.504777] Workqueue: events bpf_prog_free_deferred
[   55.505563] RIP: 0010:kasan_check_range+0x184/0x1d0
[   55.509140] RSP: 0018:ffff88800560fcf0 EFLAGS: 00010282
[   55.509977] RAX: fffffbfff802548b RBX: fffffbfff802548c RCX: ffffffff9337b6ba
[   55.511096] RDX: fffffbfff802548c RSI: 0000000000000004 RDI: ffffffffc012a458
[   55.512143] RBP: fffffbfff802548b R08: 0000000000000001 R09: ffffffffc012a45b
[   55.513228] R10: fffffbfff802548b R11: 0000000000000001 R12: ffff888001b5f598
[   55.514332] R13: ffff888004f49ac8 R14: 0000000000000000 R15: ffff888092449400
[   55.515418] FS:  0000000000000000(0000) GS:ffff888092400000(0000) knlGS:0000000000000000
[   55.516705] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   55.517560] CR2: fffffbfff802548b CR3: 0000000007c10006 CR4: 0000000000770ef0
[   55.518672] PKRU: 55555554
[   55.519022] Call Trace:
[   55.519483]  <TASK>
[   55.519884]  module_put.part.0+0x2a/0x180
[   55.520642]  bpf_prog_free_deferred+0x129/0x2e0
[   55.521478]  process_one_work+0x4fa/0x9e0
[   55.522122]  ? pwq_dec_nr_in_flight+0x100/0x100
[   55.522878]  ? rwlock_bug.part.0+0x60/0x60
[   55.523551]  worker_thread+0x2eb/0x700
[   55.524176]  ? __kthread_parkme+0xd8/0xf0
[   55.524853]  ? process_one_work+0x9e0/0x9e0
[   55.525544]  kthread+0x23a/0x270
[   55.526088]  ? set_kthread_struct+0x80/0x80
[   55.526798]  ret_from_fork+0x1f/0x30
[   55.527413]  </TASK>
[   55.527813] Modules linked in: bpf_testmod(OE) [last unloaded: bpf_testmod]
[   55.530846] CR2: fffffbfff802548b
[   55.531341] ---[ end trace 1af41803c054ad6d ]---
[   55.532136] RIP: 0010:kasan_check_range+0x184/0x1d0
[   55.535887] RSP: 0018:ffff88800560fcf0 EFLAGS: 00010282
[   55.536711] RAX: fffffbfff802548b RBX: fffffbfff802548c RCX: ffffffff9337b6ba
[   55.537821] RDX: fffffbfff802548c RSI: 0000000000000004 RDI: ffffffffc012a458
[   55.538899] RBP: fffffbfff802548b R08: 0000000000000001 R09: ffffffffc012a45b
[   55.539928] R10: fffffbfff802548b R11: 0000000000000001 R12: ffff888001b5f598
[   55.541021] R13: ffff888004f49ac8 R14: 0000000000000000 R15: ffff888092449400
[   55.542108] FS:  0000000000000000(0000) GS:ffff888092400000(0000) knlGS:0000000000000000
[   55.543260]CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   55.544136] CR2: fffffbfff802548b CR3: 0000000007c10006 CR4: 0000000000770ef0
[   55.545317] PKRU: 55555554
[   55.545671] note: kworker/0:2[83] exited with preempt_count 1

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-11-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/bpf/test_run.c                                 |   2 +
 .../selftests/bpf/bpf_testmod/bpf_testmod.c        |   4 +
 tools/testing/selftests/bpf/config                 |   1 +
 .../selftests/bpf/prog_tests/bpf_mod_race.c        | 230 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_mod_race.c   | 100 +++++++++
 .../testing/selftests/bpf/progs/kfunc_call_race.c  |  14 ++
 tools/testing/selftests/bpf/progs/ksym_race.c      |  13 ++
 7 files changed, 364 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_mod_race.c
 create mode 100644 tools/testing/selftests/bpf/progs/kfunc_call_race.c
 create mode 100644 tools/testing/selftests/bpf/progs/ksym_race.c

(limited to 'tools/testing')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 93ba56507240..3a5bf8ad834d 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -172,6 +172,8 @@ int noinline bpf_fentry_test1(int a)
 {
 	return a + 1;
 }
+EXPORT_SYMBOL_GPL(bpf_fentry_test1);
+ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
 
 int noinline bpf_fentry_test2(int a, u64 b)
 {
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index c0805d0d753f..bdbacf5adcd2 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -118,6 +118,8 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
 	.check_set = &bpf_testmod_check_kfunc_ids,
 };
 
+extern int bpf_fentry_test1(int a);
+
 static int bpf_testmod_init(void)
 {
 	int ret;
@@ -125,6 +127,8 @@ static int bpf_testmod_init(void)
 	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
 	if (ret < 0)
 		return ret;
+	if (bpf_fentry_test1(0) < 0)
+		return -EINVAL;
 	return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
 }
 
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 32d80e77e910..763db63a3890 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -52,3 +52,4 @@ CONFIG_NETFILTER=y
 CONFIG_NF_DEFRAG_IPV4=y
 CONFIG_NF_DEFRAG_IPV6=y
 CONFIG_NF_CONNTRACK=y
+CONFIG_USERFAULTFD=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
new file mode 100644
index 000000000000..d43f548c572c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <stdatomic.h>
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include <linux/module.h>
+#include <linux/userfaultfd.h>
+
+#include "ksym_race.skel.h"
+#include "bpf_mod_race.skel.h"
+#include "kfunc_call_race.skel.h"
+
+/* This test crafts a race between btf_try_get_module and do_init_module, and
+ * checks whether btf_try_get_module handles the invocation for a well-formed
+ * but uninitialized module correctly. Unless the module has completed its
+ * initcalls, the verifier should fail the program load and return ENXIO.
+ *
+ * userfaultfd is used to trigger a fault in an fmod_ret program, and make it
+ * sleep, then the BPF program is loaded and the return value from verifier is
+ * inspected. After this, the userfaultfd is closed so that the module loading
+ * thread makes forward progress, and fmod_ret injects an error so that the
+ * module load fails and it is freed.
+ *
+ * If the verifier succeeded in loading the supplied program, it will end up
+ * taking reference to freed module, and trigger a crash when the program fd
+ * is closed later. This is true for both kfuncs and ksyms. In both cases,
+ * the crash is triggered inside bpf_prog_free_deferred, when module reference
+ * is finally released.
+ */
+
+struct test_config {
+	const char *str_open;
+	void *(*bpf_open_and_load)();
+	void (*bpf_destroy)(void *);
+};
+
+enum test_state {
+	_TS_INVALID,
+	TS_MODULE_LOAD,
+	TS_MODULE_LOAD_FAIL,
+};
+
+static _Atomic enum test_state state = _TS_INVALID;
+
+static int sys_finit_module(int fd, const char *param_values, int flags)
+{
+	return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+static int sys_delete_module(const char *name, unsigned int flags)
+{
+	return syscall(__NR_delete_module, name, flags);
+}
+
+static int load_module(const char *mod)
+{
+	int ret, fd;
+
+	fd = open("bpf_testmod.ko", O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	ret = sys_finit_module(fd, "", 0);
+	close(fd);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static void *load_module_thread(void *p)
+{
+
+	if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail"))
+		atomic_store(&state, TS_MODULE_LOAD);
+	else
+		atomic_store(&state, TS_MODULE_LOAD_FAIL);
+	return p;
+}
+
+static int sys_userfaultfd(int flags)
+{
+	return syscall(__NR_userfaultfd, flags);
+}
+
+static int test_setup_uffd(void *fault_addr)
+{
+	struct uffdio_register uffd_register = {};
+	struct uffdio_api uffd_api = {};
+	int uffd;
+
+	uffd = sys_userfaultfd(O_CLOEXEC);
+	if (uffd < 0)
+		return -errno;
+
+	uffd_api.api = UFFD_API;
+	uffd_api.features = 0;
+	if (ioctl(uffd, UFFDIO_API, &uffd_api)) {
+		close(uffd);
+		return -1;
+	}
+
+	uffd_register.range.start = (unsigned long)fault_addr;
+	uffd_register.range.len = 4096;
+	uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
+		close(uffd);
+		return -1;
+	}
+	return uffd;
+}
+
+static void test_bpf_mod_race_config(const struct test_config *config)
+{
+	void *fault_addr, *skel_fail;
+	struct bpf_mod_race *skel;
+	struct uffd_msg uffd_msg;
+	pthread_t load_mod_thrd;
+	_Atomic int *blockingp;
+	int uffd, ret;
+
+	fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
+		return;
+
+	if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod"))
+		goto end_mmap;
+
+	skel = bpf_mod_race__open();
+	if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open"))
+		goto end_module;
+
+	skel->rodata->bpf_mod_race_config.tgid = getpid();
+	skel->rodata->bpf_mod_race_config.inject_error = -4242;
+	skel->rodata->bpf_mod_race_config.fault_addr = fault_addr;
+	if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load"))
+		goto end_destroy;
+	blockingp = (_Atomic int *)&skel->bss->bpf_blocking;
+
+	if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach"))
+		goto end_destroy;
+
+	uffd = test_setup_uffd(fault_addr);
+	if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address"))
+		goto end_destroy;
+
+	if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL),
+		       "load module thread"))
+		goto end_uffd;
+
+	/* Now, we either fail loading module, or block in bpf prog, spin to find out */
+	while (!atomic_load(&state) && !atomic_load(blockingp))
+		;
+	if (!ASSERT_EQ(state, _TS_INVALID, "module load should block"))
+		goto end_join;
+	if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) {
+		pthread_kill(load_mod_thrd, SIGKILL);
+		goto end_uffd;
+	}
+
+	/* We might have set bpf_blocking to 1, but may have not blocked in
+	 * bpf_copy_from_user. Read userfaultfd descriptor to verify that.
+	 */
+	if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg),
+		       "read uffd block event"))
+		goto end_join;
+	if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault"))
+		goto end_join;
+
+	/* We know that load_mod_thrd is blocked in the fmod_ret program, the
+	 * module state is still MODULE_STATE_COMING because mod->init hasn't
+	 * returned. This is the time we try to load a program calling kfunc and
+	 * check if we get ENXIO from verifier.
+	 */
+	skel_fail = config->bpf_open_and_load();
+	ret = errno;
+	if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) {
+		/* Close uffd to unblock load_mod_thrd */
+		close(uffd);
+		uffd = -1;
+		while (atomic_load(blockingp) != 2)
+			;
+		ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+		config->bpf_destroy(skel_fail);
+		goto end_join;
+
+	}
+	ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO");
+	ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false");
+
+	close(uffd);
+	uffd = -1;
+end_join:
+	pthread_join(load_mod_thrd, NULL);
+	if (uffd < 0)
+		ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success");
+end_uffd:
+	if (uffd >= 0)
+		close(uffd);
+end_destroy:
+	bpf_mod_race__destroy(skel);
+	ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+end_module:
+	sys_delete_module("bpf_testmod", 0);
+	ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod");
+end_mmap:
+	munmap(fault_addr, 4096);
+	atomic_store(&state, _TS_INVALID);
+}
+
+static const struct test_config ksym_config = {
+	.str_open = "ksym_race__open_and_load",
+	.bpf_open_and_load = (void *)ksym_race__open_and_load,
+	.bpf_destroy = (void *)ksym_race__destroy,
+};
+
+static const struct test_config kfunc_config = {
+	.str_open = "kfunc_call_race__open_and_load",
+	.bpf_open_and_load = (void *)kfunc_call_race__open_and_load,
+	.bpf_destroy = (void *)kfunc_call_race__destroy,
+};
+
+void serial_test_bpf_mod_race(void)
+{
+	if (test__start_subtest("ksym (used_btfs UAF)"))
+		test_bpf_mod_race_config(&ksym_config);
+	if (test__start_subtest("kfunc (kfunc_btf_tab UAF)"))
+		test_bpf_mod_race_config(&kfunc_config);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_mod_race.c b/tools/testing/selftests/bpf/progs/bpf_mod_race.c
new file mode 100644
index 000000000000..82a5c6c6ba83
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_mod_race.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile struct {
+	/* thread to activate trace programs for */
+	pid_t tgid;
+	/* return error from __init function */
+	int inject_error;
+	/* uffd monitored range start address */
+	void *fault_addr;
+} bpf_mod_race_config = { -1 };
+
+int bpf_blocking = 0;
+int res_try_get_module = -1;
+
+static __always_inline bool check_thread_id(void)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+
+	return task->tgid == bpf_mod_race_config.tgid;
+}
+
+/* The trace of execution is something like this:
+ *
+ * finit_module()
+ *   load_module()
+ *     prepare_coming_module()
+ *       notifier_call(MODULE_STATE_COMING)
+ *         btf_parse_module()
+ *         btf_alloc_id()		// Visible to userspace at this point
+ *         list_add(btf_mod->list, &btf_modules)
+ *     do_init_module()
+ *       freeinit = kmalloc()
+ *       ret = mod->init()
+ *         bpf_prog_widen_race()
+ *           bpf_copy_from_user()
+ *             ...<sleep>...
+ *       if (ret < 0)
+ *         ...
+ *         free_module()
+ * return ret
+ *
+ * At this point, module loading thread is blocked, we now load the program:
+ *
+ * bpf_check
+ *   add_kfunc_call/check_pseudo_btf_id
+ *     btf_try_get_module
+ *       try_get_module_live == false
+ *     return -ENXIO
+ *
+ * Without the fix (try_get_module_live in btf_try_get_module):
+ *
+ * bpf_check
+ *   add_kfunc_call/check_pseudo_btf_id
+ *     btf_try_get_module
+ *       try_get_module == true
+ *     <store module reference in btf_kfunc_tab or used_btf array>
+ *   ...
+ * return fd
+ *
+ * Now, if we inject an error in the blocked program, our module will be freed
+ * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING).
+ * Later, when bpf program is freed, it will try to module_put already freed
+ * module. This is why try_get_module_live returns false if mod->state is not
+ * MODULE_STATE_LIVE.
+ */
+
+SEC("fmod_ret.s/bpf_fentry_test1")
+int BPF_PROG(widen_race, int a, int ret)
+{
+	char dst;
+
+	if (!check_thread_id())
+		return 0;
+	/* Indicate that we will attempt to block */
+	bpf_blocking = 1;
+	bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr);
+	return bpf_mod_race_config.inject_error;
+}
+
+SEC("fexit/do_init_module")
+int BPF_PROG(fexit_init_module, struct module *mod, int ret)
+{
+	if (!check_thread_id())
+		return 0;
+	/* Indicate that we finished blocking */
+	bpf_blocking = 2;
+	return 0;
+}
+
+SEC("fexit/btf_try_get_module")
+int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod)
+{
+	res_try_get_module = !!mod;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
new file mode 100644
index 000000000000..4e8fed75a4e0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+
+SEC("tc")
+int kfunc_call_fail(struct __sk_buff *ctx)
+{
+	bpf_testmod_test_mod_kfunc(0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ksym_race.c b/tools/testing/selftests/bpf/progs/ksym_race.c
new file mode 100644
index 000000000000..def97f2fed90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ksym_race.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern int bpf_testmod_ksym_percpu __ksym;
+
+SEC("tc")
+int ksym_fail(struct __sk_buff *ctx)
+{
+	return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 7ff8985cc1aa462532f4afa2cc880dfd6892dd68 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Thu, 13 Jan 2022 09:28:48 +0900
Subject: selftest/bpf: Test batching and bpf_(get|set)sockopt in bpf unix
 iter.

This patch adds a test for the batching and bpf_(get|set)sockopt in bpf
unix iter.

It does the following.

  1. Creates an abstract UNIX domain socket
  2. Call bpf_setsockopt()
  3. Call bpf_getsockopt() and save the value
  4. Call setsockopt()
  5. Call getsockopt() and save the value
  6. Compare the saved values

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Link: https://lore.kernel.org/r/20220113002849.4384-5-kuniyu@amazon.co.jp
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpf/prog_tests/bpf_iter_setsockopt_unix.c      | 100 +++++++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_setsockopt_unix.c |  60 +++++++++++++
 .../testing/selftests/bpf/progs/bpf_tracing_net.h  |   2 +
 3 files changed, 162 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
new file mode 100644
index 000000000000..ee725d4d98a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <test_progs.h>
+#include "bpf_iter_setsockopt_unix.skel.h"
+
+#define NR_CASES 5
+
+static int create_unix_socket(struct bpf_iter_setsockopt_unix *skel)
+{
+	struct sockaddr_un addr = {
+		.sun_family = AF_UNIX,
+		.sun_path = "",
+	};
+	socklen_t len;
+	int fd, err;
+
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (!ASSERT_NEQ(fd, -1, "socket"))
+		return -1;
+
+	len = offsetof(struct sockaddr_un, sun_path);
+	err = bind(fd, (struct sockaddr *)&addr, len);
+	if (!ASSERT_OK(err, "bind"))
+		return -1;
+
+	len = sizeof(addr);
+	err = getsockname(fd, (struct sockaddr *)&addr, &len);
+	if (!ASSERT_OK(err, "getsockname"))
+		return -1;
+
+	memcpy(&skel->bss->sun_path, &addr.sun_path,
+	       len - offsetof(struct sockaddr_un, sun_path));
+
+	return fd;
+}
+
+static void test_sndbuf(struct bpf_iter_setsockopt_unix *skel, int fd)
+{
+	socklen_t optlen;
+	int i, err;
+
+	for (i = 0; i < NR_CASES; i++) {
+		if (!ASSERT_NEQ(skel->data->sndbuf_getsockopt[i], -1,
+				"bpf_(get|set)sockopt"))
+			return;
+
+		err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+				 &(skel->data->sndbuf_setsockopt[i]),
+				 sizeof(skel->data->sndbuf_setsockopt[i]));
+		if (!ASSERT_OK(err, "setsockopt"))
+			return;
+
+		optlen = sizeof(skel->bss->sndbuf_getsockopt_expected[i]);
+		err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+				 &(skel->bss->sndbuf_getsockopt_expected[i]),
+				 &optlen);
+		if (!ASSERT_OK(err, "getsockopt"))
+			return;
+
+		if (!ASSERT_EQ(skel->data->sndbuf_getsockopt[i],
+			       skel->bss->sndbuf_getsockopt_expected[i],
+			       "bpf_(get|set)sockopt"))
+			return;
+	}
+}
+
+void test_bpf_iter_setsockopt_unix(void)
+{
+	struct bpf_iter_setsockopt_unix *skel;
+	int err, unix_fd, iter_fd;
+	char buf;
+
+	skel = bpf_iter_setsockopt_unix__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	unix_fd = create_unix_socket(skel);
+	if (!ASSERT_NEQ(unix_fd, -1, "create_unix_server"))
+		goto destroy;
+
+	skel->links.change_sndbuf = bpf_program__attach_iter(skel->progs.change_sndbuf, NULL);
+	if (!ASSERT_OK_PTR(skel->links.change_sndbuf, "bpf_program__attach_iter"))
+		goto destroy;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(skel->links.change_sndbuf));
+	if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
+		goto destroy;
+
+	while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+	       errno == EAGAIN)
+		;
+	if (!ASSERT_OK(err, "read iter error"))
+		goto destroy;
+
+	test_sndbuf(skel, unix_fd);
+destroy:
+	bpf_iter_setsockopt_unix__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c
new file mode 100644
index 000000000000..eafc877ea460
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+
+#define AUTOBIND_LEN 6
+char sun_path[AUTOBIND_LEN];
+
+#define NR_CASES 5
+int sndbuf_setsockopt[NR_CASES] = {-1, 0, 8192, INT_MAX / 2, INT_MAX};
+int sndbuf_getsockopt[NR_CASES] = {-1, -1, -1, -1, -1};
+int sndbuf_getsockopt_expected[NR_CASES];
+
+static inline int cmpname(struct unix_sock *unix_sk)
+{
+	int i;
+
+	for (i = 0; i < AUTOBIND_LEN; i++) {
+		if (unix_sk->addr->name->sun_path[i] != sun_path[i])
+			return -1;
+	}
+
+	return 0;
+}
+
+SEC("iter/unix")
+int change_sndbuf(struct bpf_iter__unix *ctx)
+{
+	struct unix_sock *unix_sk = ctx->unix_sk;
+	int i, err;
+
+	if (!unix_sk || !unix_sk->addr)
+		return 0;
+
+	if (unix_sk->addr->name->sun_path[0])
+		return 0;
+
+	if (cmpname(unix_sk))
+		return 0;
+
+	for (i = 0; i < NR_CASES; i++) {
+		err = bpf_setsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+				     &sndbuf_setsockopt[i],
+				     sizeof(sndbuf_setsockopt[i]));
+		if (err)
+			break;
+
+		err = bpf_getsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+				     &sndbuf_getsockopt[i],
+				     sizeof(sndbuf_getsockopt[i]));
+		if (err)
+			break;
+	}
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index e0f42601be9b..1c1289ba5fc5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -5,6 +5,8 @@
 #define AF_INET			2
 #define AF_INET6		10
 
+#define SOL_SOCKET		1
+#define SO_SNDBUF		7
 #define __SO_ACCEPTCON		(1 << 16)
 
 #define SOL_TCP			6
-- 
cgit 


From a796966b6ea0abe05eebeb2443391b283f89b1e0 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Thu, 13 Jan 2022 09:28:49 +0900
Subject: selftest/bpf: Fix a stale comment.

The commit b8a58aa6fccc ("af_unix: Cut unix_validate_addr() out of
unix_mkname().") moved the bound test part into unix_validate_addr().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Link: https://lore.kernel.org/r/20220113002849.4384-6-kuniyu@amazon.co.jp
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/bpf_iter_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
index c21e3f545371..e6aefae38894 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -63,7 +63,7 @@ int dump_unix(struct bpf_iter__unix *ctx)
 			BPF_SEQ_PRINTF(seq, " @");
 
 			for (i = 1; i < len; i++) {
-				/* unix_mkname() tests this upper bound. */
+				/* unix_validate_addr() tests this upper bound. */
 				if (i >= sizeof(struct sockaddr_un))
 					break;
 
-- 
cgit 


From b8bff6f890513e0ae0f711e481b368c1d133c558 Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei@google.com>
Date: Thu, 16 Dec 2021 02:04:28 +0000
Subject: selftests/bpf: Test bpf_{get,set}_retval behavior with cgroup/sockopt

The tests checks how different ways of interacting with the helpers
(getting retval, setting EUNATCH, EISCONN, and legacy reject
returning 0 without setting retval), produce different results in
both the setsockopt syscall and the retval returned by the helper.
A few more tests verify the interaction between the retval of the
helper and the retval in getsockopt context.

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/43ec60d679ae3f4f6fd2460559c28b63cb93cd12.1639619851.git.zhuyifei@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpf/prog_tests/cgroup_getset_retval.c          | 481 +++++++++++++++++++++
 .../bpf/progs/cgroup_getset_retval_getsockopt.c    |  45 ++
 .../bpf/progs/cgroup_getset_retval_setsockopt.c    |  52 +++
 3 files changed, 578 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
 create mode 100644 tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
 create mode 100644 tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
new file mode 100644
index 000000000000..0b47c3c000c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "cgroup_getset_retval_setsockopt.skel.h"
+#include "cgroup_getset_retval_getsockopt.skel.h"
+
+#define SOL_CUSTOM	0xdeadbeef
+
+static int zero;
+
+static void test_setsockopt_set(int cgroup_fd, int sock_fd)
+{
+	struct cgroup_getset_retval_setsockopt *obj;
+	struct bpf_link *link_set_eunatch = NULL;
+
+	obj = cgroup_getset_retval_setsockopt__open_and_load();
+	if (!ASSERT_OK_PTR(obj, "skel-load"))
+		return;
+
+	/* Attach setsockopt that sets EUNATCH, assert that
+	 * we actually get that error when we run setsockopt()
+	 */
+	link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+						      cgroup_fd);
+	if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+		goto close_bpf_object;
+
+	if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+				   &zero, sizeof(int)), "setsockopt"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+		goto close_bpf_object;
+
+	if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+		goto close_bpf_object;
+	if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(link_set_eunatch);
+
+	cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd)
+{
+	struct cgroup_getset_retval_setsockopt *obj;
+	struct bpf_link *link_set_eunatch = NULL, *link_get_retval = NULL;
+
+	obj = cgroup_getset_retval_setsockopt__open_and_load();
+	if (!ASSERT_OK_PTR(obj, "skel-load"))
+		return;
+
+	/* Attach setsockopt that sets EUNATCH, and one that gets the
+	 * previously set errno. Assert that we get the same errno back.
+	 */
+	link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+						      cgroup_fd);
+	if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+		goto close_bpf_object;
+	link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+						     cgroup_fd);
+	if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+		goto close_bpf_object;
+
+	if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+				   &zero, sizeof(int)), "setsockopt"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+		goto close_bpf_object;
+
+	if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+		goto close_bpf_object;
+	if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(link_set_eunatch);
+	bpf_link__destroy(link_get_retval);
+
+	cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd)
+{
+	struct cgroup_getset_retval_setsockopt *obj;
+	struct bpf_link *link_get_retval = NULL;
+
+	obj = cgroup_getset_retval_setsockopt__open_and_load();
+	if (!ASSERT_OK_PTR(obj, "skel-load"))
+		return;
+
+	/* Attach setsockopt that gets the previously set errno.
+	 * Assert that, without anything setting one, we get 0.
+	 */
+	link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+						     cgroup_fd);
+	if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+		goto close_bpf_object;
+
+	if (!ASSERT_OK(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+				  &zero, sizeof(int)), "setsockopt"))
+		goto close_bpf_object;
+
+	if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+		goto close_bpf_object;
+	if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(link_get_retval);
+
+	cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd)
+{
+	struct cgroup_getset_retval_setsockopt *obj;
+	struct bpf_link *link_get_retval = NULL, *link_set_eunatch = NULL;
+
+	obj = cgroup_getset_retval_setsockopt__open_and_load();
+	if (!ASSERT_OK_PTR(obj, "skel-load"))
+		return;
+
+	/* Attach setsockopt that gets the previously set errno, and then
+	 * one that sets the errno to EUNATCH. Assert that the get does not
+	 * see EUNATCH set later, and does not prevent EUNATCH from being set.
+	 */
+	link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+						     cgroup_fd);
+	if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+		goto close_bpf_object;
+	link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+						      cgroup_fd);
+	if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+		goto close_bpf_object;
+
+	if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+				   &zero, sizeof(int)), "setsockopt"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+		goto close_bpf_object;
+
+	if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+		goto close_bpf_object;
+	if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(link_get_retval);
+	bpf_link__destroy(link_set_eunatch);
+
+	cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_override(int cgroup_fd, int sock_fd)
+{
+	struct cgroup_getset_retval_setsockopt *obj;
+	struct bpf_link *link_set_eunatch = NULL, *link_set_eisconn = NULL;
+	struct bpf_link *link_get_retval = NULL;
+
+	obj = cgroup_getset_retval_setsockopt__open_and_load();
+	if (!ASSERT_OK_PTR(obj, "skel-load"))
+		return;
+
+	/* Attach setsockopt that sets EUNATCH, then one that sets EISCONN,
+	 * and then one that gets the exported errno. Assert both the syscall
+	 * and the helper sees the last set errno.
+	 */
+	link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+						      cgroup_fd);
+	if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+		goto close_bpf_object;
+	link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+						      cgroup_fd);
+	if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+		goto close_bpf_object;
+	link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+						     cgroup_fd);
+	if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+		goto close_bpf_object;
+
+	if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+				   &zero, sizeof(int)), "setsockopt"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(errno, EISCONN, "setsockopt-errno"))
+		goto close_bpf_object;
+
+	if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+		goto close_bpf_object;
+	if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(obj->bss->retval_value, -EISCONN, "retval_value"))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(link_set_eunatch);
+	bpf_link__destroy(link_set_eisconn);
+	bpf_link__destroy(link_get_retval);
+
+	cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd)
+{
+	struct cgroup_getset_retval_setsockopt *obj;
+	struct bpf_link *link_legacy_eperm = NULL, *link_get_retval = NULL;
+
+	obj = cgroup_getset_retval_setsockopt__open_and_load();
+	if (!ASSERT_OK_PTR(obj, "skel-load"))
+		return;
+
+	/* Attach setsockopt that return a reject without setting errno
+	 * (legacy reject), and one that gets the errno. Assert that for
+	 * backward compatibility the syscall result in EPERM, and this
+	 * is also visible to the helper.
+	 */
+	link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+						       cgroup_fd);
+	if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+		goto close_bpf_object;
+	link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+						     cgroup_fd);
+	if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+		goto close_bpf_object;
+
+	if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+				   &zero, sizeof(int)), "setsockopt"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(errno, EPERM, "setsockopt-errno"))
+		goto close_bpf_object;
+
+	if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+		goto close_bpf_object;
+	if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(obj->bss->retval_value, -EPERM, "retval_value"))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(link_legacy_eperm);
+	bpf_link__destroy(link_get_retval);
+
+	cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd)
+{
+	struct cgroup_getset_retval_setsockopt *obj;
+	struct bpf_link *link_set_eunatch = NULL, *link_legacy_eperm = NULL;
+	struct bpf_link *link_get_retval = NULL;
+
+	obj = cgroup_getset_retval_setsockopt__open_and_load();
+	if (!ASSERT_OK_PTR(obj, "skel-load"))
+		return;
+
+	/* Attach setsockopt that sets EUNATCH, then one that return a reject
+	 * without setting errno, and then one that gets the exported errno.
+	 * Assert both the syscall and the helper's errno are unaffected by
+	 * the second prog (i.e. legacy rejects does not override the errno
+	 * to EPERM).
+	 */
+	link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+						      cgroup_fd);
+	if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+		goto close_bpf_object;
+	link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+						       cgroup_fd);
+	if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+		goto close_bpf_object;
+	link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+						     cgroup_fd);
+	if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+		goto close_bpf_object;
+
+	if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+				   &zero, sizeof(int)), "setsockopt"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+		goto close_bpf_object;
+
+	if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+		goto close_bpf_object;
+	if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(link_set_eunatch);
+	bpf_link__destroy(link_legacy_eperm);
+	bpf_link__destroy(link_get_retval);
+
+	cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_getsockopt_get(int cgroup_fd, int sock_fd)
+{
+	struct cgroup_getset_retval_getsockopt *obj;
+	struct bpf_link *link_get_retval = NULL;
+	int buf;
+	socklen_t optlen = sizeof(buf);
+
+	obj = cgroup_getset_retval_getsockopt__open_and_load();
+	if (!ASSERT_OK_PTR(obj, "skel-load"))
+		return;
+
+	/* Attach getsockopt that gets previously set errno. Assert that the
+	 * error from kernel is in both ctx_retval_value and retval_value.
+	 */
+	link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+						     cgroup_fd);
+	if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+		goto close_bpf_object;
+
+	if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+				   &buf, &optlen), "getsockopt"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(errno, EOPNOTSUPP, "getsockopt-errno"))
+		goto close_bpf_object;
+
+	if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+		goto close_bpf_object;
+	if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(obj->bss->retval_value, -EOPNOTSUPP, "retval_value"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(obj->bss->ctx_retval_value, -EOPNOTSUPP, "ctx_retval_value"))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(link_get_retval);
+
+	cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_override(int cgroup_fd, int sock_fd)
+{
+	struct cgroup_getset_retval_getsockopt *obj;
+	struct bpf_link *link_set_eisconn = NULL;
+	int buf;
+	socklen_t optlen = sizeof(buf);
+
+	obj = cgroup_getset_retval_getsockopt__open_and_load();
+	if (!ASSERT_OK_PTR(obj, "skel-load"))
+		return;
+
+	/* Attach getsockopt that sets retval to -EISCONN. Assert that this
+	 * overrides the value from kernel.
+	 */
+	link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+						      cgroup_fd);
+	if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+		goto close_bpf_object;
+
+	if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+				   &buf, &optlen), "getsockopt"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(errno, EISCONN, "getsockopt-errno"))
+		goto close_bpf_object;
+
+	if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+		goto close_bpf_object;
+	if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(link_set_eisconn);
+
+	cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd)
+{
+	struct cgroup_getset_retval_getsockopt *obj;
+	struct bpf_link *link_set_eisconn = NULL, *link_clear_retval = NULL;
+	struct bpf_link *link_get_retval = NULL;
+	int buf;
+	socklen_t optlen = sizeof(buf);
+
+	obj = cgroup_getset_retval_getsockopt__open_and_load();
+	if (!ASSERT_OK_PTR(obj, "skel-load"))
+		return;
+
+	/* Attach getsockopt that sets retval to -EISCONN, and one that clears
+	 * ctx retval. Assert that the clearing ctx retval is synced to helper
+	 * and clears any errors both from kernel and BPF..
+	 */
+	link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+						      cgroup_fd);
+	if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+		goto close_bpf_object;
+	link_clear_retval = bpf_program__attach_cgroup(obj->progs.clear_retval,
+						       cgroup_fd);
+	if (!ASSERT_OK_PTR(link_clear_retval, "cg-attach-clear_retval"))
+		goto close_bpf_object;
+	link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+						     cgroup_fd);
+	if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+		goto close_bpf_object;
+
+	if (!ASSERT_OK(getsockopt(sock_fd, SOL_CUSTOM, 0,
+				  &buf, &optlen), "getsockopt"))
+		goto close_bpf_object;
+
+	if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+		goto close_bpf_object;
+	if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+		goto close_bpf_object;
+	if (!ASSERT_EQ(obj->bss->ctx_retval_value, 0, "ctx_retval_value"))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(link_set_eisconn);
+	bpf_link__destroy(link_clear_retval);
+	bpf_link__destroy(link_get_retval);
+
+	cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+void test_cgroup_getset_retval(void)
+{
+	int cgroup_fd = -1;
+	int sock_fd = -1;
+
+	cgroup_fd = test__join_cgroup("/cgroup_getset_retval");
+	if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
+		goto close_fd;
+
+	sock_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+	if (!ASSERT_GE(sock_fd, 0, "start-server"))
+		goto close_fd;
+
+	if (test__start_subtest("setsockopt-set"))
+		test_setsockopt_set(cgroup_fd, sock_fd);
+
+	if (test__start_subtest("setsockopt-set_and_get"))
+		test_setsockopt_set_and_get(cgroup_fd, sock_fd);
+
+	if (test__start_subtest("setsockopt-default_zero"))
+		test_setsockopt_default_zero(cgroup_fd, sock_fd);
+
+	if (test__start_subtest("setsockopt-default_zero_and_set"))
+		test_setsockopt_default_zero_and_set(cgroup_fd, sock_fd);
+
+	if (test__start_subtest("setsockopt-override"))
+		test_setsockopt_override(cgroup_fd, sock_fd);
+
+	if (test__start_subtest("setsockopt-legacy_eperm"))
+		test_setsockopt_legacy_eperm(cgroup_fd, sock_fd);
+
+	if (test__start_subtest("setsockopt-legacy_no_override"))
+		test_setsockopt_legacy_no_override(cgroup_fd, sock_fd);
+
+	if (test__start_subtest("getsockopt-get"))
+		test_getsockopt_get(cgroup_fd, sock_fd);
+
+	if (test__start_subtest("getsockopt-override"))
+		test_getsockopt_override(cgroup_fd, sock_fd);
+
+	if (test__start_subtest("getsockopt-retval_sync"))
+		test_getsockopt_retval_sync(cgroup_fd, sock_fd);
+
+close_fd:
+	close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
new file mode 100644
index 000000000000..b2a409e6382a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+__u32 ctx_retval_value = 0;
+
+SEC("cgroup/getsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+	retval_value = bpf_get_retval();
+	ctx_retval_value = ctx->retval;
+	__sync_fetch_and_add(&invocations, 1);
+
+	return 1;
+}
+
+SEC("cgroup/getsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+	__sync_fetch_and_add(&invocations, 1);
+
+	if (bpf_set_retval(-EISCONN))
+		assertion_error = 1;
+
+	return 1;
+}
+
+SEC("cgroup/getsockopt")
+int clear_retval(struct bpf_sockopt *ctx)
+{
+	__sync_fetch_and_add(&invocations, 1);
+
+	ctx->retval = 0;
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
new file mode 100644
index 000000000000..d6e5903e06ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+
+SEC("cgroup/setsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+	retval_value = bpf_get_retval();
+	__sync_fetch_and_add(&invocations, 1);
+
+	return 1;
+}
+
+SEC("cgroup/setsockopt")
+int set_eunatch(struct bpf_sockopt *ctx)
+{
+	__sync_fetch_and_add(&invocations, 1);
+
+	if (bpf_set_retval(-EUNATCH))
+		assertion_error = 1;
+
+	return 0;
+}
+
+SEC("cgroup/setsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+	__sync_fetch_and_add(&invocations, 1);
+
+	if (bpf_set_retval(-EISCONN))
+		assertion_error = 1;
+
+	return 0;
+}
+
+SEC("cgroup/setsockopt")
+int legacy_eperm(struct bpf_sockopt *ctx)
+{
+	__sync_fetch_and_add(&invocations, 1);
+
+	return 0;
+}
-- 
cgit 


From 1080ef5cc0c2c3419dbdd61e441d1e014410824a Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei@google.com>
Date: Thu, 16 Dec 2021 02:04:29 +0000
Subject: selftests/bpf: Update sockopt_sk test to the use bpf_set_retval

The tests would break without this patch, because at one point it calls
  getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen)
This getsockopt receives the kernel-set -EINVAL. Prior to this patch
series, the eBPF getsockopt hook's -EPERM would override kernel's
-EINVAL, however, after this patch series, return 0's automatic
-EPERM will not; the eBPF prog has to explicitly bpf_set_retval(-EPERM)
if that is wanted.

I also removed the explicit mentions of EPERM in the comments in the
prog.

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/4f20b77cb46812dbc2bdcd7e3fa87c7573bde55e.1639619851.git.zhuyifei@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/sockopt_sk.c  |  4 +--
 tools/testing/selftests/bpf/progs/sockopt_sk.c     | 32 +++++++++++-----------
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 4b937e5dbaca..30a99d2ed5c6 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -173,11 +173,11 @@ static int getsetsockopt(void)
 	}
 
 	memset(&buf, 0, sizeof(buf));
-	buf.zc.address = 12345; /* rejected by BPF */
+	buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */
 	optlen = sizeof(buf.zc);
 	errno = 0;
 	err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen);
-	if (errno != EPERM) {
+	if (errno != EINVAL) {
 		log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d",
 			err, errno);
 		goto err;
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index 79c8139b63b8..d0298dccedcd 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -73,17 +73,17 @@ int _getsockopt(struct bpf_sockopt *ctx)
 		 */
 
 		if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
-			return 0; /* EPERM, bounds check */
+			return 0; /* bounds check */
 
 		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
-			return 0; /* EPERM, unexpected data */
+			return 0; /* unexpected data */
 
 		return 1;
 	}
 
 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
 		if (optval + 1 > optval_end)
-			return 0; /* EPERM, bounds check */
+			return 0; /* bounds check */
 
 		ctx->retval = 0; /* Reset system call return value to zero */
 
@@ -96,24 +96,24 @@ int _getsockopt(struct bpf_sockopt *ctx)
 		 * bytes of data.
 		 */
 		if (optval_end - optval != page_size)
-			return 0; /* EPERM, unexpected data size */
+			return 0; /* unexpected data size */
 
 		return 1;
 	}
 
 	if (ctx->level != SOL_CUSTOM)
-		return 0; /* EPERM, deny everything except custom level */
+		return 0; /* deny everything except custom level */
 
 	if (optval + 1 > optval_end)
-		return 0; /* EPERM, bounds check */
+		return 0; /* bounds check */
 
 	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
 				     BPF_SK_STORAGE_GET_F_CREATE);
 	if (!storage)
-		return 0; /* EPERM, couldn't get sk storage */
+		return 0; /* couldn't get sk storage */
 
 	if (!ctx->retval)
-		return 0; /* EPERM, kernel should not have handled
+		return 0; /* kernel should not have handled
 			   * SOL_CUSTOM, something is wrong!
 			   */
 	ctx->retval = 0; /* Reset system call return value to zero */
@@ -152,7 +152,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
 		/* Overwrite SO_SNDBUF value */
 
 		if (optval + sizeof(__u32) > optval_end)
-			return 0; /* EPERM, bounds check */
+			return 0; /* bounds check */
 
 		*(__u32 *)optval = 0x55AA;
 		ctx->optlen = 4;
@@ -164,7 +164,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
 		/* Always use cubic */
 
 		if (optval + 5 > optval_end)
-			return 0; /* EPERM, bounds check */
+			return 0; /* bounds check */
 
 		memcpy(optval, "cubic", 5);
 		ctx->optlen = 5;
@@ -175,10 +175,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
 		/* Original optlen is larger than PAGE_SIZE. */
 		if (ctx->optlen != page_size * 2)
-			return 0; /* EPERM, unexpected data size */
+			return 0; /* unexpected data size */
 
 		if (optval + 1 > optval_end)
-			return 0; /* EPERM, bounds check */
+			return 0; /* bounds check */
 
 		/* Make sure we can trim the buffer. */
 		optval[0] = 0;
@@ -189,21 +189,21 @@ int _setsockopt(struct bpf_sockopt *ctx)
 		 * bytes of data.
 		 */
 		if (optval_end - optval != page_size)
-			return 0; /* EPERM, unexpected data size */
+			return 0; /* unexpected data size */
 
 		return 1;
 	}
 
 	if (ctx->level != SOL_CUSTOM)
-		return 0; /* EPERM, deny everything except custom level */
+		return 0; /* deny everything except custom level */
 
 	if (optval + 1 > optval_end)
-		return 0; /* EPERM, bounds check */
+		return 0; /* bounds check */
 
 	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
 				     BPF_SK_STORAGE_GET_F_CREATE);
 	if (!storage)
-		return 0; /* EPERM, couldn't get sk storage */
+		return 0; /* couldn't get sk storage */
 
 	storage->val = optval[0];
 	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
-- 
cgit 


From 791cad025051773daed5512a9109eba53e73a575 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 20 Jan 2022 12:50:26 +0100
Subject: bpf: selftests: Get rid of CHECK macro in xdp_adjust_tail.c

Rely on ASSERT* macros and get rid of deprecated CHECK ones in
xdp_adjust_tail bpf selftest.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/c0ab002ffa647a20ec9e584214bf0d4373142b54.1642679130.git.lorenzo@kernel.org
---
 .../selftests/bpf/prog_tests/xdp_adjust_tail.c     | 68 +++++++++-------------
 1 file changed, 28 insertions(+), 40 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 3f5a17c38be5..d5ebe92b0ebb 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -11,22 +11,21 @@ static void test_xdp_adjust_tail_shrink(void)
 	char buf[128];
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-	if (CHECK_FAIL(err))
+	if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
 		return;
 
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
 				buf, &size, &retval, &duration);
-
-	CHECK(err || retval != XDP_DROP,
-	      "ipv4", "err %d errno %d retval %d size %d\n",
-	      err, errno, retval, size);
+	ASSERT_OK(err, "ipv4");
+	ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
 
 	expect_sz = sizeof(pkt_v6) - 20;  /* Test shrink with 20 bytes */
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
 				buf, &size, &retval, &duration);
-	CHECK(err || retval != XDP_TX || size != expect_sz,
-	      "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
-	      err, errno, retval, size, expect_sz);
+	ASSERT_OK(err, "ipv6");
+	ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
+	ASSERT_EQ(size, expect_sz, "ipv6 size");
+
 	bpf_object__close(obj);
 }
 
@@ -39,21 +38,20 @@ static void test_xdp_adjust_tail_grow(void)
 	int err, prog_fd;
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-	if (CHECK_FAIL(err))
+	if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
 		return;
 
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
 				buf, &size, &retval, &duration);
-	CHECK(err || retval != XDP_DROP,
-	      "ipv4", "err %d errno %d retval %d size %d\n",
-	      err, errno, retval, size);
+	ASSERT_OK(err, "ipv4");
+	ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
 
 	expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
 				buf, &size, &retval, &duration);
-	CHECK(err || retval != XDP_TX || size != expect_sz,
-	      "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
-	      err, errno, retval, size, expect_sz);
+	ASSERT_OK(err, "ipv6");
+	ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
+	ASSERT_EQ(size, expect_sz, "ipv6 size");
 
 	bpf_object__close(obj);
 }
@@ -76,7 +74,7 @@ static void test_xdp_adjust_tail_grow2(void)
 	};
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
-	if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+	if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
 		return;
 
 	/* Test case-64 */
@@ -86,21 +84,17 @@ static void test_xdp_adjust_tail_grow2(void)
 	/* Kernel side alloc packet memory area that is zero init */
 	err = bpf_prog_test_run_xattr(&tattr);
 
-	CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
-		   || tattr.retval != XDP_TX
-		   || tattr.data_size_out != 192, /* Expected grow size */
-		   "case-64",
-		   "err %d errno %d retval %d size %d\n",
-		   err, errno, tattr.retval, tattr.data_size_out);
+	ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */
+	ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval");
+	ASSERT_EQ(tattr.data_size_out, 192, "case-64 data_size_out"); /* Expected grow size */
 
 	/* Extra checks for data contents */
-	CHECK_ATTR(tattr.data_size_out != 192
-		   || buf[0]   != 1 ||  buf[63]  != 1  /*  0-63  memset to 1 */
-		   || buf[64]  != 0 ||  buf[127] != 0  /* 64-127 memset to 0 */
-		   || buf[128] != 1 ||  buf[191] != 1, /*128-191 memset to 1 */
-		   "case-64-data",
-		   "err %d errno %d retval %d size %d\n",
-		   err, errno, tattr.retval, tattr.data_size_out);
+	ASSERT_EQ(buf[0], 1, "case-64-data buf[0]"); /*  0-63  memset to 1 */
+	ASSERT_EQ(buf[63], 1, "case-64-data buf[63]");
+	ASSERT_EQ(buf[64], 0, "case-64-data buf[64]"); /* 64-127 memset to 0 */
+	ASSERT_EQ(buf[127], 0, "case-64-data buf[127]");
+	ASSERT_EQ(buf[128], 1, "case-64-data buf[128]"); /* 128-191 memset to 1 */
+	ASSERT_EQ(buf[191], 1, "case-64-data buf[191]");
 
 	/* Test case-128 */
 	memset(buf, 2, sizeof(buf));
@@ -109,23 +103,17 @@ static void test_xdp_adjust_tail_grow2(void)
 	err = bpf_prog_test_run_xattr(&tattr);
 
 	max_grow = 4096 - XDP_PACKET_HEADROOM -	tailroom; /* 3520 */
-	CHECK_ATTR(err
-		   || tattr.retval != XDP_TX
-		   || tattr.data_size_out != max_grow,/* Expect max grow size */
-		   "case-128",
-		   "err %d errno %d retval %d size %d expect-size %d\n",
-		   err, errno, tattr.retval, tattr.data_size_out, max_grow);
+	ASSERT_OK(err, "case-128");
+	ASSERT_EQ(tattr.retval, XDP_TX, "case-128 retval");
+	ASSERT_EQ(tattr.data_size_out, max_grow, "case-128 data_size_out"); /* Expect max grow */
 
 	/* Extra checks for data content: Count grow size, will contain zeros */
 	for (i = 0, cnt = 0; i < sizeof(buf); i++) {
 		if (buf[i] == 0)
 			cnt++;
 	}
-	CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
-		   || tattr.data_size_out != max_grow, /* Total grow size */
-		   "case-128-data",
-		   "err %d errno %d retval %d size %d grow-size %d\n",
-		   err, errno, tattr.retval, tattr.data_size_out, cnt);
+	ASSERT_EQ(cnt, max_grow - tattr.data_size_in, "case-128-data cnt"); /* Grow increase */
+	ASSERT_EQ(tattr.data_size_out, max_grow, "case-128-data data_size_out"); /* Total grow */
 
 	bpf_object__close(obj);
 }
-- 
cgit 


From fa6fde350b166a8a9019da9ce3268c4db86cbb1d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 20 Jan 2022 12:50:27 +0100
Subject: bpf: selftests: Get rid of CHECK macro in xdp_bpf2bpf.c

Rely on ASSERT* macros and get rid of deprecated CHECK ones in
xdp_bpf2bpf bpf selftest.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/df7e5098465016e27d91f2c69a376a35d63a7621.1642679130.git.lorenzo@kernel.org
---
 .../testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c | 60 ++++++++--------------
 1 file changed, 20 insertions(+), 40 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index c98a897ad692..500a302cb3e9 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -12,26 +12,14 @@ struct meta {
 
 static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 {
-	int duration = 0;
 	struct meta *meta = (struct meta *)data;
 	struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
 
-	if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
-		  "check_size", "size %u < %zu\n",
-		  size, sizeof(pkt_v4) + sizeof(*meta)))
-		return;
-
-	if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
-		  "meta->ifindex = %d\n", meta->ifindex))
-		return;
-
-	if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
-		  "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
-		return;
-
-	if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
-		  "check_packet_content", "content not the same\n"))
-		return;
+	ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size");
+	ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex");
+	ASSERT_EQ(meta->pkt_len, sizeof(pkt_v4), "check_meta_pkt_len");
+	ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0,
+		  "check_packet_content");
 
 	*(bool *)ctx = true;
 }
@@ -52,7 +40,7 @@ void test_xdp_bpf2bpf(void)
 
 	/* Load XDP program to introspect */
 	pkt_skel = test_xdp__open_and_load();
-	if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n"))
+	if (!ASSERT_OK_PTR(pkt_skel, "test_xdp__open_and_load"))
 		return;
 
 	pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel);
@@ -62,7 +50,7 @@ void test_xdp_bpf2bpf(void)
 
 	/* Load trace program */
 	ftrace_skel = test_xdp_bpf2bpf__open();
-	if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
+	if (!ASSERT_OK_PTR(ftrace_skel, "test_xdp_bpf2bpf__open"))
 		goto out;
 
 	/* Demonstrate the bpf_program__set_attach_target() API rather than
@@ -77,11 +65,11 @@ void test_xdp_bpf2bpf(void)
 	bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
 
 	err = test_xdp_bpf2bpf__load(ftrace_skel);
-	if (CHECK(err, "__load", "ftrace skeleton failed\n"))
+	if (!ASSERT_OK(err, "test_xdp_bpf2bpf__load"))
 		goto out;
 
 	err = test_xdp_bpf2bpf__attach(ftrace_skel);
-	if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "test_xdp_bpf2bpf__attach"))
 		goto out;
 
 	/* Set up perf buffer */
@@ -94,33 +82,25 @@ void test_xdp_bpf2bpf(void)
 	err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
 				buf, &size, &retval, &duration);
 	memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
-	if (CHECK(err || retval != XDP_TX || size != 74 ||
-		  iph.protocol != IPPROTO_IPIP, "ipv4",
-		  "err %d errno %d retval %d size %d\n",
-		  err, errno, retval, size))
-		goto out;
+
+	ASSERT_OK(err, "ipv4");
+	ASSERT_EQ(retval, XDP_TX, "ipv4 retval");
+	ASSERT_EQ(size, 74, "ipv4 size");
+	ASSERT_EQ(iph.protocol, IPPROTO_IPIP, "ipv4 proto");
 
 	/* Make sure bpf_xdp_output() was triggered and it sent the expected
 	 * data to the perf ring buffer.
 	 */
 	err = perf_buffer__poll(pb, 100);
-	if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
-		goto out;
-
-	CHECK_FAIL(!passed);
 
+	ASSERT_GE(err, 0, "perf_buffer__poll");
+	ASSERT_TRUE(passed, "test passed");
 	/* Verify test results */
-	if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
-		  "result", "fentry failed err %llu\n",
-		  ftrace_skel->bss->test_result_fentry))
-		goto out;
-
-	CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
-	      "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
-
+	ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"),
+		  "fentry result");
+	ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_TX, "fexit result");
 out:
-	if (pb)
-		perf_buffer__free(pb);
+	perf_buffer__free(pb);
 	test_xdp__destroy(pkt_skel);
 	test_xdp_bpf2bpf__destroy(ftrace_skel);
 }
-- 
cgit 


From 8c0be0631d81e48f77d0ebf0534c86e32bef5f89 Mon Sep 17 00:00:00 2001
From: Felix Maurer <fmaurer@redhat.com>
Date: Tue, 18 Jan 2022 16:11:56 +0100
Subject: selftests: bpf: Fix bind on used port

The bind_perm BPF selftest failed when port 111/tcp was already in use
during the test. To fix this, the test now runs in its own network name
space.

To use unshare, it is necessary to reorder the includes. The style of
the includes is adapted to be consistent with the other prog_tests.

v2: Replace deprecated CHECK macro with ASSERT_OK

Fixes: 8259fdeb30326 ("selftests/bpf: Verify that rebinding to port < 1024 from BPF works")
Signed-off-by: Felix Maurer <fmaurer@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/551ee65533bb987a43f93d88eaf2368b416ccd32.1642518457.git.fmaurer@redhat.com
---
 tools/testing/selftests/bpf/prog_tests/bind_perm.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
index d0f06e40c16d..eac71fbb24ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -1,13 +1,24 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include "bind_perm.skel.h"
-
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/capability.h>
 
+#include "test_progs.h"
+#include "bind_perm.skel.h"
+
 static int duration;
 
+static int create_netns(void)
+{
+	if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+		return -1;
+
+	return 0;
+}
+
 void try_bind(int family, int port, int expected_errno)
 {
 	struct sockaddr_storage addr = {};
@@ -75,6 +86,9 @@ void test_bind_perm(void)
 	struct bind_perm *skel;
 	int cgroup_fd;
 
+	if (create_netns())
+		return;
+
 	cgroup_fd = test__join_cgroup("/bind_perm");
 	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
 		return;
-- 
cgit 


From 1058b6a78db21e3f503362ac4719b3d83b3dd745 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 20 Jan 2022 22:19:32 +0530
Subject: selftests/bpf: Do not fail build if CONFIG_NF_CONNTRACK=m/n

Some users have complained that selftests fail to build when
CONFIG_NF_CONNTRACK=m. It would be useful to allow building as long as
it is set to module or built-in, even though in case of building as
module, user would need to load it before running the selftest. Note
that this also allows building selftest when CONFIG_NF_CONNTRACK is
disabled.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220120164932.2798544-1-memxor@gmail.com
---
 tools/testing/selftests/bpf/progs/test_bpf_nf.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 6f131c993c0b..f00a9731930e 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -17,18 +17,27 @@ int test_enonet_netns_id = 0;
 int test_enoent_lookup = 0;
 int test_eafnosupport = 0;
 
+struct nf_conn;
+
+struct bpf_ct_opts___local {
+	s32 netns_id;
+	s32 error;
+	u8 l4proto;
+	u8 reserved[3];
+} __attribute__((preserve_access_index));
+
 struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
-				  struct bpf_ct_opts *, u32) __ksym;
+				  struct bpf_ct_opts___local *, u32) __ksym;
 struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
-				  struct bpf_ct_opts *, u32) __ksym;
+				  struct bpf_ct_opts___local *, u32) __ksym;
 void bpf_ct_release(struct nf_conn *) __ksym;
 
 static __always_inline void
 nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32,
-				   struct bpf_ct_opts *, u32),
+				   struct bpf_ct_opts___local *, u32),
 	   void *ctx)
 {
-	struct bpf_ct_opts opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+	struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
 	struct bpf_sock_tuple bpf_tuple;
 	struct nf_conn *ct;
 
-- 
cgit 


From 32b3429479ea9a1259bbca8b3b3db5b9b8eb3293 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 19 Jan 2022 22:05:26 -0800
Subject: selftests/bpf: fail build on compilation warning

It's very easy to miss compilation warnings without -Werror, which is
not set for selftests. libbpf and bpftool are already strict about this,
so make selftests/bpf also treat compilation warnings as errors to catch
such regressions early.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220120060529.1890907-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 42ffc24e9e71..945f92d71db3 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -21,7 +21,7 @@ endif
 
 BPF_GCC		?= $(shell command -v bpf-gcc;)
 SAN_CFLAGS	?=
-CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS)		\
+CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS)	\
 	  -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)		\
 	  -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
 LDFLAGS += $(SAN_CFLAGS)
@@ -292,7 +292,7 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
 MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
 
 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
-BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) 			\
+BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) 		\
 	     -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)			\
 	     -I$(abspath $(OUTPUT)/../usr/include)
 
-- 
cgit 


From ccc3f56918f670ccb5b8df2c6a15cbd083a4dd03 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 19 Jan 2022 22:05:27 -0800
Subject: selftests/bpf: convert remaining legacy map definitions

Converted few remaining legacy BPF map definition to BTF-defined ones.
For the remaining two bpf_map_def-based legacy definitions that we want
to keep for testing purposes until libbpf 1.0 release, guard them in
pragma to suppres deprecation warnings which will be added in libbpf in
the next commit.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220120060529.1890907-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/progs/freplace_cls_redirect.c    | 12 +++++------
 .../testing/selftests/bpf/progs/sample_map_ret0.c  | 24 +++++++++++-----------
 tools/testing/selftests/bpf/progs/test_btf_haskv.c |  3 +++
 tools/testing/selftests/bpf/progs/test_btf_newkv.c |  3 +++
 tools/testing/selftests/bpf/progs/test_btf_nokv.c  | 12 +++++------
 .../selftests/bpf/progs/test_skb_cgroup_id_kern.c  | 12 +++++------
 tools/testing/selftests/bpf/progs/test_tc_edt.c    | 12 +++++------
 .../bpf/progs/test_tcp_check_syncookie_kern.c      | 12 +++++------
 8 files changed, 48 insertions(+), 42 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
index 68a5a9db928a..7e94412d47a5 100644
--- a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
@@ -7,12 +7,12 @@
 #include <bpf/bpf_endian.h>
 #include <bpf/bpf_helpers.h>
 
-struct bpf_map_def SEC("maps") sock_map = {
-	.type = BPF_MAP_TYPE_SOCKMAP,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 2,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__type(key, int);
+	__type(value, int);
+	__uint(max_entries, 2);
+} sock_map SEC(".maps");
 
 SEC("freplace/cls_redirect")
 int freplace_cls_redirect_test(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c
index 1612a32007b6..495990d355ef 100644
--- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c
+++ b/tools/testing/selftests/bpf/progs/sample_map_ret0.c
@@ -2,19 +2,19 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-struct bpf_map_def SEC("maps") htab = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(long),
-	.max_entries = 2,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u32);
+	__type(value, long);
+	__uint(max_entries, 2);
+} htab SEC(".maps");
 
-struct bpf_map_def SEC("maps") array = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(long),
-	.max_entries = 2,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, __u32);
+	__type(value, long);
+	__uint(max_entries, 2);
+} array SEC(".maps");
 
 /* Sample program which should always load for testing control paths. */
 SEC(".text") int func()
diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
index 160ead6c67b2..07c94df13660 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
@@ -9,12 +9,15 @@ struct ipv_counts {
 	unsigned int v6;
 };
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 struct bpf_map_def SEC("maps") btf_map = {
 	.type = BPF_MAP_TYPE_ARRAY,
 	.key_size = sizeof(int),
 	.value_size = sizeof(struct ipv_counts),
 	.max_entries = 4,
 };
+#pragma GCC diagnostic pop
 
 BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
 
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
index 1884a5bd10f5..762671a2e90c 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -9,6 +9,8 @@ struct ipv_counts {
 	unsigned int v6;
 };
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 /* just to validate we can handle maps in multiple sections */
 struct bpf_map_def SEC("maps") btf_map_legacy = {
 	.type = BPF_MAP_TYPE_ARRAY,
@@ -16,6 +18,7 @@ struct bpf_map_def SEC("maps") btf_map_legacy = {
 	.value_size = sizeof(long long),
 	.max_entries = 4,
 };
+#pragma GCC diagnostic pop
 
 BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
 
diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
index 15e0f9945fe4..1dabb88f8cb4 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
@@ -8,12 +8,12 @@ struct ipv_counts {
 	unsigned int v6;
 };
 
-struct bpf_map_def SEC("maps") btf_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct ipv_counts),
-	.max_entries = 4,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(struct ipv_counts));
+	__uint(max_entries, 4);
+} btf_map SEC(".maps");
 
 __attribute__((noinline))
 int test_long_fname_2(void)
diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
index c304cd5b8cad..37aacc66cd68 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
@@ -10,12 +10,12 @@
 
 #define NUM_CGROUP_LEVELS	4
 
-struct bpf_map_def SEC("maps") cgroup_ids = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64),
-	.max_entries = NUM_CGROUP_LEVELS,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, __u32);
+	__type(value, __u64);
+	__uint(max_entries, NUM_CGROUP_LEVELS);
+} cgroup_ids SEC(".maps");
 
 static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c
index bf28814bfde5..950a70b61e74 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_edt.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@@ -17,12 +17,12 @@
 #define THROTTLE_RATE_BPS (5 * 1000 * 1000)
 
 /* flow_key => last_tstamp timestamp used */
-struct bpf_map_def SEC("maps") flow_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(uint32_t),
-	.value_size = sizeof(uint64_t),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, uint32_t);
+	__type(value, uint64_t);
+	__uint(max_entries, 1);
+} flow_map SEC(".maps");
 
 static inline int throttle_flow(struct __sk_buff *skb)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
index cd747cd93dbe..6edebce563b5 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
@@ -16,12 +16,12 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-struct bpf_map_def SEC("maps") results = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = 3,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 3);
+} results SEC(".maps");
 
 static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk,
 					   void *iph, __u32 ip_size,
-- 
cgit 


From 93b8952d223af03c51fba0c6258173d2ffbd2cb7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 19 Jan 2022 22:05:28 -0800
Subject: libbpf: deprecate legacy BPF map definitions

Enact deprecation of legacy BPF map definition in SEC("maps") ([0]). For
the definitions themselves introduce LIBBPF_STRICT_MAP_DEFINITIONS flag
for libbpf strict mode. If it is set, error out on any struct
bpf_map_def-based map definition. If not set, libbpf will print out
a warning for each legacy BPF map to raise awareness that it goes away.

For any use of BPF_ANNOTATE_KV_PAIR() macro providing a legacy way to
associate BTF key/value type information with legacy BPF map definition,
warn through libbpf's pr_warn() error message (but don't fail BPF object
open).

BPF-side struct bpf_map_def is marked as deprecated. User-space struct
bpf_map_def has to be used internally in libbpf, so it is left
untouched. It should be enough for bpf_map__def() to be marked
deprecated to raise awareness that it goes away.

bpftool is an interesting case that utilizes libbpf to open BPF ELF
object to generate skeleton. As such, even though bpftool itself uses
full on strict libbpf mode (LIBBPF_STRICT_ALL), it has to relax it a bit
for BPF map definition handling to minimize unnecessary disruptions. So
opt-out of LIBBPF_STRICT_MAP_DEFINITIONS for bpftool. User's code that
will later use generated skeleton will make its own decision whether to
enforce LIBBPF_STRICT_MAP_DEFINITIONS or not.

There are few tests in selftests/bpf that are consciously using legacy
BPF map definitions to test libbpf functionality. For those, temporary
opt out of LIBBPF_STRICT_MAP_DEFINITIONS mode for the duration of those
tests.

  [0] Closes: https://github.com/libbpf/libbpf/issues/272

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220120060529.1890907-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/main.c                     | 9 ++++++++-
 tools/lib/bpf/bpf_helpers.h                  | 2 +-
 tools/lib/bpf/libbpf.c                       | 8 ++++++++
 tools/lib/bpf/libbpf_legacy.h                | 5 +++++
 tools/testing/selftests/bpf/prog_tests/btf.c | 4 ++++
 5 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 020e91a542d5..9d01fa9de033 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -478,7 +478,14 @@ int main(int argc, char **argv)
 	}
 
 	if (!legacy_libbpf) {
-		ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+		enum libbpf_strict_mode mode;
+
+		/* Allow legacy map definitions for skeleton generation.
+		 * It will still be rejected if users use LIBBPF_STRICT_ALL
+		 * mode for loading generated skeleton.
+		 */
+		mode = (__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS;
+		ret = libbpf_set_strict_mode(mode);
 		if (ret)
 			p_err("failed to enable libbpf strict mode: %d", ret);
 	}
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 963b1060d944..44df982d2a5c 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -133,7 +133,7 @@ struct bpf_map_def {
 	unsigned int value_size;
 	unsigned int max_entries;
 	unsigned int map_flags;
-};
+} __attribute__((deprecated("use BTF-defined maps in .maps section")));
 
 enum libbpf_pin_type {
 	LIBBPF_PIN_NONE,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fdb3536afa7d..fc6d530e20db 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1937,6 +1937,11 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 	if (obj->efile.maps_shndx < 0)
 		return 0;
 
+	if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) {
+		pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n");
+		return -EOPNOTSUPP;
+	}
+
 	if (!symbols)
 		return -EINVAL;
 
@@ -1999,6 +2004,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 
+		pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name);
+
 		if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
 			pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
 			return -ENOTSUP;
@@ -4190,6 +4197,7 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
 		return 0;
 
 	if (!bpf_map__is_internal(map)) {
+		pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n");
 		ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
 					   def->value_size, &key_type_id,
 					   &value_type_id);
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
index 79131f761a27..3c2b281c2bc3 100644
--- a/tools/lib/bpf/libbpf_legacy.h
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -73,6 +73,11 @@ enum libbpf_strict_mode {
 	 * operation.
 	 */
 	LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10,
+	/*
+	 * Error out on any SEC("maps") map definition, which are deprecated
+	 * in favor of BTF-defined map definitions in SEC(".maps").
+	 */
+	LIBBPF_STRICT_MAP_DEFINITIONS = 0x20,
 
 	__LIBBPF_STRICT_LAST,
 };
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 8ba53acf9eb4..14f9b6136794 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -4560,6 +4560,8 @@ static void do_test_file(unsigned int test_num)
 	has_btf_ext = btf_ext != NULL;
 	btf_ext__free(btf_ext);
 
+	/* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */
+	libbpf_set_strict_mode((__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS);
 	obj = bpf_object__open(test->file);
 	err = libbpf_get_error(obj);
 	if (CHECK(err, "obj: %d", err))
@@ -4684,6 +4686,8 @@ skip:
 	fprintf(stderr, "OK");
 
 done:
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	btf__free(btf);
 	free(func_info);
 	bpf_object__close(obj);
-- 
cgit 


From 544356524dd6ff5b0bb9099861ab8493a4387def Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 19 Jan 2022 22:14:21 -0800
Subject: selftests/bpf: switch to new libbpf XDP APIs

Switch to using new bpf_xdp_*() APIs across all selftests. Take
advantage of a more straightforward and user-friendly semantics of
old_prog_fd (0 means "don't care") in few places.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220120061422.2710637-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/xdp_attach.c  | 29 ++++++++++------------
 .../selftests/bpf/prog_tests/xdp_cpumap_attach.c   |  8 +++---
 .../selftests/bpf/prog_tests/xdp_devmap_attach.c   |  8 +++---
 tools/testing/selftests/bpf/prog_tests/xdp_info.c  | 14 +++++------
 tools/testing/selftests/bpf/prog_tests/xdp_link.c  | 26 +++++++++----------
 tools/testing/selftests/bpf/xdp_redirect_multi.c   |  8 +++---
 tools/testing/selftests/bpf/xdping.c               |  4 +--
 7 files changed, 47 insertions(+), 50 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index c6fa390e3aa1..62aa3edda5e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -11,8 +11,7 @@ void serial_test_xdp_attach(void)
 	const char *file = "./test_xdp.o";
 	struct bpf_prog_info info = {};
 	int err, fd1, fd2, fd3;
-	DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts,
-			    .old_fd = -1);
+	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
 
 	len = sizeof(info);
 
@@ -38,49 +37,47 @@ void serial_test_xdp_attach(void)
 	if (CHECK_FAIL(err))
 		goto out_2;
 
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE,
-				       &opts);
+	err = bpf_xdp_attach(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE, &opts);
 	if (CHECK(err, "load_ok", "initial load failed"))
 		goto out_close;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (CHECK(err || id0 != id1, "id1_check",
 		  "loaded prog id %u != id1 %u, err %d", id0, id1, err))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE,
-				       &opts);
+	err = bpf_xdp_attach(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE, &opts);
 	if (CHECK(!err, "load_fail", "load with expected id didn't fail"))
 		goto out;
 
-	opts.old_fd = fd1;
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts);
+	opts.old_prog_fd = fd1;
+	err = bpf_xdp_attach(IFINDEX_LO, fd2, 0, &opts);
 	if (CHECK(err, "replace_ok", "replace valid old_fd failed"))
 		goto out;
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (CHECK(err || id0 != id2, "id2_check",
 		  "loaded prog id %u != id2 %u, err %d", id0, id2, err))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts);
+	err = bpf_xdp_attach(IFINDEX_LO, fd3, 0, &opts);
 	if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail"))
 		goto out;
 
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+	err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
 	if (CHECK(!err, "remove_fail", "remove invalid old_fd didn't fail"))
 		goto out;
 
-	opts.old_fd = fd2;
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+	opts.old_prog_fd = fd2;
+	err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
 	if (CHECK(err, "remove_ok", "remove valid old_fd failed"))
 		goto out;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (CHECK(err || id0 != 0, "unload_check",
 		  "loaded prog id %u != 0, err %d", id0, err))
 		goto out_close;
 out:
-	bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+	bpf_xdp_detach(IFINDEX_LO, 0, NULL);
 out_close:
 	bpf_object__close(obj3);
 out_2:
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index fd812bd43600..abe9d9f988ec 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -23,11 +23,11 @@ void serial_test_xdp_cpumap_attach(void)
 		return;
 
 	prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
 	ASSERT_OK(err, "XDP program detach");
 
 	prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
@@ -45,9 +45,9 @@ void serial_test_xdp_cpumap_attach(void)
 	ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
 
 	/* can not attach BPF_XDP_CPUMAP program to a device */
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
-		bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+		bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
 
 	val.qsize = 192;
 	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 3079d5568f8f..fc1a40c3193b 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -25,11 +25,11 @@ static void test_xdp_with_devmap_helpers(void)
 		return;
 
 	dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
 	ASSERT_OK(err, "XDP program detach");
 
 	dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
@@ -47,9 +47,9 @@ static void test_xdp_with_devmap_helpers(void)
 	ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
 
 	/* can not attach BPF_XDP_DEVMAP program to a device */
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
-		bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+		bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
 
 	val.ifindex = 1;
 	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
index abe48e82e1dc..0d01ff6cb91a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -14,13 +14,13 @@ void serial_test_xdp_info(void)
 
 	/* Get prog_id for XDP_ATTACHED_NONE mode */
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
 	if (CHECK(err, "get_xdp_none", "errno=%d\n", errno))
 		return;
 	if (CHECK(prog_id, "prog_id_none", "unexpected prog_id=%u\n", prog_id))
 		return;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
 	if (CHECK(err, "get_xdp_none_skb", "errno=%d\n", errno))
 		return;
 	if (CHECK(prog_id, "prog_id_none_skb", "unexpected prog_id=%u\n",
@@ -37,32 +37,32 @@ void serial_test_xdp_info(void)
 	if (CHECK(err, "get_prog_info", "errno=%d\n", errno))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (CHECK(err, "set_xdp_skb", "errno=%d\n", errno))
 		goto out_close;
 
 	/* Get prog_id for single prog mode */
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
 	if (CHECK(err, "get_xdp", "errno=%d\n", errno))
 		goto out;
 	if (CHECK(prog_id != info.id, "prog_id", "prog_id not available\n"))
 		goto out;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
 	if (CHECK(err, "get_xdp_skb", "errno=%d\n", errno))
 		goto out;
 	if (CHECK(prog_id != info.id, "prog_id_skb", "prog_id not available\n"))
 		goto out;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_DRV_MODE);
+	err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_DRV_MODE, &prog_id);
 	if (CHECK(err, "get_xdp_drv", "errno=%d\n", errno))
 		goto out;
 	if (CHECK(prog_id, "prog_id_drv", "unexpected prog_id=%u\n", prog_id))
 		goto out;
 
 out:
-	bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+	bpf_xdp_detach(IFINDEX_LO, 0, NULL);
 out_close:
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 983ab0b47d30..0c5e4ea8eaae 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -9,8 +9,8 @@
 void serial_test_xdp_link(void)
 {
 	__u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err;
-	DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
 	struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
+	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
 	struct bpf_link_info link_info;
 	struct bpf_prog_info prog_info;
 	struct bpf_link *link;
@@ -40,12 +40,12 @@ void serial_test_xdp_link(void)
 	id2 = prog_info.id;
 
 	/* set initial prog attachment */
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
 	if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err))
 		goto cleanup;
 
 	/* validate prog ID */
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	CHECK(err || id0 != id1, "id1_check",
 	      "loaded prog id %u != id1 %u, err %d", id0, id1, err);
 
@@ -54,14 +54,14 @@ void serial_test_xdp_link(void)
 	if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
 		bpf_link__destroy(link);
 		/* best-effort detach prog */
-		opts.old_fd = prog_fd1;
-		bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+		opts.old_prog_fd = prog_fd1;
+		bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
 		goto cleanup;
 	}
 
 	/* detach BPF program */
-	opts.old_fd = prog_fd1;
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+	opts.old_prog_fd = prog_fd1;
+	err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
 	if (CHECK(err, "prog_detach", "failed %d\n", err))
 		goto cleanup;
 
@@ -72,24 +72,24 @@ void serial_test_xdp_link(void)
 	skel1->links.xdp_handler = link;
 
 	/* validate prog ID */
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (CHECK(err || id0 != id1, "id1_check",
 		  "loaded prog id %u != id1 %u, err %d", id0, id1, err))
 		goto cleanup;
 
 	/* BPF prog attach is not allowed to replace BPF link */
-	opts.old_fd = prog_fd1;
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
+	opts.old_prog_fd = prog_fd1;
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
 	if (CHECK(!err, "prog_attach_fail", "unexpected success\n"))
 		goto cleanup;
 
 	/* Can't force-update when BPF link is active */
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, 0, NULL);
 	if (CHECK(!err, "prog_update_fail", "unexpected success\n"))
 		goto cleanup;
 
 	/* Can't force-detach when BPF link is active */
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+	err = bpf_xdp_detach(IFINDEX_LO, 0, NULL);
 	if (CHECK(!err, "prog_detach_fail", "unexpected success\n"))
 		goto cleanup;
 
@@ -109,7 +109,7 @@ void serial_test_xdp_link(void)
 		goto cleanup;
 	skel2->links.xdp_handler = link;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (CHECK(err || id0 != id2, "id2_check",
 		  "loaded prog id %u != id2 %u, err %d", id0, id1, err))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
index 51c8224b4ccc..aaedbf4955c3 100644
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -32,12 +32,12 @@ static void int_exit(int sig)
 	int i;
 
 	for (i = 0; ifaces[i] > 0; i++) {
-		if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
-			printf("bpf_get_link_xdp_id failed\n");
+		if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) {
+			printf("bpf_xdp_query_id failed\n");
 			exit(1);
 		}
 		if (prog_id)
-			bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+			bpf_xdp_detach(ifaces[i], xdp_flags, NULL);
 	}
 
 	exit(0);
@@ -210,7 +210,7 @@ int main(int argc, char **argv)
 		}
 
 		/* bind prog_fd to each interface */
-		ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
+		ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
 		if (ret) {
 			printf("Set xdp fd failed on %d\n", ifindex);
 			goto err_out;
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index baa870a759a2..c567856fd1bc 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -29,7 +29,7 @@ static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 
 static void cleanup(int sig)
 {
-	bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	bpf_xdp_detach(ifindex, xdp_flags, NULL);
 	if (sig)
 		exit(1);
 }
@@ -203,7 +203,7 @@ int main(int argc, char **argv)
 
 	printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n");
 
-	if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+	if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
 		fprintf(stderr, "Link set xdp fd failed for %s\n", ifname);
 		goto done;
 	}
-- 
cgit 


From 820e6e227c4053b6b631ae65ef1f65d560cb392b Mon Sep 17 00:00:00 2001
From: Di Zhu <zhudi2@huawei.com>
Date: Wed, 19 Jan 2022 09:40:05 +0800
Subject: selftests: bpf: test BPF_PROG_QUERY for progs attached to sockmap

Add test for querying progs attached to sockmap. we use an existing
libbpf query interface to query prog cnt before and after progs
attaching to sockmap and check whether the queried prog id is right.

Signed-off-by: Di Zhu <zhudi2@huawei.com>
Acked-by: Yonghong Song <yhs@fb.com>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/r/20220119014005.1209-2-zhudi2@huawei.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/sockmap_basic.c       | 66 ++++++++++++++++++++++
 .../selftests/bpf/progs/test_sockmap_progs_query.c | 24 ++++++++
 2 files changed, 90 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 85db0f4cdd95..b97a8f236b3a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -8,6 +8,7 @@
 #include "test_sockmap_update.skel.h"
 #include "test_sockmap_invalid_update.skel.h"
 #include "test_sockmap_skb_verdict_attach.skel.h"
+#include "test_sockmap_progs_query.skel.h"
 #include "bpf_iter_sockmap.skel.h"
 
 #define TCP_REPAIR		19	/* TCP sock is under repair right now */
@@ -315,6 +316,63 @@ out:
 	test_sockmap_skb_verdict_attach__destroy(skel);
 }
 
+static __u32 query_prog_id(int prog_fd)
+{
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	int err;
+
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") ||
+	    !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd"))
+		return 0;
+
+	return info.id;
+}
+
+static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
+{
+	struct test_sockmap_progs_query *skel;
+	int err, map_fd, verdict_fd;
+	__u32 attach_flags = 0;
+	__u32 prog_ids[3] = {};
+	__u32 prog_cnt = 3;
+
+	skel = test_sockmap_progs_query__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load"))
+		return;
+
+	map_fd = bpf_map__fd(skel->maps.sock_map);
+
+	if (attach_type == BPF_SK_MSG_VERDICT)
+		verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict);
+	else
+		verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict);
+
+	err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+			     &attach_flags, prog_ids, &prog_cnt);
+	ASSERT_OK(err, "bpf_prog_query failed");
+	ASSERT_EQ(attach_flags,  0, "wrong attach_flags on query");
+	ASSERT_EQ(prog_cnt, 0, "wrong program count on query");
+
+	err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach failed"))
+		goto out;
+
+	prog_cnt = 1;
+	err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+			     &attach_flags, prog_ids, &prog_cnt);
+	ASSERT_OK(err, "bpf_prog_query failed");
+	ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+	ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
+	ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
+		  "wrong prog_ids on query");
+
+	bpf_prog_detach2(verdict_fd, map_fd, attach_type);
+out:
+	test_sockmap_progs_query__destroy(skel);
+}
+
 void test_sockmap_basic(void)
 {
 	if (test__start_subtest("sockmap create_update_free"))
@@ -341,4 +399,12 @@ void test_sockmap_basic(void)
 		test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
 						BPF_SK_SKB_VERDICT);
 	}
+	if (test__start_subtest("sockmap msg_verdict progs query"))
+		test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
+	if (test__start_subtest("sockmap stream_parser progs query"))
+		test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
+	if (test__start_subtest("sockmap stream_verdict progs query"))
+		test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
+	if (test__start_subtest("sockmap skb_verdict progs query"))
+		test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
new file mode 100644
index 000000000000..9d58d61c0dee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+	return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+	return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From d99173027d6803430fd60e61aab3006644e18628 Mon Sep 17 00:00:00 2001
From: Eelco Chaudron <echaudro@redhat.com>
Date: Fri, 21 Jan 2022 11:09:56 +0100
Subject: bpf: add frags support to xdp copy helpers

This patch adds support for frags for the following helpers:
  - bpf_xdp_output()
  - bpf_perf_event_output()

Acked-by: Toke Hoiland-Jorgensen <toke@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/340b4a99cdc24337b40eaf8bb597f9f9e7b0373e.1642758637.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/trace/bpf_trace.c                           |   3 +
 net/core/filter.c                                  |  57 ++++++++++-
 .../testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c | 111 +++++++++++++++------
 .../testing/selftests/bpf/progs/test_xdp_bpf2bpf.c |   2 +-
 4 files changed, 137 insertions(+), 36 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 21aa30644219..06a9e220069e 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1562,6 +1562,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
 
 extern const struct bpf_func_proto bpf_skb_output_proto;
 extern const struct bpf_func_proto bpf_xdp_output_proto;
+extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
 
 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
 	   struct bpf_map *, map, u64, flags)
@@ -1661,6 +1662,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sock_from_file_proto;
 	case BPF_FUNC_get_socket_cookie:
 		return &bpf_get_socket_ptr_cookie_proto;
+	case BPF_FUNC_xdp_get_buff_len:
+		return &bpf_xdp_get_buff_len_trace_proto;
 #endif
 	case BPF_FUNC_seq_printf:
 		return prog->expected_attach_type == BPF_TRACE_ITER ?
diff --git a/net/core/filter.c b/net/core/filter.c
index 70e5874f19c3..e4ce138bf925 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3796,6 +3796,15 @@ static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
 	.arg1_type	= ARG_PTR_TO_CTX,
 };
 
+BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
+
+const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
+	.func		= bpf_xdp_get_buff_len,
+	.gpl_only	= false,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &bpf_xdp_get_buff_len_bpf_ids[0],
+};
+
 static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
 {
 	return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -4668,10 +4677,48 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
 };
 #endif
 
-static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+static unsigned long bpf_xdp_copy(void *dst_buff, const void *ctx,
 				  unsigned long off, unsigned long len)
 {
-	memcpy(dst_buff, src_buff + off, len);
+	struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+	unsigned long ptr_len, ptr_off = 0;
+	skb_frag_t *next_frag, *end_frag;
+	struct skb_shared_info *sinfo;
+	u8 *ptr_buf;
+
+	if (likely(xdp->data_end - xdp->data >= off + len)) {
+		memcpy(dst_buff, xdp->data + off, len);
+		return 0;
+	}
+
+	sinfo = xdp_get_shared_info_from_buff(xdp);
+	end_frag = &sinfo->frags[sinfo->nr_frags];
+	next_frag = &sinfo->frags[0];
+
+	ptr_len = xdp->data_end - xdp->data;
+	ptr_buf = xdp->data;
+
+	while (true) {
+		if (off < ptr_off + ptr_len) {
+			unsigned long copy_off = off - ptr_off;
+			unsigned long copy_len = min(len, ptr_len - copy_off);
+
+			memcpy(dst_buff, ptr_buf + copy_off, copy_len);
+
+			off += copy_len;
+			len -= copy_len;
+			dst_buff += copy_len;
+		}
+
+		if (!len || next_frag == end_frag)
+			break;
+
+		ptr_off += ptr_len;
+		ptr_buf = skb_frag_address(next_frag);
+		ptr_len = skb_frag_size(next_frag);
+		next_frag++;
+	}
+
 	return 0;
 }
 
@@ -4682,11 +4729,11 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
 
 	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
 		return -EINVAL;
-	if (unlikely(!xdp ||
-		     xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+
+	if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp)))
 		return -EFAULT;
 
-	return bpf_event_output(map, flags, meta, meta_size, xdp->data,
+	return bpf_event_output(map, flags, meta, meta_size, xdp,
 				xdp_size, bpf_xdp_copy);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 500a302cb3e9..9c395ea680c6 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -10,28 +10,97 @@ struct meta {
 	int pkt_len;
 };
 
+struct test_ctx_s {
+	bool passed;
+	int pkt_size;
+};
+
+struct test_ctx_s test_ctx;
+
 static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 {
 	struct meta *meta = (struct meta *)data;
 	struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+	unsigned char *raw_pkt = data + sizeof(*meta);
+	struct test_ctx_s *tst_ctx = ctx;
 
 	ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size");
 	ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex");
-	ASSERT_EQ(meta->pkt_len, sizeof(pkt_v4), "check_meta_pkt_len");
+	ASSERT_EQ(meta->pkt_len, tst_ctx->pkt_size, "check_meta_pkt_len");
 	ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0,
 		  "check_packet_content");
 
-	*(bool *)ctx = true;
+	if (meta->pkt_len > sizeof(pkt_v4)) {
+		for (int i = 0; i < meta->pkt_len - sizeof(pkt_v4); i++)
+			ASSERT_EQ(raw_pkt[i + sizeof(pkt_v4)], (unsigned char)i,
+				  "check_packet_content");
+	}
+
+	tst_ctx->passed = true;
 }
 
-void test_xdp_bpf2bpf(void)
+#define BUF_SZ	9000
+
+static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
+				     struct test_xdp_bpf2bpf *ftrace_skel,
+				     int pkt_size)
 {
 	__u32 duration = 0, retval, size;
-	char buf[128];
+	__u8 *buf, *buf_in;
+	int err;
+
+	if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") ||
+	    !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size"))
+		return;
+
+	buf_in = malloc(BUF_SZ);
+	if (!ASSERT_OK_PTR(buf_in, "buf_in malloc()"))
+		return;
+
+	buf = malloc(BUF_SZ);
+	if (!ASSERT_OK_PTR(buf, "buf malloc()")) {
+		free(buf_in);
+		return;
+	}
+
+	test_ctx.passed = false;
+	test_ctx.pkt_size = pkt_size;
+
+	memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
+	if (pkt_size > sizeof(pkt_v4)) {
+		for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
+			buf_in[i + sizeof(pkt_v4)] = i;
+	}
+
+	/* Run test program */
+	err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size,
+				buf, &size, &retval, &duration);
+
+	ASSERT_OK(err, "ipv4");
+	ASSERT_EQ(retval, XDP_PASS, "ipv4 retval");
+	ASSERT_EQ(size, pkt_size, "ipv4 size");
+
+	/* Make sure bpf_xdp_output() was triggered and it sent the expected
+	 * data to the perf ring buffer.
+	 */
+	err = perf_buffer__poll(pb, 100);
+
+	ASSERT_GE(err, 0, "perf_buffer__poll");
+	ASSERT_TRUE(test_ctx.passed, "test passed");
+	/* Verify test results */
+	ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"),
+		  "fentry result");
+	ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_PASS, "fexit result");
+
+	free(buf);
+	free(buf_in);
+}
+
+void test_xdp_bpf2bpf(void)
+{
 	int err, pkt_fd, map_fd;
-	bool passed = false;
-	struct iphdr iph;
-	struct iptnl_info value4 = {.family = AF_INET};
+	int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
+	struct iptnl_info value4 = {.family = AF_INET6};
 	struct test_xdp *pkt_skel = NULL;
 	struct test_xdp_bpf2bpf *ftrace_skel = NULL;
 	struct vip key4 = {.protocol = 6, .family = AF_INET};
@@ -73,32 +142,14 @@ void test_xdp_bpf2bpf(void)
 		goto out;
 
 	/* Set up perf buffer */
-	pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1,
-			      on_sample, NULL, &passed, NULL);
+	pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 8,
+			      on_sample, NULL, &test_ctx, NULL);
 	if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
 		goto out;
 
-	/* Run test program */
-	err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
-	memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
-
-	ASSERT_OK(err, "ipv4");
-	ASSERT_EQ(retval, XDP_TX, "ipv4 retval");
-	ASSERT_EQ(size, 74, "ipv4 size");
-	ASSERT_EQ(iph.protocol, IPPROTO_IPIP, "ipv4 proto");
-
-	/* Make sure bpf_xdp_output() was triggered and it sent the expected
-	 * data to the perf ring buffer.
-	 */
-	err = perf_buffer__poll(pb, 100);
-
-	ASSERT_GE(err, 0, "perf_buffer__poll");
-	ASSERT_TRUE(passed, "test passed");
-	/* Verify test results */
-	ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"),
-		  "fentry result");
-	ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_TX, "fexit result");
+	for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
+		run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
+					 pkt_sizes[i]);
 out:
 	perf_buffer__free(pb);
 	test_xdp__destroy(pkt_skel);
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
index 58cf4345f5cc..3379d303f41a 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -49,7 +49,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
 	void *data = (void *)(long)xdp->data;
 
 	meta.ifindex = xdp->rxq->dev->ifindex;
-	meta.pkt_len = data_end - data;
+	meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
 	bpf_xdp_output(xdp, &perf_buf_map,
 		       ((__u64) meta.pkt_len << 32) |
 		       BPF_F_CURRENT_CPU,
-- 
cgit 


From 110221081aac19ae147e472f590abe20a750dd25 Mon Sep 17 00:00:00 2001
From: Eelco Chaudron <echaudro@redhat.com>
Date: Fri, 21 Jan 2022 11:10:00 +0100
Subject: bpf: selftests: update xdp_adjust_tail selftest to include xdp frags

This change adds test cases for the xdp frags scenarios when shrinking
and growing.

Acked-by: Toke Hoiland-Jorgensen <toke@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Link: https://lore.kernel.org/r/d2e6a0ebc52db6f89e62b9befe045032e5e0a5fe.1642758637.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/xdp_adjust_tail.c     | 125 +++++++++++++++++++++
 .../bpf/progs/test_xdp_adjust_tail_grow.c          |  10 +-
 .../bpf/progs/test_xdp_adjust_tail_shrink.c        |  32 +++++-
 3 files changed, 160 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index d5ebe92b0ebb..ccc9e63254a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -118,6 +118,127 @@ static void test_xdp_adjust_tail_grow2(void)
 	bpf_object__close(obj);
 }
 
+void test_xdp_adjust_frags_tail_shrink(void)
+{
+	const char *file = "./test_xdp_adjust_tail_shrink.o";
+	__u32 duration, retval, size, exp_size;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	int err, prog_fd;
+	__u8 *buf;
+
+	/* For the individual test cases, the first byte in the packet
+	 * indicates which test will be run.
+	 */
+	obj = bpf_object__open(file);
+	if (libbpf_get_error(obj))
+		return;
+
+	prog = bpf_object__next_program(obj, NULL);
+	if (bpf_object__load(obj))
+		return;
+
+	prog_fd = bpf_program__fd(prog);
+
+	buf = malloc(9000);
+	if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+		goto out;
+
+	memset(buf, 0, 9000);
+
+	/* Test case removing 10 bytes from last frag, NOT freeing it */
+	exp_size = 8990; /* 9000 - 10 */
+	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+				buf, &size, &retval, &duration);
+
+	ASSERT_OK(err, "9Kb-10b");
+	ASSERT_EQ(retval, XDP_TX, "9Kb-10b retval");
+	ASSERT_EQ(size, exp_size, "9Kb-10b size");
+
+	/* Test case removing one of two pages, assuming 4K pages */
+	buf[0] = 1;
+	exp_size = 4900; /* 9000 - 4100 */
+	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+				buf, &size, &retval, &duration);
+
+	ASSERT_OK(err, "9Kb-4Kb");
+	ASSERT_EQ(retval, XDP_TX, "9Kb-4Kb retval");
+	ASSERT_EQ(size, exp_size, "9Kb-4Kb size");
+
+	/* Test case removing two pages resulting in a linear xdp_buff */
+	buf[0] = 2;
+	exp_size = 800; /* 9000 - 8200 */
+	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+				buf, &size, &retval, &duration);
+
+	ASSERT_OK(err, "9Kb-9Kb");
+	ASSERT_EQ(retval, XDP_TX, "9Kb-9Kb retval");
+	ASSERT_EQ(size, exp_size, "9Kb-9Kb size");
+
+	free(buf);
+out:
+	bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags_tail_grow(void)
+{
+	const char *file = "./test_xdp_adjust_tail_grow.o";
+	__u32 duration, retval, size, exp_size;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	int err, i, prog_fd;
+	__u8 *buf;
+
+	obj = bpf_object__open(file);
+	if (libbpf_get_error(obj))
+		return;
+
+	prog = bpf_object__next_program(obj, NULL);
+	if (bpf_object__load(obj))
+		return;
+
+	prog_fd = bpf_program__fd(prog);
+
+	buf = malloc(16384);
+	if (!ASSERT_OK_PTR(buf, "alloc buf 16Kb"))
+		goto out;
+
+	/* Test case add 10 bytes to last frag */
+	memset(buf, 1, 16384);
+	size = 9000;
+	exp_size = size + 10;
+	err = bpf_prog_test_run(prog_fd, 1, buf, size,
+				buf, &size, &retval, &duration);
+
+	ASSERT_OK(err, "9Kb+10b");
+	ASSERT_EQ(retval, XDP_TX, "9Kb+10b retval");
+	ASSERT_EQ(size, exp_size, "9Kb+10b size");
+
+	for (i = 0; i < 9000; i++)
+		ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+
+	for (i = 9000; i < 9010; i++)
+		ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+
+	for (i = 9010; i < 16384; i++)
+		ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+
+	/* Test a too large grow */
+	memset(buf, 1, 16384);
+	size = 9001;
+	exp_size = size;
+	err = bpf_prog_test_run(prog_fd, 1, buf, size,
+				buf, &size, &retval, &duration);
+
+	ASSERT_OK(err, "9Kb+10b");
+	ASSERT_EQ(retval, XDP_DROP, "9Kb+10b retval");
+	ASSERT_EQ(size, exp_size, "9Kb+10b size");
+
+	free(buf);
+out:
+	bpf_object__close(obj);
+}
+
 void test_xdp_adjust_tail(void)
 {
 	if (test__start_subtest("xdp_adjust_tail_shrink"))
@@ -126,4 +247,8 @@ void test_xdp_adjust_tail(void)
 		test_xdp_adjust_tail_grow();
 	if (test__start_subtest("xdp_adjust_tail_grow2"))
 		test_xdp_adjust_tail_grow2();
+	if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
+		test_xdp_adjust_frags_tail_shrink();
+	if (test__start_subtest("xdp_adjust_frags_tail_grow"))
+		test_xdp_adjust_frags_tail_grow();
 }
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index 199c61b7d062..53b64c999450 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -7,11 +7,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
 {
 	void *data_end = (void *)(long)xdp->data_end;
 	void *data = (void *)(long)xdp->data;
-	unsigned int data_len;
+	int data_len = bpf_xdp_get_buff_len(xdp);
 	int offset = 0;
 
 	/* Data length determine test case */
-	data_len = data_end - data;
 
 	if (data_len == 54) { /* sizeof(pkt_v4) */
 		offset = 4096; /* test too large offset */
@@ -20,7 +19,12 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
 	} else if (data_len == 64) {
 		offset = 128;
 	} else if (data_len == 128) {
-		offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+		/* Max tail grow 3520 */
+		offset = 4096 - 256 - 320 - data_len;
+	} else if (data_len == 9000) {
+		offset = 10;
+	} else if (data_len == 9001) {
+		offset = 4096;
 	} else {
 		return XDP_ABORTED; /* No matching test */
 	}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
index b7448253d135..ca68c038357c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -12,14 +12,38 @@
 SEC("xdp")
 int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
 {
-	void *data_end = (void *)(long)xdp->data_end;
-	void *data = (void *)(long)xdp->data;
+	__u8 *data_end = (void *)(long)xdp->data_end;
+	__u8 *data = (void *)(long)xdp->data;
 	int offset = 0;
 
-	if (data_end - data == 54) /* sizeof(pkt_v4) */
+	switch (bpf_xdp_get_buff_len(xdp)) {
+	case 54:
+		/* sizeof(pkt_v4) */
 		offset = 256; /* shrink too much */
-	else
+		break;
+	case 9000:
+		/* non-linear buff test cases */
+		if (data + 1 > data_end)
+			return XDP_DROP;
+
+		switch (data[0]) {
+		case 0:
+			offset = 10;
+			break;
+		case 1:
+			offset = 4100;
+			break;
+		case 2:
+			offset = 8200;
+			break;
+		default:
+			return XDP_DROP;
+		}
+		break;
+	default:
 		offset = 20;
+		break;
+	}
 	if (bpf_xdp_adjust_tail(xdp, 0 - offset))
 		return XDP_DROP;
 	return XDP_TX;
-- 
cgit 


From 6db28e24ae46d037481f344fee6abf2d088748f8 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 21 Jan 2022 11:10:04 +0100
Subject: bpf: selftests: introduce bpf_xdp_{load,store}_bytes selftest

Introduce kernel selftest for new bpf_xdp_{load,store}_bytes helpers.
and bpf_xdp_pointer/bpf_xdp_copy_buf utility routines.

Acked-by: Toke Hoiland-Jorgensen <toke@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/2c99ae663a5dcfbd9240b1d0489ad55dea4f4601.1642758637.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/xdp_adjust_frags.c    | 104 +++++++++++++++++++++
 .../selftests/bpf/progs/test_xdp_update_frags.c    |  42 +++++++++
 2 files changed, 146 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_update_frags.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
new file mode 100644
index 000000000000..31c188666e81
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+void test_xdp_update_frags(void)
+{
+	const char *file = "./test_xdp_update_frags.o";
+	__u32 duration, retval, size;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	int err, prog_fd;
+	__u32 *offset;
+	__u8 *buf;
+
+	obj = bpf_object__open(file);
+	if (libbpf_get_error(obj))
+		return;
+
+	prog = bpf_object__next_program(obj, NULL);
+	if (bpf_object__load(obj))
+		return;
+
+	prog_fd = bpf_program__fd(prog);
+
+	buf = malloc(128);
+	if (!ASSERT_OK_PTR(buf, "alloc buf 128b"))
+		goto out;
+
+	memset(buf, 0, 128);
+	offset = (__u32 *)buf;
+	*offset = 16;
+	buf[*offset] = 0xaa;		/* marker at offset 16 (head) */
+	buf[*offset + 15] = 0xaa;	/* marker at offset 31 (head) */
+
+	err = bpf_prog_test_run(prog_fd, 1, buf, 128,
+				buf, &size, &retval, &duration);
+
+	/* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */
+	ASSERT_OK(err, "xdp_update_frag");
+	ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+	ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]");
+	ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]");
+
+	free(buf);
+
+	buf = malloc(9000);
+	if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+		goto out;
+
+	memset(buf, 0, 9000);
+	offset = (__u32 *)buf;
+	*offset = 5000;
+	buf[*offset] = 0xaa;		/* marker at offset 5000 (frag0) */
+	buf[*offset + 15] = 0xaa;	/* marker at offset 5015 (frag0) */
+
+	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+				buf, &size, &retval, &duration);
+
+	/* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */
+	ASSERT_OK(err, "xdp_update_frag");
+	ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+	ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]");
+	ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]");
+
+	memset(buf, 0, 9000);
+	offset = (__u32 *)buf;
+	*offset = 3510;
+	buf[*offset] = 0xaa;		/* marker at offset 3510 (head) */
+	buf[*offset + 15] = 0xaa;	/* marker at offset 3525 (frag0) */
+
+	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+				buf, &size, &retval, &duration);
+
+	/* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */
+	ASSERT_OK(err, "xdp_update_frag");
+	ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+	ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]");
+	ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]");
+
+	memset(buf, 0, 9000);
+	offset = (__u32 *)buf;
+	*offset = 7606;
+	buf[*offset] = 0xaa;		/* marker at offset 7606 (frag0) */
+	buf[*offset + 15] = 0xaa;	/* marker at offset 7621 (frag1) */
+
+	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
+				buf, &size, &retval, &duration);
+
+	/* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */
+	ASSERT_OK(err, "xdp_update_frag");
+	ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+	ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]");
+	ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]");
+
+	free(buf);
+out:
+	bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags(void)
+{
+	if (test__start_subtest("xdp_adjust_frags"))
+		test_xdp_update_frags();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c
new file mode 100644
index 000000000000..2a3496d8e327
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <bpf/bpf_helpers.h>
+
+int _version SEC("version") = 1;
+
+SEC("xdp.frags")
+int xdp_adjust_frags(struct xdp_md *xdp)
+{
+	__u8 *data_end = (void *)(long)xdp->data_end;
+	__u8 *data = (void *)(long)xdp->data;
+	__u8 val[16] = {};
+	__u32 offset;
+	int err;
+
+	if (data + sizeof(__u32) > data_end)
+		return XDP_DROP;
+
+	offset = *(__u32 *)data;
+	err = bpf_xdp_load_bytes(xdp, offset, val, sizeof(val));
+	if (err < 0)
+		return XDP_DROP;
+
+	if (val[0] != 0xaa || val[15] != 0xaa) /* marker */
+		return XDP_DROP;
+
+	val[0] = 0xbb; /* update the marker */
+	val[15] = 0xbb;
+	err = bpf_xdp_store_bytes(xdp, offset, val, sizeof(val));
+	if (err < 0)
+		return XDP_DROP;
+
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 0c5e118cb4b8a885622f76e528135e86f722ba7f Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 21 Jan 2022 11:10:05 +0100
Subject: bpf: selftests: add CPUMAP/DEVMAP selftests for xdp frags

Verify compatibility checks attaching a XDP frags program to a
CPUMAP/DEVMAP

Acked-by: Toke Hoiland-Jorgensen <toke@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/d94b4d35adc1e42c9ca5004e6b2cdfd75992304d.1642758637.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/xdp_cpumap_attach.c   | 64 +++++++++++++++++++++-
 .../selftests/bpf/prog_tests/xdp_devmap_attach.c   | 55 +++++++++++++++++++
 .../bpf/progs/test_xdp_with_cpumap_frags_helpers.c | 27 +++++++++
 .../bpf/progs/test_xdp_with_cpumap_helpers.c       |  6 ++
 .../bpf/progs/test_xdp_with_devmap_frags_helpers.c | 27 +++++++++
 .../bpf/progs/test_xdp_with_devmap_helpers.c       |  7 +++
 6 files changed, 185 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index abe9d9f988ec..b353e1f3acb5 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -3,11 +3,12 @@
 #include <linux/if_link.h>
 #include <test_progs.h>
 
+#include "test_xdp_with_cpumap_frags_helpers.skel.h"
 #include "test_xdp_with_cpumap_helpers.skel.h"
 
 #define IFINDEX_LO	1
 
-void serial_test_xdp_cpumap_attach(void)
+void test_xdp_with_cpumap_helpers(void)
 {
 	struct test_xdp_with_cpumap_helpers *skel;
 	struct bpf_prog_info info = {};
@@ -54,6 +55,67 @@ void serial_test_xdp_cpumap_attach(void)
 	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
 	ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
 
+	/* Try to attach BPF_XDP program with frags to cpumap when we have
+	 * already loaded a BPF_XDP program on the map
+	 */
+	idx = 1;
+	val.qsize = 192;
+	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry");
+
 out_close:
 	test_xdp_with_cpumap_helpers__destroy(skel);
 }
+
+void test_xdp_with_cpumap_frags_helpers(void)
+{
+	struct test_xdp_with_cpumap_frags_helpers *skel;
+	struct bpf_prog_info info = {};
+	__u32 len = sizeof(info);
+	struct bpf_cpumap_val val = {
+		.qsize = 192,
+	};
+	int err, frags_prog_fd, map_fd;
+	__u32 idx = 0;
+
+	skel = test_xdp_with_cpumap_frags_helpers__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
+		return;
+
+	frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+	map_fd = bpf_map__fd(skel->maps.cpu_map);
+	err = bpf_obj_get_info_by_fd(frags_prog_fd, &info, &len);
+	if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+		goto out_close;
+
+	val.bpf_prog.fd = frags_prog_fd;
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	ASSERT_OK(err, "Add program to cpumap entry");
+
+	err = bpf_map_lookup_elem(map_fd, &idx, &val);
+	ASSERT_OK(err, "Read cpumap entry");
+	ASSERT_EQ(info.id, val.bpf_prog.id,
+		  "Match program id to cpumap entry prog_id");
+
+	/* Try to attach BPF_XDP program to cpumap when we have
+	 * already loaded a BPF_XDP program with frags on the map
+	 */
+	idx = 1;
+	val.qsize = 192;
+	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	ASSERT_NEQ(err, 0, "Add BPF_XDP program to cpumap entry");
+
+out_close:
+	test_xdp_with_cpumap_frags_helpers__destroy(skel);
+}
+
+void serial_test_xdp_cpumap_attach(void)
+{
+	if (test__start_subtest("CPUMAP with programs in entries"))
+		test_xdp_with_cpumap_helpers();
+
+	if (test__start_subtest("CPUMAP with frags programs in entries"))
+		test_xdp_with_cpumap_frags_helpers();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index fc1a40c3193b..463a72fc3e70 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -4,6 +4,7 @@
 #include <test_progs.h>
 
 #include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_frags_helpers.skel.h"
 #include "test_xdp_with_devmap_helpers.skel.h"
 
 #define IFINDEX_LO 1
@@ -56,6 +57,15 @@ static void test_xdp_with_devmap_helpers(void)
 	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
 	ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
 
+	/* Try to attach BPF_XDP program with frags to devmap when we have
+	 * already loaded a BPF_XDP program on the map
+	 */
+	idx = 1;
+	val.ifindex = 1;
+	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry");
+
 out_close:
 	test_xdp_with_devmap_helpers__destroy(skel);
 }
@@ -71,12 +81,57 @@ static void test_neg_xdp_devmap_helpers(void)
 	}
 }
 
+void test_xdp_with_devmap_frags_helpers(void)
+{
+	struct test_xdp_with_devmap_frags_helpers *skel;
+	struct bpf_prog_info info = {};
+	struct bpf_devmap_val val = {
+		.ifindex = IFINDEX_LO,
+	};
+	__u32 len = sizeof(info);
+	int err, dm_fd_frags, map_fd;
+	__u32 idx = 0;
+
+	skel = test_xdp_with_devmap_frags_helpers__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
+		return;
+
+	dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+	map_fd = bpf_map__fd(skel->maps.dm_ports);
+	err = bpf_obj_get_info_by_fd(dm_fd_frags, &info, &len);
+	if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+		goto out_close;
+
+	val.bpf_prog.fd = dm_fd_frags;
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	ASSERT_OK(err, "Add frags program to devmap entry");
+
+	err = bpf_map_lookup_elem(map_fd, &idx, &val);
+	ASSERT_OK(err, "Read devmap entry");
+	ASSERT_EQ(info.id, val.bpf_prog.id,
+		  "Match program id to devmap entry prog_id");
+
+	/* Try to attach BPF_XDP program to devmap when we have
+	 * already loaded a BPF_XDP program with frags on the map
+	 */
+	idx = 1;
+	val.ifindex = 1;
+	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	ASSERT_NEQ(err, 0, "Add BPF_XDP program to devmap entry");
+
+out_close:
+	test_xdp_with_devmap_frags_helpers__destroy(skel);
+}
 
 void serial_test_xdp_devmap_attach(void)
 {
 	if (test__start_subtest("DEVMAP with programs in entries"))
 		test_xdp_with_devmap_helpers();
 
+	if (test__start_subtest("DEVMAP with frags programs in entries"))
+		test_xdp_with_devmap_frags_helpers();
+
 	if (test__start_subtest("Verifier check of DEVMAP programs"))
 		test_neg_xdp_devmap_helpers();
 }
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
new file mode 100644
index 000000000000..62fb7cd4d87a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO	1
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CPUMAP);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_cpumap_val));
+	__uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp_cpumap/dummy_cm")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
index 532025057711..48007f17dfa8 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -33,4 +33,10 @@ int xdp_dummy_cm(struct xdp_md *ctx)
 	return XDP_PASS;
 }
 
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
new file mode 100644
index 000000000000..e1caf510b7d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_devmap_val));
+	__uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp_devmap/map_prog")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
index 1e6b9c38ea6d..8ae11fab8316 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -40,4 +40,11 @@ int xdp_dummy_dm(struct xdp_md *ctx)
 
 	return XDP_PASS;
 }
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit 


From b4ec6a19231224f6b08dc54ea07da4c4090e8ee3 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 21 Jan 2022 13:35:08 +0100
Subject: selftests, xsk: Fix rx_full stats test

Fix the rx_full stats test so that it correctly reports pass even when
the fill ring is not full of buffers.

Fixes: 872a1184dbf2 ("selftests: xsk: Put the same buffer only once in the fill ring")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20220121123508.12759-1-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 0a5d23da486d..ffa5502ad95e 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -906,7 +906,10 @@ static bool rx_stats_are_valid(struct ifobject *ifobject)
 			return true;
 		case STAT_TEST_RX_FULL:
 			xsk_stat = stats.rx_ring_full;
-			expected_stat -= RX_FULL_RXQSIZE;
+			if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+				expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE;
+			else
+				expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE;
 			break;
 		case STAT_TEST_RX_FILL_EMPTY:
 			xsk_stat = stats.rx_fill_ring_empty_descs;
-- 
cgit 


From 0bfb95f59a6613e30c0672b8ef2c9502302bf6bb Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 24 Jan 2022 23:01:28 +0100
Subject: selftests, bpf: Do not yet switch to new libbpf XDP APIs

Revert commit 544356524dd6 ("selftests/bpf: switch to new libbpf XDP APIs")
for now given this will heavily conflict with 4b27480dcaa7 ("bpf/selftests:
convert xdp_link test to ASSERT_* macros") upon merge. Andrii agreed to redo
the conversion cleanly after trees merged.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/xdp_attach.c  | 29 ++++++++++++----------
 .../selftests/bpf/prog_tests/xdp_cpumap_attach.c   |  8 +++---
 .../selftests/bpf/prog_tests/xdp_devmap_attach.c   |  8 +++---
 tools/testing/selftests/bpf/prog_tests/xdp_info.c  | 14 +++++------
 tools/testing/selftests/bpf/prog_tests/xdp_link.c  | 26 +++++++++----------
 tools/testing/selftests/bpf/xdp_redirect_multi.c   |  8 +++---
 tools/testing/selftests/bpf/xdping.c               |  4 +--
 7 files changed, 50 insertions(+), 47 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index 62aa3edda5e6..c6fa390e3aa1 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -11,7 +11,8 @@ void serial_test_xdp_attach(void)
 	const char *file = "./test_xdp.o";
 	struct bpf_prog_info info = {};
 	int err, fd1, fd2, fd3;
-	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+	DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts,
+			    .old_fd = -1);
 
 	len = sizeof(info);
 
@@ -37,47 +38,49 @@ void serial_test_xdp_attach(void)
 	if (CHECK_FAIL(err))
 		goto out_2;
 
-	err = bpf_xdp_attach(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE, &opts);
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE,
+				       &opts);
 	if (CHECK(err, "load_ok", "initial load failed"))
 		goto out_close;
 
-	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
 	if (CHECK(err || id0 != id1, "id1_check",
 		  "loaded prog id %u != id1 %u, err %d", id0, id1, err))
 		goto out_close;
 
-	err = bpf_xdp_attach(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE, &opts);
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE,
+				       &opts);
 	if (CHECK(!err, "load_fail", "load with expected id didn't fail"))
 		goto out;
 
-	opts.old_prog_fd = fd1;
-	err = bpf_xdp_attach(IFINDEX_LO, fd2, 0, &opts);
+	opts.old_fd = fd1;
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts);
 	if (CHECK(err, "replace_ok", "replace valid old_fd failed"))
 		goto out;
-	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
 	if (CHECK(err || id0 != id2, "id2_check",
 		  "loaded prog id %u != id2 %u, err %d", id0, id2, err))
 		goto out_close;
 
-	err = bpf_xdp_attach(IFINDEX_LO, fd3, 0, &opts);
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts);
 	if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail"))
 		goto out;
 
-	err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
 	if (CHECK(!err, "remove_fail", "remove invalid old_fd didn't fail"))
 		goto out;
 
-	opts.old_prog_fd = fd2;
-	err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
+	opts.old_fd = fd2;
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
 	if (CHECK(err, "remove_ok", "remove valid old_fd failed"))
 		goto out;
 
-	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
 	if (CHECK(err || id0 != 0, "unload_check",
 		  "loaded prog id %u != 0, err %d", id0, err))
 		goto out_close;
 out:
-	bpf_xdp_detach(IFINDEX_LO, 0, NULL);
+	bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
 out_close:
 	bpf_object__close(obj3);
 out_2:
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index b353e1f3acb5..13aabb3b6cf2 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -24,11 +24,11 @@ void test_xdp_with_cpumap_helpers(void)
 		return;
 
 	prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
-	err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
 	if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
 		goto out_close;
 
-	err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
 	ASSERT_OK(err, "XDP program detach");
 
 	prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
@@ -46,9 +46,9 @@ void test_xdp_with_cpumap_helpers(void)
 	ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
 
 	/* can not attach BPF_XDP_CPUMAP program to a device */
-	err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
 	if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
-		bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
+		bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
 
 	val.qsize = 192;
 	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 463a72fc3e70..2a784ccd3136 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -26,11 +26,11 @@ static void test_xdp_with_devmap_helpers(void)
 		return;
 
 	dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
-	err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
 	if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
 		goto out_close;
 
-	err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
 	ASSERT_OK(err, "XDP program detach");
 
 	dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
@@ -48,9 +48,9 @@ static void test_xdp_with_devmap_helpers(void)
 	ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
 
 	/* can not attach BPF_XDP_DEVMAP program to a device */
-	err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
 	if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
-		bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
+		bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
 
 	val.ifindex = 1;
 	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
index 0d01ff6cb91a..abe48e82e1dc 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -14,13 +14,13 @@ void serial_test_xdp_info(void)
 
 	/* Get prog_id for XDP_ATTACHED_NONE mode */
 
-	err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
 	if (CHECK(err, "get_xdp_none", "errno=%d\n", errno))
 		return;
 	if (CHECK(prog_id, "prog_id_none", "unexpected prog_id=%u\n", prog_id))
 		return;
 
-	err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
 	if (CHECK(err, "get_xdp_none_skb", "errno=%d\n", errno))
 		return;
 	if (CHECK(prog_id, "prog_id_none_skb", "unexpected prog_id=%u\n",
@@ -37,32 +37,32 @@ void serial_test_xdp_info(void)
 	if (CHECK(err, "get_prog_info", "errno=%d\n", errno))
 		goto out_close;
 
-	err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
 	if (CHECK(err, "set_xdp_skb", "errno=%d\n", errno))
 		goto out_close;
 
 	/* Get prog_id for single prog mode */
 
-	err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
 	if (CHECK(err, "get_xdp", "errno=%d\n", errno))
 		goto out;
 	if (CHECK(prog_id != info.id, "prog_id", "prog_id not available\n"))
 		goto out;
 
-	err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
 	if (CHECK(err, "get_xdp_skb", "errno=%d\n", errno))
 		goto out;
 	if (CHECK(prog_id != info.id, "prog_id_skb", "prog_id not available\n"))
 		goto out;
 
-	err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_DRV_MODE, &prog_id);
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_DRV_MODE);
 	if (CHECK(err, "get_xdp_drv", "errno=%d\n", errno))
 		goto out;
 	if (CHECK(prog_id, "prog_id_drv", "unexpected prog_id=%u\n", prog_id))
 		goto out;
 
 out:
-	bpf_xdp_detach(IFINDEX_LO, 0, NULL);
+	bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
 out_close:
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 0c5e4ea8eaae..983ab0b47d30 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -9,8 +9,8 @@
 void serial_test_xdp_link(void)
 {
 	__u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err;
+	DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
 	struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
-	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
 	struct bpf_link_info link_info;
 	struct bpf_prog_info prog_info;
 	struct bpf_link *link;
@@ -40,12 +40,12 @@ void serial_test_xdp_link(void)
 	id2 = prog_info.id;
 
 	/* set initial prog attachment */
-	err = bpf_xdp_attach(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
 	if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err))
 		goto cleanup;
 
 	/* validate prog ID */
-	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
 	CHECK(err || id0 != id1, "id1_check",
 	      "loaded prog id %u != id1 %u, err %d", id0, id1, err);
 
@@ -54,14 +54,14 @@ void serial_test_xdp_link(void)
 	if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
 		bpf_link__destroy(link);
 		/* best-effort detach prog */
-		opts.old_prog_fd = prog_fd1;
-		bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
+		opts.old_fd = prog_fd1;
+		bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
 		goto cleanup;
 	}
 
 	/* detach BPF program */
-	opts.old_prog_fd = prog_fd1;
-	err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
+	opts.old_fd = prog_fd1;
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
 	if (CHECK(err, "prog_detach", "failed %d\n", err))
 		goto cleanup;
 
@@ -72,24 +72,24 @@ void serial_test_xdp_link(void)
 	skel1->links.xdp_handler = link;
 
 	/* validate prog ID */
-	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
 	if (CHECK(err || id0 != id1, "id1_check",
 		  "loaded prog id %u != id1 %u, err %d", id0, id1, err))
 		goto cleanup;
 
 	/* BPF prog attach is not allowed to replace BPF link */
-	opts.old_prog_fd = prog_fd1;
-	err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
+	opts.old_fd = prog_fd1;
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
 	if (CHECK(!err, "prog_attach_fail", "unexpected success\n"))
 		goto cleanup;
 
 	/* Can't force-update when BPF link is active */
-	err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, 0, NULL);
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
 	if (CHECK(!err, "prog_update_fail", "unexpected success\n"))
 		goto cleanup;
 
 	/* Can't force-detach when BPF link is active */
-	err = bpf_xdp_detach(IFINDEX_LO, 0, NULL);
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
 	if (CHECK(!err, "prog_detach_fail", "unexpected success\n"))
 		goto cleanup;
 
@@ -109,7 +109,7 @@ void serial_test_xdp_link(void)
 		goto cleanup;
 	skel2->links.xdp_handler = link;
 
-	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
 	if (CHECK(err || id0 != id2, "id2_check",
 		  "loaded prog id %u != id2 %u, err %d", id0, id1, err))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
index aaedbf4955c3..51c8224b4ccc 100644
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -32,12 +32,12 @@ static void int_exit(int sig)
 	int i;
 
 	for (i = 0; ifaces[i] > 0; i++) {
-		if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) {
-			printf("bpf_xdp_query_id failed\n");
+		if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
+			printf("bpf_get_link_xdp_id failed\n");
 			exit(1);
 		}
 		if (prog_id)
-			bpf_xdp_detach(ifaces[i], xdp_flags, NULL);
+			bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
 	}
 
 	exit(0);
@@ -210,7 +210,7 @@ int main(int argc, char **argv)
 		}
 
 		/* bind prog_fd to each interface */
-		ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
+		ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
 		if (ret) {
 			printf("Set xdp fd failed on %d\n", ifindex);
 			goto err_out;
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index c567856fd1bc..baa870a759a2 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -29,7 +29,7 @@ static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 
 static void cleanup(int sig)
 {
-	bpf_xdp_detach(ifindex, xdp_flags, NULL);
+	bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
 	if (sig)
 		exit(1);
 }
@@ -203,7 +203,7 @@ int main(int argc, char **argv)
 
 	printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n");
 
-	if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
+	if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
 		fprintf(stderr, "Link set xdp fd failed for %s\n", ifname);
 		goto done;
 	}
-- 
cgit 


From 45105c2eb751e2a3dd9858622cf72e2d588209a0 Mon Sep 17 00:00:00 2001
From: Kenny Yu <kennyyu@fb.com>
Date: Mon, 24 Jan 2022 10:54:03 -0800
Subject: selftests/bpf: Add test for sleepable bpf iterator programs

This adds a test for bpf iterator programs to make use of sleepable
bpf helpers.

Signed-off-by: Kenny Yu <kennyyu@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220124185403.468466-5-kennyyu@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 20 +++++++++
 tools/testing/selftests/bpf/progs/bpf_iter_task.c | 54 +++++++++++++++++++++++
 2 files changed, 74 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index b84f859b1267..5142a7d130b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -138,6 +138,24 @@ static void test_task(void)
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_sleepable(void)
+{
+	struct bpf_iter_task *skel;
+
+	skel = bpf_iter_task__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_sleepable);
+
+	ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
+		  "num_expected_failure_copy_from_user_task");
+	ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
+		  "num_success_copy_from_user_task");
+
+	bpf_iter_task__destroy(skel);
+}
+
 static void test_task_stack(void)
 {
 	struct bpf_iter_task_stack *skel;
@@ -1252,6 +1270,8 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_sleepable"))
+		test_task_sleepable();
 	if (test__start_subtest("task_stack"))
 		test_task_stack();
 	if (test__start_subtest("task_file"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index c86b93f33b32..d22741272692 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -23,3 +24,56 @@ int dump_task(struct bpf_iter__task *ctx)
 	BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
 	return 0;
 }
+
+int num_expected_failure_copy_from_user_task = 0;
+int num_success_copy_from_user_task = 0;
+
+SEC("iter.s/task")
+int dump_task_sleepable(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	static const char info[] = "    === END ===";
+	struct pt_regs *regs;
+	void *ptr;
+	uint32_t user_data = 0;
+	int ret;
+
+	if (task == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Read an invalid pointer and ensure we get an error */
+	ptr = NULL;
+	ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+	if (ret) {
+		++num_expected_failure_copy_from_user_task;
+	} else {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Try to read the contents of the task's instruction pointer from the
+	 * remote task's address space.
+	 */
+	regs = (struct pt_regs *)bpf_task_pt_regs(task);
+	if (regs == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	ptr = (void *)PT_REGS_IP(regs);
+
+	ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+	if (ret) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+	++num_success_copy_from_user_task;
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid     data\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
+	return 0;
+}
-- 
cgit 


From 78a2054156dd6265619b230cc5372b74f9ba5233 Mon Sep 17 00:00:00 2001
From: Kenta Tada <Kenta.Tada@sony.com>
Date: Mon, 24 Jan 2022 23:16:20 +0900
Subject: selftests/bpf: Extract syscall wrapper

Extract the helper to set up SYS_PREFIX for fentry and kprobe selftests
that use __x86_sys_* attach functions.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Kenta Tada <Kenta.Tada@sony.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220124141622.4378-2-Kenta.Tada@sony.com
---
 tools/testing/selftests/bpf/progs/bpf_misc.h        | 19 +++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_probe_user.c | 15 +--------------
 2 files changed, 20 insertions(+), 14 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_misc.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
new file mode 100644
index 000000000000..0b78bc9b1b4c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_MISC_H__
+#define __BPF_MISC_H__
+
+#if defined(__TARGET_ARCH_x86)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__x64_"
+#elif defined(__TARGET_ARCH_s390)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__s390x_"
+#elif defined(__TARGET_ARCH_arm64)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__arm64_"
+#else
+#define SYSCALL_WRAPPER 0
+#define SYS_PREFIX ""
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
index 8812a90da4eb..702578a5e496 100644
--- a/tools/testing/selftests/bpf/progs/test_probe_user.c
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -7,20 +7,7 @@
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-
-#if defined(__TARGET_ARCH_x86)
-#define SYSCALL_WRAPPER 1
-#define SYS_PREFIX "__x64_"
-#elif defined(__TARGET_ARCH_s390)
-#define SYSCALL_WRAPPER 1
-#define SYS_PREFIX "__s390x_"
-#elif defined(__TARGET_ARCH_arm64)
-#define SYSCALL_WRAPPER 1
-#define SYS_PREFIX "__arm64_"
-#else
-#define SYSCALL_WRAPPER 0
-#define SYS_PREFIX ""
-#endif
+#include "bpf_misc.h"
 
 static struct sockaddr_in old;
 
-- 
cgit 


From 77fc0330dfe5abf9b7ec336f173d2e1fd7258cd5 Mon Sep 17 00:00:00 2001
From: Kenta Tada <Kenta.Tada@sony.com>
Date: Mon, 24 Jan 2022 23:16:22 +0900
Subject: selftests/bpf: Add a test to confirm PT_REGS_PARM4_SYSCALL

Add a selftest to verify the behavior of PT_REGS_xxx
and the CORE variant.

Signed-off-by: Kenta Tada <Kenta.Tada@sony.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220124141622.4378-4-Kenta.Tada@sony.com
---
 .../bpf/prog_tests/test_bpf_syscall_macro.c        | 63 ++++++++++++++++++++++
 .../selftests/bpf/progs/bpf_syscall_macro.c        | 56 +++++++++++++++++++
 2 files changed, 119 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_syscall_macro.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
new file mode 100644
index 000000000000..f5f4c8adf539
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2022 Sony Group Corporation */
+#include <sys/prctl.h>
+#include <test_progs.h>
+#include "bpf_syscall_macro.skel.h"
+
+void test_bpf_syscall_macro(void)
+{
+	struct bpf_syscall_macro *skel = NULL;
+	int err;
+	int exp_arg1 = 1001;
+	unsigned long exp_arg2 = 12;
+	unsigned long exp_arg3 = 13;
+	unsigned long exp_arg4 = 14;
+	unsigned long exp_arg5 = 15;
+
+	/* check whether it can open program */
+	skel = bpf_syscall_macro__open();
+	if (!ASSERT_OK_PTR(skel, "bpf_syscall_macro__open"))
+		return;
+
+	skel->rodata->filter_pid = getpid();
+
+	/* check whether it can load program */
+	err = bpf_syscall_macro__load(skel);
+	if (!ASSERT_OK(err, "bpf_syscall_macro__load"))
+		goto cleanup;
+
+	/* check whether it can attach kprobe */
+	err = bpf_syscall_macro__attach(skel);
+	if (!ASSERT_OK(err, "bpf_syscall_macro__attach"))
+		goto cleanup;
+
+	/* check whether args of syscall are copied correctly */
+	prctl(exp_arg1, exp_arg2, exp_arg3, exp_arg4, exp_arg5);
+	ASSERT_EQ(skel->bss->arg1, exp_arg1, "syscall_arg1");
+	ASSERT_EQ(skel->bss->arg2, exp_arg2, "syscall_arg2");
+	ASSERT_EQ(skel->bss->arg3, exp_arg3, "syscall_arg3");
+	/* it cannot copy arg4 when uses PT_REGS_PARM4 on x86_64 */
+#ifdef __x86_64__
+	ASSERT_NEQ(skel->bss->arg4_cx, exp_arg4, "syscall_arg4_from_cx");
+#else
+	ASSERT_EQ(skel->bss->arg4_cx, exp_arg4, "syscall_arg4_from_cx");
+#endif
+	ASSERT_EQ(skel->bss->arg4, exp_arg4, "syscall_arg4");
+	ASSERT_EQ(skel->bss->arg5, exp_arg5, "syscall_arg5");
+
+	/* check whether args of syscall are copied correctly for CORE variants */
+	ASSERT_EQ(skel->bss->arg1_core, exp_arg1, "syscall_arg1_core_variant");
+	ASSERT_EQ(skel->bss->arg2_core, exp_arg2, "syscall_arg2_core_variant");
+	ASSERT_EQ(skel->bss->arg3_core, exp_arg3, "syscall_arg3_core_variant");
+	/* it cannot copy arg4 when uses PT_REGS_PARM4_CORE on x86_64 */
+#ifdef __x86_64__
+	ASSERT_NEQ(skel->bss->arg4_core_cx, exp_arg4, "syscall_arg4_from_cx_core_variant");
+#else
+	ASSERT_EQ(skel->bss->arg4_core_cx, exp_arg4, "syscall_arg4_from_cx_core_variant");
+#endif
+	ASSERT_EQ(skel->bss->arg4_core, exp_arg4, "syscall_arg4_core_variant");
+	ASSERT_EQ(skel->bss->arg5_core, exp_arg5, "syscall_arg5_core_variant");
+
+cleanup:
+	bpf_syscall_macro__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
new file mode 100644
index 000000000000..c8e60220cda8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2022 Sony Group Corporation */
+#include <vmlinux.h>
+
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+int arg1 = 0;
+unsigned long arg2 = 0;
+unsigned long arg3 = 0;
+unsigned long arg4_cx = 0;
+unsigned long arg4 = 0;
+unsigned long arg5 = 0;
+
+int arg1_core = 0;
+unsigned long arg2_core = 0;
+unsigned long arg3_core = 0;
+unsigned long arg4_core_cx = 0;
+unsigned long arg4_core = 0;
+unsigned long arg5_core = 0;
+
+const volatile pid_t filter_pid = 0;
+
+SEC("kprobe/" SYS_PREFIX "sys_prctl")
+int BPF_KPROBE(handle_sys_prctl)
+{
+	struct pt_regs *real_regs;
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid != filter_pid)
+		return 0;
+
+	real_regs = (struct pt_regs *)PT_REGS_PARM1(ctx);
+
+	/* test for PT_REGS_PARM */
+	bpf_probe_read_kernel(&arg1, sizeof(arg1), &PT_REGS_PARM1_SYSCALL(real_regs));
+	bpf_probe_read_kernel(&arg2, sizeof(arg2), &PT_REGS_PARM2_SYSCALL(real_regs));
+	bpf_probe_read_kernel(&arg3, sizeof(arg3), &PT_REGS_PARM3_SYSCALL(real_regs));
+	bpf_probe_read_kernel(&arg4_cx, sizeof(arg4_cx), &PT_REGS_PARM4(real_regs));
+	bpf_probe_read_kernel(&arg4, sizeof(arg4), &PT_REGS_PARM4_SYSCALL(real_regs));
+	bpf_probe_read_kernel(&arg5, sizeof(arg5), &PT_REGS_PARM5_SYSCALL(real_regs));
+
+	/* test for the CORE variant of PT_REGS_PARM */
+	arg1_core = PT_REGS_PARM1_CORE_SYSCALL(real_regs);
+	arg2_core = PT_REGS_PARM2_CORE_SYSCALL(real_regs);
+	arg3_core = PT_REGS_PARM3_CORE_SYSCALL(real_regs);
+	arg4_core_cx = PT_REGS_PARM4_CORE(real_regs);
+	arg4_core = PT_REGS_PARM4_CORE_SYSCALL(real_regs);
+	arg5_core = PT_REGS_PARM5_CORE_SYSCALL(real_regs);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 379d19ecdc208c7b8d3d221f29e39d84a5f3b00b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 24 Jan 2022 11:42:52 -0800
Subject: selftests/bpf: use preferred setter/getter APIs instead of deprecated
 ones

Switch to using preferred setters and getters instead of deprecated ones.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220124194254.2051434-6-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/benchs/bench_ringbufs.c                | 2 +-
 tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c             | 2 +-
 tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c | 2 +-
 tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index da8593b3494a..c2554f9695ff 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -151,7 +151,7 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)
 		/* record data + header take 16 bytes */
 		skel->rodata->wakeup_data_size = args.sample_rate * 16;
 
-	bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
+	bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
 
 	if (ringbuf_bench__load(skel)) {
 		fprintf(stderr, "failed to load skeleton\n");
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index c52f99f6a909..e83575e5480f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -132,7 +132,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 					     &link_info, &info_len);
 		ASSERT_OK(err, "link_fd_get_info");
 		ASSERT_EQ(link_info.tracing.attach_type,
-			  bpf_program__get_expected_attach_type(prog[i]),
+			  bpf_program__expected_attach_type(prog[i]),
 			  "link_attach_type");
 		ASSERT_EQ(link_info.tracing.target_obj_id, tgt_prog_id, "link_tgt_obj_id");
 		ASSERT_EQ(link_info.tracing.target_btf_id, btf_id, "link_tgt_btf_id");
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
index 8d5a6023a1bb..5308de1ed478 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -27,7 +27,7 @@ void test_get_stackid_cannot_attach(void)
 		return;
 
 	/* override program type */
-	bpf_program__set_perf_event(skel->progs.oncpu);
+	bpf_program__set_type(skel->progs.oncpu, BPF_PROG_TYPE_PERF_EVENT);
 
 	err = test_stacktrace_build_id__load(skel);
 	if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 0a91d8d9954b..f45a1d7b0a28 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -42,7 +42,7 @@ retry:
 		return;
 
 	/* override program type */
-	bpf_program__set_perf_event(skel->progs.oncpu);
+	bpf_program__set_type(skel->progs.oncpu, BPF_PROG_TYPE_PERF_EVENT);
 
 	err = test_stacktrace_build_id__load(skel);
 	if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
-- 
cgit 


From fc1ca95585aa4f51e9776f01dffedc1591458c31 Mon Sep 17 00:00:00 2001
From: Felix Maurer <fmaurer@redhat.com>
Date: Tue, 25 Jan 2022 17:58:23 +0100
Subject: selftests: bpf: Less strict size check in sockopt_sk

Originally, the kernel strictly checked the size of the optval in
getsockopt(TCP_ZEROCOPY_RECEIVE) to be equal to sizeof(struct
tcp_zerocopy_receive). With c8856c0514549, this was changed to allow
optvals of different sizes.

The bpf code in the sockopt_sk test was still performing the strict size
check. This fix adapts the kernel behavior from c8856c0514549 in the
selftest, i.e., just check if the required fields are there.

Fixes: 9cacf81f81611 ("bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE")
Signed-off-by: Felix Maurer <fmaurer@redhat.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/6f569cca2e45473f9a724d54d03fdfb45f29e35f.1643129402.git.fmaurer@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/sockopt_sk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index d0298dccedcd..c8d810010a94 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -72,7 +72,8 @@ int _getsockopt(struct bpf_sockopt *ctx)
 		 * reasons.
 		 */
 
-		if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
+		/* Check that optval contains address (__u64) */
+		if (optval + sizeof(__u64) > optval_end)
 			return 0; /* bounds check */
 
 		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
-- 
cgit 


From 88b6132248940954b958cd4e6d0432c640a9abd2 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 25 Jan 2022 16:28:24 +0000
Subject: kselftest: alsa: Add test case for writing invalid values

Attempt to write various invalid values for control types we know about and
check that something sensible happens. The ABI isn't quite as clearly
defined as one might like, rather than generating an error when an invalid
value is written many devices will silently rewrite the value into one that
is valid for the control. The exact value chosen is not predictable so in
the case the write succeeds we just check that the value we read back is
one that is valid for the control.

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220125162824.3816659-1-broonie@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 tools/testing/selftests/alsa/mixer-test.c | 222 +++++++++++++++++++++++++++++-
 1 file changed, 221 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
index 17f158d7a767..0e88f4f3d802 100644
--- a/tools/testing/selftests/alsa/mixer-test.c
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -13,6 +13,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdbool.h>
+#include <limits.h>
 #include <string.h>
 #include <getopt.h>
 #include <stdarg.h>
@@ -26,7 +27,7 @@
 
 #include "../kselftest.h"
 
-#define TESTS_PER_CONTROL 3
+#define TESTS_PER_CONTROL 4
 
 struct card_data {
 	snd_ctl_t *handle;
@@ -679,6 +680,224 @@ void test_ctl_write_valid(struct ctl_data *ctl)
 			 ctl->card->card, ctl->elem);
 }
 
+bool test_ctl_write_invalid_value(struct ctl_data *ctl,
+				  snd_ctl_elem_value_t *val)
+{
+	int err;
+	long val_read;
+
+	/* Ideally this will fail... */
+	err = snd_ctl_elem_write(ctl->card->handle, val);
+	if (err < 0)
+		return false;
+
+	/* ...but some devices will clamp to an in range value */
+	err = snd_ctl_elem_read(ctl->card->handle, val);
+	if (err < 0) {
+		ksft_print_msg("%s failed to read: %s\n",
+			       ctl->name, snd_strerror(err));
+		return true;
+	}
+
+	return !ctl_value_valid(ctl, val);
+}
+
+bool test_ctl_write_invalid_boolean(struct ctl_data *ctl)
+{
+	int err, i;
+	long val_read;
+	bool fail = false;
+	snd_ctl_elem_value_t *val;
+	snd_ctl_elem_value_alloca(&val);
+
+	for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+		snd_ctl_elem_value_copy(val, ctl->def_val);
+		snd_ctl_elem_value_set_boolean(val, i, 2);
+
+		if (test_ctl_write_invalid_value(ctl, val))
+			fail = true;
+	}
+
+	return !fail;
+}
+
+bool test_ctl_write_invalid_integer(struct ctl_data *ctl)
+{
+	int i;
+	bool fail = false;
+	snd_ctl_elem_value_t *val;
+	snd_ctl_elem_value_alloca(&val);
+
+	for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+		if (snd_ctl_elem_info_get_min(ctl->info) != LONG_MIN) {
+			/* Just under range */
+			snd_ctl_elem_value_copy(val, ctl->def_val);
+			snd_ctl_elem_value_set_integer(val, i,
+			       snd_ctl_elem_info_get_min(ctl->info) - 1);
+
+			if (test_ctl_write_invalid_value(ctl, val))
+				fail = true;
+
+			/* Minimum representable value */
+			snd_ctl_elem_value_copy(val, ctl->def_val);
+			snd_ctl_elem_value_set_integer(val, i, LONG_MIN);
+
+			if (test_ctl_write_invalid_value(ctl, val))
+				fail = true;
+		}
+
+		if (snd_ctl_elem_info_get_max(ctl->info) != LONG_MAX) {
+			/* Just over range */
+			snd_ctl_elem_value_copy(val, ctl->def_val);
+			snd_ctl_elem_value_set_integer(val, i,
+			       snd_ctl_elem_info_get_max(ctl->info) + 1);
+
+			if (test_ctl_write_invalid_value(ctl, val))
+				fail = true;
+
+			/* Maximum representable value */
+			snd_ctl_elem_value_copy(val, ctl->def_val);
+			snd_ctl_elem_value_set_integer(val, i, LONG_MAX);
+
+			if (test_ctl_write_invalid_value(ctl, val))
+				fail = true;
+		}
+	}
+
+	return !fail;
+}
+
+bool test_ctl_write_invalid_integer64(struct ctl_data *ctl)
+{
+	int i;
+	bool fail = false;
+	snd_ctl_elem_value_t *val;
+	snd_ctl_elem_value_alloca(&val);
+
+	for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+		if (snd_ctl_elem_info_get_min64(ctl->info) != LLONG_MIN) {
+			/* Just under range */
+			snd_ctl_elem_value_copy(val, ctl->def_val);
+			snd_ctl_elem_value_set_integer64(val, i,
+				snd_ctl_elem_info_get_min64(ctl->info) - 1);
+
+			if (test_ctl_write_invalid_value(ctl, val))
+				fail = true;
+
+			/* Minimum representable value */
+			snd_ctl_elem_value_copy(val, ctl->def_val);
+			snd_ctl_elem_value_set_integer64(val, i, LLONG_MIN);
+
+			if (test_ctl_write_invalid_value(ctl, val))
+				fail = true;
+		}
+
+		if (snd_ctl_elem_info_get_max64(ctl->info) != LLONG_MAX) {
+			/* Just over range */
+			snd_ctl_elem_value_copy(val, ctl->def_val);
+			snd_ctl_elem_value_set_integer64(val, i,
+				snd_ctl_elem_info_get_max64(ctl->info) + 1);
+
+			if (test_ctl_write_invalid_value(ctl, val))
+				fail = true;
+
+			/* Maximum representable value */
+			snd_ctl_elem_value_copy(val, ctl->def_val);
+			snd_ctl_elem_value_set_integer64(val, i, LLONG_MAX);
+
+			if (test_ctl_write_invalid_value(ctl, val))
+				fail = true;
+		}
+	}
+
+	return !fail;
+}
+
+bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl)
+{
+	int err, i;
+	unsigned int val_read;
+	bool fail = false;
+	snd_ctl_elem_value_t *val;
+	snd_ctl_elem_value_alloca(&val);
+
+	snd_ctl_elem_value_set_id(val, ctl->id);
+
+	for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+		/* One beyond maximum */
+		snd_ctl_elem_value_copy(val, ctl->def_val);
+		snd_ctl_elem_value_set_enumerated(val, i,
+				  snd_ctl_elem_info_get_items(ctl->info));
+
+		if (test_ctl_write_invalid_value(ctl, val))
+			fail = true;
+
+		/* Maximum representable value */
+		snd_ctl_elem_value_copy(val, ctl->def_val);
+		snd_ctl_elem_value_set_enumerated(val, i, UINT_MAX);
+
+		if (test_ctl_write_invalid_value(ctl, val))
+			fail = true;
+
+	}
+
+	return !fail;
+}
+
+
+void test_ctl_write_invalid(struct ctl_data *ctl)
+{
+	bool pass;
+	int err;
+
+	/* If the control is turned off let's be polite */
+	if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+		ksft_print_msg("%s is inactive\n", ctl->name);
+		ksft_test_result_skip("write_invalid.%d.%d\n",
+				      ctl->card->card, ctl->elem);
+		return;
+	}
+
+	if (!snd_ctl_elem_info_is_writable(ctl->info)) {
+		ksft_print_msg("%s is not writeable\n", ctl->name);
+		ksft_test_result_skip("write_invalid.%d.%d\n",
+				      ctl->card->card, ctl->elem);
+		return;
+	}
+
+	switch (snd_ctl_elem_info_get_type(ctl->info)) {
+	case SND_CTL_ELEM_TYPE_BOOLEAN:
+		pass = test_ctl_write_invalid_boolean(ctl);
+		break;
+
+	case SND_CTL_ELEM_TYPE_INTEGER:
+		pass = test_ctl_write_invalid_integer(ctl);
+		break;
+
+	case SND_CTL_ELEM_TYPE_INTEGER64:
+		pass = test_ctl_write_invalid_integer64(ctl);
+		break;
+
+	case SND_CTL_ELEM_TYPE_ENUMERATED:
+		pass = test_ctl_write_invalid_enumerated(ctl);
+		break;
+
+	default:
+		/* No tests for this yet */
+		ksft_test_result_skip("write_invalid.%d.%d\n",
+				      ctl->card->card, ctl->elem);
+		return;
+	}
+
+	/* Restore the default value to minimise disruption */
+	err = write_and_verify(ctl, ctl->def_val, NULL);
+	if (err < 0)
+		pass = false;
+
+	ksft_test_result(pass, "write_invalid.%d.%d\n",
+			 ctl->card->card, ctl->elem);
+}
+
 int main(void)
 {
 	struct ctl_data *ctl;
@@ -697,6 +916,7 @@ int main(void)
 		test_ctl_get_value(ctl);
 		test_ctl_write_default(ctl);
 		test_ctl_write_valid(ctl);
+		test_ctl_write_invalid(ctl);
 	}
 
 	ksft_exit_pass();
-- 
cgit 


From e5465a9027e9703d8f5ec9c0387e32fb3a02b58f Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 26 Jan 2022 10:19:40 -0800
Subject: selftests/bpf: Fix a clang compilation error

Compiling kernel and selftests/bpf with latest llvm like blow:
  make -j LLVM=1
  make -C tools/testing/selftests/bpf -j LLVM=1
I hit the following compilation error:
  /.../prog_tests/log_buf.c:215:6: error: variable 'log_buf' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized]
          if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good"))
              ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  /.../prog_tests/log_buf.c:264:7: note: uninitialized use occurs here
          free(log_buf);
               ^~~~~~~
  /.../prog_tests/log_buf.c:215:2: note: remove the 'if' if its condition is always false
          if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good"))
          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  /.../prog_tests/log_buf.c:205:15: note: initialize the variable 'log_buf' to silence this warning
          char *log_buf;
                       ^
                        = NULL
  1 error generated.

Compiler rightfully detected that log_buf is uninitialized in one of failure path as indicated
in the above.

Proper initialization of 'log_buf' variable fixed the issue.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220126181940.4105997-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/log_buf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index e469b023962b..1ef377a7e731 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -202,7 +202,7 @@ static void bpf_btf_load_log_buf(void)
 	const void *raw_btf_data;
 	__u32 raw_btf_size;
 	struct btf *btf;
-	char *log_buf;
+	char *log_buf = NULL;
 	int fd = -1;
 
 	btf = btf__new_empty();
-- 
cgit 


From ff943683f8a6dbf887c16275d0e80c1c5391b7bb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 26 Jan 2022 11:30:58 -0800
Subject: selftests/bpf: fix uprobe offset calculation in selftests

Fix how selftests determine relative offset of a function that is
uprobed. Previously, there was an assumption that uprobed function is
always in the first executable region, which is not always the case
(libbpf CI hits this case now). So get_base_addr() approach in isolation
doesn't work anymore. So teach get_uprobe_offset() to determine correct
memory mapping and calculate uprobe offset correctly.

While at it, I merged together two implementations of
get_uprobe_offset() helper, moving powerpc64-specific logic inside (had
to add extra {} block to avoid unused variable error for insn).

Also ensured that uprobed functions are never inlined, but are still
static (and thus local to each selftest), by using a no-op asm volatile
block internally. I didn't want to keep them global __weak, because some
tests use uprobe's ref counter offset (to test USDT-like logic) which is
not compatible with non-refcounted uprobe. So it's nicer to have each
test uprobe target local to the file and guaranteed to not be inlined or
skipped by the compiler (which can happen with static functions,
especially if compiling selftests with -O2).

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220126193058.3390292-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/benchs/bench_trigger.c |  6 +-
 .../selftests/bpf/prog_tests/attach_probe.c        | 18 +++---
 .../testing/selftests/bpf/prog_tests/bpf_cookie.c  | 16 +++--
 .../selftests/bpf/prog_tests/task_pt_regs.c        | 16 +++--
 tools/testing/selftests/bpf/trace_helpers.c        | 70 ++++++++++------------
 tools/testing/selftests/bpf/trace_helpers.h        |  3 +-
 6 files changed, 63 insertions(+), 66 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 7f957c55a3ca..0c481de2833d 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -154,7 +154,6 @@ static void *uprobe_producer_without_nop(void *input)
 static void usetup(bool use_retprobe, bool use_nop)
 {
 	size_t uprobe_offset;
-	ssize_t base_addr;
 	struct bpf_link *link;
 
 	setup_libbpf();
@@ -165,11 +164,10 @@ static void usetup(bool use_retprobe, bool use_nop)
 		exit(1);
 	}
 
-	base_addr = get_base_addr();
 	if (use_nop)
-		uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop, base_addr);
+		uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop);
 	else
-		uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop, base_addr);
+		uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop);
 
 	link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
 					  use_retprobe,
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index d0bd51eb23c8..d48f6e533e1e 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -5,9 +5,10 @@
 /* this is how USDT semaphore is actually defined, except volatile modifier */
 volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
 
-/* attach point */
-static void method(void) {
-	return ;
+/* uprobe attach point */
+static void trigger_func(void)
+{
+	asm volatile ("");
 }
 
 void test_attach_probe(void)
@@ -17,8 +18,7 @@ void test_attach_probe(void)
 	struct bpf_link *kprobe_link, *kretprobe_link;
 	struct bpf_link *uprobe_link, *uretprobe_link;
 	struct test_attach_probe* skel;
-	size_t uprobe_offset;
-	ssize_t base_addr, ref_ctr_offset;
+	ssize_t uprobe_offset, ref_ctr_offset;
 	bool legacy;
 
 	/* Check if new-style kprobe/uprobe API is supported.
@@ -34,11 +34,9 @@ void test_attach_probe(void)
 	 */
 	legacy = access("/sys/bus/event_source/devices/kprobe/type", F_OK) != 0;
 
-	base_addr = get_base_addr();
-	if (CHECK(base_addr < 0, "get_base_addr",
-		  "failed to find base addr: %zd", base_addr))
+	uprobe_offset = get_uprobe_offset(&trigger_func);
+	if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
 		return;
-	uprobe_offset = get_uprobe_offset(&method, base_addr);
 
 	ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
 	if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
@@ -103,7 +101,7 @@ void test_attach_probe(void)
 		goto cleanup;
 
 	/* trigger & validate uprobe & uretprobe */
-	method();
+	trigger_func();
 
 	if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
 		  "wrong uprobe res: %d\n", skel->bss->uprobe_res))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 5eea3c3a40fe..cd10df6cd0fc 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -8,6 +8,12 @@
 #include <test_progs.h>
 #include "test_bpf_cookie.skel.h"
 
+/* uprobe attach point */
+static void trigger_func(void)
+{
+	asm volatile ("");
+}
+
 static void kprobe_subtest(struct test_bpf_cookie *skel)
 {
 	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
@@ -62,11 +68,11 @@ static void uprobe_subtest(struct test_bpf_cookie *skel)
 	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
 	struct bpf_link *link1 = NULL, *link2 = NULL;
 	struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
-	size_t uprobe_offset;
-	ssize_t base_addr;
+	ssize_t uprobe_offset;
 
-	base_addr = get_base_addr();
-	uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+	uprobe_offset = get_uprobe_offset(&trigger_func);
+	if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+		goto cleanup;
 
 	/* attach two uprobes */
 	opts.bpf_cookie = 0x100;
@@ -99,7 +105,7 @@ static void uprobe_subtest(struct test_bpf_cookie *skel)
 		goto cleanup;
 
 	/* trigger uprobe && uretprobe */
-	get_base_addr();
+	trigger_func();
 
 	ASSERT_EQ(skel->bss->uprobe_res, 0x100 | 0x200, "uprobe_res");
 	ASSERT_EQ(skel->bss->uretprobe_res, 0x1000 | 0x2000, "uretprobe_res");
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
index 37c20b5ffa70..61935e7e056a 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -3,18 +3,22 @@
 #include <test_progs.h>
 #include "test_task_pt_regs.skel.h"
 
+/* uprobe attach point */
+static void trigger_func(void)
+{
+	asm volatile ("");
+}
+
 void test_task_pt_regs(void)
 {
 	struct test_task_pt_regs *skel;
 	struct bpf_link *uprobe_link;
-	size_t uprobe_offset;
-	ssize_t base_addr;
+	ssize_t uprobe_offset;
 	bool match;
 
-	base_addr = get_base_addr();
-	if (!ASSERT_GT(base_addr, 0, "get_base_addr"))
+	uprobe_offset = get_uprobe_offset(&trigger_func);
+	if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
 		return;
-	uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
 
 	skel = test_task_pt_regs__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -32,7 +36,7 @@ void test_task_pt_regs(void)
 	skel->links.handle_uprobe = uprobe_link;
 
 	/* trigger & validate uprobe */
-	get_base_addr();
+	trigger_func();
 
 	if (!ASSERT_EQ(skel->bss->uprobe_res, 1, "check_uprobe_res"))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 7b7f918eda77..65ab533c2516 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -138,6 +138,29 @@ void read_trace_pipe(void)
 	}
 }
 
+ssize_t get_uprobe_offset(const void *addr)
+{
+	size_t start, end, base;
+	char buf[256];
+	bool found;
+	FILE *f;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f)
+		return -errno;
+
+	while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
+		if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) {
+			found = true;
+			break;
+		}
+	}
+
+	fclose(f);
+
+	if (!found)
+		return -ESRCH;
+
 #if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
 
 #define OP_RT_RA_MASK   0xffff0000UL
@@ -145,10 +168,6 @@ void read_trace_pipe(void)
 #define ADDIS_R2_R12    0x3c4c0000UL
 #define ADDI_R2_R2      0x38420000UL
 
-ssize_t get_uprobe_offset(const void *addr, ssize_t base)
-{
-	u32 *insn = (u32 *)(uintptr_t)addr;
-
 	/*
 	 * A PPC64 ABIv2 function may have a local and a global entry
 	 * point. We need to use the local entry point when patching
@@ -165,43 +184,16 @@ ssize_t get_uprobe_offset(const void *addr, ssize_t base)
 	 * lis   r2,XXXX
 	 * addi  r2,r2,XXXX
 	 */
-	if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
-	     ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
-	    ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
-		return (ssize_t)(insn + 2) - base;
-	else
-		return (uintptr_t)addr - base;
-}
-
-#else
+	{
+		const u32 *insn = (const u32 *)(uintptr_t)addr;
 
-ssize_t get_uprobe_offset(const void *addr, ssize_t base)
-{
-	return (uintptr_t)addr - base;
-}
-
-#endif
-
-ssize_t get_base_addr(void)
-{
-	size_t start, offset;
-	char buf[256];
-	FILE *f;
-
-	f = fopen("/proc/self/maps", "r");
-	if (!f)
-		return -errno;
-
-	while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
-		      &start, buf, &offset) == 3) {
-		if (strcmp(buf, "r-xp") == 0) {
-			fclose(f);
-			return start - offset;
-		}
+		if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+		     ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+		    ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+			return (uintptr_t)(insn + 2) - start + base;
 	}
-
-	fclose(f);
-	return -EINVAL;
+#endif
+	return (uintptr_t)addr - start + base;
 }
 
 ssize_t get_rel_offset(uintptr_t addr)
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index d907b445524d..238a9c98cde2 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -18,8 +18,7 @@ int kallsyms_find(const char *sym, unsigned long long *addr);
 
 void read_trace_pipe(void);
 
-ssize_t get_uprobe_offset(const void *addr, ssize_t base);
-ssize_t get_base_addr(void);
+ssize_t get_uprobe_offset(const void *addr);
 ssize_t get_rel_offset(uintptr_t addr);
 
 #endif
-- 
cgit 


From 3b22523bca02b0d5618c08b93d8fd1fb578e1cc3 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 25 Jan 2022 09:29:45 +0100
Subject: selftests, xsk: Fix bpf_res cleanup test

After commit 710ad98c363a ("veth: Do not record rx queue hint in veth_xmit"),
veth no longer receives traffic on the same queue as it was sent on. This
breaks the bpf_res test for the AF_XDP selftests as the socket tied to
queue 1 will not receive traffic anymore.

Modify the test so that two sockets are tied to queue id 0 using a shared
umem instead. When killing the first socket enter the second socket into
the xskmap so that traffic will flow to it. This will still test that the
resources are not cleaned up until after the second socket dies, without
having to rely on veth supporting rx_queue hints.

Reported-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20220125082945.26179-1-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 80 +++++++++++++++++++-------------
 tools/testing/selftests/bpf/xdpxceiver.h |  2 +-
 2 files changed, 50 insertions(+), 32 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index ffa5502ad95e..5f8296d29e77 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -266,22 +266,24 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size
 }
 
 static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
-				struct ifobject *ifobject, u32 qid)
+				struct ifobject *ifobject, bool shared)
 {
-	struct xsk_socket_config cfg;
+	struct xsk_socket_config cfg = {};
 	struct xsk_ring_cons *rxr;
 	struct xsk_ring_prod *txr;
 
 	xsk->umem = umem;
 	cfg.rx_size = xsk->rxqsize;
 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-	cfg.libbpf_flags = 0;
+	cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
 	cfg.xdp_flags = ifobject->xdp_flags;
 	cfg.bind_flags = ifobject->bind_flags;
+	if (shared)
+		cfg.bind_flags |= XDP_SHARED_UMEM;
 
 	txr = ifobject->tx_on ? &xsk->tx : NULL;
 	rxr = ifobject->rx_on ? &xsk->rx : NULL;
-	return xsk_socket__create(&xsk->xsk, ifobject->ifname, qid, umem->umem, rxr, txr, &cfg);
+	return xsk_socket__create(&xsk->xsk, ifobject->ifname, 0, umem->umem, rxr, txr, &cfg);
 }
 
 static struct option long_options[] = {
@@ -387,7 +389,6 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 	for (i = 0; i < MAX_INTERFACES; i++) {
 		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
 
-		ifobj->umem = &ifobj->umem_arr[0];
 		ifobj->xsk = &ifobj->xsk_arr[0];
 		ifobj->use_poll = false;
 		ifobj->pacing_on = true;
@@ -401,11 +402,12 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 			ifobj->tx_on = false;
 		}
 
+		memset(ifobj->umem, 0, sizeof(*ifobj->umem));
+		ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
+		ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+
 		for (j = 0; j < MAX_SOCKETS; j++) {
-			memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j]));
 			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
-			ifobj->umem_arr[j].num_frames = DEFAULT_UMEM_BUFFERS;
-			ifobj->umem_arr[j].frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
 			ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 		}
 	}
@@ -950,7 +952,10 @@ static void tx_stats_validate(struct ifobject *ifobject)
 
 static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 {
+	u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+	int ret, ifindex;
+	void *bufs;
 	u32 i;
 
 	ifobject->ns_fd = switch_namespace(ifobject->nsname);
@@ -958,23 +963,20 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	if (ifobject->umem->unaligned_mode)
 		mmap_flags |= MAP_HUGETLB;
 
-	for (i = 0; i < test->nb_sockets; i++) {
-		u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
-		u32 ctr = 0;
-		void *bufs;
-		int ret;
+	bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+	if (bufs == MAP_FAILED)
+		exit_with_error(errno);
 
-		bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
-		if (bufs == MAP_FAILED)
-			exit_with_error(errno);
+	ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz);
+	if (ret)
+		exit_with_error(-ret);
 
-		ret = xsk_configure_umem(&ifobject->umem_arr[i], bufs, umem_sz);
-		if (ret)
-			exit_with_error(-ret);
+	for (i = 0; i < test->nb_sockets; i++) {
+		u32 ctr = 0;
 
 		while (ctr++ < SOCK_RECONF_CTR) {
-			ret = xsk_configure_socket(&ifobject->xsk_arr[i], &ifobject->umem_arr[i],
-						   ifobject, i);
+			ret = xsk_configure_socket(&ifobject->xsk_arr[i], ifobject->umem,
+						   ifobject, !!i);
 			if (!ret)
 				break;
 
@@ -985,8 +987,22 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 		}
 	}
 
-	ifobject->umem = &ifobject->umem_arr[0];
 	ifobject->xsk = &ifobject->xsk_arr[0];
+
+	if (!ifobject->rx_on)
+		return;
+
+	ifindex = if_nametoindex(ifobject->ifname);
+	if (!ifindex)
+		exit_with_error(errno);
+
+	ret = xsk_setup_xdp_prog(ifindex, &ifobject->xsk_map_fd);
+	if (ret)
+		exit_with_error(-ret);
+
+	ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd);
+	if (ret)
+		exit_with_error(-ret);
 }
 
 static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
@@ -1142,14 +1158,16 @@ static void testapp_bidi(struct test_spec *test)
 
 static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
 {
+	int ret;
+
 	xsk_socket__delete(ifobj_tx->xsk->xsk);
-	xsk_umem__delete(ifobj_tx->umem->umem);
 	xsk_socket__delete(ifobj_rx->xsk->xsk);
-	xsk_umem__delete(ifobj_rx->umem->umem);
-	ifobj_tx->umem = &ifobj_tx->umem_arr[1];
 	ifobj_tx->xsk = &ifobj_tx->xsk_arr[1];
-	ifobj_rx->umem = &ifobj_rx->umem_arr[1];
 	ifobj_rx->xsk = &ifobj_rx->xsk_arr[1];
+
+	ret = xsk_socket__update_xskmap(ifobj_rx->xsk->xsk, ifobj_rx->xsk_map_fd);
+	if (ret)
+		exit_with_error(-ret);
 }
 
 static void testapp_bpf_res(struct test_spec *test)
@@ -1408,13 +1426,13 @@ static struct ifobject *ifobject_create(void)
 	if (!ifobj->xsk_arr)
 		goto out_xsk_arr;
 
-	ifobj->umem_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->umem_arr));
-	if (!ifobj->umem_arr)
-		goto out_umem_arr;
+	ifobj->umem = calloc(1, sizeof(*ifobj->umem));
+	if (!ifobj->umem)
+		goto out_umem;
 
 	return ifobj;
 
-out_umem_arr:
+out_umem:
 	free(ifobj->xsk_arr);
 out_xsk_arr:
 	free(ifobj);
@@ -1423,7 +1441,7 @@ out_xsk_arr:
 
 static void ifobject_delete(struct ifobject *ifobj)
 {
-	free(ifobj->umem_arr);
+	free(ifobj->umem);
 	free(ifobj->xsk_arr);
 	free(ifobj);
 }
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 2f705f44b748..62a3e6388632 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -125,10 +125,10 @@ struct ifobject {
 	struct xsk_socket_info *xsk;
 	struct xsk_socket_info *xsk_arr;
 	struct xsk_umem_info *umem;
-	struct xsk_umem_info *umem_arr;
 	thread_func_t func_ptr;
 	struct pkt_stream *pkt_stream;
 	int ns_fd;
+	int xsk_map_fd;
 	u32 dst_ip;
 	u32 src_ip;
 	u32 xdp_flags;
-- 
cgit 


From cdb5ed9796e70ca666863eff65cf4907da5fe13c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 27 Jan 2022 08:37:26 -0800
Subject: selftests/bpf: fix a clang compilation error

When building selftests/bpf with clang
  make -j LLVM=1
  make -C tools/testing/selftests/bpf -j LLVM=1
I hit the following compilation error:

  trace_helpers.c:152:9: error: variable 'found' is used uninitialized whenever 'while' loop exits because its condition is false [-Werror,-Wsometimes-uninitialized]
          while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  trace_helpers.c:161:7: note: uninitialized use occurs here
          if (!found)
               ^~~~~
  trace_helpers.c:152:9: note: remove the condition if it is always true
          while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 1
  trace_helpers.c:145:12: note: initialize the variable 'found' to silence this warning
          bool found;
                    ^
                     = false

It is possible that for sane /proc/self/maps we may never hit the above issue
in practice. But let us initialize variable 'found' properly to silence the
compilation error.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127163726.1442032-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/trace_helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 65ab533c2516..ca6abae9b09c 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -142,7 +142,7 @@ ssize_t get_uprobe_offset(const void *addr)
 {
 	size_t start, end, base;
 	char buf[256];
-	bool found;
+	bool found = false;
 	FILE *f;
 
 	f = fopen("/proc/self/maps", "r");
-- 
cgit 


From 571d01a9d06f984ce51bd7625036e67f7f1f886c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 27 Jan 2022 07:46:11 -0800
Subject: selftests/bpf: rename btf_decl_tag.c to test_btf_decl_tag.c

The uapi btf.h contains the following declaration:
  struct btf_decl_tag {
       __s32   component_idx;
  };

The skeleton will also generate a struct with name
"btf_decl_tag" for bpf program btf_decl_tag.c.

Rename btf_decl_tag.c to test_btf_decl_tag.c so
the corresponding skeleton struct name becomes
"test_btf_decl_tag". This way, we could include
uapi btf.h in prog_tests/btf_tag.c.
There is no functionality change for this patch.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154611.656699-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/btf_tag.c   |  8 ++--
 tools/testing/selftests/bpf/progs/btf_decl_tag.c   | 50 ----------------------
 .../selftests/bpf/progs/test_btf_decl_tag.c        | 50 ++++++++++++++++++++++
 3 files changed, 54 insertions(+), 54 deletions(-)
 delete mode 100644 tools/testing/selftests/bpf/progs/btf_decl_tag.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_btf_decl_tag.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
index 88d63e23e35f..c4cf27777ff7 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_tag.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2021 Facebook */
 #include <test_progs.h>
-#include "btf_decl_tag.skel.h"
+#include "test_btf_decl_tag.skel.h"
 
 /* struct btf_type_tag_test is referenced in btf_type_tag.skel.h */
 struct btf_type_tag_test {
@@ -11,9 +11,9 @@ struct btf_type_tag_test {
 
 static void test_btf_decl_tag(void)
 {
-	struct btf_decl_tag *skel;
+	struct test_btf_decl_tag *skel;
 
-	skel = btf_decl_tag__open_and_load();
+	skel = test_btf_decl_tag__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "btf_decl_tag"))
 		return;
 
@@ -22,7 +22,7 @@ static void test_btf_decl_tag(void)
 		test__skip();
 	}
 
-	btf_decl_tag__destroy(skel);
+	test_btf_decl_tag__destroy(skel);
 }
 
 static void test_btf_type_tag(void)
diff --git a/tools/testing/selftests/bpf/progs/btf_decl_tag.c b/tools/testing/selftests/bpf/progs/btf_decl_tag.c
deleted file mode 100644
index c88ccc53529a..000000000000
--- a/tools/testing/selftests/bpf/progs/btf_decl_tag.c
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2021 Facebook */
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-
-#if __has_attribute(btf_decl_tag)
-#define __tag1 __attribute__((btf_decl_tag("tag1")))
-#define __tag2 __attribute__((btf_decl_tag("tag2")))
-volatile const bool skip_tests __tag1 __tag2 = false;
-#else
-#define __tag1
-#define __tag2
-volatile const bool skip_tests = true;
-#endif
-
-struct key_t {
-	int a;
-	int b __tag1 __tag2;
-	int c;
-} __tag1 __tag2;
-
-typedef struct {
-	int a;
-	int b;
-} value_t __tag1 __tag2;
-
-struct {
-	__uint(type, BPF_MAP_TYPE_HASH);
-	__uint(max_entries, 3);
-	__type(key, struct key_t);
-	__type(value, value_t);
-} hashmap1 SEC(".maps");
-
-
-static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2
-{
-	struct key_t key;
-	value_t val = {};
-
-	key.a = key.b = key.c = x;
-	bpf_map_update_elem(&hashmap1, &key, &val, 0);
-	return 0;
-}
-
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(sub, int x)
-{
-	return foo(x);
-}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
new file mode 100644
index 000000000000..c88ccc53529a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#if __has_attribute(btf_decl_tag)
+#define __tag1 __attribute__((btf_decl_tag("tag1")))
+#define __tag2 __attribute__((btf_decl_tag("tag2")))
+volatile const bool skip_tests __tag1 __tag2 = false;
+#else
+#define __tag1
+#define __tag2
+volatile const bool skip_tests = true;
+#endif
+
+struct key_t {
+	int a;
+	int b __tag1 __tag2;
+	int c;
+} __tag1 __tag2;
+
+typedef struct {
+	int a;
+	int b;
+} value_t __tag1 __tag2;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 3);
+	__type(key, struct key_t);
+	__type(value, value_t);
+} hashmap1 SEC(".maps");
+
+
+static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2
+{
+	struct key_t key;
+	value_t val = {};
+
+	key.a = key.b = key.c = x;
+	bpf_map_update_elem(&hashmap1, &key, &val, 0);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(sub, int x)
+{
+	return foo(x);
+}
-- 
cgit 


From 696c39011538fe0124fcc0e81ebc44ff7f8623b4 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 27 Jan 2022 07:46:16 -0800
Subject: selftests/bpf: add a selftest with __user tag

Added a selftest with three__user usages: a __user pointer-type argument
in bpf_testmod, a __user pointer-type struct member in bpf_testmod,
and a __user pointer-type struct member in vmlinux. In all cases,
directly accessing the user memory will result verification failure.

  $ ./test_progs -v -n 22/3
  ...
  libbpf: prog 'test_user1': BPF program load failed: Permission denied
  libbpf: prog 'test_user1': -- BEGIN PROG LOAD LOG --
  R1 type=ctx expected=fp
  0: R1=ctx(id=0,off=0,imm=0) R10=fp0
  ; int BPF_PROG(test_user1, struct bpf_testmod_btf_type_tag_1 *arg)
  0: (79) r1 = *(u64 *)(r1 +0)
  func 'bpf_testmod_test_btf_type_tag_user_1' arg0 has btf_id 136561 type STRUCT 'bpf_testmod_btf_type_tag_1'
  1: R1_w=user_ptr_bpf_testmod_btf_type_tag_1(id=0,off=0,imm=0)
  ; g = arg->a;
  1: (61) r1 = *(u32 *)(r1 +0)
  R1 invalid mem access 'user_ptr_'
  ...
  #22/3 btf_tag/btf_type_tag_user_mod1:OK

  $ ./test_progs -v -n 22/4
  ...
  libbpf: prog 'test_user2': BPF program load failed: Permission denied
  libbpf: prog 'test_user2': -- BEGIN PROG LOAD LOG --
  R1 type=ctx expected=fp
  0: R1=ctx(id=0,off=0,imm=0) R10=fp0
  ; int BPF_PROG(test_user2, struct bpf_testmod_btf_type_tag_2 *arg)
  0: (79) r1 = *(u64 *)(r1 +0)
  func 'bpf_testmod_test_btf_type_tag_user_2' arg0 has btf_id 136563 type STRUCT 'bpf_testmod_btf_type_tag_2'
  1: R1_w=ptr_bpf_testmod_btf_type_tag_2(id=0,off=0,imm=0)
  ; g = arg->p->a;
  1: (79) r1 = *(u64 *)(r1 +0)          ; R1_w=user_ptr_bpf_testmod_btf_type_tag_1(id=0,off=0,imm=0)
  ; g = arg->p->a;
  2: (61) r1 = *(u32 *)(r1 +0)
  R1 invalid mem access 'user_ptr_'
  ...
  #22/4 btf_tag/btf_type_tag_user_mod2:OK

  $ ./test_progs -v -n 22/5
  ...
  libbpf: prog 'test_sys_getsockname': BPF program load failed: Permission denied
  libbpf: prog 'test_sys_getsockname': -- BEGIN PROG LOAD LOG --
  R1 type=ctx expected=fp
  0: R1=ctx(id=0,off=0,imm=0) R10=fp0
  ; int BPF_PROG(test_sys_getsockname, int fd, struct sockaddr *usockaddr,
  0: (79) r1 = *(u64 *)(r1 +8)
  func '__sys_getsockname' arg1 has btf_id 2319 type STRUCT 'sockaddr'
  1: R1_w=user_ptr_sockaddr(id=0,off=0,imm=0)
  ; g = usockaddr->sa_family;
  1: (69) r1 = *(u16 *)(r1 +0)
  R1 invalid mem access 'user_ptr_'
  ...
  #22/5 btf_tag/btf_type_tag_user_vmlinux:OK

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154616.659314-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/bpf_testmod/bpf_testmod.c        | 18 +++++
 tools/testing/selftests/bpf/prog_tests/btf_tag.c   | 93 ++++++++++++++++++++++
 .../selftests/bpf/progs/btf_type_tag_user.c        | 40 ++++++++++
 3 files changed, 151 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/btf_type_tag_user.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index bdbacf5adcd2..595d32ab285a 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -21,6 +21,24 @@ bpf_testmod_test_mod_kfunc(int i)
 	*(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i;
 }
 
+struct bpf_testmod_btf_type_tag_1 {
+	int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+	struct bpf_testmod_btf_type_tag_1 __user *p;
+};
+
+noinline int
+bpf_testmod_test_btf_type_tag_user_1(struct bpf_testmod_btf_type_tag_1 __user *arg) {
+	return arg->a;
+}
+
+noinline int
+bpf_testmod_test_btf_type_tag_user_2(struct bpf_testmod_btf_type_tag_2 *arg) {
+	return arg->p->a;
+}
+
 noinline int bpf_testmod_loop_test(int n)
 {
 	int i, sum = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
index c4cf27777ff7..f7560b54a6bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_tag.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2021 Facebook */
 #include <test_progs.h>
+#include <bpf/btf.h>
 #include "test_btf_decl_tag.skel.h"
 
 /* struct btf_type_tag_test is referenced in btf_type_tag.skel.h */
@@ -8,6 +9,7 @@ struct btf_type_tag_test {
         int **p;
 };
 #include "btf_type_tag.skel.h"
+#include "btf_type_tag_user.skel.h"
 
 static void test_btf_decl_tag(void)
 {
@@ -41,10 +43,101 @@ static void test_btf_type_tag(void)
 	btf_type_tag__destroy(skel);
 }
 
+static void test_btf_type_tag_mod_user(bool load_test_user1)
+{
+	const char *module_name = "bpf_testmod";
+	struct btf *vmlinux_btf, *module_btf;
+	struct btf_type_tag_user *skel;
+	__s32 type_id;
+	int err;
+
+	if (!env.has_testmod) {
+		test__skip();
+		return;
+	}
+
+	/* skip the test if the module does not have __user tags */
+	vmlinux_btf = btf__load_vmlinux_btf();
+	if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+		return;
+
+	module_btf = btf__load_module_btf(module_name, vmlinux_btf);
+	if (!ASSERT_OK_PTR(module_btf, "could not load module BTF"))
+		goto free_vmlinux_btf;
+
+	type_id = btf__find_by_name_kind(module_btf, "user", BTF_KIND_TYPE_TAG);
+	if (type_id <= 0) {
+		printf("%s:SKIP: btf_type_tag attribute not in %s", __func__, module_name);
+		test__skip();
+		goto free_module_btf;
+	}
+
+	skel = btf_type_tag_user__open();
+	if (!ASSERT_OK_PTR(skel, "btf_type_tag_user"))
+		goto free_module_btf;
+
+	bpf_program__set_autoload(skel->progs.test_sys_getsockname, false);
+	if (load_test_user1)
+		bpf_program__set_autoload(skel->progs.test_user2, false);
+	else
+		bpf_program__set_autoload(skel->progs.test_user1, false);
+
+	err = btf_type_tag_user__load(skel);
+	ASSERT_ERR(err, "btf_type_tag_user");
+
+	btf_type_tag_user__destroy(skel);
+
+free_module_btf:
+	btf__free(module_btf);
+free_vmlinux_btf:
+	btf__free(vmlinux_btf);
+}
+
+static void test_btf_type_tag_vmlinux_user(void)
+{
+	struct btf_type_tag_user *skel;
+	struct btf *vmlinux_btf;
+	__s32 type_id;
+	int err;
+
+	/* skip the test if the vmlinux does not have __user tags */
+	vmlinux_btf = btf__load_vmlinux_btf();
+	if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+		return;
+
+	type_id = btf__find_by_name_kind(vmlinux_btf, "user", BTF_KIND_TYPE_TAG);
+	if (type_id <= 0) {
+		printf("%s:SKIP: btf_type_tag attribute not in vmlinux btf", __func__);
+		test__skip();
+		goto free_vmlinux_btf;
+	}
+
+	skel = btf_type_tag_user__open();
+	if (!ASSERT_OK_PTR(skel, "btf_type_tag_user"))
+		goto free_vmlinux_btf;
+
+	bpf_program__set_autoload(skel->progs.test_user2, false);
+	bpf_program__set_autoload(skel->progs.test_user1, false);
+
+	err = btf_type_tag_user__load(skel);
+	ASSERT_ERR(err, "btf_type_tag_user");
+
+	btf_type_tag_user__destroy(skel);
+
+free_vmlinux_btf:
+	btf__free(vmlinux_btf);
+}
+
 void test_btf_tag(void)
 {
 	if (test__start_subtest("btf_decl_tag"))
 		test_btf_decl_tag();
 	if (test__start_subtest("btf_type_tag"))
 		test_btf_type_tag();
+	if (test__start_subtest("btf_type_tag_user_mod1"))
+		test_btf_type_tag_mod_user(true);
+	if (test__start_subtest("btf_type_tag_user_mod2"))
+		test_btf_type_tag_mod_user(false);
+	if (test__start_subtest("btf_type_tag_sys_user_vmlinux"))
+		test_btf_type_tag_vmlinux_user();
 }
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_user.c b/tools/testing/selftests/bpf/progs/btf_type_tag_user.c
new file mode 100644
index 000000000000..5523f77c5a44
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_user.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct bpf_testmod_btf_type_tag_1 {
+	int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+	struct bpf_testmod_btf_type_tag_1 *p;
+};
+
+int g;
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_user_1")
+int BPF_PROG(test_user1, struct bpf_testmod_btf_type_tag_1 *arg)
+{
+	g = arg->a;
+	return 0;
+}
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_user_2")
+int BPF_PROG(test_user2, struct bpf_testmod_btf_type_tag_2 *arg)
+{
+	g = arg->p->a;
+	return 0;
+}
+
+/* int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+ *                       int __user *usockaddr_len);
+ */
+SEC("fentry/__sys_getsockname")
+int BPF_PROG(test_sys_getsockname, int fd, struct sockaddr *usockaddr,
+	     int *usockaddr_len)
+{
+	g = usockaddr->sa_family;
+	return 0;
+}
-- 
cgit 


From 67ef7e1a759e4268e2102e7aa93c226eb75589f4 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 27 Jan 2022 07:46:22 -0800
Subject: selftests/bpf: specify pahole version requirement for btf_tag test

Specify pahole version requirement (1.23) for btf_tag subtests
btf_type_tag_user_{mod1, mod2, vmlinux}.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154622.663337-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/README.rst | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 42ef250c7acc..d099d91adc3b 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -206,6 +206,8 @@ btf_tag test and Clang version
 
 The btf_tag selftest requires LLVM support to recognize the btf_decl_tag and
 btf_type_tag attributes. They are introduced in `Clang 14` [0_, 1_].
+The subtests ``btf_type_tag_user_{mod1, mod2, vmlinux}`` also requires
+pahole version ``1.23``.
 
 Without them, the btf_tag selftest will be skipped and you will observe:
 
-- 
cgit 


From cec74489a8dee93053340ec88ea938ff4008c3c0 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 25 Jan 2022 16:17:11 +0800
Subject: selftests/bpf/test_xdp_redirect_multi: use temp netns for testing

Use temp netns instead of hard code name for testing in case the netns
already exists.

Remove the hard code interface index when creating the veth interfaces.
Because when the system loads some virtual interface modules, e.g. tunnels.
the ifindex of 2 will be used and the cmd will fail.

As the netns has not created if checking environment failed. Trap the
clean up function after checking env.

Fixes: 8955c1a32987 ("selftests/bpf/xdp_redirect_multi: Limit the tests in netns")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: William Tu <u9012063@gmail.com>
Link: https://lore.kernel.org/r/20220125081717.1260849-2-liuhangbin@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/test_xdp_redirect_multi.sh       | 60 +++++++++++-----------
 1 file changed, 31 insertions(+), 29 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
index 05f872740999..cc57cb87e65f 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -32,6 +32,11 @@ DRV_MODE="xdpgeneric xdpdrv xdpegress"
 PASS=0
 FAIL=0
 LOG_DIR=$(mktemp -d)
+declare -a NS
+NS[0]="ns0-$(mktemp -u XXXXXX)"
+NS[1]="ns1-$(mktemp -u XXXXXX)"
+NS[2]="ns2-$(mktemp -u XXXXXX)"
+NS[3]="ns3-$(mktemp -u XXXXXX)"
 
 test_pass()
 {
@@ -47,11 +52,9 @@ test_fail()
 
 clean_up()
 {
-	for i in $(seq $NUM); do
-		ip link del veth$i 2> /dev/null
-		ip netns del ns$i 2> /dev/null
+	for i in $(seq 0 $NUM); do
+		ip netns del ${NS[$i]} 2> /dev/null
 	done
-	ip netns del ns0 2> /dev/null
 }
 
 # Kselftest framework requirement - SKIP code is 4.
@@ -79,23 +82,22 @@ setup_ns()
 		mode="xdpdrv"
 	fi
 
-	ip netns add ns0
+	ip netns add ${NS[0]}
 	for i in $(seq $NUM); do
-	        ip netns add ns$i
-		ip -n ns$i link add veth0 index 2 type veth \
-			peer name veth$i netns ns0 index $((1 + $i))
-		ip -n ns0 link set veth$i up
-		ip -n ns$i link set veth0 up
-
-		ip -n ns$i addr add 192.0.2.$i/24 dev veth0
-		ip -n ns$i addr add 2001:db8::$i/64 dev veth0
+	        ip netns add ${NS[$i]}
+		ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]}
+		ip -n ${NS[$i]} link set veth0 up
+		ip -n ${NS[0]} link set veth$i up
+
+		ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0
+		ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0
 		# Add a neigh entry for IPv4 ping test
-		ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
-		ip -n ns$i link set veth0 $mode obj \
+		ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+		ip -n ${NS[$i]} link set veth0 $mode obj \
 			xdp_dummy.o sec xdp &> /dev/null || \
 			{ test_fail "Unable to load dummy xdp" && exit 1; }
 		IFACES="$IFACES veth$i"
-		veth_mac[$i]=$(ip -n ns0 link show veth$i | awk '/link\/ether/ {print $2}')
+		veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}')
 	done
 }
 
@@ -104,10 +106,10 @@ do_egress_tests()
 	local mode=$1
 
 	# mac test
-	ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
-	ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
+	ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
+	ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
 	sleep 0.5
-	ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+	ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
 	sleep 0.5
 	pkill tcpdump
 
@@ -123,18 +125,18 @@ do_ping_tests()
 	local mode=$1
 
 	# ping6 test: echo request should be redirect back to itself, not others
-	ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+	ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
 
-	ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
-	ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
-	ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
+	ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
+	ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
+	ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
 	sleep 0.5
 	# ARP test
-	ip netns exec ns1 arping -q -c 2 -I veth0 192.0.2.254
+	ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254
 	# IPv4 test
-	ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+	ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
 	# IPv6 test
-	ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+	ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
 	sleep 0.5
 	pkill tcpdump
 
@@ -180,7 +182,7 @@ do_tests()
 		xdpgeneric) drv_p="-S";;
 	esac
 
-	ip netns exec ns0 ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
+	ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
 	xdp_pid=$!
 	sleep 1
 	if ! ps -p $xdp_pid > /dev/null; then
@@ -197,10 +199,10 @@ do_tests()
 	kill $xdp_pid
 }
 
-trap clean_up EXIT
-
 check_env
 
+trap clean_up EXIT
+
 for mode in ${DRV_MODE}; do
 	setup_ns $mode
 	do_tests $mode
-- 
cgit 


From 9d66c9ddc9fc660ed8bb58469c86baf2ca46de61 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 25 Jan 2022 16:17:12 +0800
Subject: selftests/bpf/test_xdp_veth: use temp netns for testing

Use temp netns instead of hard code name for testing in case the
netns already exists.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20220125081717.1260849-3-liuhangbin@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_xdp_veth.sh | 39 +++++++++++++++-------------
 1 file changed, 21 insertions(+), 18 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
index a3a1eaee26ea..392d28cc4e58 100755
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -22,6 +22,9 @@ ksft_skip=4
 TESTNAME=xdp_veth
 BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
 BPF_DIR=$BPF_FS/test_$TESTNAME
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
+readonly NS3="ns3-$(mktemp -u XXXXXX)"
 
 _cleanup()
 {
@@ -29,9 +32,9 @@ _cleanup()
 	ip link del veth1 2> /dev/null
 	ip link del veth2 2> /dev/null
 	ip link del veth3 2> /dev/null
-	ip netns del ns1 2> /dev/null
-	ip netns del ns2 2> /dev/null
-	ip netns del ns3 2> /dev/null
+	ip netns del ${NS1} 2> /dev/null
+	ip netns del ${NS2} 2> /dev/null
+	ip netns del ${NS3} 2> /dev/null
 	rm -rf $BPF_DIR 2> /dev/null
 }
 
@@ -77,24 +80,24 @@ set -e
 
 trap cleanup_skip EXIT
 
-ip netns add ns1
-ip netns add ns2
-ip netns add ns3
+ip netns add ${NS1}
+ip netns add ${NS2}
+ip netns add ${NS3}
 
-ip link add veth1 index 111 type veth peer name veth11 netns ns1
-ip link add veth2 index 122 type veth peer name veth22 netns ns2
-ip link add veth3 index 133 type veth peer name veth33 netns ns3
+ip link add veth1 index 111 type veth peer name veth11 netns ${NS1}
+ip link add veth2 index 122 type veth peer name veth22 netns ${NS2}
+ip link add veth3 index 133 type veth peer name veth33 netns ${NS3}
 
 ip link set veth1 up
 ip link set veth2 up
 ip link set veth3 up
 
-ip -n ns1 addr add 10.1.1.11/24 dev veth11
-ip -n ns3 addr add 10.1.1.33/24 dev veth33
+ip -n ${NS1} addr add 10.1.1.11/24 dev veth11
+ip -n ${NS3} addr add 10.1.1.33/24 dev veth33
 
-ip -n ns1 link set dev veth11 up
-ip -n ns2 link set dev veth22 up
-ip -n ns3 link set dev veth33 up
+ip -n ${NS1} link set dev veth11 up
+ip -n ${NS2} link set dev veth22 up
+ip -n ${NS3} link set dev veth33 up
 
 mkdir $BPF_DIR
 bpftool prog loadall \
@@ -107,12 +110,12 @@ ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
 ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
 ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
 
-ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp
-ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
-ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp
+ip -n ${NS1} link set dev veth11 xdp obj xdp_dummy.o sec xdp
+ip -n ${NS2} link set dev veth22 xdp obj xdp_tx.o sec xdp
+ip -n ${NS3} link set dev veth33 xdp obj xdp_dummy.o sec xdp
 
 trap cleanup EXIT
 
-ip netns exec ns1 ping -c 1 -W 1 10.1.1.33
+ip netns exec ${NS1} ping -c 1 -W 1 10.1.1.33
 
 exit 0
-- 
cgit 


From 3cc382e02f5930a54e08b64541262a1224debebd Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 25 Jan 2022 16:17:13 +0800
Subject: selftests/bpf/test_xdp_vlan: use temp netns for testing

Use temp netns instead of hard code name for testing in case the
netns already exists.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20220125081717.1260849-4-liuhangbin@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_xdp_vlan.sh | 66 ++++++++++++++--------------
 1 file changed, 34 insertions(+), 32 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
index 0cbc7604a2f8..810c407e0286 100755
--- a/tools/testing/selftests/bpf/test_xdp_vlan.sh
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh
@@ -4,6 +4,8 @@
 
 # Kselftest framework requirement - SKIP code is 4.
 readonly KSFT_SKIP=4
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
 
 # Allow wrapper scripts to name test
 if [ -z "$TESTNAME" ]; then
@@ -49,15 +51,15 @@ cleanup()
 
 	if [ -n "$INTERACTIVE" ]; then
 		echo "Namespace setup still active explore with:"
-		echo " ip netns exec ns1 bash"
-		echo " ip netns exec ns2 bash"
+		echo " ip netns exec ${NS1} bash"
+		echo " ip netns exec ${NS2} bash"
 		exit $status
 	fi
 
 	set +e
 	ip link del veth1 2> /dev/null
-	ip netns del ns1 2> /dev/null
-	ip netns del ns2 2> /dev/null
+	ip netns del ${NS1} 2> /dev/null
+	ip netns del ${NS2} 2> /dev/null
 }
 
 # Using external program "getopt" to get --long-options
@@ -126,8 +128,8 @@ fi
 # Interactive mode likely require us to cleanup netns
 if [ -n "$INTERACTIVE" ]; then
 	ip link del veth1 2> /dev/null
-	ip netns del ns1 2> /dev/null
-	ip netns del ns2 2> /dev/null
+	ip netns del ${NS1} 2> /dev/null
+	ip netns del ${NS2} 2> /dev/null
 fi
 
 # Exit on failure
@@ -144,8 +146,8 @@ if [ -n "$VERBOSE" ]; then
 fi
 
 # Create two namespaces
-ip netns add ns1
-ip netns add ns2
+ip netns add ${NS1}
+ip netns add ${NS2}
 
 # Run cleanup if failing or on kill
 trap cleanup 0 2 3 6 9
@@ -154,44 +156,44 @@ trap cleanup 0 2 3 6 9
 ip link add veth1 type veth peer name veth2
 
 # Move veth1 and veth2 into the respective namespaces
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
+ip link set veth1 netns ${NS1}
+ip link set veth2 netns ${NS2}
 
 # NOTICE: XDP require VLAN header inside packet payload
 #  - Thus, disable VLAN offloading driver features
 #  - For veth REMEMBER TX side VLAN-offload
 #
 # Disable rx-vlan-offload (mostly needed on ns1)
-ip netns exec ns1 ethtool -K veth1 rxvlan off
-ip netns exec ns2 ethtool -K veth2 rxvlan off
+ip netns exec ${NS1} ethtool -K veth1 rxvlan off
+ip netns exec ${NS2} ethtool -K veth2 rxvlan off
 #
 # Disable tx-vlan-offload (mostly needed on ns2)
-ip netns exec ns2 ethtool -K veth2 txvlan off
-ip netns exec ns1 ethtool -K veth1 txvlan off
+ip netns exec ${NS2} ethtool -K veth2 txvlan off
+ip netns exec ${NS1} ethtool -K veth1 txvlan off
 
 export IPADDR1=100.64.41.1
 export IPADDR2=100.64.41.2
 
 # In ns1/veth1 add IP-addr on plain net_device
-ip netns exec ns1 ip addr add ${IPADDR1}/24 dev veth1
-ip netns exec ns1 ip link set veth1 up
+ip netns exec ${NS1} ip addr add ${IPADDR1}/24 dev veth1
+ip netns exec ${NS1} ip link set veth1 up
 
 # In ns2/veth2 create VLAN device
 export VLAN=4011
 export DEVNS2=veth2
-ip netns exec ns2 ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
-ip netns exec ns2 ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
-ip netns exec ns2 ip link set $DEVNS2 up
-ip netns exec ns2 ip link set $DEVNS2.$VLAN up
+ip netns exec ${NS2} ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
+ip netns exec ${NS2} ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
+ip netns exec ${NS2} ip link set $DEVNS2 up
+ip netns exec ${NS2} ip link set $DEVNS2.$VLAN up
 
 # Bringup lo in netns (to avoids confusing people using --interactive)
-ip netns exec ns1 ip link set lo up
-ip netns exec ns2 ip link set lo up
+ip netns exec ${NS1} ip link set lo up
+ip netns exec ${NS2} ip link set lo up
 
 # At this point, the hosts cannot reach each-other,
 # because ns2 are using VLAN tags on the packets.
 
-ip netns exec ns2 sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
+ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
 
 
 # Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
@@ -202,19 +204,19 @@ export FILE=test_xdp_vlan.o
 
 # First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
 export XDP_PROG=xdp_vlan_change
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
 
 # In ns1: egress use TC to add back VLAN tag 4011
 #  (del cmd)
 #  tc qdisc del dev $DEVNS1 clsact 2> /dev/null
 #
-ip netns exec ns1 tc qdisc add dev $DEVNS1 clsact
-ip netns exec ns1 tc filter add dev $DEVNS1 egress \
+ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact
+ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \
   prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
 
 # Now the namespaces can reach each-other, test with ping:
-ip netns exec ns2 ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
+ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
+ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
 
 # Second test: Replace xdp prog, that fully remove vlan header
 #
@@ -223,9 +225,9 @@ ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
 # ETH_P_8021Q indication, and this cause overwriting of our changes.
 #
 export XDP_PROG=xdp_vlan_remove_outer2
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE off
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
 
 # Now the namespaces should still be able reach each-other, test with ping:
-ip netns exec ns2 ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
+ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
+ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
-- 
cgit 


From 07c5855461084c1f14dfec17f21c6b2beb5ed6cc Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 25 Jan 2022 16:17:14 +0800
Subject: selftests/bpf/test_lwt_seg6local: use temp netns for testing

Use temp netns instead of hard code name for testing in case the
netns already exists.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20220125081717.1260849-5-liuhangbin@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_lwt_seg6local.sh | 170 +++++++++++-----------
 1 file changed, 88 insertions(+), 82 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
index 5620919fde9e..826f4423ce02 100755
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -23,6 +23,12 @@
 
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
+readonly NS3="ns3-$(mktemp -u XXXXXX)"
+readonly NS4="ns4-$(mktemp -u XXXXXX)"
+readonly NS5="ns5-$(mktemp -u XXXXXX)"
+readonly NS6="ns6-$(mktemp -u XXXXXX)"
 
 msg="skip all tests:"
 if [ $UID != 0 ]; then
@@ -41,23 +47,23 @@ cleanup()
 	fi
 
 	set +e
-	ip netns del ns1 2> /dev/null
-	ip netns del ns2 2> /dev/null
-	ip netns del ns3 2> /dev/null
-	ip netns del ns4 2> /dev/null
-	ip netns del ns5 2> /dev/null
-	ip netns del ns6 2> /dev/null
+	ip netns del ${NS1} 2> /dev/null
+	ip netns del ${NS2} 2> /dev/null
+	ip netns del ${NS3} 2> /dev/null
+	ip netns del ${NS4} 2> /dev/null
+	ip netns del ${NS5} 2> /dev/null
+	ip netns del ${NS6} 2> /dev/null
 	rm -f $TMP_FILE
 }
 
 set -e
 
-ip netns add ns1
-ip netns add ns2
-ip netns add ns3
-ip netns add ns4
-ip netns add ns5
-ip netns add ns6
+ip netns add ${NS1}
+ip netns add ${NS2}
+ip netns add ${NS3}
+ip netns add ${NS4}
+ip netns add ${NS5}
+ip netns add ${NS6}
 
 trap cleanup 0 2 3 6 9
 
@@ -67,78 +73,78 @@ ip link add veth5 type veth peer name veth6
 ip link add veth7 type veth peer name veth8
 ip link add veth9 type veth peer name veth10
 
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
-ip link set veth3 netns ns2
-ip link set veth4 netns ns3
-ip link set veth5 netns ns3
-ip link set veth6 netns ns4
-ip link set veth7 netns ns4
-ip link set veth8 netns ns5
-ip link set veth9 netns ns5
-ip link set veth10 netns ns6
-
-ip netns exec ns1 ip link set dev veth1 up
-ip netns exec ns2 ip link set dev veth2 up
-ip netns exec ns2 ip link set dev veth3 up
-ip netns exec ns3 ip link set dev veth4 up
-ip netns exec ns3 ip link set dev veth5 up
-ip netns exec ns4 ip link set dev veth6 up
-ip netns exec ns4 ip link set dev veth7 up
-ip netns exec ns5 ip link set dev veth8 up
-ip netns exec ns5 ip link set dev veth9 up
-ip netns exec ns6 ip link set dev veth10 up
-ip netns exec ns6 ip link set dev lo up
+ip link set veth1 netns ${NS1}
+ip link set veth2 netns ${NS2}
+ip link set veth3 netns ${NS2}
+ip link set veth4 netns ${NS3}
+ip link set veth5 netns ${NS3}
+ip link set veth6 netns ${NS4}
+ip link set veth7 netns ${NS4}
+ip link set veth8 netns ${NS5}
+ip link set veth9 netns ${NS5}
+ip link set veth10 netns ${NS6}
+
+ip netns exec ${NS1} ip link set dev veth1 up
+ip netns exec ${NS2} ip link set dev veth2 up
+ip netns exec ${NS2} ip link set dev veth3 up
+ip netns exec ${NS3} ip link set dev veth4 up
+ip netns exec ${NS3} ip link set dev veth5 up
+ip netns exec ${NS4} ip link set dev veth6 up
+ip netns exec ${NS4} ip link set dev veth7 up
+ip netns exec ${NS5} ip link set dev veth8 up
+ip netns exec ${NS5} ip link set dev veth9 up
+ip netns exec ${NS6} ip link set dev veth10 up
+ip netns exec ${NS6} ip link set dev lo up
 
 # All link scope addresses and routes required between veths
-ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
-ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
-ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
-ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
-ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
-ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
-ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
-ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
-ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
-ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
-ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
-ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
-ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
-ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
-ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
-ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
-
-ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
-ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
-
-ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
-ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
-
-ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
-ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
-
-ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
-ip netns exec ns4 ip -6 addr add fc42::1 dev lo
-ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
-
-ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
-ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
-
-ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
-ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
-
-ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-
-ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
-ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
-ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
-
-ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
-ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
+ip netns exec ${NS1} ip -6 addr add fb00::12/16 dev veth1 scope link
+ip netns exec ${NS1} ip -6 route add fb00::21 dev veth1 scope link
+ip netns exec ${NS2} ip -6 addr add fb00::21/16 dev veth2 scope link
+ip netns exec ${NS2} ip -6 addr add fb00::34/16 dev veth3 scope link
+ip netns exec ${NS2} ip -6 route add fb00::43 dev veth3 scope link
+ip netns exec ${NS3} ip -6 route add fb00::65 dev veth5 scope link
+ip netns exec ${NS3} ip -6 addr add fb00::43/16 dev veth4 scope link
+ip netns exec ${NS3} ip -6 addr add fb00::56/16 dev veth5 scope link
+ip netns exec ${NS4} ip -6 addr add fb00::65/16 dev veth6 scope link
+ip netns exec ${NS4} ip -6 addr add fb00::78/16 dev veth7 scope link
+ip netns exec ${NS4} ip -6 route add fb00::87 dev veth7 scope link
+ip netns exec ${NS5} ip -6 addr add fb00::87/16 dev veth8 scope link
+ip netns exec ${NS5} ip -6 addr add fb00::910/16 dev veth9 scope link
+ip netns exec ${NS5} ip -6 route add fb00::109 dev veth9 scope link
+ip netns exec ${NS5} ip -6 route add fb00::109 table 117 dev veth9 scope link
+ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link
+
+ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo
+ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21
+
+ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
+ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
+
+ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65
+ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
+
+ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
+ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo
+ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87
+
+ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
+ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
+
+ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo
+ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo
+
+ip netns exec ${NS1} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS2} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS3} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS4} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS5} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ip netns exec ${NS6} sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
+ip netns exec ${NS6} sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
+ip netns exec ${NS6} sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
+
+ip netns exec ${NS6} nc -l -6 -u -d 7330 > $TMP_FILE &
+ip netns exec ${NS1} bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
 sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
 kill -TERM $!
 
-- 
cgit 


From ab6bcc20722775022c5ab0086824e9d2e3ceefcd Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 25 Jan 2022 16:17:15 +0800
Subject: selftests/bpf/test_tcp_check_syncookie: use temp netns for testing

Use temp netns instead of hard code name for testing in case the
netns already exists.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/r/20220125081717.1260849-6-liuhangbin@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_tcp_check_syncookie.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
index 6413c1472554..102e6588e2fe 100755
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
@@ -4,6 +4,7 @@
 # Copyright (c) 2019 Cloudflare
 
 set -eu
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
 
 wait_for_ip()
 {
@@ -28,12 +29,12 @@ get_prog_id()
 
 ns1_exec()
 {
-	ip netns exec ns1 "$@"
+	ip netns exec ${NS1} "$@"
 }
 
 setup()
 {
-	ip netns add ns1
+	ip netns add ${NS1}
 	ns1_exec ip link set lo up
 
 	ns1_exec sysctl -w net.ipv4.tcp_syncookies=2
-- 
cgit 


From 36d9970e52709bb4ba87bdf9a3c321da721e45f0 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 25 Jan 2022 16:17:16 +0800
Subject: selftests/bpf/test_xdp_meta: use temp netns for testing

Use temp netns instead of hard code name for testing in case the
netns already exists.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20220125081717.1260849-7-liuhangbin@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_xdp_meta.sh | 38 +++++++++++++++-------------
 1 file changed, 20 insertions(+), 18 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
index d10cefd6eb09..ea69370caae3 100755
--- a/tools/testing/selftests/bpf/test_xdp_meta.sh
+++ b/tools/testing/selftests/bpf/test_xdp_meta.sh
@@ -2,6 +2,8 @@
 
 # Kselftest framework requirement - SKIP code is 4.
 readonly KSFT_SKIP=4
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
 
 cleanup()
 {
@@ -13,8 +15,8 @@ cleanup()
 
 	set +e
 	ip link del veth1 2> /dev/null
-	ip netns del ns1 2> /dev/null
-	ip netns del ns2 2> /dev/null
+	ip netns del ${NS1} 2> /dev/null
+	ip netns del ${NS2} 2> /dev/null
 }
 
 ip link set dev lo xdp off 2>/dev/null > /dev/null
@@ -24,32 +26,32 @@ if [ $? -ne 0 ];then
 fi
 set -e
 
-ip netns add ns1
-ip netns add ns2
+ip netns add ${NS1}
+ip netns add ${NS2}
 
 trap cleanup 0 2 3 6 9
 
 ip link add veth1 type veth peer name veth2
 
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
+ip link set veth1 netns ${NS1}
+ip link set veth2 netns ${NS2}
 
-ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1
-ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2
+ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1
+ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2
 
-ip netns exec ns1 tc qdisc add dev veth1 clsact
-ip netns exec ns2 tc qdisc add dev veth2 clsact
+ip netns exec ${NS1} tc qdisc add dev veth1 clsact
+ip netns exec ${NS2} tc qdisc add dev veth2 clsact
 
-ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
-ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
+ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
+ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
 
-ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x
-ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x
+ip netns exec ${NS1} ip link set dev veth1 xdp obj test_xdp_meta.o sec x
+ip netns exec ${NS2} ip link set dev veth2 xdp obj test_xdp_meta.o sec x
 
-ip netns exec ns1 ip link set dev veth1 up
-ip netns exec ns2 ip link set dev veth2 up
+ip netns exec ${NS1} ip link set dev veth1 up
+ip netns exec ${NS2} ip link set dev veth2 up
 
-ip netns exec ns1 ping -c 1 10.1.1.22
-ip netns exec ns2 ping -c 1 10.1.1.11
+ip netns exec ${NS1} ping -c 1 10.1.1.22
+ip netns exec ${NS2} ping -c 1 10.1.1.11
 
 exit 0
-- 
cgit 


From 4ec25b49f4522d64473cd347a9d9e832b8be2a60 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 25 Jan 2022 16:17:17 +0800
Subject: selftests/bpf/test_xdp_redirect: use temp netns for testing

Use temp netns instead of hard code name for testing in case the
netns already exists.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: William Tu <u9012063@gmail.com>
Link: https://lore.kernel.org/r/20220125081717.1260849-8-liuhangbin@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_xdp_redirect.sh | 30 +++++++++++++-----------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index 57c8db9972a6..1d79f31480ad 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -10,6 +10,8 @@
 #     | xdp forwarding |
 #     ------------------
 
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
 ret=0
 
 setup()
@@ -17,27 +19,27 @@ setup()
 
 	local xdpmode=$1
 
-	ip netns add ns1
-	ip netns add ns2
+	ip netns add ${NS1}
+	ip netns add ${NS2}
 
-	ip link add veth1 index 111 type veth peer name veth11 netns ns1
-	ip link add veth2 index 222 type veth peer name veth22 netns ns2
+	ip link add veth1 index 111 type veth peer name veth11 netns ${NS1}
+	ip link add veth2 index 222 type veth peer name veth22 netns ${NS2}
 
 	ip link set veth1 up
 	ip link set veth2 up
-	ip -n ns1 link set dev veth11 up
-	ip -n ns2 link set dev veth22 up
+	ip -n ${NS1} link set dev veth11 up
+	ip -n ${NS2} link set dev veth22 up
 
-	ip -n ns1 addr add 10.1.1.11/24 dev veth11
-	ip -n ns2 addr add 10.1.1.22/24 dev veth22
+	ip -n ${NS1} addr add 10.1.1.11/24 dev veth11
+	ip -n ${NS2} addr add 10.1.1.22/24 dev veth22
 }
 
 cleanup()
 {
 	ip link del veth1 2> /dev/null
 	ip link del veth2 2> /dev/null
-	ip netns del ns1 2> /dev/null
-	ip netns del ns2 2> /dev/null
+	ip netns del ${NS1} 2> /dev/null
+	ip netns del ${NS2} 2> /dev/null
 }
 
 test_xdp_redirect()
@@ -52,13 +54,13 @@ test_xdp_redirect()
 		return 0
 	fi
 
-	ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
-	ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
+	ip -n ${NS1} link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
+	ip -n ${NS2} link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
 	ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
 	ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
 
-	if ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null &&
-	   ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null; then
+	if ip netns exec ${NS1} ping -c 1 10.1.1.22 &> /dev/null &&
+	   ip netns exec ${NS2} ping -c 1 10.1.1.11 &> /dev/null; then
 		echo "selftests: test_xdp_redirect $xdpmode [PASS]";
 	else
 		ret=1
-- 
cgit 


From 678dfd5280341d877ca646499bfdc82a3d8b4356 Mon Sep 17 00:00:00 2001
From: Gerhard Engleder <gerhard@engleder-embedded.com>
Date: Sun, 30 Jan 2022 10:54:22 +0100
Subject: selftests/net: timestamping: Fix bind_phc check

timestamping checks socket options during initialisation. For the field
bind_phc of the socket option SO_TIMESTAMPING it expects the value -1 if
PHC is not bound. Actually the value of bind_phc is 0 if PHC is not
bound. This results in the following output:

SIOCSHWTSTAMP: tx_type 0 requested, got 0; rx_filter 0 requested, got 0
SO_TIMESTAMP 0
SO_TIMESTAMPNS 0
SO_TIMESTAMPING flags 0, bind phc 0
   not expected, flags 0, bind phc -1

This is fixed by setting default value and expected value of bind_phc to
0.

Fixes: 2214d7032479 ("selftests/net: timestamping: support binding PHC")
Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/timestamping.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c
index aee631c5284e..044bc0e9ed81 100644
--- a/tools/testing/selftests/net/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
@@ -325,8 +325,8 @@ int main(int argc, char **argv)
 	struct ifreq device;
 	struct ifreq hwtstamp;
 	struct hwtstamp_config hwconfig, hwconfig_requested;
-	struct so_timestamping so_timestamping_get = { 0, -1 };
-	struct so_timestamping so_timestamping = { 0, -1 };
+	struct so_timestamping so_timestamping_get = { 0, 0 };
+	struct so_timestamping so_timestamping = { 0, 0 };
 	struct sockaddr_in addr;
 	struct ip_mreq imr;
 	struct in_addr iaddr;
-- 
cgit 


From 95dcbc55fe4ffe262795bea02a42695c85a22dc4 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Tue, 18 Jan 2022 11:09:18 -0800
Subject: kunit: tool: drop mostly unused KunitResult.result field

This field is only used to pass along the parsed Test object from
parse_tests().
Everywhere else the `result` field is ignored.

Instead make parse_tests() explicitly return a KunitResult and Test so
we can retire the `result` field.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit.py | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 7a706f96f68d..9274c6355809 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -17,7 +17,7 @@ assert sys.version_info >= (3, 7), "Python version is too old"
 
 from dataclasses import dataclass
 from enum import Enum, auto
-from typing import Any, Iterable, Sequence, List, Optional
+from typing import Iterable, List, Optional, Sequence, Tuple
 
 import kunit_json
 import kunit_kernel
@@ -32,7 +32,6 @@ class KunitStatus(Enum):
 @dataclass
 class KunitResult:
 	status: KunitStatus
-	result: Any
 	elapsed_time: float
 
 @dataclass
@@ -82,10 +81,8 @@ def config_tests(linux: kunit_kernel.LinuxSourceTree,
 	config_end = time.time()
 	if not success:
 		return KunitResult(KunitStatus.CONFIG_FAILURE,
-				   'could not configure kernel',
 				   config_end - config_start)
 	return KunitResult(KunitStatus.SUCCESS,
-			   'configured kernel successfully',
 			   config_end - config_start)
 
 def build_tests(linux: kunit_kernel.LinuxSourceTree,
@@ -100,14 +97,11 @@ def build_tests(linux: kunit_kernel.LinuxSourceTree,
 	build_end = time.time()
 	if not success:
 		return KunitResult(KunitStatus.BUILD_FAILURE,
-				   'could not build kernel',
 				   build_end - build_start)
 	if not success:
 		return KunitResult(KunitStatus.BUILD_FAILURE,
-				   'could not build kernel',
 				   build_end - build_start)
 	return KunitResult(KunitStatus.SUCCESS,
-			   'built kernel successfully',
 			   build_end - build_start)
 
 def config_and_build_tests(linux: kunit_kernel.LinuxSourceTree,
@@ -173,14 +167,14 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -
 			filter_glob=filter_glob,
 			build_dir=request.build_dir)
 
-		result = parse_tests(request, run_result)
+		_, test_result = parse_tests(request, run_result)
 		# run_kernel() doesn't block on the kernel exiting.
 		# That only happens after we get the last line of output from `run_result`.
 		# So exec_time here actually contains parsing + execution time, which is fine.
 		test_end = time.time()
 		exec_time += test_end - test_start
 
-		test_counts.add_subtest_counts(result.result.counts)
+		test_counts.add_subtest_counts(test_result.counts)
 
 	if len(filter_globs) == 1 and test_counts.crashed > 0:
 		bd = request.build_dir
@@ -189,7 +183,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -
 				bd, bd, kunit_kernel.get_outfile_path(bd), bd, sys.argv[0]))
 
 	kunit_status = _map_to_overall_status(test_counts.get_status())
-	return KunitResult(status=kunit_status, result=result, elapsed_time=exec_time)
+	return KunitResult(status=kunit_status, elapsed_time=exec_time)
 
 def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus:
 	if test_status in (kunit_parser.TestStatus.SUCCESS, kunit_parser.TestStatus.SKIPPED):
@@ -197,7 +191,7 @@ def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus:
 	else:
 		return KunitStatus.TEST_FAILURE
 
-def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> KunitResult:
+def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> Tuple[KunitResult, kunit_parser.Test]:
 	parse_start = time.time()
 
 	test_result = kunit_parser.Test()
@@ -231,11 +225,9 @@ def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> KunitR
 			print(json_obj)
 
 	if test_result.status != kunit_parser.TestStatus.SUCCESS:
-		return KunitResult(KunitStatus.TEST_FAILURE, test_result,
-				   parse_end - parse_start)
+		return KunitResult(KunitStatus.TEST_FAILURE, parse_end - parse_start), test_result
 
-	return KunitResult(KunitStatus.SUCCESS, test_result,
-				parse_end - parse_start)
+	return KunitResult(KunitStatus.SUCCESS, parse_end - parse_start), test_result
 
 def run_tests(linux: kunit_kernel.LinuxSourceTree,
 	      request: KunitRequest) -> KunitResult:
@@ -513,7 +505,7 @@ def main(argv, linux=None):
 		request = KunitParseRequest(raw_output=cli_args.raw_output,
 					    build_dir='',
 					    json=cli_args.json)
-		result = parse_tests(request, kunit_output)
+		result, _ = parse_tests(request, kunit_output)
 		if result.status != KunitStatus.SUCCESS:
 			sys.exit(1)
 	else:
-- 
cgit 


From 8f50f16ff39dd4e2d43d1548ca66925652f8aff7 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 30 Jan 2022 12:55:18 +0100
Subject: selftests/bpf: Extend verifier and bpf_sock tests for dst_port loads

Add coverage to the verifier tests and tests for reading bpf_sock fields to
ensure that 32-bit, 16-bit, and 8-bit loads from dst_port field are allowed
only at intended offsets and produce expected values.

While 16-bit and 8-bit access to dst_port field is straight-forward, 32-bit
wide loads need be allowed and produce a zero-padded 16-bit value for
backward compatibility.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/r/20220130115518.213259-3-jakub@cloudflare.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h                     |  3 +-
 .../testing/selftests/bpf/prog_tests/sock_fields.c | 58 +++++++++++-----
 .../testing/selftests/bpf/progs/test_sock_fields.c | 41 +++++++++++
 tools/testing/selftests/bpf/verifier/sock.c        | 81 +++++++++++++++++++++-
 4 files changed, 162 insertions(+), 21 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4a2f7041ebae..a7f0ddedac1f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5574,7 +5574,8 @@ struct bpf_sock {
 	__u32 src_ip4;
 	__u32 src_ip6[4];
 	__u32 src_port;		/* host byte order */
-	__u32 dst_port;		/* network byte order */
+	__be16 dst_port;	/* network byte order */
+	__u16 :16;		/* zero padding */
 	__u32 dst_ip4;
 	__u32 dst_ip6[4];
 	__u32 state;
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index 9fc040eaa482..9d211b5c22c4 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -1,9 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 
+#define _GNU_SOURCE
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <unistd.h>
+#include <sched.h>
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
@@ -20,6 +22,7 @@
 enum bpf_linum_array_idx {
 	EGRESS_LINUM_IDX,
 	INGRESS_LINUM_IDX,
+	READ_SK_DST_PORT_LINUM_IDX,
 	__NR_BPF_LINUM_ARRAY_IDX,
 };
 
@@ -42,8 +45,16 @@ static __u64 child_cg_id;
 static int linum_map_fd;
 static __u32 duration;
 
-static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
-static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
+static bool create_netns(void)
+{
+	if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+		return false;
+
+	if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+		return false;
+
+	return true;
+}
 
 static void print_sk(const struct bpf_sock *sk, const char *prefix)
 {
@@ -91,19 +102,24 @@ static void check_result(void)
 {
 	struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
 	struct bpf_sock srv_sk, cli_sk, listen_sk;
-	__u32 ingress_linum, egress_linum;
+	__u32 idx, ingress_linum, egress_linum, linum;
 	int err;
 
-	err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
-				  &egress_linum);
+	idx = EGRESS_LINUM_IDX;
+	err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum);
 	CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
 	      "err:%d errno:%d\n", err, errno);
 
-	err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
-				  &ingress_linum);
+	idx = INGRESS_LINUM_IDX;
+	err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum);
 	CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
 	      "err:%d errno:%d\n", err, errno);
 
+	idx = READ_SK_DST_PORT_LINUM_IDX;
+	err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum);
+	ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)");
+	ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line");
+
 	memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
 	memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
 	memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
@@ -262,7 +278,7 @@ static void test(void)
 	char buf[DATA_LEN];
 
 	/* Prepare listen_fd */
-	listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+	listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0);
 	/* start_server() has logged the error details */
 	if (CHECK_FAIL(listen_fd == -1))
 		goto done;
@@ -330,8 +346,12 @@ done:
 
 void serial_test_sock_fields(void)
 {
-	struct bpf_link *egress_link = NULL, *ingress_link = NULL;
 	int parent_cg_fd = -1, child_cg_fd = -1;
+	struct bpf_link *link;
+
+	/* Use a dedicated netns to have a fixed listen port */
+	if (!create_netns())
+		return;
 
 	/* Create a cgroup, get fd, and join it */
 	parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
@@ -352,15 +372,20 @@ void serial_test_sock_fields(void)
 	if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
 		goto done;
 
-	egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
-						 child_cg_fd);
-	if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
+	link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd);
+	if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)"))
+		goto done;
+	skel->links.egress_read_sock_fields = link;
+
+	link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd);
+	if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)"))
 		goto done;
+	skel->links.ingress_read_sock_fields = link;
 
-	ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
-						  child_cg_fd);
-	if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
+	link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd);
+	if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port"))
 		goto done;
+	skel->links.read_sk_dst_port = link;
 
 	linum_map_fd = bpf_map__fd(skel->maps.linum_map);
 	sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
@@ -369,8 +394,7 @@ void serial_test_sock_fields(void)
 	test();
 
 done:
-	bpf_link__destroy(egress_link);
-	bpf_link__destroy(ingress_link);
+	test_sock_fields__detach(skel);
 	test_sock_fields__destroy(skel);
 	if (child_cg_fd >= 0)
 		close(child_cg_fd);
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 81b57b9aaaea..246f1f001813 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -12,6 +12,7 @@
 enum bpf_linum_array_idx {
 	EGRESS_LINUM_IDX,
 	INGRESS_LINUM_IDX,
+	READ_SK_DST_PORT_LINUM_IDX,
 	__NR_BPF_LINUM_ARRAY_IDX,
 };
 
@@ -250,4 +251,44 @@ int ingress_read_sock_fields(struct __sk_buff *skb)
 	return CG_OK;
 }
 
+static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
+{
+	__u32 *word = (__u32 *)&sk->dst_port;
+	return word[0] == bpf_htonl(0xcafe0000);
+}
+
+static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
+{
+	__u16 *half = (__u16 *)&sk->dst_port;
+	return half[0] == bpf_htons(0xcafe);
+}
+
+static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
+{
+	__u8 *byte = (__u8 *)&sk->dst_port;
+	return byte[0] == 0xca && byte[1] == 0xfe;
+}
+
+SEC("cgroup_skb/egress")
+int read_sk_dst_port(struct __sk_buff *skb)
+{
+	__u32 linum, linum_idx;
+	struct bpf_sock *sk;
+
+	linum_idx = READ_SK_DST_PORT_LINUM_IDX;
+
+	sk = skb->sk;
+	if (!sk)
+		RET_LOG();
+
+	if (!sk_dst_port__load_word(sk))
+		RET_LOG();
+	if (!sk_dst_port__load_half(sk))
+		RET_LOG();
+	if (!sk_dst_port__load_byte(sk))
+		RET_LOG();
+
+	return CG_OK;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index ce13ece08d51..8c224eac93df 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -121,7 +121,25 @@
 	.result = ACCEPT,
 },
 {
-	"sk_fullsock(skb->sk): sk->dst_port [narrow load]",
+	"sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.result = ACCEPT,
+},
+{
+	"sk_fullsock(skb->sk): sk->dst_port [half load]",
 	.insns = {
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
@@ -139,7 +157,64 @@
 	.result = ACCEPT,
 },
 {
-	"sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]",
+	"sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.result = REJECT,
+	.errstr = "invalid sock access",
+},
+{
+	"sk_fullsock(skb->sk): sk->dst_port [byte load]",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
+	BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.result = ACCEPT,
+},
+{
+	"sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	.result = REJECT,
+	.errstr = "invalid sock access",
+},
+{
+	"sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)",
 	.insns = {
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
@@ -149,7 +224,7 @@
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
-	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
+	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, dst_port)),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-- 
cgit 


From 8af2ba9a78115cb1189504f9690483969a35c07c Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Mon, 31 Jan 2022 16:42:01 +0100
Subject: selftests: fib rule: Make 'getmatch' and 'match' local variables

Let's restrict the scope of these variables to avoid possible
interferences.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_rule_tests.sh | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 43ea8407a82e..f04f337dd7c6 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -115,6 +115,9 @@ fib_rule6_test_match_n_redirect()
 
 fib_rule6_test()
 {
+	local getmatch
+	local match
+
 	# setup the fib rule redirect route
 	$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
 
@@ -184,6 +187,9 @@ fib_rule4_test_match_n_redirect()
 
 fib_rule4_test()
 {
+	local getmatch
+	local match
+
 	# setup the fib rule redirect route
 	$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
 
-- 
cgit 


From 2e252113632704a04676a963bd1bc10e8d52bed7 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Mon, 31 Jan 2022 16:42:04 +0100
Subject: selftests: fib rule: Drop erroneous TABLE variable

The fib_rule6_del_by_pref() and fib_rule4_del_by_pref() functions use
an uninitialised $TABLE variable. They should use $RTABLE instead.
This doesn't alter the result of the test, as it just makes the grep
command less specific (but since the script always uses the same table
number, that doesn't really matter).

Let's fix it anyway and, while there, specify the filtering parameters
directly in 'ip -X rule show' to avoid the extra grep command entirely.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_rule_tests.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index f04f337dd7c6..012f9385d68c 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -96,7 +96,7 @@ fib_rule6_del()
 
 fib_rule6_del_by_pref()
 {
-	pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+	pref=$($IP -6 rule show $1 table $RTABLE | cut -d ":" -f 1)
 	$IP -6 rule del pref $pref
 }
 
@@ -168,7 +168,7 @@ fib_rule4_del()
 
 fib_rule4_del_by_pref()
 {
-	pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+	pref=$($IP rule show $1 table $RTABLE | cut -d ":" -f 1)
 	$IP rule del pref $pref
 }
 
-- 
cgit 


From 21f25cd43672c8eb8d80adcb498bbdf855a6405a Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Mon, 31 Jan 2022 16:42:06 +0100
Subject: selftests: fib rule: Log test description

All callers of fib_rule6_test_match_n_redirect() and
fib_rule4_test_match_n_redirect() pass a third argument containing a
description of the test being run. Instead of ignoring this argument,
let's use it for logging instead of printing a truncated version of the
command.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_rule_tests.sh | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 012f9385d68c..6a05e81fc81d 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -104,13 +104,14 @@ fib_rule6_test_match_n_redirect()
 {
 	local match="$1"
 	local getmatch="$2"
+	local description="$3"
 
 	$IP -6 rule add $match table $RTABLE
 	$IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
-	log_test $? 0 "rule6 check: $1"
+	log_test $? 0 "rule6 check: $description"
 
 	fib_rule6_del_by_pref "$match"
-	log_test $? 0 "rule6 del by pref: $match"
+	log_test $? 0 "rule6 del by pref: $description"
 }
 
 fib_rule6_test()
@@ -176,13 +177,14 @@ fib_rule4_test_match_n_redirect()
 {
 	local match="$1"
 	local getmatch="$2"
+	local description="$3"
 
 	$IP rule add $match table $RTABLE
 	$IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
-	log_test $? 0 "rule4 check: $1"
+	log_test $? 0 "rule4 check: $description"
 
 	fib_rule4_del_by_pref "$match"
-	log_test $? 0 "rule4 del by pref: $match"
+	log_test $? 0 "rule4 del by pref: $description"
 }
 
 fib_rule4_test()
-- 
cgit 


From 9f397dd5f15522f8066816ee832974f7f4252366 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Mon, 31 Jan 2022 16:42:09 +0100
Subject: selftests: fib rule: Don't echo modified sysctls

Run sysctl in quiet mode. Echoing the modified sysctl doesn't bring any
useful information.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_rule_tests.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 6a05e81fc81d..3b0489910422 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -200,11 +200,11 @@ fib_rule4_test()
 
 	# need enable forwarding and disable rp_filter temporarily as all the
 	# addresses are in the same subnet and egress device == ingress device.
-	ip netns exec testns sysctl -w net.ipv4.ip_forward=1
-	ip netns exec testns sysctl -w net.ipv4.conf.$DEV.rp_filter=0
+	ip netns exec testns sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
 	match="from $SRC_IP iif $DEV"
 	fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
-	ip netns exec testns sysctl -w net.ipv4.ip_forward=0
+	ip netns exec testns sysctl -qw net.ipv4.ip_forward=0
 
 	match="tos 0x10"
 	fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
-- 
cgit 


From 439f0336566cb9b917f5286df599ec2a3a9d1475 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 1 Feb 2022 15:58:09 +0100
Subject: selftests/bpf: Update cpumap/devmap sec_name

Substitute deprecated xdp_cpumap and xdp_devmap sec_name with
xdp/cpumap and xdp/devmap respectively in bpf kselftests.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/9a4286cd36781e2c31ba3773bfdcf45cf1bbaa9e.1643727185.git.lorenzo@kernel.org
---
 tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c | 2 +-
 tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c       | 2 +-
 tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c | 2 +-
 tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c       | 2 +-
 tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c            | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
index 62fb7cd4d87a..97ed625bb70a 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
@@ -12,7 +12,7 @@ struct {
 	__uint(max_entries, 4);
 } cpu_map SEC(".maps");
 
-SEC("xdp_cpumap/dummy_cm")
+SEC("xdp/cpumap")
 int xdp_dummy_cm(struct xdp_md *ctx)
 {
 	return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
index 48007f17dfa8..20ec6723df18 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -24,7 +24,7 @@ int xdp_dummy_prog(struct xdp_md *ctx)
 	return XDP_PASS;
 }
 
-SEC("xdp_cpumap/dummy_cm")
+SEC("xdp/cpumap")
 int xdp_dummy_cm(struct xdp_md *ctx)
 {
 	if (ctx->ingress_ifindex == IFINDEX_LO)
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
index e1caf510b7d2..cdcf7de7ec8c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
@@ -12,7 +12,7 @@ struct {
 /* valid program on DEVMAP entry via SEC name;
  * has access to egress and ingress ifindex
  */
-SEC("xdp_devmap/map_prog")
+SEC("xdp/devmap")
 int xdp_dummy_dm(struct xdp_md *ctx)
 {
 	return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
index 8ae11fab8316..4139a14f9996 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -27,7 +27,7 @@ int xdp_dummy_prog(struct xdp_md *ctx)
 /* valid program on DEVMAP entry via SEC name;
  * has access to egress and ingress ifindex
  */
-SEC("xdp_devmap/map_prog")
+SEC("xdp/devmap")
 int xdp_dummy_dm(struct xdp_md *ctx)
 {
 	char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
index 8395782b6e0a..97b26a30b59a 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -70,7 +70,7 @@ int xdp_redirect_map_all_prog(struct xdp_md *ctx)
 				BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
 }
 
-SEC("xdp_devmap/map_prog")
+SEC("xdp/devmap")
 int xdp_devmap_prog(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
-- 
cgit 


From bee6f2169935658fc405128a8b763ea49f50985c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 1 Dec 2021 19:19:13 -0800
Subject: torture: Drop trailing ^M from console output

Console logs can sometimes have trailing control-M characters, which the
forward-progress evaluation code in kvm-recheck-rcu.sh passes through
to the user output.  Which does not cause a technical problem, but which
can look ugly.  This commit therefore strips the control-M characters.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 1c4c2c727dad..43e1387234d1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -25,7 +25,7 @@ stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
 	    tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
 	    awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
 	    tr -d '\012\015'`"
-fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }'`"
+fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }' | tr -d '\015'`"
 if test -z "$ngps"
 then
 	echo "$configfile ------- " $stopstate
-- 
cgit 


From 010e5773b2050db260395a240e2f7adee3108603 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 2 Dec 2021 11:24:05 -0800
Subject: torture: Allow four-digit repetition numbers for --configs parameter

In a clear-cut case of "not thinking big enough", kvm.sh limits the
multipliers for torture-test scenarios to three digits.  Although this is
large enough for any single system that I have ever run rcutorture on,
it does become a problem when you want to use kvm-remote.sh to run as
many instances of TREE09 as fit on a set of 20 systems with 80 CPUs each.

Yes, one could simply say "--configs '800*TREE09 800*TREE09'", but this
commit removes the need for that sort of hacky workaround by permitting
four-digit repetition numbers, thus allowing "--configs '1600*TREE09'".

Five-digit repetition numbers remain off the menu.  Should they ever
really be needed, they can easily be added!

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 6de0c183db5b..348ad177a5ac 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -280,7 +280,7 @@ configs_derep=
 for CF in $configs
 do
 	case $CF in
-	[0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**)
+	[0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**|[0-9][0-9][0-9][0-9]\**)
 		config_reps=`echo $CF | sed -e 's/\*.*$//'`
 		CF1=`echo $CF | sed -e 's/^[^*]*\*//'`
 		;;
-- 
cgit 


From 21fbc62576b67d6db66cae88710f82bd51a556e1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 4 Dec 2021 13:53:24 -0800
Subject: torture: Output per-failed-run summary lines from torture.sh

Currently, torture.sh lists the failed runs, but it is up to the user
to work out what failed.  This is especially annoying for KCSAN runs,
where RCU's tighter definitions result in failures being reported for
other parts of the kernel.  This commit therefore outputs "Summary:"
lines for each failed run, allowing the user to more quickly identify
which failed runs need focused attention.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index eae88aacca2a..894f589dd562 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -414,7 +414,7 @@ nfailures=0
 echo FAILURES: | tee -a $T/log
 if test -s "$T/failures"
 then
-	cat "$T/failures" | tee -a $T/log
+	awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "grep Summary: " $2 "/log | sed -e " sq "s/^[^S]*/  /" sq; }' | sh | tee -a $T/log
 	nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
 	ret=2
 fi
-- 
cgit 


From a711aaccf6b389b7fa7f3e35b4f99c437a36b9b2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 4 Dec 2021 21:00:55 -0800
Subject: torture: Make kvm.sh summaries note runs having only KCSAN reports

Runs having only KCSAN reports will normally print a summary line
containing only a "Bugs:" entry.  However, these bugs might or might
not be KCSAN reports.  This commit therefore flags runs in which all the
"Bugs:" entries are KCSAN reports.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/console-badness.sh |  2 +-
 tools/testing/selftests/rcutorture/bin/parse-console.sh   | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh
index e6a132df6172..69f8a5958cef 100755
--- a/tools/testing/selftests/rcutorture/bin/console-badness.sh
+++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh
@@ -10,7 +10,7 @@
 #
 # Authors: Paul E. McKenney <paulmck@kernel.org>
 
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
+egrep 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
 grep -v 'ODEBUG: ' |
 grep -v 'This means that this is a DEBUG kernel and it is' |
 grep -v 'Warning: unable to open an initial console' |
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 9f624bd53c27..822eb037a057 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -138,6 +138,16 @@ then
 	then
 		summary="$summary  Bugs: $n_bugs"
 	fi
+	n_kcsan=`egrep -c 'BUG: KCSAN: ' $file`
+	if test "$n_kcsan" -ne 0
+	then
+		if test "$n_bugs" = "$n_kcsan"
+		then
+			summary="$summary (all bugs kcsan)"
+		else
+			summary="$summary  KCSAN: $n_kcsan"
+		fi
+	fi
 	n_calltrace=`grep -c 'Call Trace:' $file`
 	if test "$n_calltrace" -ne 0
 	then
-- 
cgit 


From 9a32ed1cf6cdebff7ccd82b91f8021267ed0d9cb Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 6 Dec 2021 09:13:37 -0800
Subject: torture: Indicate which torture.sh runs' bugs are all KCSAN reports

This commit further improves torture.sh run summaries by indicating
which runs' "Bugs:" counts are all KCSAN reports, and further printing
an additional end-of-run summary line when all errors reported in all
runs were KCSAN reports.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 894f589dd562..bddce72ea5ce 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -414,8 +414,14 @@ nfailures=0
 echo FAILURES: | tee -a $T/log
 if test -s "$T/failures"
 then
-	awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "grep Summary: " $2 "/log | sed -e " sq "s/^[^S]*/  /" sq; }' | sh | tee -a $T/log
+	awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "grep Summary: " $2 "/log | sed -e " sq "s/^[^S]*/  /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum"
 	nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
+	grep "^  Summary: " "$T/failuresum" |
+		grep -v '^  Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan"
+	if test -s "$T/nonkcsan"
+	then
+		nonkcsanbug="yes"
+	fi
 	ret=2
 fi
 if test "$do_kcsan" = "yes"
@@ -424,6 +430,10 @@ then
 fi
 echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
 echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log
+if test -z "$nonkcsanbug" && test -s "$T/failuresum"
+then
+	echo "  All bugs were KCSAN failures."
+fi
 tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
 if test -n "$tdir" && test $compress_kasan_vmlinux -gt 0
 then
-- 
cgit 


From 99c80a96a512f32a234687343ff1c8e5e033976b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 17 Dec 2021 16:14:31 -0800
Subject: torture: Compress KCSAN as well as KASAN vmlinux files

Compressing KASAN vmlinux files reduces torture.sh res file size from
about 100G to about 50G, which is good, but the KCSAN vmlinux files
are also large.  Compressing them reduces their size from about 700M to
about 100M (but of course your mileage may vary).  This commit therefore
compresses both KASAN and KCSAN vmlinux files.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index bddce72ea5ce..a5f1c5fbefe4 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -37,7 +37,7 @@ configs_scftorture=
 kcsan_kmake_args=
 
 # Default compression, duration, and apportionment.
-compress_kasan_vmlinux="`identify_qemu_vcpus`"
+compress_concurrency="`identify_qemu_vcpus`"
 duration_base=10
 duration_rcutorture_frac=7
 duration_locktorture_frac=1
@@ -67,7 +67,7 @@ function doyesno () {
 
 usage () {
 	echo "Usage: $scriptname optional arguments:"
-	echo "       --compress-kasan-vmlinux concurrency"
+	echo "       --compress-concurrency concurrency"
 	echo "       --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\""
 	echo "       --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\""
 	echo "       --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
@@ -91,9 +91,9 @@ usage () {
 while test $# -gt 0
 do
 	case "$1" in
-	--compress-kasan-vmlinux)
-		checkarg --compress-kasan-vmlinux "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error'
-		compress_kasan_vmlinux=$2
+	--compress-concurrency)
+		checkarg --compress-concurrency "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error'
+		compress_concurrency=$2
 		shift
 		;;
 	--config-rcutorture|--configs-rcutorture)
@@ -435,11 +435,11 @@ then
 	echo "  All bugs were KCSAN failures."
 fi
 tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
-if test -n "$tdir" && test $compress_kasan_vmlinux -gt 0
+if test -n "$tdir" && test $compress_concurrency -gt 0
 then
 	# KASAN vmlinux files can approach 1GB in size, so compress them.
-	echo Looking for KASAN files to compress: `date` > "$tdir/log-xz" 2>&1
-	find "$tdir" -type d -name '*-kasan' -print > $T/xz-todo
+	echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1
+	find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo
 	ncompresses=0
 	batchno=1
 	if test -s $T/xz-todo
@@ -457,7 +457,7 @@ then
 			do
 				xz "$j" >> "$tdir/log-xz" 2>&1 &
 				ncompresses=$((ncompresses+1))
-				if test $ncompresses -ge $compress_kasan_vmlinux
+				if test $ncompresses -ge $compress_concurrency
 				then
 					echo Waiting for batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log
 					wait
-- 
cgit 


From 2bc9062e7f394b3c6cbe408a13523c0a4bd13d2a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 20 Dec 2021 20:24:25 -0800
Subject: torture: Make kvm-remote.sh try multiple times to download tarball

This commit ups the retries for downloading the build-product tarball
to a given remote system from once to five times, the better to handle
transient network failures.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-remote.sh | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index e09b1bc78708..29b068a55b46 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -155,18 +155,23 @@ do
 	echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
 	cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
 	ret=$?
-	if test "$ret" -ne 0
-	then
-		echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. | tee -a "$oldrun/remote-log"
+	tries=0
+	while test "$ret" -ne 0
+	do
+		echo Unable to download $T/binres.tgz to system $i, waiting and then retrying.  $tries prior retries. | tee -a "$oldrun/remote-log"
 		sleep 60
 		cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
 		ret=$?
 		if test "$ret" -ne 0
 		then
-			echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
-			exit 10
+			if test "$tries" > 5
+			then
+				echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
+				exit 10
+			fi
 		fi
-	fi
+		tries=$((tries+1))
+	done
 done
 
 # Function to check for presence of a file on the specified system.
-- 
cgit 


From b376005eb3a8aedcde437d3d495418d63ce5451c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 27 Dec 2021 21:21:35 -0800
Subject: torture: Print only one summary line per run

The torture.sh scripts currently duplicates the summary lines, getting
one during the run phase and one during the summary phase of each run.
This commit therefore removes the run phase from consideration so as to
get only one summary line per run.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index a5f1c5fbefe4..d1cb60085d8f 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -414,7 +414,7 @@ nfailures=0
 echo FAILURES: | tee -a $T/log
 if test -s "$T/failures"
 then
-	awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "grep Summary: " $2 "/log | sed -e " sq "s/^[^S]*/  /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum"
+	awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/  /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum"
 	nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
 	grep "^  Summary: " "$T/failuresum" |
 		grep -v '^  Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan"
-- 
cgit 


From e31ccc1ddd2ad3e14f02a53cfa22e6b2b98c6dab Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 18 Jan 2022 15:40:49 -0800
Subject: torture: Make kvm-find-errors.sh notice missing vmlinux file

Currently, an obtuse compiler diagnostic can fool kvm-find-errors.sh
into believing that the build was successful.  This commit therefore
adds a check for a missing vmlinux file.  Note that in the case of
repeated torture-test scenarios ("--configs '2*TREE01'"), the vmlinux
file will only be present in the first directory, that is, in TREE01
but not TREE01.2.

Link: https://lore.kernel.org/lkml/36bd91e4-8eda-5677-7fde-40295932a640@molgen.mpg.de/
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 2e9e9e2eedb6..5f682fc892dd 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -30,10 +30,16 @@ editor=${EDITOR-vi}
 files=
 for i in ${rundir}/*/Make.out
 do
+	scenariodir="`dirname $i`"
+	scenariobasedir="`echo ${scenariodir} | sed -e 's/\.[0-9]*$//'`"
 	if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i
 	then
 		egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
 		files="$files $i.diags $i"
+	elif ! test -f ${scenariobasedir}/vmlinux
+	then
+		echo No ${scenariobasedir}/vmlinux file > $i.diags
+		files="$files $i.diags $i"
 	fi
 done
 if test -n "$files"
-- 
cgit 


From a7d89cfb8e1269cb6d22453adba56b8d0218589f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 25 Jan 2022 21:08:55 -0800
Subject: torture: Change KVM environment variable to RCUTORTURE

The torture-test scripting's long-standing use of KVM as the environment
variable tracking the pathname of the rcutorture directory now conflicts
with allmodconfig builds due to the virt/kvm/Makefile.kvm file's use
of this as a makefile variable.  This commit therefore changes the
torture-test scripting from KVM to RCUTORTURE, avoiding the name conflict.

Reported-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
Tested-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-again.sh        |  4 ++--
 .../testing/selftests/rcutorture/bin/kvm-check-branches.sh |  4 ++--
 .../testing/selftests/rcutorture/bin/kvm-end-run-stats.sh  |  4 ++--
 tools/testing/selftests/rcutorture/bin/kvm-remote.sh       |  8 ++++----
 tools/testing/selftests/rcutorture/bin/kvm.sh              | 14 +++++++-------
 tools/testing/selftests/rcutorture/bin/torture.sh          |  4 ++--
 6 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index 5a0023d183da..0941f1ddab65 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -47,8 +47,8 @@ else
 	exit 1
 fi
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
 dryrun=
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
index 370406bbfeed..f17000a2ccf1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
@@ -49,8 +49,8 @@ fi
 mkdir $resdir/$ds
 echo Results directory: $resdir/$ds
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 echo Using all `identify_qemu_vcpus` CPUs.
 
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
index e4a00779b8c6..ee886b40a5d2 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
@@ -22,8 +22,8 @@ T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$
 trap 'rm -rf $T' 0
 mkdir $T
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 default_starttime="`get_starttime`"
 starttime="${2-default_starttime}"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 29b068a55b46..8c4c1e4792d0 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -19,8 +19,8 @@ then
 	exit 1
 fi
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
 starttime="`get_starttime`"
@@ -108,8 +108,8 @@ else
 		cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
 		exit 2
 	fi
-	cp -a "$rundir" "$KVM/res/"
-	oldrun="$KVM/res/$ds"
+	cp -a "$rundir" "$RCUTORTURE/res/"
+	oldrun="$RCUTORTURE/res/$ds"
 fi
 echo | tee -a "$oldrun/remote-log"
 echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 348ad177a5ac..55b2c1533282 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -25,15 +25,15 @@ LANG=en_US.UTF-8; export LANG
 
 dur=$((30*60))
 dryrun=""
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
 TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
 TORTURE_DEFCONFIG=defconfig
 TORTURE_BOOT_IMAGE=""
 TORTURE_BUILDONLY=
-TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
+TORTURE_INITRD="$RCUTORTURE/initrd"; export TORTURE_INITRD
 TORTURE_KCONFIG_ARG=""
 TORTURE_KCONFIG_GDB_ARG=""
 TORTURE_BOOT_GDB_ARG=""
@@ -262,7 +262,7 @@ else
 	exit 1
 fi
 
-CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG
+CONFIGFRAG=${RCUTORTURE}/configs/${TORTURE_SUITE}; export CONFIGFRAG
 
 defaultconfigs="`tr '\012' ' ' < $CONFIGFRAG/CFLIST`"
 if test -z "$configs"
@@ -272,7 +272,7 @@ fi
 
 if test -z "$resdir"
 then
-	resdir=$KVM/res
+	resdir=$RCUTORTURE/res
 fi
 
 # Create a file of test-name/#cpus pairs, sorted by decreasing #cpus.
@@ -386,7 +386,7 @@ END {
 # Generate a script to execute the tests in appropriate batches.
 cat << ___EOF___ > $T/script
 CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG
-KVM="$KVM"; export KVM
+RCUTORTURE="$RCUTORTURE"; export RCUTORTURE
 PATH="$PATH"; export PATH
 TORTURE_ALLOTED_CPUS="$TORTURE_ALLOTED_CPUS"; export TORTURE_ALLOTED_CPUS
 TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
@@ -569,7 +569,7 @@ ___EOF___
 awk < $T/cfgcpu.pack \
 	-v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
 	-v CONFIGDIR="$CONFIGFRAG/" \
-	-v KVM="$KVM" \
+	-v RCUTORTURE="$RCUTORTURE" \
 	-v ncpus=$cpus \
 	-v jitter="$jitter" \
 	-v rd=$resdir/$ds/ \
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index d1cb60085d8f..e00e60efb231 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -13,8 +13,8 @@
 scriptname=$0
 args="$*"
 
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
 . functions.sh
 
 TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
-- 
cgit 


From 5c105d55a9dc9e01535116ccfc26e703168a574f Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:39 -0500
Subject: selftests/rseq: introduce own copy of rseq uapi header

The Linux kernel rseq uapi header has a broken layout for the
rseq_cs.ptr field on 32-bit little endian architectures. The entire
rseq_cs.ptr field is planned for removal, leaving only the 64-bit
rseq_cs.ptr64 field available.

Both glibc and librseq use their own copy of the Linux kernel uapi
header, where they introduce proper union fields to access to the 32-bit
low order bits of the rseq_cs pointer on 32-bit architectures.

Introduce a copy of the Linux kernel uapi headers in the Linux kernel
selftests.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-2-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/rseq-abi.h | 151 ++++++++++++++++++++++++++++++++
 tools/testing/selftests/rseq/rseq.c     |  14 +--
 tools/testing/selftests/rseq/rseq.h     |  10 +--
 3 files changed, 161 insertions(+), 14 deletions(-)
 create mode 100644 tools/testing/selftests/rseq/rseq-abi.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h
new file mode 100644
index 000000000000..a8c44d9af71f
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-abi.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _RSEQ_ABI_H
+#define _RSEQ_ABI_H
+
+/*
+ * rseq-abi.h
+ *
+ * Restartable sequences system call API
+ *
+ * Copyright (c) 2015-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+enum rseq_abi_cpu_id_state {
+	RSEQ_ABI_CPU_ID_UNINITIALIZED			= -1,
+	RSEQ_ABI_CPU_ID_REGISTRATION_FAILED		= -2,
+};
+
+enum rseq_abi_flags {
+	RSEQ_ABI_FLAG_UNREGISTER = (1 << 0),
+};
+
+enum rseq_abi_cs_flags_bit {
+	RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT	= 0,
+	RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT	= 1,
+	RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT	= 2,
+};
+
+enum rseq_abi_cs_flags {
+	RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT	=
+		(1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
+	RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL	=
+		(1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
+	RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE	=
+		(1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
+};
+
+/*
+ * struct rseq_abi_cs is aligned on 4 * 8 bytes to ensure it is always
+ * contained within a single cache-line. It is usually declared as
+ * link-time constant data.
+ */
+struct rseq_abi_cs {
+	/* Version of this structure. */
+	__u32 version;
+	/* enum rseq_abi_cs_flags */
+	__u32 flags;
+	__u64 start_ip;
+	/* Offset from start_ip. */
+	__u64 post_commit_offset;
+	__u64 abort_ip;
+} __attribute__((aligned(4 * sizeof(__u64))));
+
+/*
+ * struct rseq_abi is aligned on 4 * 8 bytes to ensure it is always
+ * contained within a single cache-line.
+ *
+ * A single struct rseq_abi per thread is allowed.
+ */
+struct rseq_abi {
+	/*
+	 * Restartable sequences cpu_id_start field. Updated by the
+	 * kernel. Read by user-space with single-copy atomicity
+	 * semantics. This field should only be read by the thread which
+	 * registered this data structure. Aligned on 32-bit. Always
+	 * contains a value in the range of possible CPUs, although the
+	 * value may not be the actual current CPU (e.g. if rseq is not
+	 * initialized). This CPU number value should always be compared
+	 * against the value of the cpu_id field before performing a rseq
+	 * commit or returning a value read from a data structure indexed
+	 * using the cpu_id_start value.
+	 */
+	__u32 cpu_id_start;
+	/*
+	 * Restartable sequences cpu_id field. Updated by the kernel.
+	 * Read by user-space with single-copy atomicity semantics. This
+	 * field should only be read by the thread which registered this
+	 * data structure. Aligned on 32-bit. Values
+	 * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED
+	 * have a special semantic: the former means "rseq uninitialized",
+	 * and latter means "rseq initialization failed". This value is
+	 * meant to be read within rseq critical sections and compared
+	 * with the cpu_id_start value previously read, before performing
+	 * the commit instruction, or read and compared with the
+	 * cpu_id_start value before returning a value loaded from a data
+	 * structure indexed using the cpu_id_start value.
+	 */
+	__u32 cpu_id;
+	/*
+	 * Restartable sequences rseq_cs field.
+	 *
+	 * Contains NULL when no critical section is active for the current
+	 * thread, or holds a pointer to the currently active struct rseq_cs.
+	 *
+	 * Updated by user-space, which sets the address of the currently
+	 * active rseq_cs at the beginning of assembly instruction sequence
+	 * block, and set to NULL by the kernel when it restarts an assembly
+	 * instruction sequence block, as well as when the kernel detects that
+	 * it is preempting or delivering a signal outside of the range
+	 * targeted by the rseq_cs. Also needs to be set to NULL by user-space
+	 * before reclaiming memory that contains the targeted struct rseq_cs.
+	 *
+	 * Read and set by the kernel. Set by user-space with single-copy
+	 * atomicity semantics. This field should only be updated by the
+	 * thread which registered this data structure. Aligned on 64-bit.
+	 */
+	union {
+		__u64 ptr64;
+
+		/*
+		 * The "arch" field provides architecture accessor for
+		 * the ptr field based on architecture pointer size and
+		 * endianness.
+		 */
+		struct {
+#ifdef __LP64__
+			__u64 ptr;
+#elif defined(__BYTE_ORDER) ? (__BYTE_ORDER == __BIG_ENDIAN) : defined(__BIG_ENDIAN)
+			__u32 padding;		/* Initialized to zero. */
+			__u32 ptr;
+#else
+			__u32 ptr;
+			__u32 padding;		/* Initialized to zero. */
+#endif
+		} arch;
+	} rseq_cs;
+
+	/*
+	 * Restartable sequences flags field.
+	 *
+	 * This field should only be updated by the thread which
+	 * registered this data structure. Read by the kernel.
+	 * Mainly used for single-stepping through rseq critical sections
+	 * with debuggers.
+	 *
+	 * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT
+	 *     Inhibit instruction sequence block restart on preemption
+	 *     for this thread.
+	 * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL
+	 *     Inhibit instruction sequence block restart on signal
+	 *     delivery for this thread.
+	 * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE
+	 *     Inhibit instruction sequence block restart on migration for
+	 *     this thread.
+	 */
+	__u32 flags;
+} __attribute__((aligned(4 * sizeof(__u64))));
+
+#endif /* _RSEQ_ABI_H */
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index fb440dfca158..bfe1b2692ffc 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -30,8 +30,8 @@
 #include "../kselftest.h"
 #include "rseq.h"
 
-__thread volatile struct rseq __rseq_abi = {
-	.cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+__thread volatile struct rseq_abi __rseq_abi = {
+	.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
 };
 
 /*
@@ -66,7 +66,7 @@ static void signal_restore(sigset_t oldset)
 		abort();
 }
 
-static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len,
+static int sys_rseq(volatile struct rseq_abi *rseq_abi, uint32_t rseq_len,
 		    int flags, uint32_t sig)
 {
 	return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
@@ -86,13 +86,13 @@ int rseq_register_current_thread(void)
 	}
 	if (__rseq_refcount++)
 		goto end;
-	rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
+	rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG);
 	if (!rc) {
 		assert(rseq_current_cpu_raw() >= 0);
 		goto end;
 	}
 	if (errno != EBUSY)
-		__rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
+		__rseq_abi.cpu_id = RSEQ_ABI_CPU_ID_REGISTRATION_FAILED;
 	ret = -1;
 	__rseq_refcount--;
 end:
@@ -114,8 +114,8 @@ int rseq_unregister_current_thread(void)
 	}
 	if (--__rseq_refcount)
 		goto end;
-	rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
-		      RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
+	rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi),
+		      RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
 	if (!rc)
 		goto end;
 	__rseq_refcount = 1;
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index 3f63eb362b92..cb6bbc53b586 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -16,7 +16,7 @@
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <linux/rseq.h>
+#include "rseq-abi.h"
 
 /*
  * Empty code injection macros, override when testing.
@@ -43,7 +43,7 @@
 #define RSEQ_INJECT_FAILED
 #endif
 
-extern __thread volatile struct rseq __rseq_abi;
+extern __thread volatile struct rseq_abi __rseq_abi;
 extern int __rseq_handled;
 
 #define rseq_likely(x)		__builtin_expect(!!(x), 1)
@@ -139,11 +139,7 @@ static inline uint32_t rseq_current_cpu(void)
 
 static inline void rseq_clear_rseq_cs(void)
 {
-#ifdef __LP64__
-	__rseq_abi.rseq_cs.ptr = 0;
-#else
-	__rseq_abi.rseq_cs.ptr.ptr32 = 0;
-#endif
+	__rseq_abi.rseq_cs.arch.ptr = 0;
 }
 
 /*
-- 
cgit 


From 930378d056eac2c96407b02aafe4938d0ac9cc37 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:41 -0500
Subject: selftests/rseq: Remove useless assignment to cpu variable

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-4-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/param_test.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index 699ad5f93c34..cc2cfc1da938 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -368,9 +368,7 @@ void *test_percpu_spinlock_thread(void *arg)
 		abort();
 	reps = thread_data->reps;
 	for (i = 0; i < reps; i++) {
-		int cpu = rseq_cpu_start();
-
-		cpu = rseq_this_cpu_lock(&data->lock);
+		int cpu = rseq_this_cpu_lock(&data->lock);
 		data->c[cpu].count++;
 		rseq_percpu_unlock(&data->lock, cpu);
 #ifndef BENCHMARK
-- 
cgit 


From 94b80a19ebfe347a01301d750040a61c38200e2b Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:42 -0500
Subject: selftests/rseq: Remove volatile from __rseq_abi

This is done in preparation for the selftest uplift to become compatible
with glibc-2.35.

All accesses to the __rseq_abi fields are volatile, but remove the
volatile from the TLS variable declaration, otherwise we are stuck with
volatile for the upcoming rseq_get_abi() helper.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-5-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/rseq.c | 4 ++--
 tools/testing/selftests/rseq/rseq.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index bfe1b2692ffc..1f905b60728a 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -30,7 +30,7 @@
 #include "../kselftest.h"
 #include "rseq.h"
 
-__thread volatile struct rseq_abi __rseq_abi = {
+__thread struct rseq_abi __rseq_abi = {
 	.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
 };
 
@@ -92,7 +92,7 @@ int rseq_register_current_thread(void)
 		goto end;
 	}
 	if (errno != EBUSY)
-		__rseq_abi.cpu_id = RSEQ_ABI_CPU_ID_REGISTRATION_FAILED;
+		RSEQ_WRITE_ONCE(__rseq_abi.cpu_id, RSEQ_ABI_CPU_ID_REGISTRATION_FAILED);
 	ret = -1;
 	__rseq_refcount--;
 end:
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index cb6bbc53b586..d580f8e9c001 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -43,7 +43,7 @@
 #define RSEQ_INJECT_FAILED
 #endif
 
-extern __thread volatile struct rseq_abi __rseq_abi;
+extern __thread struct rseq_abi __rseq_abi;
 extern int __rseq_handled;
 
 #define rseq_likely(x)		__builtin_expect(!!(x), 1)
@@ -139,7 +139,7 @@ static inline uint32_t rseq_current_cpu(void)
 
 static inline void rseq_clear_rseq_cs(void)
 {
-	__rseq_abi.rseq_cs.arch.ptr = 0;
+	RSEQ_WRITE_ONCE(__rseq_abi.rseq_cs.arch.ptr, 0);
 }
 
 /*
-- 
cgit 


From e546cd48ccc456074ddb8920732aef4af65d7ca7 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:43 -0500
Subject: selftests/rseq: Introduce rseq_get_abi() helper

This is done in preparation for the selftest uplift to become compatible
with glibc-2.35.

glibc-2.35 exposes the rseq per-thread data in the TCB, accessible
at an offset from the thread pointer, rather than through an actual
Thread-Local Storage (TLS) variable, as the kernel selftests initially
expected.

Introduce a rseq_get_abi() helper, initially using the __rseq_abi
TLS variable, in preparation for changing this userspace ABI for one
which is compatible with glibc-2.35.

Note that the __rseq_abi TLS and glibc-2.35's ABI for per-thread data
cannot actively coexist in a process, because the kernel supports only
a single rseq registration per thread.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-6-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/rseq-arm.h   | 32 +++++++++++++++----------------
 tools/testing/selftests/rseq/rseq-arm64.h | 32 +++++++++++++++----------------
 tools/testing/selftests/rseq/rseq-mips.h  | 32 +++++++++++++++----------------
 tools/testing/selftests/rseq/rseq-ppc.h   | 32 +++++++++++++++----------------
 tools/testing/selftests/rseq/rseq-s390.h  | 24 +++++++++++------------
 tools/testing/selftests/rseq/rseq-x86.h   | 30 ++++++++++++++---------------
 tools/testing/selftests/rseq/rseq.h       | 11 ++++++++---
 7 files changed, 99 insertions(+), 94 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
index 5943c816c07c..6716540e17c6 100644
--- a/tools/testing/selftests/rseq/rseq-arm.h
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -185,8 +185,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		"5:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -255,8 +255,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		"5:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -316,8 +316,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		"5:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [v]			"m" (*v),
 		  [count]		"Ir" (count)
 		  RSEQ_INJECT_INPUT
@@ -381,8 +381,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		"5:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -457,8 +457,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		"5:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -537,8 +537,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		"5:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -657,8 +657,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		"8:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
@@ -782,8 +782,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		"8:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
index 200dae9e4208..b9d9b3aa6e9b 100644
--- a/tools/testing/selftests/rseq/rseq-arm64.h
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -230,8 +230,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [v]			"Qo" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -287,8 +287,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [v]			"Qo" (*v),
 		  [expectnot]		"r" (expectnot),
 		  [load]		"Qo" (*load),
@@ -337,8 +337,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [v]			"Qo" (*v),
 		  [count]		"r" (count)
 		  RSEQ_INJECT_INPUT
@@ -388,8 +388,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [expect]		"r" (expect),
 		  [v]			"Qo" (*v),
 		  [newv]		"r" (newv),
@@ -447,8 +447,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [expect]		"r" (expect),
 		  [v]			"Qo" (*v),
 		  [newv]		"r" (newv),
@@ -508,8 +508,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [v]			"Qo" (*v),
 		  [expect]		"r" (expect),
 		  [v2]			"Qo" (*v2),
@@ -569,8 +569,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [expect]		"r" (expect),
 		  [v]			"Qo" (*v),
 		  [newv]		"r" (newv),
@@ -629,8 +629,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [expect]		"r" (expect),
 		  [v]			"Qo" (*v),
 		  [newv]		"r" (newv),
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
index e989e7c14b09..2b1f5bd95268 100644
--- a/tools/testing/selftests/rseq/rseq-mips.h
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -190,8 +190,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		"5:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -258,8 +258,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		"5:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -319,8 +319,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		"5:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [v]			"m" (*v),
 		  [count]		"Ir" (count)
 		  RSEQ_INJECT_INPUT
@@ -382,8 +382,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		"5:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -456,8 +456,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		"5:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -532,8 +532,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		"5:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -649,8 +649,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		"8:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
@@ -771,8 +771,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		"8:\n\t"
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
index 76be90196fe4..2e6b7572ba08 100644
--- a/tools/testing/selftests/rseq/rseq-ppc.h
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -235,8 +235,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -301,8 +301,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -359,8 +359,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [count]		"r" (count)
@@ -419,8 +419,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -489,8 +489,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -560,8 +560,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -635,8 +635,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
@@ -711,8 +711,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
index 8ef94ad1cbb4..b906e044d2a3 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -165,8 +165,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -233,8 +233,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -288,8 +288,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [count]		"r" (count)
@@ -347,8 +347,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -426,8 +426,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -534,8 +534,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 #endif
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
-		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index 640411518e46..1d9fa0516e53 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -141,7 +141,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -207,7 +207,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -258,7 +258,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [count]		"er" (count)
@@ -314,7 +314,7 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* final store input */
 		  [ptr]			"m" (*ptr),
 		  [off]			"er" (off),
@@ -372,7 +372,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -449,7 +449,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -555,7 +555,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 #endif
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
@@ -719,7 +719,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -785,7 +785,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -836,7 +836,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [count]		"ir" (count)
@@ -894,7 +894,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"m" (newv2),
@@ -962,7 +962,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -1032,7 +1032,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -1142,7 +1142,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 #endif
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"m" (expect),
@@ -1255,7 +1255,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 #endif
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (&__rseq_abi),
+		  [rseq_abi]		"r" (rseq_get_abi()),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"m" (expect),
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index d580f8e9c001..ca668a2af172 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -46,6 +46,11 @@
 extern __thread struct rseq_abi __rseq_abi;
 extern int __rseq_handled;
 
+static inline struct rseq_abi *rseq_get_abi(void)
+{
+	return &__rseq_abi;
+}
+
 #define rseq_likely(x)		__builtin_expect(!!(x), 1)
 #define rseq_unlikely(x)	__builtin_expect(!!(x), 0)
 #define rseq_barrier()		__asm__ __volatile__("" : : : "memory")
@@ -108,7 +113,7 @@ int32_t rseq_fallback_current_cpu(void);
  */
 static inline int32_t rseq_current_cpu_raw(void)
 {
-	return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id);
+	return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id);
 }
 
 /*
@@ -124,7 +129,7 @@ static inline int32_t rseq_current_cpu_raw(void)
  */
 static inline uint32_t rseq_cpu_start(void)
 {
-	return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
+	return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id_start);
 }
 
 static inline uint32_t rseq_current_cpu(void)
@@ -139,7 +144,7 @@ static inline uint32_t rseq_current_cpu(void)
 
 static inline void rseq_clear_rseq_cs(void)
 {
-	RSEQ_WRITE_ONCE(__rseq_abi.rseq_cs.arch.ptr, 0);
+	RSEQ_WRITE_ONCE(rseq_get_abi()->rseq_cs.arch.ptr, 0);
 }
 
 /*
-- 
cgit 


From 886ddfba933f5ce9d76c278165d834d114ba4ffc Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:44 -0500
Subject: selftests/rseq: Introduce thread pointer getters

This is done in preparation for the selftest uplift to become compatible
with glibc-2.35.

glibc-2.35 exposes the rseq per-thread data in the TCB, accessible
at an offset from the thread pointer.

The toolchains do not implement accessing the thread pointer on all
architectures. Provide thread pointer getters for ppc and x86 which
lack (or lacked until recently) toolchain support.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-7-mathieu.desnoyers@efficios.com
---
 .../selftests/rseq/rseq-generic-thread-pointer.h   | 25 ++++++++++++++
 .../selftests/rseq/rseq-ppc-thread-pointer.h       | 30 ++++++++++++++++
 tools/testing/selftests/rseq/rseq-thread-pointer.h | 19 ++++++++++
 .../selftests/rseq/rseq-x86-thread-pointer.h       | 40 ++++++++++++++++++++++
 4 files changed, 114 insertions(+)
 create mode 100644 tools/testing/selftests/rseq/rseq-generic-thread-pointer.h
 create mode 100644 tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h
 create mode 100644 tools/testing/selftests/rseq/rseq-thread-pointer.h
 create mode 100644 tools/testing/selftests/rseq/rseq-x86-thread-pointer.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h
new file mode 100644
index 000000000000..38c584661571
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq-generic-thread-pointer.h
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _RSEQ_GENERIC_THREAD_POINTER
+#define _RSEQ_GENERIC_THREAD_POINTER
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Use gcc builtin thread pointer. */
+static inline void *rseq_thread_pointer(void)
+{
+	return __builtin_thread_pointer();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h
new file mode 100644
index 000000000000..263eee84fb76
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq-ppc-thread-pointer.h
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _RSEQ_PPC_THREAD_POINTER
+#define _RSEQ_PPC_THREAD_POINTER
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void *rseq_thread_pointer(void)
+{
+#ifdef __powerpc64__
+	register void *__result asm ("r13");
+#else
+	register void *__result asm ("r2");
+#endif
+	asm ("" : "=r" (__result));
+	return __result;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-thread-pointer.h b/tools/testing/selftests/rseq/rseq-thread-pointer.h
new file mode 100644
index 000000000000..977c25d758b2
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-thread-pointer.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq-thread-pointer.h
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _RSEQ_THREAD_POINTER
+#define _RSEQ_THREAD_POINTER
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "rseq-x86-thread-pointer.h"
+#elif defined(__PPC__)
+#include "rseq-ppc-thread-pointer.h"
+#else
+#include "rseq-generic-thread-pointer.h"
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h
new file mode 100644
index 000000000000..d3133587d996
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq-x86-thread-pointer.h
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _RSEQ_X86_THREAD_POINTER
+#define _RSEQ_X86_THREAD_POINTER
+
+#include <features.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if __GNUC_PREREQ (11, 1)
+static inline void *rseq_thread_pointer(void)
+{
+	return __builtin_thread_pointer();
+}
+#else
+static inline void *rseq_thread_pointer(void)
+{
+	void *__result;
+
+# ifdef __x86_64__
+	__asm__ ("mov %%fs:0, %0" : "=r" (__result));
+# else
+	__asm__ ("mov %%gs:0, %0" : "=r" (__result));
+# endif
+	return __result;
+}
+#endif /* !GCC 11 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
-- 
cgit 


From 233e667e1ae3e348686bd9dd0172e62a09d852e1 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:45 -0500
Subject: selftests/rseq: Uplift rseq selftests for compatibility with
 glibc-2.35

glibc-2.35 (upcoming release date 2022-02-01) exposes the rseq per-thread
data in the TCB, accessible at an offset from the thread pointer, rather
than through an actual Thread-Local Storage (TLS) variable, as the
Linux kernel selftests initially expected.

The __rseq_abi TLS and glibc-2.35's ABI for per-thread data cannot
actively coexist in a process, because the kernel supports only a single
rseq registration per thread.

Here is the scheme introduced to ensure selftests can work both with an
older glibc and with glibc-2.35+:

- librseq exposes its own "rseq_offset, rseq_size, rseq_flags" ABI.

- librseq queries for glibc rseq ABI (__rseq_offset, __rseq_size,
  __rseq_flags) using dlsym() in a librseq library constructor. If those
  are found, copy their values into rseq_offset, rseq_size, and
  rseq_flags.

- Else, if those glibc symbols are not found, handle rseq registration
  from librseq and use its own IE-model TLS to implement the rseq ABI
  per-thread storage.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-8-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/Makefile |   2 +-
 tools/testing/selftests/rseq/rseq.c   | 161 ++++++++++++++++------------------
 tools/testing/selftests/rseq/rseq.h   |  13 ++-
 3 files changed, 88 insertions(+), 88 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index 2af9d39a9716..215e1067f037 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -6,7 +6,7 @@ endif
 
 CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L$(OUTPUT) -Wl,-rpath=./ \
 	  $(CLANG_FLAGS)
-LDLIBS += -lpthread
+LDLIBS += -lpthread -ldl
 
 # Own dependencies because we only want to build against 1st prerequisite, but
 # still track changes to header files and depend on shared object.
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 1f905b60728a..07ba0d463a96 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -26,130 +26,123 @@
 #include <assert.h>
 #include <signal.h>
 #include <limits.h>
+#include <dlfcn.h>
 
 #include "../kselftest.h"
 #include "rseq.h"
 
-__thread struct rseq_abi __rseq_abi = {
-	.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
-};
+static const int *libc_rseq_offset_p;
+static const unsigned int *libc_rseq_size_p;
+static const unsigned int *libc_rseq_flags_p;
 
-/*
- * Shared with other libraries. This library may take rseq ownership if it is
- * still 0 when executing the library constructor. Set to 1 by library
- * constructor when handling rseq. Set to 0 in destructor if handling rseq.
- */
-int __rseq_handled;
+/* Offset from the thread pointer to the rseq area.  */
+int rseq_offset;
+
+/* Size of the registered rseq area.  0 if the registration was
+   unsuccessful.  */
+unsigned int rseq_size = -1U;
+
+/* Flags used during rseq registration.  */
+unsigned int rseq_flags;
 
-/* Whether this library have ownership of rseq registration. */
 static int rseq_ownership;
 
-static __thread volatile uint32_t __rseq_refcount;
+static
+__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"))) = {
+	.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
+};
 
-static void signal_off_save(sigset_t *oldset)
+static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
+		    int flags, uint32_t sig)
 {
-	sigset_t set;
-	int ret;
-
-	sigfillset(&set);
-	ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
-	if (ret)
-		abort();
+	return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
 }
 
-static void signal_restore(sigset_t oldset)
+int rseq_available(void)
 {
-	int ret;
+	int rc;
 
-	ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
-	if (ret)
+	rc = sys_rseq(NULL, 0, 0, 0);
+	if (rc != -1)
 		abort();
-}
-
-static int sys_rseq(volatile struct rseq_abi *rseq_abi, uint32_t rseq_len,
-		    int flags, uint32_t sig)
-{
-	return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+	switch (errno) {
+	case ENOSYS:
+		return 0;
+	case EINVAL:
+		return 1;
+	default:
+		abort();
+	}
 }
 
 int rseq_register_current_thread(void)
 {
-	int rc, ret = 0;
-	sigset_t oldset;
+	int rc;
 
-	if (!rseq_ownership)
+	if (!rseq_ownership) {
+		/* Treat libc's ownership as a successful registration. */
 		return 0;
-	signal_off_save(&oldset);
-	if (__rseq_refcount == UINT_MAX) {
-		ret = -1;
-		goto end;
 	}
-	if (__rseq_refcount++)
-		goto end;
 	rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG);
-	if (!rc) {
-		assert(rseq_current_cpu_raw() >= 0);
-		goto end;
-	}
-	if (errno != EBUSY)
-		RSEQ_WRITE_ONCE(__rseq_abi.cpu_id, RSEQ_ABI_CPU_ID_REGISTRATION_FAILED);
-	ret = -1;
-	__rseq_refcount--;
-end:
-	signal_restore(oldset);
-	return ret;
+	if (rc)
+		return -1;
+	assert(rseq_current_cpu_raw() >= 0);
+	return 0;
 }
 
 int rseq_unregister_current_thread(void)
 {
-	int rc, ret = 0;
-	sigset_t oldset;
+	int rc;
 
-	if (!rseq_ownership)
+	if (!rseq_ownership) {
+		/* Treat libc's ownership as a successful unregistration. */
 		return 0;
-	signal_off_save(&oldset);
-	if (!__rseq_refcount) {
-		ret = -1;
-		goto end;
 	}
-	if (--__rseq_refcount)
-		goto end;
-	rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi),
-		      RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
-	if (!rc)
-		goto end;
-	__rseq_refcount = 1;
-	ret = -1;
-end:
-	signal_restore(oldset);
-	return ret;
+	rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+	if (rc)
+		return -1;
+	return 0;
 }
 
-int32_t rseq_fallback_current_cpu(void)
+static __attribute__((constructor))
+void rseq_init(void)
 {
-	int32_t cpu;
-
-	cpu = sched_getcpu();
-	if (cpu < 0) {
-		perror("sched_getcpu()");
-		abort();
+	libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
+	libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
+	libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+	if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) {
+		/* rseq registration owned by glibc */
+		rseq_offset = *libc_rseq_offset_p;
+		rseq_size = *libc_rseq_size_p;
+		rseq_flags = *libc_rseq_flags_p;
+		return;
 	}
-	return cpu;
-}
-
-void __attribute__((constructor)) rseq_init(void)
-{
-	/* Check whether rseq is handled by another library. */
-	if (__rseq_handled)
+	if (!rseq_available())
 		return;
-	__rseq_handled = 1;
 	rseq_ownership = 1;
+	rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
+	rseq_size = sizeof(struct rseq_abi);
+	rseq_flags = 0;
 }
 
-void __attribute__((destructor)) rseq_fini(void)
+static __attribute__((destructor))
+void rseq_exit(void)
 {
 	if (!rseq_ownership)
 		return;
-	__rseq_handled = 0;
+	rseq_offset = 0;
+	rseq_size = -1U;
 	rseq_ownership = 0;
 }
+
+int32_t rseq_fallback_current_cpu(void)
+{
+	int32_t cpu;
+
+	cpu = sched_getcpu();
+	if (cpu < 0) {
+		perror("sched_getcpu()");
+		abort();
+	}
+	return cpu;
+}
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index ca668a2af172..17531ccd3090 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -43,12 +43,19 @@
 #define RSEQ_INJECT_FAILED
 #endif
 
-extern __thread struct rseq_abi __rseq_abi;
-extern int __rseq_handled;
+#include "rseq-thread-pointer.h"
+
+/* Offset from the thread pointer to the rseq area.  */
+extern int rseq_offset;
+/* Size of the registered rseq area.  0 if the registration was
+   unsuccessful.  */
+extern unsigned int rseq_size;
+/* Flags used during rseq registration.  */
+extern unsigned int rseq_flags;
 
 static inline struct rseq_abi *rseq_get_abi(void)
 {
-	return &__rseq_abi;
+	return (struct rseq_abi *) ((uintptr_t) rseq_thread_pointer() + rseq_offset);
 }
 
 #define rseq_likely(x)		__builtin_expect(!!(x), 1)
-- 
cgit 


From 24d1136a29da5953de5c0cbc6c83eb62a1e0bf14 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:46 -0500
Subject: selftests/rseq: Fix ppc32: wrong rseq_cs 32-bit field pointer on big
 endian

ppc32 incorrectly uses padding as rseq_cs pointer field. Fix this by
using the rseq_cs.arch.ptr field.

Use this field across all architectures.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-9-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/rseq-arm.h   | 16 ++++++++--------
 tools/testing/selftests/rseq/rseq-arm64.h | 16 ++++++++--------
 tools/testing/selftests/rseq/rseq-mips.h  | 16 ++++++++--------
 tools/testing/selftests/rseq/rseq-ppc.h   | 16 ++++++++--------
 tools/testing/selftests/rseq/rseq-s390.h  | 12 ++++++------
 5 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
index 6716540e17c6..5f567b3b40f2 100644
--- a/tools/testing/selftests/rseq/rseq-arm.h
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -186,7 +186,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -256,7 +256,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -317,7 +317,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [v]			"m" (*v),
 		  [count]		"Ir" (count)
 		  RSEQ_INJECT_INPUT
@@ -382,7 +382,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -458,7 +458,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -538,7 +538,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -658,7 +658,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
@@ -783,7 +783,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
index b9d9b3aa6e9b..d0f2b7feee94 100644
--- a/tools/testing/selftests/rseq/rseq-arm64.h
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -231,7 +231,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [v]			"Qo" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -288,7 +288,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [v]			"Qo" (*v),
 		  [expectnot]		"r" (expectnot),
 		  [load]		"Qo" (*load),
@@ -338,7 +338,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [v]			"Qo" (*v),
 		  [count]		"r" (count)
 		  RSEQ_INJECT_INPUT
@@ -389,7 +389,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [expect]		"r" (expect),
 		  [v]			"Qo" (*v),
 		  [newv]		"r" (newv),
@@ -448,7 +448,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [expect]		"r" (expect),
 		  [v]			"Qo" (*v),
 		  [newv]		"r" (newv),
@@ -509,7 +509,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [v]			"Qo" (*v),
 		  [expect]		"r" (expect),
 		  [v2]			"Qo" (*v2),
@@ -570,7 +570,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [expect]		"r" (expect),
 		  [v]			"Qo" (*v),
 		  [newv]		"r" (newv),
@@ -630,7 +630,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"Qo" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [expect]		"r" (expect),
 		  [v]			"Qo" (*v),
 		  [newv]		"r" (newv),
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
index 2b1f5bd95268..6df54273825d 100644
--- a/tools/testing/selftests/rseq/rseq-mips.h
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -191,7 +191,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -259,7 +259,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -320,7 +320,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [v]			"m" (*v),
 		  [count]		"Ir" (count)
 		  RSEQ_INJECT_INPUT
@@ -383,7 +383,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -457,7 +457,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -533,7 +533,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -650,7 +650,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
@@ -772,7 +772,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
index 2e6b7572ba08..c4ba1375285d 100644
--- a/tools/testing/selftests/rseq/rseq-ppc.h
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -236,7 +236,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -302,7 +302,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -360,7 +360,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [count]		"r" (count)
@@ -420,7 +420,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -490,7 +490,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -561,7 +561,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -636,7 +636,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
@@ -712,7 +712,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
index b906e044d2a3..9927021f8bd0 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -166,7 +166,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -234,7 +234,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -289,7 +289,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [count]		"r" (count)
@@ -348,7 +348,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -427,7 +427,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -535,7 +535,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
 		  [current_cpu_id]	"m" (rseq_get_abi()->cpu_id),
-		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs),
+		  [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
-- 
cgit 


From de6b52a21420a18dc8a36438d581efd1313d5fe3 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:47 -0500
Subject: selftests/rseq: Fix ppc32 missing instruction selection "u" and "x"
 for load/store

Building the rseq basic test  with
gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.12)
Target: powerpc-linux-gnu

leads to these errors:

/tmp/ccieEWxU.s: Assembler messages:
/tmp/ccieEWxU.s:118: Error: syntax error; found `,', expected `('
/tmp/ccieEWxU.s:118: Error: junk at end of line: `,8'
/tmp/ccieEWxU.s:121: Error: syntax error; found `,', expected `('
/tmp/ccieEWxU.s:121: Error: junk at end of line: `,8'
/tmp/ccieEWxU.s:626: Error: syntax error; found `,', expected `('
/tmp/ccieEWxU.s:626: Error: junk at end of line: `,8'
/tmp/ccieEWxU.s:629: Error: syntax error; found `,', expected `('
/tmp/ccieEWxU.s:629: Error: junk at end of line: `,8'
/tmp/ccieEWxU.s:735: Error: syntax error; found `,', expected `('
/tmp/ccieEWxU.s:735: Error: junk at end of line: `,8'
/tmp/ccieEWxU.s:738: Error: syntax error; found `,', expected `('
/tmp/ccieEWxU.s:738: Error: junk at end of line: `,8'
/tmp/ccieEWxU.s:741: Error: syntax error; found `,', expected `('
/tmp/ccieEWxU.s:741: Error: junk at end of line: `,8'
Makefile:581: recipe for target 'basic_percpu_ops_test.o' failed

Based on discussion with Linux powerpc maintainers and review of
the use of the "m" operand in powerpc kernel code, add the missing
%Un%Xn (where n is operand number) to the lwz, stw, ld, and std
instructions when used with "m" operands.

Using "WORD" to mean either a 32-bit or 64-bit type depending on
the architecture is misleading. The term "WORD" really means a
32-bit type in both 32-bit and 64-bit powerpc assembler. The intent
here is to wrap load/store to intptr_t into common macros for both
32-bit and 64-bit.

Rename the macros with a RSEQ_ prefix, and use the terms "INT"
for always 32-bit type, and "LONG" for architecture bitness-sized
type.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-10-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/rseq-ppc.h | 55 +++++++++++++++++----------------
 1 file changed, 28 insertions(+), 27 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
index c4ba1375285d..87befda47ba4 100644
--- a/tools/testing/selftests/rseq/rseq-ppc.h
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -47,10 +47,13 @@ do {									\
 
 #ifdef __PPC64__
 
-#define STORE_WORD	"std "
-#define LOAD_WORD	"ld "
-#define LOADX_WORD	"ldx "
-#define CMP_WORD	"cmpd "
+#define RSEQ_STORE_LONG(arg)	"std%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] "	/* To memory ("m" constraint) */
+#define RSEQ_STORE_INT(arg)	"stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] "	/* To memory ("m" constraint) */
+#define RSEQ_LOAD_LONG(arg)	"ld%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] "	/* From memory ("m" constraint) */
+#define RSEQ_LOAD_INT(arg)	"lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] "	/* From memory ("m" constraint) */
+#define RSEQ_LOADX_LONG		"ldx "							/* From base register ("b" constraint) */
+#define RSEQ_CMP_LONG		"cmpd "
+#define RSEQ_CMP_LONG_INT	"cmpdi "
 
 #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,				\
 			start_ip, post_commit_offset, abort_ip)			\
@@ -89,10 +92,13 @@ do {									\
 
 #else /* #ifdef __PPC64__ */
 
-#define STORE_WORD	"stw "
-#define LOAD_WORD	"lwz "
-#define LOADX_WORD	"lwzx "
-#define CMP_WORD	"cmpw "
+#define RSEQ_STORE_LONG(arg)	"stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] "	/* To memory ("m" constraint) */
+#define RSEQ_STORE_INT(arg)	RSEQ_STORE_LONG(arg)					/* To memory ("m" constraint) */
+#define RSEQ_LOAD_LONG(arg)	"lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] "	/* From memory ("m" constraint) */
+#define RSEQ_LOAD_INT(arg)	RSEQ_LOAD_LONG(arg)					/* From memory ("m" constraint) */
+#define RSEQ_LOADX_LONG		"lwzx "							/* From base register ("b" constraint) */
+#define RSEQ_CMP_LONG		"cmpw "
+#define RSEQ_CMP_LONG_INT	"cmpwi "
 
 #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,				\
 			start_ip, post_commit_offset, abort_ip)			\
@@ -125,7 +131,7 @@ do {									\
 		RSEQ_INJECT_ASM(1)						\
 		"lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t"			\
 		"addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t"		\
-		"stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t"			\
+		RSEQ_STORE_INT(rseq_cs) "%%r17, %[" __rseq_str(rseq_cs) "]\n\t"	\
 		__rseq_str(label) ":\n\t"
 
 #endif /* #ifdef __PPC64__ */
@@ -136,7 +142,7 @@ do {									\
 
 #define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)			\
 		RSEQ_INJECT_ASM(2)						\
-		"lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t"		\
+		RSEQ_LOAD_INT(current_cpu_id) "%%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \
 		"cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t"		\
 		"bne- cr7, " __rseq_str(label) "\n\t"
 
@@ -153,25 +159,25 @@ do {									\
  * 	RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7)
  */
 #define RSEQ_ASM_OP_CMPEQ(var, expect, label)					\
-		LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"			\
-		CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t"		\
+		RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t"		\
+		RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expect) "]\n\t"		\
 		"bne- cr7, " __rseq_str(label) "\n\t"
 
 #define RSEQ_ASM_OP_CMPNE(var, expectnot, label)				\
-		LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"			\
-		CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t"		\
+		RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t"		\
+		RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t"		\
 		"beq- cr7, " __rseq_str(label) "\n\t"
 
 #define RSEQ_ASM_OP_STORE(value, var)						\
-		STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
+		RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
 
 /* Load @var to r17 */
 #define RSEQ_ASM_OP_R_LOAD(var)							\
-		LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+		RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t"
 
 /* Store r17 to @var */
 #define RSEQ_ASM_OP_R_STORE(var)						\
-		STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+		RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t"
 
 /* Add @count to r17 */
 #define RSEQ_ASM_OP_R_ADD(count)						\
@@ -179,11 +185,11 @@ do {									\
 
 /* Load (r17 + voffp) to r17 */
 #define RSEQ_ASM_OP_R_LOADX(voffp)						\
-		LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
+		RSEQ_LOADX_LONG "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
 
 /* TODO: implement a faster memcpy. */
 #define RSEQ_ASM_OP_R_MEMCPY() \
-		"cmpdi %%r19, 0\n\t" \
+		RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \
 		"beq 333f\n\t" \
 		"addi %%r20, %%r20, -1\n\t" \
 		"addi %%r21, %%r21, -1\n\t" \
@@ -191,16 +197,16 @@ do {									\
 		"lbzu %%r18, 1(%%r20)\n\t" \
 		"stbu %%r18, 1(%%r21)\n\t" \
 		"addi %%r19, %%r19, -1\n\t" \
-		"cmpdi %%r19, 0\n\t" \
+		RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \
 		"bne 222b\n\t" \
 		"333:\n\t" \
 
 #define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label)			\
-		STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t"			\
+		RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t"			\
 		__rseq_str(post_commit_label) ":\n\t"
 
 #define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label)			\
-		STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
+		RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
 		__rseq_str(post_commit_label) ":\n\t"
 
 static inline __attribute__((always_inline))
@@ -743,9 +749,4 @@ error2:
 #endif
 }
 
-#undef STORE_WORD
-#undef LOAD_WORD
-#undef LOADX_WORD
-#undef CMP_WORD
-
 #endif /* !RSEQ_SKIP_FASTPATH */
-- 
cgit 


From 26dc8a6d8e11552f3b797b5aafe01071ca32d692 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:48 -0500
Subject: selftests/rseq: Fix ppc32 offsets by using long rather than off_t

The semantic of off_t is for file offsets. We mean to use it as an
offset from a pointer. We really expect it to fit in a single register,
and not use a 64-bit type on 32-bit architectures.

Fix runtime issues on ppc32 where the offset is always 0 due to
inconsistency between the argument type (off_t -> 64-bit) and type
expected by the inline assembler (32-bit).

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-11-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/basic_percpu_ops_test.c | 2 +-
 tools/testing/selftests/rseq/param_test.c            | 2 +-
 tools/testing/selftests/rseq/rseq-arm.h              | 2 +-
 tools/testing/selftests/rseq/rseq-arm64.h            | 2 +-
 tools/testing/selftests/rseq/rseq-mips.h             | 2 +-
 tools/testing/selftests/rseq/rseq-ppc.h              | 2 +-
 tools/testing/selftests/rseq/rseq-s390.h             | 2 +-
 tools/testing/selftests/rseq/rseq-skip.h             | 2 +-
 tools/testing/selftests/rseq/rseq-x86.h              | 6 +++---
 9 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
index b953a52ff706..517756afc2a4 100644
--- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -167,7 +167,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
 	for (;;) {
 		struct percpu_list_node *head;
 		intptr_t *targetptr, expectnot, *load;
-		off_t offset;
+		long offset;
 		int ret, cpu;
 
 		cpu = rseq_cpu_start();
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index cc2cfc1da938..335c290b39e7 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -549,7 +549,7 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
 	for (;;) {
 		struct percpu_list_node *head;
 		intptr_t *targetptr, expectnot, *load;
-		off_t offset;
+		long offset;
 		int ret;
 
 		cpu = rseq_cpu_start();
diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
index 5f567b3b40f2..ae476af70152 100644
--- a/tools/testing/selftests/rseq/rseq-arm.h
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -217,7 +217,7 @@ error2:
 
 static inline __attribute__((always_inline))
 int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
-			       off_t voffp, intptr_t *load, int cpu)
+			       long voffp, intptr_t *load, int cpu)
 {
 	RSEQ_INJECT_C(9)
 
diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
index d0f2b7feee94..7806817062c2 100644
--- a/tools/testing/selftests/rseq/rseq-arm64.h
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -259,7 +259,7 @@ error2:
 
 static inline __attribute__((always_inline))
 int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
-			       off_t voffp, intptr_t *load, int cpu)
+			       long voffp, intptr_t *load, int cpu)
 {
 	RSEQ_INJECT_C(9)
 
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
index 6df54273825d..0d1d9255da70 100644
--- a/tools/testing/selftests/rseq/rseq-mips.h
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -222,7 +222,7 @@ error2:
 
 static inline __attribute__((always_inline))
 int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
-			       off_t voffp, intptr_t *load, int cpu)
+			       long voffp, intptr_t *load, int cpu)
 {
 	RSEQ_INJECT_C(9)
 
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
index 87befda47ba4..aa18c0eabf40 100644
--- a/tools/testing/selftests/rseq/rseq-ppc.h
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -270,7 +270,7 @@ error2:
 
 static inline __attribute__((always_inline))
 int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
-			       off_t voffp, intptr_t *load, int cpu)
+			       long voffp, intptr_t *load, int cpu)
 {
 	RSEQ_INJECT_C(9)
 
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
index 9927021f8bd0..0f523b3ecdef 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -198,7 +198,7 @@ error2:
  */
 static inline __attribute__((always_inline))
 int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
-			       off_t voffp, intptr_t *load, int cpu)
+			       long voffp, intptr_t *load, int cpu)
 {
 	RSEQ_INJECT_C(9)
 
diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h
index 72750b5905a9..7b53dac1fcdd 100644
--- a/tools/testing/selftests/rseq/rseq-skip.h
+++ b/tools/testing/selftests/rseq/rseq-skip.h
@@ -13,7 +13,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 
 static inline __attribute__((always_inline))
 int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
-			       off_t voffp, intptr_t *load, int cpu)
+			       long voffp, intptr_t *load, int cpu)
 {
 	return -1;
 }
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index 1d9fa0516e53..0ee6c041d4be 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -172,7 +172,7 @@ error2:
  */
 static inline __attribute__((always_inline))
 int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
-			       off_t voffp, intptr_t *load, int cpu)
+			       long voffp, intptr_t *load, int cpu)
 {
 	RSEQ_INJECT_C(9)
 
@@ -286,7 +286,7 @@ error1:
  *  *pval += inc;
  */
 static inline __attribute__((always_inline))
-int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu)
 {
 	RSEQ_INJECT_C(9)
 
@@ -750,7 +750,7 @@ error2:
  */
 static inline __attribute__((always_inline))
 int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
-			       off_t voffp, intptr_t *load, int cpu)
+			       long voffp, intptr_t *load, int cpu)
 {
 	RSEQ_INJECT_C(9)
 
-- 
cgit 


From d7ed99ade3e62b755584eea07b4e499e79240527 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:49 -0500
Subject: selftests/rseq: Fix warnings about #if checks of undefined tokens

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-12-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/param_test.c | 2 +-
 tools/testing/selftests/rseq/rseq-x86.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index 335c290b39e7..da23c22d5882 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -161,7 +161,7 @@ unsigned int yield_mod_cnt, nr_abort;
 	"	cbnz	" INJECT_ASM_REG ", 222b\n"			\
 	"333:\n"
 
-#elif __PPC__
+#elif defined(__PPC__)
 
 #define RSEQ_INJECT_INPUT \
 	, [loop_cnt_1]"m"(loop_cnt[1]) \
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index 0ee6c041d4be..e444563c6999 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -600,7 +600,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 
 #endif /* !RSEQ_SKIP_FASTPATH */
 
-#elif __i386__
+#elif defined(__i386__)
 
 #define rseq_smp_mb()	\
 	__asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
-- 
cgit 


From 94c5cf2a0e193afffef8de48ddc42de6df7cac93 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:50 -0500
Subject: selftests/rseq: Remove arm/mips asm goto compiler work-around

The arm and mips work-around for asm goto size guess issues are not
properly documented, and lack reference to specific compiler versions,
upstream compiler bug tracker entry, and reproducer.

I can only find a loosely documented patch in my original LKML rseq post
refering to gcc < 7 on ARM, but it does not appear to be sufficient to
track the exact issue. Also, I am not sure MIPS really has the same
limitation.

Therefore, remove the work-around until we can properly document this.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/lkml/20171121141900.18471-17-mathieu.desnoyers@efficios.com/
---
 tools/testing/selftests/rseq/rseq-arm.h  | 37 --------------------------------
 tools/testing/selftests/rseq/rseq-mips.h | 37 --------------------------------
 2 files changed, 74 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
index ae476af70152..64c3a622107b 100644
--- a/tools/testing/selftests/rseq/rseq-arm.h
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -147,14 +147,11 @@ do {									\
 		teardown						\
 		"b %l[" __rseq_str(cmpfail_label) "]\n\t"
 
-#define rseq_workaround_gcc_asm_size_guess()	__asm__ __volatile__("")
-
 static inline __attribute__((always_inline))
 int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 {
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -198,14 +195,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		  , error1, error2
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
@@ -221,7 +215,6 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 {
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -270,14 +263,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		  , error1, error2
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
@@ -292,7 +282,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 {
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 #ifdef RSEQ_COMPARE_TWICE
@@ -328,10 +317,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		  , error1
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 #ifdef RSEQ_COMPARE_TWICE
@@ -347,7 +334,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 {
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -398,14 +384,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
@@ -422,7 +405,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 {
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -474,14 +456,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
@@ -498,7 +477,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 {
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -554,14 +532,11 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2, error3
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
@@ -582,7 +557,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -678,21 +652,16 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
-	rseq_workaround_gcc_asm_size_guess();
 	rseq_bug("cpu_id comparison failed");
 error2:
-	rseq_workaround_gcc_asm_size_guess();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -706,7 +675,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -803,21 +771,16 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
-	rseq_workaround_gcc_asm_size_guess();
 	rseq_bug("cpu_id comparison failed");
 error2:
-	rseq_workaround_gcc_asm_size_guess();
 	rseq_bug("expected value comparison failed");
 #endif
 }
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
index 0d1d9255da70..878739fae2fd 100644
--- a/tools/testing/selftests/rseq/rseq-mips.h
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -154,14 +154,11 @@ do {									\
 		teardown \
 		"b %l[" __rseq_str(cmpfail_label) "]\n\t"
 
-#define rseq_workaround_gcc_asm_size_guess()	__asm__ __volatile__("")
-
 static inline __attribute__((always_inline))
 int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 {
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -203,14 +200,11 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		  , error1, error2
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
@@ -226,7 +220,6 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 {
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -273,14 +266,11 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		  , error1, error2
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
@@ -295,7 +285,6 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 {
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 #ifdef RSEQ_COMPARE_TWICE
@@ -331,10 +320,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		  , error1
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 #ifdef RSEQ_COMPARE_TWICE
@@ -350,7 +337,6 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 {
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -399,14 +385,11 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
@@ -423,7 +406,6 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 {
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -473,14 +455,11 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
@@ -497,7 +476,6 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 {
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -549,14 +527,11 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2, error3
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
@@ -577,7 +552,6 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -670,21 +644,16 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
-	rseq_workaround_gcc_asm_size_guess();
 	rseq_bug("cpu_id comparison failed");
 error2:
-	rseq_workaround_gcc_asm_size_guess();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -698,7 +667,6 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 
 	RSEQ_INJECT_C(9)
 
-	rseq_workaround_gcc_asm_size_guess();
 	__asm__ __volatile__ goto (
 		RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
@@ -792,21 +760,16 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
-	rseq_workaround_gcc_asm_size_guess();
 	return 0;
 abort:
-	rseq_workaround_gcc_asm_size_guess();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
-	rseq_workaround_gcc_asm_size_guess();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
-	rseq_workaround_gcc_asm_size_guess();
 	rseq_bug("cpu_id comparison failed");
 error2:
-	rseq_workaround_gcc_asm_size_guess();
 	rseq_bug("expected value comparison failed");
 #endif
 }
-- 
cgit 


From b53823fb2ef854222853be164f3b1e815f315144 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:51 -0500
Subject: selftests/rseq: Fix: work-around asm goto compiler bugs

gcc and clang each have their own compiler bugs with respect to asm
goto. Implement a work-around for compiler versions known to have those
bugs.

gcc prior to 4.8.2 miscompiles asm goto.
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670

gcc prior to 8.1.0 miscompiles asm goto at O1.
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103908

clang prior to version 13.0.1 miscompiles asm goto at O2.
https://github.com/llvm/llvm-project/issues/52735

Work around these issues by adding a volatile inline asm with
memory clobber in the fallthrough after the asm goto and at each
label target.  Emit this for all compilers in case other similar
issues are found in the future.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-14-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/compiler.h   | 30 ++++++++++++++
 tools/testing/selftests/rseq/rseq-arm.h   | 39 ++++++++++++++++++
 tools/testing/selftests/rseq/rseq-arm64.h | 45 +++++++++++++++++---
 tools/testing/selftests/rseq/rseq-ppc.h   | 39 ++++++++++++++++++
 tools/testing/selftests/rseq/rseq-s390.h  | 29 +++++++++++++
 tools/testing/selftests/rseq/rseq-x86.h   | 68 +++++++++++++++++++++++++++++++
 tools/testing/selftests/rseq/rseq.h       |  1 +
 7 files changed, 245 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/rseq/compiler.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/compiler.h b/tools/testing/selftests/rseq/compiler.h
new file mode 100644
index 000000000000..876eb6a7f75b
--- /dev/null
+++ b/tools/testing/selftests/rseq/compiler.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq/compiler.h
+ *
+ * Work-around asm goto compiler bugs.
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef RSEQ_COMPILER_H
+#define RSEQ_COMPILER_H
+
+/*
+ * gcc prior to 4.8.2 miscompiles asm goto.
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
+ *
+ * gcc prior to 8.1.0 miscompiles asm goto at O1.
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103908
+ *
+ * clang prior to version 13.0.1 miscompiles asm goto at O2.
+ * https://github.com/llvm/llvm-project/issues/52735
+ *
+ * Work around these issues by adding a volatile inline asm with
+ * memory clobber in the fallthrough after the asm goto and at each
+ * label target.  Emit this for all compilers in case other similar
+ * issues are found in the future.
+ */
+#define rseq_after_asm_goto()	asm volatile ("" : : : "memory")
+
+#endif  /* RSEQ_COMPILER_H_ */
diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
index 64c3a622107b..893a11eca9d5 100644
--- a/tools/testing/selftests/rseq/rseq-arm.h
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -195,16 +195,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -263,16 +268,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -317,12 +327,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		  , error1
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 #endif
 }
@@ -384,16 +397,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -456,16 +474,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -532,18 +555,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2, error3
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("1st expected value comparison failed");
 error3:
+	rseq_after_asm_goto();
 	rseq_bug("2nd expected value comparison failed");
 #endif
 }
@@ -652,16 +681,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -771,16 +805,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
index 7806817062c2..cbe190a4d005 100644
--- a/tools/testing/selftests/rseq/rseq-arm64.h
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -242,17 +242,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		  , error1, error2
 #endif
 	);
-
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -300,16 +304,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -348,12 +357,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		  , error1
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 #endif
 }
@@ -402,17 +414,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
-
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -461,17 +477,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
-
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -522,19 +542,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2, error3
 #endif
 	);
-
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 error3:
+	rseq_after_asm_goto();
 	rseq_bug("2nd expected value comparison failed");
 #endif
 }
@@ -584,17 +609,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
-
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -644,17 +673,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
-
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
index aa18c0eabf40..bab8e0b9fb11 100644
--- a/tools/testing/selftests/rseq/rseq-ppc.h
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -254,16 +254,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -322,16 +327,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -378,12 +388,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		  , error1
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 #endif
 }
@@ -442,16 +455,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -512,16 +530,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -583,18 +606,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2, error3
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("1st expected value comparison failed");
 error3:
+	rseq_after_asm_goto();
 	rseq_bug("2nd expected value comparison failed");
 #endif
 }
@@ -659,16 +688,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -735,16 +769,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
index 0f523b3ecdef..4e6dc5f0cb42 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -178,16 +178,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -248,16 +253,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -301,12 +311,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		  , error1
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 #endif
 }
@@ -364,16 +377,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -443,18 +461,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2, error3
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("1st expected value comparison failed");
 error3:
+	rseq_after_asm_goto();
 	rseq_bug("2nd expected value comparison failed");
 #endif
 }
@@ -555,16 +579,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index e444563c6999..4ab2e74811ab 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -152,16 +152,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -220,16 +225,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -269,12 +279,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		  , error1
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 #endif
 }
@@ -387,16 +400,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -464,18 +482,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2, error3
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("1st expected value comparison failed");
 error3:
+	rseq_after_asm_goto();
 	rseq_bug("2nd expected value comparison failed");
 #endif
 }
@@ -574,16 +598,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -730,16 +759,21 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -798,16 +832,21 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -847,12 +886,15 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		  , error1
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 #endif
 }
@@ -909,16 +951,21 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -977,16 +1024,21 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 
@@ -1047,18 +1099,24 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2, error3
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("1st expected value comparison failed");
 error3:
+	rseq_after_asm_goto();
 	rseq_bug("2nd expected value comparison failed");
 #endif
 }
@@ -1161,16 +1219,21 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
@@ -1274,16 +1337,21 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		  , error1, error2
 #endif
 	);
+	rseq_after_asm_goto();
 	return 0;
 abort:
+	rseq_after_asm_goto();
 	RSEQ_INJECT_FAILED
 	return -1;
 cmpfail:
+	rseq_after_asm_goto();
 	return 1;
 #ifdef RSEQ_COMPARE_TWICE
 error1:
+	rseq_after_asm_goto();
 	rseq_bug("cpu_id comparison failed");
 error2:
+	rseq_after_asm_goto();
 	rseq_bug("expected value comparison failed");
 #endif
 }
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index 17531ccd3090..6bd0ac466b4a 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -17,6 +17,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "rseq-abi.h"
+#include "compiler.h"
 
 /*
  * Empty code injection macros, override when testing.
-- 
cgit 


From 4e15bb766b6c6e963a4d33629034d0ec3b7637df Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:52 -0500
Subject: selftests/rseq: x86-64: use %fs segment selector for accessing rseq
 thread area

Rather than use rseq_get_abi() and pass its result through a register to
the inline assembler, directly access the per-thread rseq area through a
memory reference combining the %fs segment selector, the constant offset
of the field in struct rseq, and the rseq_offset value (in a register).

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-15-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/rseq-x86.h | 58 +++++++++++++++++----------------
 1 file changed, 30 insertions(+), 28 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index 4ab2e74811ab..29769664edaa 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -28,6 +28,8 @@
 
 #ifdef __x86_64__
 
+#define RSEQ_ASM_TP_SEGMENT	%%fs
+
 #define rseq_smp_mb()	\
 	__asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
 #define rseq_smp_rmb()	rseq_barrier()
@@ -123,14 +125,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
 #endif
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 		"cmpq %[v], %[expect]\n\t"
 		"jnz %l[cmpfail]\n\t"
 		RSEQ_INJECT_ASM(4)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
 		"cmpq %[v], %[expect]\n\t"
 		"jnz %l[error2]\n\t"
 #endif
@@ -141,7 +143,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" ((long)rseq_offset),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -189,15 +191,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
 #endif
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 		"movq %[v], %%rbx\n\t"
 		"cmpq %%rbx, %[expectnot]\n\t"
 		"je %l[cmpfail]\n\t"
 		RSEQ_INJECT_ASM(4)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
 		"movq %[v], %%rbx\n\t"
 		"cmpq %%rbx, %[expectnot]\n\t"
 		"je %l[error2]\n\t"
@@ -212,7 +214,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" ((long)rseq_offset),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -255,11 +257,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
 #endif
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
 #endif
 		/* final store */
 		"addq %[count], %[v]\n\t"
@@ -268,7 +270,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" ((long)rseq_offset),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [count]		"er" (count)
@@ -309,11 +311,11 @@ int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu)
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
 #endif
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
 #endif
 		/* get p+v */
 		"movq %[ptr], %%rbx\n\t"
@@ -327,7 +329,7 @@ int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" ((long)rseq_offset),
 		  /* final store input */
 		  [ptr]			"m" (*ptr),
 		  [off]			"er" (off),
@@ -364,14 +366,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
 #endif
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 		"cmpq %[v], %[expect]\n\t"
 		"jnz %l[cmpfail]\n\t"
 		RSEQ_INJECT_ASM(4)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
 		"cmpq %[v], %[expect]\n\t"
 		"jnz %l[error2]\n\t"
 #endif
@@ -385,7 +387,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" ((long)rseq_offset),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -444,8 +446,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
 #endif
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 		"cmpq %[v], %[expect]\n\t"
 		"jnz %l[cmpfail]\n\t"
@@ -454,7 +456,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		"jnz %l[cmpfail]\n\t"
 		RSEQ_INJECT_ASM(5)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
 		"cmpq %[v], %[expect]\n\t"
 		"jnz %l[error2]\n\t"
 		"cmpq %[v2], %[expect2]\n\t"
@@ -467,7 +469,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" ((long)rseq_offset),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -524,14 +526,14 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		"movq %[dst], %[rseq_scratch1]\n\t"
 		"movq %[len], %[rseq_scratch2]\n\t"
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 		"cmpq %[v], %[expect]\n\t"
 		"jnz 5f\n\t"
 		RSEQ_INJECT_ASM(4)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f)
 		"cmpq %[v], %[expect]\n\t"
 		"jnz 7f\n\t"
 #endif
@@ -579,7 +581,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 #endif
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" ((long)rseq_offset),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
-- 
cgit 


From 127b6429d235ab7c358223bbfd8a8b8d8cc799b6 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 24 Jan 2022 12:12:53 -0500
Subject: selftests/rseq: x86-32: use %gs segment selector for accessing rseq
 thread area

Rather than use rseq_get_abi() and pass its result through a register to
the inline assembler, directly access the per-thread rseq area through a
memory reference combining the %gs segment selector, the constant offset
of the field in struct rseq, and the rseq_offset value (in a register).

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220124171253.22072-16-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/rseq-x86.h | 66 +++++++++++++++++----------------
 1 file changed, 34 insertions(+), 32 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index 29769664edaa..f704d3664327 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -633,6 +633,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 
 #elif defined(__i386__)
 
+#define RSEQ_ASM_TP_SEGMENT	%%gs
+
 #define rseq_smp_mb()	\
 	__asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
 #define rseq_smp_rmb()	\
@@ -732,14 +734,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
 #endif
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 		"cmpl %[v], %[expect]\n\t"
 		"jnz %l[cmpfail]\n\t"
 		RSEQ_INJECT_ASM(4)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
 		"cmpl %[v], %[expect]\n\t"
 		"jnz %l[error2]\n\t"
 #endif
@@ -750,7 +752,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" (rseq_offset),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -798,15 +800,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
 #endif
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 		"movl %[v], %%ebx\n\t"
 		"cmpl %%ebx, %[expectnot]\n\t"
 		"je %l[cmpfail]\n\t"
 		RSEQ_INJECT_ASM(4)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
 		"movl %[v], %%ebx\n\t"
 		"cmpl %%ebx, %[expectnot]\n\t"
 		"je %l[error2]\n\t"
@@ -821,7 +823,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -864,11 +866,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
 #endif
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
 #endif
 		/* final store */
 		"addl %[count], %[v]\n\t"
@@ -877,7 +879,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [count]		"ir" (count)
@@ -916,14 +918,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
 #endif
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 		"cmpl %[v], %[expect]\n\t"
 		"jnz %l[cmpfail]\n\t"
 		RSEQ_INJECT_ASM(4)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
 		"cmpl %[v], %[expect]\n\t"
 		"jnz %l[error2]\n\t"
 #endif
@@ -938,7 +940,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"m" (newv2),
@@ -987,15 +989,15 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
 #endif
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 		"movl %[expect], %%eax\n\t"
 		"cmpl %[v], %%eax\n\t"
 		"jnz %l[cmpfail]\n\t"
 		RSEQ_INJECT_ASM(4)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
 		"movl %[expect], %%eax\n\t"
 		"cmpl %[v], %%eax\n\t"
 		"jnz %l[error2]\n\t"
@@ -1011,7 +1013,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -1062,8 +1064,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
 #endif
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 		"cmpl %[v], %[expect]\n\t"
 		"jnz %l[cmpfail]\n\t"
@@ -1072,7 +1074,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		"jnz %l[cmpfail]\n\t"
 		RSEQ_INJECT_ASM(5)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
 		"cmpl %[v], %[expect]\n\t"
 		"jnz %l[error2]\n\t"
 		"cmpl %[expect2], %[v2]\n\t"
@@ -1086,7 +1088,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -1144,15 +1146,15 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 		"movl %[dst], %[rseq_scratch1]\n\t"
 		"movl %[len], %[rseq_scratch2]\n\t"
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 		"movl %[expect], %%eax\n\t"
 		"cmpl %%eax, %[v]\n\t"
 		"jnz 5f\n\t"
 		RSEQ_INJECT_ASM(4)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f)
 		"movl %[expect], %%eax\n\t"
 		"cmpl %%eax, %[v]\n\t"
 		"jnz 7f\n\t"
@@ -1202,7 +1204,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 #endif
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"m" (expect),
@@ -1261,15 +1263,15 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 		"movl %[dst], %[rseq_scratch1]\n\t"
 		"movl %[len], %[rseq_scratch2]\n\t"
 		/* Start rseq by storing table entry pointer into rseq_cs. */
-		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 4f)
 		RSEQ_INJECT_ASM(3)
 		"movl %[expect], %%eax\n\t"
 		"cmpl %%eax, %[v]\n\t"
 		"jnz 5f\n\t"
 		RSEQ_INJECT_ASM(4)
 #ifdef RSEQ_COMPARE_TWICE
-		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_CPU_ID_OFFSET(%[rseq_offset]), 6f)
 		"movl %[expect], %%eax\n\t"
 		"cmpl %%eax, %[v]\n\t"
 		"jnz 7f\n\t"
@@ -1320,7 +1322,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
 #endif
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_abi]		"r" (rseq_get_abi()),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"m" (expect),
-- 
cgit 


From 04fcb5f9a104f24278ad849c642cbdf0c8f48453 Mon Sep 17 00:00:00 2001
From: Delyan Kratunov <delyank@fb.com>
Date: Wed, 2 Feb 2022 15:54:20 -0800
Subject: selftests/bpf: Migrate from bpf_prog_test_run

bpf_prog_test_run is being deprecated in favor of the OPTS-based
bpf_prog_test_run_opts.
We end up unable to use CHECK in most cases, so replace usages with
ASSERT_* calls.

Signed-off-by: Delyan Kratunov <delyank@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220202235423.1097270-2-delyank@fb.com
---
 tools/testing/selftests/bpf/prog_tests/atomics.c   |  72 +++----
 tools/testing/selftests/bpf/prog_tests/bpf_nf.c    |  10 +-
 .../selftests/bpf/prog_tests/fentry_fexit.c        |  24 +--
 .../testing/selftests/bpf/prog_tests/fentry_test.c |   7 +-
 .../selftests/bpf/prog_tests/fexit_bpf2bpf.c       |  32 +--
 .../selftests/bpf/prog_tests/fexit_stress.c        |  22 +-
 .../testing/selftests/bpf/prog_tests/fexit_test.c  |   7 +-
 .../bpf/prog_tests/flow_dissector_load_bytes.c     |  24 ++-
 tools/testing/selftests/bpf/prog_tests/for_each.c  |  32 +--
 .../selftests/bpf/prog_tests/get_func_args_test.c  |  12 +-
 .../selftests/bpf/prog_tests/get_func_ip_test.c    |  10 +-
 .../testing/selftests/bpf/prog_tests/global_data.c |  24 ++-
 .../selftests/bpf/prog_tests/global_func_args.c    |  14 +-
 .../testing/selftests/bpf/prog_tests/kfunc_call.c  |  46 ++--
 .../selftests/bpf/prog_tests/ksyms_module.c        |  23 +-
 tools/testing/selftests/bpf/prog_tests/l4lb_all.c  |  35 +--
 tools/testing/selftests/bpf/prog_tests/map_lock.c  |  15 +-
 tools/testing/selftests/bpf/prog_tests/map_ptr.c   |  16 +-
 .../selftests/bpf/prog_tests/modify_return.c       |  33 ++-
 .../testing/selftests/bpf/prog_tests/pkt_access.c  |  26 ++-
 .../selftests/bpf/prog_tests/pkt_md_access.c       |  14 +-
 .../selftests/bpf/prog_tests/queue_stack_map.c     |  46 ++--
 .../bpf/prog_tests/raw_tp_writable_test_run.c      |  16 +-
 .../selftests/bpf/prog_tests/signal_pending.c      |  23 +-
 tools/testing/selftests/bpf/prog_tests/spinlock.c  |  14 +-
 tools/testing/selftests/bpf/prog_tests/tailcalls.c | 238 +++++++++++----------
 .../selftests/bpf/prog_tests/test_skb_pkt_end.c    |  15 +-
 tools/testing/selftests/bpf/prog_tests/timer.c     |   7 +-
 tools/testing/selftests/bpf/prog_tests/timer_mim.c |   7 +-
 tools/testing/selftests/bpf/prog_tests/trace_ext.c |  28 +--
 tools/testing/selftests/bpf/prog_tests/xdp.c       |  34 +--
 .../selftests/bpf/prog_tests/xdp_adjust_frags.c    |  32 +--
 .../selftests/bpf/prog_tests/xdp_adjust_tail.c     | 110 ++++++----
 .../testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c |  14 +-
 .../selftests/bpf/prog_tests/xdp_noinline.c        |  44 ++--
 tools/testing/selftests/bpf/prog_tests/xdp_perf.c  |  19 +-
 tools/testing/selftests/bpf/test_lru_map.c         |  11 +-
 tools/testing/selftests/bpf/test_verifier.c        |  16 +-
 38 files changed, 649 insertions(+), 523 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 86b7d5d84eec..ab62aba10e2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -7,18 +7,18 @@
 static void test_add(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__add__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(add)"))
 		return;
 
 	prog_fd = skel->progs.add.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run add",
-		  "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->add64_value, 3, "add64_value");
@@ -39,19 +39,18 @@ cleanup:
 static void test_sub(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__sub__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(sub)"))
 		return;
 
 	prog_fd = skel->progs.sub.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run sub",
-		  "err %d errno %d retval %d duration %d\n",
-		  err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->sub64_value, -1, "sub64_value");
@@ -72,18 +71,18 @@ cleanup:
 static void test_and(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__and__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(and)"))
 		return;
 
 	prog_fd = skel->progs.and.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run and",
-		  "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->and64_value, 0x010ull << 32, "and64_value");
@@ -100,19 +99,18 @@ cleanup:
 static void test_or(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__or__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(or)"))
 		return;
 
 	prog_fd = skel->progs.or.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run or",
-		  "err %d errno %d retval %d duration %d\n",
-		  err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->or64_value, 0x111ull << 32, "or64_value");
@@ -129,18 +127,18 @@ cleanup:
 static void test_xor(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__xor__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(xor)"))
 		return;
 
 	prog_fd = skel->progs.xor.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run xor",
-		  "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->xor64_value, 0x101ull << 32, "xor64_value");
@@ -157,18 +155,18 @@ cleanup:
 static void test_cmpxchg(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__cmpxchg__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)"))
 		return;
 
 	prog_fd = skel->progs.cmpxchg.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run cmpxchg",
-		  "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->cmpxchg64_value, 2, "cmpxchg64_value");
@@ -186,18 +184,18 @@ cleanup:
 static void test_xchg(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	int link_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	link_fd = atomics_lskel__xchg__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(xchg)"))
 		return;
 
 	prog_fd = skel->progs.xchg.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "test_run xchg",
-		  "err %d errno %d retval %d duration %d\n", err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
 		goto cleanup;
 
 	ASSERT_EQ(skel->data->xchg64_value, 2, "xchg64_value");
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index e3166a81e989..dd30b1e3a67c 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -11,7 +11,12 @@ enum {
 void test_bpf_nf_ct(int mode)
 {
 	struct test_bpf_nf *skel;
-	int prog_fd, err, retval;
+	int prog_fd, err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	skel = test_bpf_nf__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
@@ -22,8 +27,7 @@ void test_bpf_nf_ct(int mode)
 	else
 		prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test);
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
-				(__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	if (!ASSERT_OK(err, "bpf_prog_test_run"))
 		goto end;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 4374ac8a8a91..130f5b82d2e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -9,38 +9,34 @@ void test_fentry_fexit(void)
 	struct fentry_test_lskel *fentry_skel = NULL;
 	struct fexit_test_lskel *fexit_skel = NULL;
 	__u64 *fentry_res, *fexit_res;
-	__u32 duration = 0, retval;
 	int err, prog_fd, i;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	fentry_skel = fentry_test_lskel__open_and_load();
-	if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
+	if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
 		goto close_prog;
 	fexit_skel = fexit_test_lskel__open_and_load();
-	if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+	if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
 		goto close_prog;
 
 	err = fentry_test_lskel__attach(fentry_skel);
-	if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "fentry_attach"))
 		goto close_prog;
 	err = fexit_test_lskel__attach(fexit_skel);
-	if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "fexit_attach"))
 		goto close_prog;
 
 	prog_fd = fexit_skel->progs.test1.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "ipv6",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "ipv6 test_run");
+	ASSERT_OK(topts.retval, "ipv6 test retval");
 
 	fentry_res = (__u64 *)fentry_skel->bss;
 	fexit_res = (__u64 *)fexit_skel->bss;
 	printf("%lld\n", fentry_skel->bss->test1_result);
 	for (i = 0; i < 8; i++) {
-		CHECK(fentry_res[i] != 1, "result",
-		      "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]);
-		CHECK(fexit_res[i] != 1, "result",
-		      "fexit_test%d failed err %lld\n", i + 1, fexit_res[i]);
+		ASSERT_EQ(fentry_res[i], 1, "fentry result");
+		ASSERT_EQ(fexit_res[i], 1, "fexit result");
 	}
 
 close_prog:
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 12921b3850d2..c0d1d61d5f66 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -6,9 +6,9 @@
 static int fentry_test(struct fentry_test_lskel *fentry_skel)
 {
 	int err, prog_fd, i;
-	__u32 duration = 0, retval;
 	int link_fd;
 	__u64 *result;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	err = fentry_test_lskel__attach(fentry_skel);
 	if (!ASSERT_OK(err, "fentry_attach"))
@@ -20,10 +20,9 @@ static int fentry_test(struct fentry_test_lskel *fentry_skel)
 		return -1;
 
 	prog_fd = fentry_skel->progs.test1.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 0, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
 
 	result = (__u64 *)fentry_skel->bss;
 	for (i = 0; i < sizeof(*fentry_skel->bss) / sizeof(__u64); i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index e83575e5480f..d9aad15e0d24 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -58,12 +58,17 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 				      test_cb cb)
 {
 	struct bpf_object *obj = NULL, *tgt_obj;
-	__u32 retval, tgt_prog_id, info_len;
+	__u32 tgt_prog_id, info_len;
 	struct bpf_prog_info prog_info = {};
 	struct bpf_program **prog = NULL, *p;
 	struct bpf_link **link = NULL;
 	int err, tgt_fd, i;
 	struct btf *btf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v6,
+		.data_size_in = sizeof(pkt_v6),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
 			    &tgt_obj, &tgt_fd);
@@ -147,10 +152,9 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 	if (!run_prog)
 		goto close_prog;
 
-	err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
-				NULL, NULL, &retval, NULL);
+	err = bpf_prog_test_run_opts(tgt_fd, &topts);
 	ASSERT_OK(err, "prog_run");
-	ASSERT_EQ(retval, 0, "prog_run_ret");
+	ASSERT_EQ(topts.retval, 0, "prog_run_ret");
 
 	if (check_data_map(obj, prog_cnt, false))
 		goto close_prog;
@@ -225,29 +229,31 @@ static int test_second_attach(struct bpf_object *obj)
 	const char *tgt_obj_file = "./test_pkt_access.o";
 	struct bpf_program *prog = NULL;
 	struct bpf_object *tgt_obj;
-	__u32 duration = 0, retval;
 	struct bpf_link *link;
 	int err = 0, tgt_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v6,
+		.data_size_in = sizeof(pkt_v6),
+		.repeat = 1,
+	);
 
 	prog = bpf_object__find_program_by_name(obj, prog_name);
-	if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name))
+	if (!ASSERT_OK_PTR(prog, "find_prog"))
 		return -ENOENT;
 
 	err = bpf_prog_test_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC,
 			    &tgt_obj, &tgt_fd);
-	if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n",
-		  tgt_obj_file, err, errno))
+	if (!ASSERT_OK(err, "second_prog_load"))
 		return err;
 
 	link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
 	if (!ASSERT_OK_PTR(link, "second_link"))
 		goto out;
 
-	err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval, "ipv6",
-		  "err %d errno %d retval %d duration %d\n",
-		  err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(tgt_fd, &topts);
+	if (!ASSERT_OK(err, "ipv6 test_run"))
+		goto out;
+	if (!ASSERT_OK(topts.retval, "ipv6 retval"))
 		goto out;
 
 	err = check_data_map(obj, 1, true);
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index e4cede6b4b2d..3ee2107bbf7a 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -10,9 +10,7 @@ void test_fexit_stress(void)
 	char test_skb[128] = {};
 	int fexit_fd[CNT] = {};
 	int link_fd[CNT] = {};
-	__u32 duration = 0;
 	char error[4096];
-	__u32 prog_ret;
 	int err, i, filter_fd;
 
 	const struct bpf_insn trace_program[] = {
@@ -36,9 +34,15 @@ void test_fexit_stress(void)
 		.log_size = sizeof(error),
 	);
 
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = test_skb,
+		.data_size_in = sizeof(test_skb),
+		.repeat = 1,
+	);
+
 	err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1",
 					 trace_opts.expected_attach_type);
-	if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err))
+	if (!ASSERT_GT(err, 0, "find_vmlinux_btf_id"))
 		goto out;
 	trace_opts.attach_btf_id = err;
 
@@ -47,24 +51,20 @@ void test_fexit_stress(void)
 					    trace_program,
 					    sizeof(trace_program) / sizeof(struct bpf_insn),
 					    &trace_opts);
-		if (CHECK(fexit_fd[i] < 0, "fexit loaded",
-			  "failed: %d errno %d\n", fexit_fd[i], errno))
+		if (!ASSERT_GE(fexit_fd[i], 0, "fexit load"))
 			goto out;
 		link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]);
-		if (CHECK(link_fd[i] < 0, "fexit attach failed",
-			  "prog %d failed: %d err %d\n", i, link_fd[i], errno))
+		if (!ASSERT_GE(link_fd[i], 0, "fexit attach"))
 			goto out;
 	}
 
 	filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
 				  skb_program, sizeof(skb_program) / sizeof(struct bpf_insn),
 				  &skb_opts);
-	if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
-		  filter_fd, errno))
+	if (!ASSERT_GE(filter_fd, 0, "test_program_loaded"))
 		goto out;
 
-	err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
-				0, &prog_ret, 0);
+	err = bpf_prog_test_run_opts(filter_fd, &topts);
 	close(filter_fd);
 	CHECK_FAIL(err);
 out:
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index d4887d8bb396..101b7343036b 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -6,9 +6,9 @@
 static int fexit_test(struct fexit_test_lskel *fexit_skel)
 {
 	int err, prog_fd, i;
-	__u32 duration = 0, retval;
 	int link_fd;
 	__u64 *result;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	err = fexit_test_lskel__attach(fexit_skel);
 	if (!ASSERT_OK(err, "fexit_attach"))
@@ -20,10 +20,9 @@ static int fexit_test(struct fexit_test_lskel *fexit_skel)
 		return -1;
 
 	prog_fd = fexit_skel->progs.test1.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 0, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
 
 	result = (__u64 *)fexit_skel->bss;
 	for (i = 0; i < sizeof(*fexit_skel->bss) / sizeof(__u64); i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
index 93ac3f28226c..36afb409c25f 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -5,7 +5,6 @@
 void serial_test_flow_dissector_load_bytes(void)
 {
 	struct bpf_flow_keys flow_keys;
-	__u32 duration = 0, retval, size;
 	struct bpf_insn prog[] = {
 		// BPF_REG_1 - 1st argument: context
 		// BPF_REG_2 - 2nd argument: offset, start at first byte
@@ -27,22 +26,25 @@ void serial_test_flow_dissector_load_bytes(void)
 		BPF_EXIT_INSN(),
 	};
 	int fd, err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = &flow_keys,
+		.data_size_out = sizeof(flow_keys),
+		.repeat = 1,
+	);
 
 	/* make sure bpf_skb_load_bytes is not allowed from skb-less context
 	 */
 	fd = bpf_test_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
 			      ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
-	CHECK(fd < 0,
-	      "flow_dissector-bpf_skb_load_bytes-load",
-	      "fd %d errno %d\n",
-	      fd, errno);
+	ASSERT_GE(fd, 0, "bpf_test_load_program good fd");
 
-	err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4),
-				&flow_keys, &size, &retval, &duration);
-	CHECK(size != sizeof(flow_keys) || err || retval != 1,
-	      "flow_dissector-bpf_skb_load_bytes",
-	      "err %d errno %d retval %d duration %d size %u/%zu\n",
-	      err, errno, retval, duration, size, sizeof(flow_keys));
+	err = bpf_prog_test_run_opts(fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
+		  "test_run data_size_out");
+	ASSERT_EQ(topts.retval, 1, "test_run retval");
 
 	if (fd >= -1)
 		close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 68eb12a287d4..044df13ee069 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -12,8 +12,13 @@ static void test_hash_map(void)
 	int i, err, hashmap_fd, max_entries, percpu_map_fd;
 	struct for_each_hash_map_elem *skel;
 	__u64 *percpu_valbuf = NULL;
-	__u32 key, num_cpus, retval;
+	__u32 key, num_cpus;
 	__u64 val;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	skel = for_each_hash_map_elem__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
@@ -42,11 +47,10 @@ static void test_hash_map(void)
 	if (!ASSERT_OK(err, "percpu_map_update"))
 		goto out;
 
-	err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
-				1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
-				&retval, &duration);
-	if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
-		  err, errno, retval))
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+	duration = topts.duration;
+	if (CHECK(err || topts.retval, "ipv4", "err %d errno %d retval %d\n",
+		  err, errno, topts.retval))
 		goto out;
 
 	ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
@@ -69,11 +73,16 @@ out:
 
 static void test_array_map(void)
 {
-	__u32 key, num_cpus, max_entries, retval;
+	__u32 key, num_cpus, max_entries;
 	int i, arraymap_fd, percpu_map_fd, err;
 	struct for_each_array_map_elem *skel;
 	__u64 *percpu_valbuf = NULL;
 	__u64 val, expected_total;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	skel = for_each_array_map_elem__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
@@ -106,11 +115,10 @@ static void test_array_map(void)
 	if (!ASSERT_OK(err, "percpu_map_update"))
 		goto out;
 
-	err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
-				1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
-				&retval, &duration);
-	if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
-		  err, errno, retval))
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+	duration = topts.duration;
+	if (CHECK(err || topts.retval, "ipv4", "err %d errno %d retval %d\n",
+		  err, errno, topts.retval))
 		goto out;
 
 	ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
index 85c427119fe9..28cf63963cb7 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
@@ -5,8 +5,8 @@
 void test_get_func_args_test(void)
 {
 	struct get_func_args_test *skel = NULL;
-	__u32 duration = 0, retval;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	skel = get_func_args_test__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "get_func_args_test__open_and_load"))
@@ -20,19 +20,17 @@ void test_get_func_args_test(void)
 	 * fentry/fexit programs.
 	 */
 	prog_fd = bpf_program__fd(skel->progs.test1);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 0, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
 
 	/* This runs bpf_modify_return_test function and triggers
 	 * fmod_ret_test and fexit_test programs.
 	 */
 	prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 1234, "test_run");
+	ASSERT_EQ(topts.retval, 1234, "test_run");
 
 	ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
 	ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
index 02a465f36d59..938dbd4d7c2f 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -5,8 +5,8 @@
 void test_get_func_ip_test(void)
 {
 	struct get_func_ip_test *skel = NULL;
-	__u32 duration = 0, retval;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	skel = get_func_ip_test__open();
 	if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
@@ -29,14 +29,12 @@ void test_get_func_ip_test(void)
 		goto cleanup;
 
 	prog_fd = bpf_program__fd(skel->progs.test1);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 0, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
 
 	prog_fd = bpf_program__fd(skel->progs.test5);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	ASSERT_OK(err, "test_run");
 
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index 917165e04427..6fb3d3155c35 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -132,24 +132,26 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
 void test_global_data(void)
 {
 	const char *file = "./test_global_data.o";
-	__u32 duration = 0, retval;
 	struct bpf_object *obj;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
-	if (CHECK(err, "load program", "error %d loading %s\n", err, file))
+	if (!ASSERT_OK(err, "load program"))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "pass global data run",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "pass global data run err");
+	ASSERT_OK(topts.retval, "pass global data run retval");
 
-	test_global_data_number(obj, duration);
-	test_global_data_string(obj, duration);
-	test_global_data_struct(obj, duration);
-	test_global_data_rdonly(obj, duration);
+	test_global_data_number(obj, topts.duration);
+	test_global_data_string(obj, topts.duration);
+	test_global_data_struct(obj, topts.duration);
+	test_global_data_rdonly(obj, topts.duration);
 
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_args.c b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
index 93a2439237b0..29039a36cce5 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_func_args.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
@@ -40,19 +40,21 @@ static void test_global_func_args0(struct bpf_object *obj)
 void test_global_func_args(void)
 {
 	const char *file = "./test_global_func_args.o";
-	__u32 retval;
 	struct bpf_object *obj;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
 	if (CHECK(err, "load program", "error %d loading %s\n", err, file))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "pass global func args run",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_OK(topts.retval, "test_run retval");
 
 	test_global_func_args0(obj);
 
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index b39a4f09aefd..c00eb974eb85 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -9,29 +9,31 @@
 static void test_main(void)
 {
 	struct kfunc_call_test_lskel *skel;
-	int prog_fd, retval, err;
+	int prog_fd, err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	skel = kfunc_call_test_lskel__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel"))
 		return;
 
 	prog_fd = skel->progs.kfunc_call_test1.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "bpf_prog_test_run(test1)");
-	ASSERT_EQ(retval, 12, "test1-retval");
+	ASSERT_EQ(topts.retval, 12, "test1-retval");
 
 	prog_fd = skel->progs.kfunc_call_test2.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "bpf_prog_test_run(test2)");
-	ASSERT_EQ(retval, 3, "test2-retval");
+	ASSERT_EQ(topts.retval, 3, "test2-retval");
 
 	prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)");
-	ASSERT_EQ(retval, 0, "test_ref_btf_id-retval");
+	ASSERT_EQ(topts.retval, 0, "test_ref_btf_id-retval");
 
 	kfunc_call_test_lskel__destroy(skel);
 }
@@ -39,17 +41,21 @@ static void test_main(void)
 static void test_subprog(void)
 {
 	struct kfunc_call_test_subprog *skel;
-	int prog_fd, retval, err;
+	int prog_fd, err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	skel = kfunc_call_test_subprog__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel"))
 		return;
 
 	prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "bpf_prog_test_run(test1)");
-	ASSERT_EQ(retval, 10, "test1-retval");
+	ASSERT_EQ(topts.retval, 10, "test1-retval");
 	ASSERT_NEQ(skel->data->active_res, -1, "active_res");
 	ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
 
@@ -59,17 +65,21 @@ static void test_subprog(void)
 static void test_subprog_lskel(void)
 {
 	struct kfunc_call_test_subprog_lskel *skel;
-	int prog_fd, retval, err;
+	int prog_fd, err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	skel = kfunc_call_test_subprog_lskel__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel"))
 		return;
 
 	prog_fd = skel->progs.kfunc_call_test1.prog_fd;
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "bpf_prog_test_run(test1)");
-	ASSERT_EQ(retval, 10, "test1-retval");
+	ASSERT_EQ(topts.retval, 10, "test1-retval");
 	ASSERT_NEQ(skel->data->active_res, -1, "active_res");
 	ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
 
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
index d490ad80eccb..ecc58c9e7631 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
@@ -9,8 +9,12 @@
 void test_ksyms_module_lskel(void)
 {
 	struct test_ksyms_module_lskel *skel;
-	int retval;
 	int err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	if (!env.has_testmod) {
 		test__skip();
@@ -20,11 +24,10 @@ void test_ksyms_module_lskel(void)
 	skel = test_ksyms_module_lskel__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "test_ksyms_module_lskel__open_and_load"))
 		return;
-	err = bpf_prog_test_run(skel->progs.load.prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(skel->progs.load.prog_fd, &topts);
 	if (!ASSERT_OK(err, "bpf_prog_test_run"))
 		goto cleanup;
-	ASSERT_EQ(retval, 0, "retval");
+	ASSERT_EQ(topts.retval, 0, "retval");
 	ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
 cleanup:
 	test_ksyms_module_lskel__destroy(skel);
@@ -33,7 +36,12 @@ cleanup:
 void test_ksyms_module_libbpf(void)
 {
 	struct test_ksyms_module *skel;
-	int retval, err;
+	int err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	if (!env.has_testmod) {
 		test__skip();
@@ -43,11 +51,10 @@ void test_ksyms_module_libbpf(void)
 	skel = test_ksyms_module__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open"))
 		return;
-	err = bpf_prog_test_run(bpf_program__fd(skel->progs.load), 1, &pkt_v4,
-				sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL);
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.load), &topts);
 	if (!ASSERT_OK(err, "bpf_prog_test_run"))
 		goto cleanup;
-	ASSERT_EQ(retval, 0, "retval");
+	ASSERT_EQ(topts.retval, 0, "retval");
 	ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
 cleanup:
 	test_ksyms_module__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index 540ef28fabff..55f733ff4109 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -23,12 +23,16 @@ static void test_l4lb(const char *file)
 		__u8 flags;
 	} real_def = {.dst = MAGIC_VAL};
 	__u32 ch_key = 11, real_num = 3;
-	__u32 duration, retval, size;
 	int err, i, prog_fd, map_fd;
 	__u64 bytes = 0, pkts = 0;
 	struct bpf_object *obj;
 	char buf[128];
 	u32 *magic = (u32 *)buf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = NUM_ITER,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
 	if (CHECK_FAIL(err))
@@ -49,19 +53,24 @@ static void test_l4lb(const char *file)
 		goto out;
 	bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
 
-	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
-	CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
-	      *magic != MAGIC_VAL, "ipv4",
-	      "err %d errno %d retval %d size %d magic %x\n",
-	      err, errno, retval, size, *magic);
+	topts.data_in = &pkt_v4;
+	topts.data_size_in = sizeof(pkt_v4);
 
-	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
-				buf, &size, &retval, &duration);
-	CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
-	      *magic != MAGIC_VAL, "ipv6",
-	      "err %d errno %d retval %d size %d magic %x\n",
-	      err, errno, retval, size, *magic);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, 7 /*TC_ACT_REDIRECT*/, "ipv4 test_run retval");
+	ASSERT_EQ(topts.data_size_out, 54, "ipv4 test_run data_size_out");
+	ASSERT_EQ(*magic, MAGIC_VAL, "ipv4 magic");
+
+	topts.data_in = &pkt_v6;
+	topts.data_size_in = sizeof(pkt_v6);
+	topts.data_size_out = sizeof(buf); /* reset out size */
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, 7 /*TC_ACT_REDIRECT*/, "ipv6 test_run retval");
+	ASSERT_EQ(topts.data_size_out, 74, "ipv6 test_run data_size_out");
+	ASSERT_EQ(*magic, MAGIC_VAL, "ipv6 magic");
 
 	map_fd = bpf_find_map(__func__, obj, "stats");
 	if (map_fd < 0)
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index 23d19e9cf26a..e4e99b37df64 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -4,14 +4,17 @@
 
 static void *spin_lock_thread(void *arg)
 {
-	__u32 duration, retval;
 	int err, prog_fd = *(u32 *) arg;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 10000,
+	);
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run_opts err");
+	ASSERT_OK(topts.retval, "test_run_opts retval");
 
-	err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
 	pthread_exit(arg);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
index 273725504f11..43e502acf050 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -9,10 +9,16 @@
 void test_map_ptr(void)
 {
 	struct map_ptr_kern_lskel *skel;
-	__u32 duration = 0, retval;
 	char buf[128];
 	int err;
 	int page_size = getpagesize();
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = 1,
+	);
 
 	skel = map_ptr_kern_lskel__open();
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -26,14 +32,12 @@ void test_map_ptr(void)
 
 	skel->bss->page_size = page_size;
 
-	err = bpf_prog_test_run(skel->progs.cg_skb.prog_fd, 1, &pkt_v4,
-				sizeof(pkt_v4), buf, NULL, &retval, NULL);
+	err = bpf_prog_test_run_opts(skel->progs.cg_skb.prog_fd, &topts);
 
-	if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno))
+	if (!ASSERT_OK(err, "test_run"))
 		goto cleanup;
 
-	if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval,
-		  skel->bss->g_map_type, skel->bss->g_line))
+	if (!ASSERT_NEQ(topts.retval, 0, "test_run retval"))
 		goto cleanup;
 
 cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c
index b772fe30ce9b..5d9955af6247 100644
--- a/tools/testing/selftests/bpf/prog_tests/modify_return.c
+++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c
@@ -15,39 +15,31 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret)
 {
 	struct modify_return *skel = NULL;
 	int err, prog_fd;
-	__u32 duration = 0, retval;
 	__u16 side_effect;
 	__s16 ret;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	skel = modify_return__open_and_load();
-	if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n"))
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
 		goto cleanup;
 
 	err = modify_return__attach(skel);
-	if (CHECK(err, "modify_return", "attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "modify_return__attach failed"))
 		goto cleanup;
 
 	skel->bss->input_retval = input_retval;
 	prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0,
-				&retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
 
-	CHECK(err, "test_run", "err %d errno %d\n", err, errno);
+	side_effect = UPPER(topts.retval);
+	ret = LOWER(topts.retval);
 
-	side_effect = UPPER(retval);
-	ret  = LOWER(retval);
-
-	CHECK(ret != want_ret, "test_run",
-	      "unexpected ret: %d, expected: %d\n", ret, want_ret);
-	CHECK(side_effect != want_side_effect, "modify_return",
-	      "unexpected side_effect: %d\n", side_effect);
-
-	CHECK(skel->bss->fentry_result != 1, "modify_return",
-	      "fentry failed\n");
-	CHECK(skel->bss->fexit_result != 1, "modify_return",
-	      "fexit failed\n");
-	CHECK(skel->bss->fmod_ret_result != 1, "modify_return",
-	      "fmod_ret failed\n");
+	ASSERT_EQ(ret, want_ret, "test_run ret");
+	ASSERT_EQ(side_effect, want_side_effect, "modify_return side_effect");
+	ASSERT_EQ(skel->bss->fentry_result, 1, "modify_return fentry_result");
+	ASSERT_EQ(skel->bss->fexit_result, 1, "modify_return fexit_result");
+	ASSERT_EQ(skel->bss->fmod_ret_result, 1, "modify_return fmod_ret_result");
 
 cleanup:
 	modify_return__destroy(skel);
@@ -63,4 +55,3 @@ void serial_test_modify_return(void)
 		 0 /* want_side_effect */,
 		 -EINVAL /* want_ret */);
 }
-
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index 6628710ec3c6..0bcccdc34fbc 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -6,23 +6,27 @@ void test_pkt_access(void)
 {
 	const char *file = "./test_pkt_access.o";
 	struct bpf_object *obj;
-	__u32 duration, retval;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 100000,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "ipv4",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "ipv4 test_run_opts err");
+	ASSERT_OK(topts.retval, "ipv4 test_run_opts retval");
+
+	topts.data_in = &pkt_v6;
+	topts.data_size_in = sizeof(pkt_v6);
+	topts.data_size_out = 0; /* reset from last call */
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "ipv6 test_run_opts err");
+	ASSERT_OK(topts.retval, "ipv6 test_run_opts retval");
 
-	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "ipv6",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index c9d2d6a1bfcc..00ee1dd792aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -6,18 +6,20 @@ void test_pkt_md_access(void)
 {
 	const char *file = "./test_pkt_md_access.o";
 	struct bpf_object *obj;
-	__u32 duration, retval;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 10,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run_opts err");
+	ASSERT_OK(topts.retval, "test_run_opts retval");
 
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index b9822f914eeb..d2743fc10032 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -10,11 +10,18 @@ enum {
 static void test_queue_stack_map_by_type(int type)
 {
 	const int MAP_SIZE = 32;
-	__u32 vals[MAP_SIZE], duration, retval, size, val;
+	__u32 vals[MAP_SIZE], val;
 	int i, err, prog_fd, map_in_fd, map_out_fd;
 	char file[32], buf[128];
 	struct bpf_object *obj;
 	struct iphdr iph;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = 1,
+	);
 
 	/* Fill test values to be used */
 	for (i = 0; i < MAP_SIZE; i++)
@@ -58,38 +65,37 @@ static void test_queue_stack_map_by_type(int type)
 			pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5;
 		}
 
-		err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-					buf, &size, &retval, &duration);
-		if (err || retval || size != sizeof(pkt_v4))
+		topts.data_size_out = sizeof(buf);
+		err = bpf_prog_test_run_opts(prog_fd, &topts);
+		if (err || topts.retval ||
+		    topts.data_size_out != sizeof(pkt_v4))
 			break;
 		memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
 		if (iph.daddr != val)
 			break;
 	}
 
-	CHECK(err || retval || size != sizeof(pkt_v4) || iph.daddr != val,
-	      "bpf_map_pop_elem",
-	      "err %d errno %d retval %d size %d iph->daddr %u\n",
-	      err, errno, retval, size, iph.daddr);
+	ASSERT_OK(err, "bpf_map_pop_elem");
+	ASSERT_OK(topts.retval, "bpf_map_pop_elem test retval");
+	ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4),
+		  "bpf_map_pop_elem data_size_out");
+	ASSERT_EQ(iph.daddr, val, "bpf_map_pop_elem iph.daddr");
 
 	/* Queue is empty, program should return TC_ACT_SHOT */
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
-	CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4),
-	      "check-queue-stack-map-empty",
-	      "err %d errno %d retval %d size %d\n",
-	      err, errno, retval, size);
+	topts.data_size_out = sizeof(buf);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "check-queue-stack-map-empty");
+	ASSERT_EQ(topts.retval, 2  /* TC_ACT_SHOT */,
+		  "check-queue-stack-map-empty test retval");
+	ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4),
+		  "check-queue-stack-map-empty data_size_out");
 
 	/* Check that the program pushed elements correctly */
 	for (i = 0; i < MAP_SIZE; i++) {
 		err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val);
-		if (err || val != vals[i] * 5)
-			break;
+		ASSERT_OK(err, "bpf_map_lookup_and_delete_elem");
+		ASSERT_EQ(val, vals[i] * 5, "bpf_map_push_elem val");
 	}
-
-	CHECK(i != MAP_SIZE && (err || val != vals[i] * 5),
-	      "bpf_map_push_elem", "err %d value %u\n", err, val);
-
 out:
 	pkt_v4.iph.saddr = 0;
 	bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
index 239baccabccb..f4aa7dab4766 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
@@ -56,21 +56,23 @@ void serial_test_raw_tp_writable_test_run(void)
 		0,
 	};
 
-	__u32 prog_ret;
-	int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
-				    0, &prog_ret, 0);
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = test_skb,
+		.data_size_in = sizeof(test_skb),
+		.repeat = 1,
+	);
+	int err = bpf_prog_test_run_opts(filter_fd, &topts);
 	CHECK(err != 42, "test_run",
 	      "tracepoint did not modify return value\n");
-	CHECK(prog_ret != 0, "test_run_ret",
+	CHECK(topts.retval != 0, "test_run_ret",
 	      "socket_filter did not return 0\n");
 
 	close(tp_fd);
 
-	err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0,
-				&prog_ret, 0);
+	err = bpf_prog_test_run_opts(filter_fd, &topts);
 	CHECK(err != 0, "test_run_notrace",
 	      "test_run failed with %d errno %d\n", err, errno);
-	CHECK(prog_ret != 0, "test_run_ret_notrace",
+	CHECK(topts.retval != 0, "test_run_ret_notrace",
 	      "socket_filter did not return 0\n");
 
 out_filterfd:
diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
index aecfe662c070..70b49da5ca0a 100644
--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c
+++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
@@ -13,10 +13,14 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type)
 	struct itimerval timeo = {
 		.it_value.tv_usec = 100000, /* 100ms */
 	};
-	__u32 duration = 0, retval;
 	int prog_fd;
 	int err;
 	int i;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 0xffffffff,
+	);
 
 	for (i = 0; i < ARRAY_SIZE(prog); i++)
 		prog[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
@@ -24,20 +28,17 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type)
 
 	prog_fd = bpf_test_load_program(prog_type, prog, ARRAY_SIZE(prog),
 				   "GPL", 0, NULL, 0);
-	CHECK(prog_fd < 0, "test-run", "errno %d\n", errno);
+	ASSERT_GE(prog_fd, 0, "test-run load");
 
 	err = sigaction(SIGALRM, &sigalrm_action, NULL);
-	CHECK(err, "test-run-signal-sigaction", "errno %d\n", errno);
+	ASSERT_OK(err, "test-run-signal-sigaction");
 
 	err = setitimer(ITIMER_REAL, &timeo, NULL);
-	CHECK(err, "test-run-signal-timer", "errno %d\n", errno);
-
-	err = bpf_prog_test_run(prog_fd, 0xffffffff, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(duration > 500000000, /* 500ms */
-	      "test-run-signal-duration",
-	      "duration %dns > 500ms\n",
-	      duration);
+	ASSERT_OK(err, "test-run-signal-timer");
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_LE(topts.duration, 500000000 /* 500ms */,
+		  "test-run-signal-duration");
 
 	signal(SIGALRM, SIG_DFL);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 6307f5d2b417..8e329eaee6d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -4,14 +4,16 @@
 
 static void *spin_lock_thread(void *arg)
 {
-	__u32 duration, retval;
 	int err, prog_fd = *(u32 *) arg;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 10000,
+	);
 
-	err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_OK(topts.retval, "test_run retval");
 	pthread_exit(arg);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 796f231582f8..c4da87ec3ba4 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -12,9 +12,13 @@ static void test_tailcall_1(void)
 	struct bpf_map *prog_array;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char prog_name[32];
 	char buff[128] = {};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
 			    &prog_fd);
@@ -54,20 +58,18 @@ static void test_tailcall_1(void)
 	}
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != i, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, i, "tailcall retval");
 
 		err = bpf_map_delete_elem(map_fd, &i);
 		if (CHECK_FAIL(err))
 			goto out;
 	}
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 3, "tailcall retval");
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
@@ -85,10 +87,9 @@ static void test_tailcall_1(void)
 			goto out;
 	}
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_OK(topts.retval, "tailcall retval");
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		j = bpf_map__max_entries(prog_array) - 1 - i;
@@ -110,30 +111,27 @@ static void test_tailcall_1(void)
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		j = bpf_map__max_entries(prog_array) - 1 - i;
 
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != j, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, j, "tailcall retval");
 
 		err = bpf_map_delete_elem(map_fd, &i);
 		if (CHECK_FAIL(err))
 			goto out;
 	}
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 3, "tailcall retval");
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
 		err = bpf_map_delete_elem(map_fd, &i);
 		if (CHECK_FAIL(err >= 0 || errno != ENOENT))
 			goto out;
 
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != 3, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, 3, "tailcall retval");
 	}
 
 out:
@@ -150,9 +148,13 @@ static void test_tailcall_2(void)
 	struct bpf_map *prog_array;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char prog_name[32];
 	char buff[128] = {};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
 			    &prog_fd);
@@ -191,30 +193,27 @@ static void test_tailcall_2(void)
 			goto out;
 	}
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 2, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 2, "tailcall retval");
 
 	i = 2;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 1, "tailcall retval");
 
 	i = 0;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 3, "tailcall retval");
 out:
 	bpf_object__close(obj);
 }
@@ -225,8 +224,12 @@ static void test_tailcall_count(const char *which)
 	struct bpf_map *prog_array, *data_map;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char buff[128] = {};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj,
 			    &prog_fd);
@@ -262,10 +265,9 @@ static void test_tailcall_count(const char *which)
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 1, "tailcall retval");
 
 	data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
 	if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
@@ -277,18 +279,17 @@ static void test_tailcall_count(const char *which)
 
 	i = 0;
 	err = bpf_map_lookup_elem(data_fd, &i, &val);
-	CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
-	      err, errno, val);
+	ASSERT_OK(err, "tailcall count");
+	ASSERT_EQ(val, 33, "tailcall count");
 
 	i = 0;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_OK(topts.retval, "tailcall retval");
 out:
 	bpf_object__close(obj);
 }
@@ -319,10 +320,14 @@ static void test_tailcall_4(void)
 	struct bpf_map *prog_array, *data_map;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	static const int zero = 0;
 	char buff[128] = {};
 	char prog_name[32];
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
 			    &prog_fd);
@@ -374,10 +379,9 @@ static void test_tailcall_4(void)
 		if (CHECK_FAIL(err))
 			goto out;
 
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != i, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, i, "tailcall retval");
 	}
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
@@ -389,10 +393,9 @@ static void test_tailcall_4(void)
 		if (CHECK_FAIL(err))
 			goto out;
 
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != 3, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, 3, "tailcall retval");
 	}
 out:
 	bpf_object__close(obj);
@@ -407,10 +410,14 @@ static void test_tailcall_5(void)
 	struct bpf_map *prog_array, *data_map;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	static const int zero = 0;
 	char buff[128] = {};
 	char prog_name[32];
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
 			    &prog_fd);
@@ -462,10 +469,9 @@ static void test_tailcall_5(void)
 		if (CHECK_FAIL(err))
 			goto out;
 
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != i, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, i, "tailcall retval");
 	}
 
 	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
@@ -477,10 +483,9 @@ static void test_tailcall_5(void)
 		if (CHECK_FAIL(err))
 			goto out;
 
-		err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-					&duration, &retval, NULL);
-		CHECK(err || retval != 3, "tailcall",
-		      "err %d errno %d retval %d\n", err, errno, retval);
+		err = bpf_prog_test_run_opts(main_fd, &topts);
+		ASSERT_OK(err, "tailcall");
+		ASSERT_EQ(topts.retval, 3, "tailcall retval");
 	}
 out:
 	bpf_object__close(obj);
@@ -495,8 +500,12 @@ static void test_tailcall_bpf2bpf_1(void)
 	struct bpf_map *prog_array;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char prog_name[32];
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS,
 			    &obj, &prog_fd);
@@ -536,10 +545,9 @@ static void test_tailcall_bpf2bpf_1(void)
 			goto out;
 	}
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				0, &retval, &duration);
-	CHECK(err || retval != 1, "tailcall",
-	      "err %d errno %d retval %d\n", err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 1, "tailcall retval");
 
 	/* jmp -> nop, call subprog that will do tailcall */
 	i = 1;
@@ -547,10 +555,9 @@ static void test_tailcall_bpf2bpf_1(void)
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				0, &retval, &duration);
-	CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_OK(topts.retval, "tailcall retval");
 
 	/* make sure that subprog can access ctx and entry prog that
 	 * called this subprog can properly return
@@ -560,11 +567,9 @@ static void test_tailcall_bpf2bpf_1(void)
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				0, &retval, &duration);
-	CHECK(err || retval != sizeof(pkt_v4) * 2,
-	      "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 2, "tailcall retval");
 out:
 	bpf_object__close(obj);
 }
@@ -579,8 +584,12 @@ static void test_tailcall_bpf2bpf_2(void)
 	struct bpf_map *prog_array, *data_map;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char buff[128] = {};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = buff,
+		.data_size_in = sizeof(buff),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS,
 			    &obj, &prog_fd);
@@ -616,10 +625,9 @@ static void test_tailcall_bpf2bpf_2(void)
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, 1, "tailcall retval");
 
 	data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
 	if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
@@ -631,18 +639,17 @@ static void test_tailcall_bpf2bpf_2(void)
 
 	i = 0;
 	err = bpf_map_lookup_elem(data_fd, &i, &val);
-	CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
-	      err, errno, val);
+	ASSERT_OK(err, "tailcall count");
+	ASSERT_EQ(val, 33, "tailcall count");
 
 	i = 0;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_OK(topts.retval, "tailcall retval");
 out:
 	bpf_object__close(obj);
 }
@@ -657,8 +664,12 @@ static void test_tailcall_bpf2bpf_3(void)
 	struct bpf_map *prog_array;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char prog_name[32];
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS,
 			    &obj, &prog_fd);
@@ -697,33 +708,27 @@ static void test_tailcall_bpf2bpf_3(void)
 			goto out;
 	}
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != sizeof(pkt_v4) * 3,
-	      "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 3, "tailcall retval");
 
 	i = 1;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != sizeof(pkt_v4),
-	      "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, sizeof(pkt_v4), "tailcall retval");
 
 	i = 0;
 	err = bpf_map_delete_elem(map_fd, &i);
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != sizeof(pkt_v4) * 2,
-	      "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 2, "tailcall retval");
 out:
 	bpf_object__close(obj);
 }
@@ -754,8 +759,12 @@ static void test_tailcall_bpf2bpf_4(bool noise)
 	struct bpf_map *prog_array, *data_map;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	__u32 retval, duration;
 	char prog_name[32];
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS,
 			    &obj, &prog_fd);
@@ -809,15 +818,14 @@ static void test_tailcall_bpf2bpf_4(bool noise)
 	if (CHECK_FAIL(err))
 		goto out;
 
-	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
-				&duration, &retval, NULL);
-	CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
-	      err, errno, retval);
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "tailcall");
+	ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 3, "tailcall retval");
 
 	i = 0;
 	err = bpf_map_lookup_elem(data_fd, &i, &val);
-	CHECK(err || val.count != 31, "tailcall count", "err %d errno %d count %d\n",
-	      err, errno, val.count);
+	ASSERT_OK(err, "tailcall count");
+	ASSERT_EQ(val.count, 31, "tailcall count");
 
 out:
 	bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
index cf1215531920..ae93411fd582 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
@@ -6,15 +6,18 @@
 
 static int sanity_run(struct bpf_program *prog)
 {
-	__u32 duration, retval;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	prog_fd = bpf_program__fd(prog);
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	if (CHECK(err || retval != 123, "test_run",
-		  "err %d errno %d retval %d duration %d\n",
-		  err, errno, retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run"))
+		return -1;
+	if (!ASSERT_EQ(topts.retval, 123, "test_run retval"))
 		return -1;
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index 0f4e49e622cd..7eb049214859 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -6,7 +6,7 @@
 static int timer(struct timer *timer_skel)
 {
 	int err, prog_fd;
-	__u32 duration = 0, retval;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	err = timer__attach(timer_skel);
 	if (!ASSERT_OK(err, "timer_attach"))
@@ -16,10 +16,9 @@ static int timer(struct timer *timer_skel)
 	ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
 
 	prog_fd = bpf_program__fd(timer_skel->progs.test1);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 0, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
 	timer__detach(timer_skel);
 
 	usleep(50); /* 10 usecs should be enough, but give it extra */
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
index 949a0617869d..2ee5f5ae11d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -6,19 +6,18 @@
 
 static int timer_mim(struct timer_mim *timer_skel)
 {
-	__u32 duration = 0, retval;
 	__u64 cnt1, cnt2;
 	int err, prog_fd, key1 = 1;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	err = timer_mim__attach(timer_skel);
 	if (!ASSERT_OK(err, "timer_attach"))
 		return err;
 
 	prog_fd = bpf_program__fd(timer_skel->progs.test1);
-	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
-				NULL, NULL, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "test_run");
-	ASSERT_EQ(retval, 0, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
 	timer_mim__detach(timer_skel);
 
 	/* check that timer_cb[12] are incrementing 'cnt' */
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
index 924441d4362d..aabdff7bea3e 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_ext.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
@@ -23,8 +23,12 @@ void test_trace_ext(void)
 	int err, pkt_fd, ext_fd;
 	struct bpf_program *prog;
 	char buf[100];
-	__u32 retval;
 	__u64 len;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 
 	/* open/load/attach test_pkt_md_access */
 	skel_pkt = test_pkt_md_access__open_and_load();
@@ -77,32 +81,32 @@ void test_trace_ext(void)
 
 	/* load/attach tracing */
 	err = test_trace_ext_tracing__load(skel_trace);
-	if (CHECK(err, "setup", "tracing/test_pkt_md_access_new load failed\n")) {
+	if (!ASSERT_OK(err, "tracing/test_pkt_md_access_new load")) {
 		libbpf_strerror(err, buf, sizeof(buf));
 		fprintf(stderr, "%s\n", buf);
 		goto cleanup;
 	}
 
 	err = test_trace_ext_tracing__attach(skel_trace);
-	if (CHECK(err, "setup", "tracing/test_pkt_md_access_new attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "tracing/test_pkt_md_access_new attach"))
 		goto cleanup;
 
 	/* trigger the test */
-	err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "run", "err %d errno %d retval %d\n", err, errno, retval);
+	err = bpf_prog_test_run_opts(pkt_fd, &topts);
+	ASSERT_OK(err, "test_run_opts err");
+	ASSERT_OK(topts.retval, "test_run_opts retval");
 
 	bss_ext = skel_ext->bss;
 	bss_trace = skel_trace->bss;
 
 	len = bss_ext->ext_called;
 
-	CHECK(bss_ext->ext_called == 0,
-		"check", "failed to trigger freplace/test_pkt_md_access\n");
-	CHECK(bss_trace->fentry_called != len,
-		"check", "failed to trigger fentry/test_pkt_md_access_new\n");
-	CHECK(bss_trace->fexit_called != len,
-		"check", "failed to trigger fexit/test_pkt_md_access_new\n");
+	ASSERT_NEQ(bss_ext->ext_called, 0,
+		  "failed to trigger freplace/test_pkt_md_access");
+	ASSERT_EQ(bss_trace->fentry_called, len,
+		  "failed to trigger fentry/test_pkt_md_access_new");
+	ASSERT_EQ(bss_trace->fexit_called, len,
+		   "failed to trigger fexit/test_pkt_md_access_new");
 
 cleanup:
 	test_trace_ext_tracing__destroy(skel_trace);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index ac65456b7ab8..ec21c53cb1da 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -13,8 +13,14 @@ void test_xdp(void)
 	char buf[128];
 	struct ipv6hdr iph6;
 	struct iphdr iph;
-	__u32 duration, retval, size;
 	int err, prog_fd, map_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (CHECK_FAIL(err))
@@ -26,21 +32,23 @@ void test_xdp(void)
 	bpf_map_update_elem(map_fd, &key4, &value4, 0);
 	bpf_map_update_elem(map_fd, &key6, &value6, 0);
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
-	CHECK(err || retval != XDP_TX || size != 74 ||
-	      iph.protocol != IPPROTO_IPIP, "ipv4",
-	      "err %d errno %d retval %d size %d\n",
-	      err, errno, retval, size);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, XDP_TX, "ipv4 test_run retval");
+	ASSERT_EQ(topts.data_size_out, 74, "ipv4 test_run data_size_out");
+	ASSERT_EQ(iph.protocol, IPPROTO_IPIP, "ipv4 test_run iph.protocol");
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
-				buf, &size, &retval, &duration);
+	topts.data_in = &pkt_v6;
+	topts.data_size_in = sizeof(pkt_v6);
+	topts.data_size_out = sizeof(buf);
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	memcpy(&iph6, buf + sizeof(struct ethhdr), sizeof(iph6));
-	CHECK(err || retval != XDP_TX || size != 114 ||
-	      iph6.nexthdr != IPPROTO_IPV6, "ipv6",
-	      "err %d errno %d retval %d size %d\n",
-	      err, errno, retval, size);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, XDP_TX, "ipv6 test_run retval");
+	ASSERT_EQ(topts.data_size_out, 114, "ipv6 test_run data_size_out");
+	ASSERT_EQ(iph6.nexthdr, IPPROTO_IPV6, "ipv6 test_run iph6.nexthdr");
 out:
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
index 31c188666e81..134d0ac32f59 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
@@ -5,12 +5,12 @@
 void test_xdp_update_frags(void)
 {
 	const char *file = "./test_xdp_update_frags.o";
-	__u32 duration, retval, size;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
 	int err, prog_fd;
 	__u32 *offset;
 	__u8 *buf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	obj = bpf_object__open(file);
 	if (libbpf_get_error(obj))
@@ -32,12 +32,16 @@ void test_xdp_update_frags(void)
 	buf[*offset] = 0xaa;		/* marker at offset 16 (head) */
 	buf[*offset + 15] = 0xaa;	/* marker at offset 31 (head) */
 
-	err = bpf_prog_test_run(prog_fd, 1, buf, 128,
-				buf, &size, &retval, &duration);
+	topts.data_in = buf;
+	topts.data_out = buf;
+	topts.data_size_in = 128;
+	topts.data_size_out = 128;
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	/* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */
 	ASSERT_OK(err, "xdp_update_frag");
-	ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+	ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
 	ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]");
 	ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]");
 
@@ -53,12 +57,16 @@ void test_xdp_update_frags(void)
 	buf[*offset] = 0xaa;		/* marker at offset 5000 (frag0) */
 	buf[*offset + 15] = 0xaa;	/* marker at offset 5015 (frag0) */
 
-	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
-				buf, &size, &retval, &duration);
+	topts.data_in = buf;
+	topts.data_out = buf;
+	topts.data_size_in = 9000;
+	topts.data_size_out = 9000;
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	/* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */
 	ASSERT_OK(err, "xdp_update_frag");
-	ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+	ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
 	ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]");
 	ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]");
 
@@ -68,12 +76,11 @@ void test_xdp_update_frags(void)
 	buf[*offset] = 0xaa;		/* marker at offset 3510 (head) */
 	buf[*offset + 15] = 0xaa;	/* marker at offset 3525 (frag0) */
 
-	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
-				buf, &size, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	/* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */
 	ASSERT_OK(err, "xdp_update_frag");
-	ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+	ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
 	ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]");
 	ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]");
 
@@ -83,12 +90,11 @@ void test_xdp_update_frags(void)
 	buf[*offset] = 0xaa;		/* marker at offset 7606 (frag0) */
 	buf[*offset + 15] = 0xaa;	/* marker at offset 7621 (frag1) */
 
-	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
-				buf, &size, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	/* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */
 	ASSERT_OK(err, "xdp_update_frag");
-	ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval");
+	ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
 	ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]");
 	ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]");
 
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index ccc9e63254a8..0289d09b350f 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -5,26 +5,34 @@
 static void test_xdp_adjust_tail_shrink(void)
 {
 	const char *file = "./test_xdp_adjust_tail_shrink.o";
-	__u32 duration, retval, size, expect_sz;
+	__u32 expect_sz;
 	struct bpf_object *obj;
 	int err, prog_fd;
 	char buf[128];
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "ipv4");
-	ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
+	ASSERT_EQ(topts.retval, XDP_DROP, "ipv4 retval");
 
 	expect_sz = sizeof(pkt_v6) - 20;  /* Test shrink with 20 bytes */
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
-				buf, &size, &retval, &duration);
+	topts.data_in = &pkt_v6;
+	topts.data_size_in = sizeof(pkt_v6);
+	topts.data_size_out = sizeof(buf);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "ipv6");
-	ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
-	ASSERT_EQ(size, expect_sz, "ipv6 size");
+	ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval");
+	ASSERT_EQ(topts.data_size_out, expect_sz, "ipv6 size");
 
 	bpf_object__close(obj);
 }
@@ -34,24 +42,31 @@ static void test_xdp_adjust_tail_grow(void)
 	const char *file = "./test_xdp_adjust_tail_grow.o";
 	struct bpf_object *obj;
 	char buf[4096]; /* avoid segfault: large buf to hold grow results */
-	__u32 duration, retval, size, expect_sz;
+	__u32 expect_sz;
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = 1,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "ipv4");
-	ASSERT_EQ(retval, XDP_DROP, "ipv4 retval");
+	ASSERT_EQ(topts.retval, XDP_DROP, "ipv4 retval");
 
 	expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
-	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
-				buf, &size, &retval, &duration);
+	topts.data_in = &pkt_v6;
+	topts.data_size_in = sizeof(pkt_v6);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	ASSERT_OK(err, "ipv6");
-	ASSERT_EQ(retval, XDP_TX, "ipv6 retval");
-	ASSERT_EQ(size, expect_sz, "ipv6 size");
+	ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval");
+	ASSERT_EQ(topts.data_size_out, expect_sz, "ipv6 size");
 
 	bpf_object__close(obj);
 }
@@ -121,11 +136,12 @@ static void test_xdp_adjust_tail_grow2(void)
 void test_xdp_adjust_frags_tail_shrink(void)
 {
 	const char *file = "./test_xdp_adjust_tail_shrink.o";
-	__u32 duration, retval, size, exp_size;
+	__u32 exp_size;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
 	int err, prog_fd;
 	__u8 *buf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	/* For the individual test cases, the first byte in the packet
 	 * indicates which test will be run.
@@ -148,32 +164,36 @@ void test_xdp_adjust_frags_tail_shrink(void)
 
 	/* Test case removing 10 bytes from last frag, NOT freeing it */
 	exp_size = 8990; /* 9000 - 10 */
-	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
-				buf, &size, &retval, &duration);
+	topts.data_in = buf;
+	topts.data_out = buf;
+	topts.data_size_in = 9000;
+	topts.data_size_out = 9000;
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	ASSERT_OK(err, "9Kb-10b");
-	ASSERT_EQ(retval, XDP_TX, "9Kb-10b retval");
-	ASSERT_EQ(size, exp_size, "9Kb-10b size");
+	ASSERT_EQ(topts.retval, XDP_TX, "9Kb-10b retval");
+	ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-10b size");
 
 	/* Test case removing one of two pages, assuming 4K pages */
 	buf[0] = 1;
 	exp_size = 4900; /* 9000 - 4100 */
-	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
-				buf, &size, &retval, &duration);
+
+	topts.data_size_out = 9000; /* reset from previous invocation */
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	ASSERT_OK(err, "9Kb-4Kb");
-	ASSERT_EQ(retval, XDP_TX, "9Kb-4Kb retval");
-	ASSERT_EQ(size, exp_size, "9Kb-4Kb size");
+	ASSERT_EQ(topts.retval, XDP_TX, "9Kb-4Kb retval");
+	ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-4Kb size");
 
 	/* Test case removing two pages resulting in a linear xdp_buff */
 	buf[0] = 2;
 	exp_size = 800; /* 9000 - 8200 */
-	err = bpf_prog_test_run(prog_fd, 1, buf, 9000,
-				buf, &size, &retval, &duration);
+	topts.data_size_out = 9000; /* reset from previous invocation */
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	ASSERT_OK(err, "9Kb-9Kb");
-	ASSERT_EQ(retval, XDP_TX, "9Kb-9Kb retval");
-	ASSERT_EQ(size, exp_size, "9Kb-9Kb size");
+	ASSERT_EQ(topts.retval, XDP_TX, "9Kb-9Kb retval");
+	ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-9Kb size");
 
 	free(buf);
 out:
@@ -183,11 +203,12 @@ out:
 void test_xdp_adjust_frags_tail_grow(void)
 {
 	const char *file = "./test_xdp_adjust_tail_grow.o";
-	__u32 duration, retval, size, exp_size;
+	__u32 exp_size;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
 	int err, i, prog_fd;
 	__u8 *buf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	obj = bpf_object__open(file);
 	if (libbpf_get_error(obj))
@@ -205,14 +226,17 @@ void test_xdp_adjust_frags_tail_grow(void)
 
 	/* Test case add 10 bytes to last frag */
 	memset(buf, 1, 16384);
-	size = 9000;
-	exp_size = size + 10;
-	err = bpf_prog_test_run(prog_fd, 1, buf, size,
-				buf, &size, &retval, &duration);
+	exp_size = 9000 + 10;
+
+	topts.data_in = buf;
+	topts.data_out = buf;
+	topts.data_size_in = 9000;
+	topts.data_size_out = 16384;
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	ASSERT_OK(err, "9Kb+10b");
-	ASSERT_EQ(retval, XDP_TX, "9Kb+10b retval");
-	ASSERT_EQ(size, exp_size, "9Kb+10b size");
+	ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval");
+	ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
 
 	for (i = 0; i < 9000; i++)
 		ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
@@ -225,14 +249,16 @@ void test_xdp_adjust_frags_tail_grow(void)
 
 	/* Test a too large grow */
 	memset(buf, 1, 16384);
-	size = 9001;
-	exp_size = size;
-	err = bpf_prog_test_run(prog_fd, 1, buf, size,
-				buf, &size, &retval, &duration);
+	exp_size = 9001;
+
+	topts.data_in = topts.data_out = buf;
+	topts.data_size_in = 9001;
+	topts.data_size_out = 16384;
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
 
 	ASSERT_OK(err, "9Kb+10b");
-	ASSERT_EQ(retval, XDP_DROP, "9Kb+10b retval");
-	ASSERT_EQ(size, exp_size, "9Kb+10b size");
+	ASSERT_EQ(topts.retval, XDP_DROP, "9Kb+10b retval");
+	ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
 
 	free(buf);
 out:
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 9c395ea680c6..76967d8ace9c 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -45,9 +45,9 @@ static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
 				     struct test_xdp_bpf2bpf *ftrace_skel,
 				     int pkt_size)
 {
-	__u32 duration = 0, retval, size;
 	__u8 *buf, *buf_in;
 	int err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") ||
 	    !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size"))
@@ -73,12 +73,16 @@ static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
 	}
 
 	/* Run test program */
-	err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size,
-				buf, &size, &retval, &duration);
+	topts.data_in = buf_in;
+	topts.data_size_in = pkt_size;
+	topts.data_out = buf;
+	topts.data_size_out = BUF_SZ;
+
+	err = bpf_prog_test_run_opts(pkt_fd, &topts);
 
 	ASSERT_OK(err, "ipv4");
-	ASSERT_EQ(retval, XDP_PASS, "ipv4 retval");
-	ASSERT_EQ(size, pkt_size, "ipv4 size");
+	ASSERT_EQ(topts.retval, XDP_PASS, "ipv4 retval");
+	ASSERT_EQ(topts.data_size_out, pkt_size, "ipv4 size");
 
 	/* Make sure bpf_xdp_output() was triggered and it sent the expected
 	 * data to the perf ring buffer.
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index 0281095de266..92ef0aa50866 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -25,43 +25,49 @@ void test_xdp_noinline(void)
 		__u8 flags;
 	} real_def = {.dst = MAGIC_VAL};
 	__u32 ch_key = 11, real_num = 3;
-	__u32 duration = 0, retval, size;
 	int err, i;
 	__u64 bytes = 0, pkts = 0;
 	char buf[128];
 	u32 *magic = (u32 *)buf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = sizeof(buf),
+		.repeat = NUM_ITER,
+	);
 
 	skel = test_xdp_noinline__open_and_load();
-	if (CHECK(!skel, "skel_open_and_load", "failed\n"))
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
 		return;
 
 	bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0);
 	bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0);
 	bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0);
 
-	err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4),
-				NUM_ITER, &pkt_v4, sizeof(pkt_v4),
-				buf, &size, &retval, &duration);
-	CHECK(err || retval != 1 || size != 54 ||
-	      *magic != MAGIC_VAL, "ipv4",
-	      "err %d errno %d retval %d size %d magic %x\n",
-	      err, errno, retval, size, *magic);
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.balancer_ingress_v4), &topts);
+	ASSERT_OK(err, "ipv4 test_run");
+	ASSERT_EQ(topts.retval, 1, "ipv4 test_run retval");
+	ASSERT_EQ(topts.data_size_out, 54, "ipv4 test_run data_size_out");
+	ASSERT_EQ(*magic, MAGIC_VAL, "ipv4 test_run magic");
 
-	err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6),
-				NUM_ITER, &pkt_v6, sizeof(pkt_v6),
-				buf, &size, &retval, &duration);
-	CHECK(err || retval != 1 || size != 74 ||
-	      *magic != MAGIC_VAL, "ipv6",
-	      "err %d errno %d retval %d size %d magic %x\n",
-	      err, errno, retval, size, *magic);
+	topts.data_in = &pkt_v6;
+	topts.data_size_in = sizeof(pkt_v6);
+	topts.data_out = buf;
+	topts.data_size_out = sizeof(buf);
+
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.balancer_ingress_v6), &topts);
+	ASSERT_OK(err, "ipv6 test_run");
+	ASSERT_EQ(topts.retval, 1, "ipv6 test_run retval");
+	ASSERT_EQ(topts.data_size_out, 74, "ipv6 test_run data_size_out");
+	ASSERT_EQ(*magic, MAGIC_VAL, "ipv6 test_run magic");
 
 	bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats);
 	for (i = 0; i < nr_cpus; i++) {
 		bytes += stats[i].bytes;
 		pkts += stats[i].pkts;
 	}
-	CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2,
-	      "stats", "bytes %lld pkts %lld\n",
-	      (unsigned long long)bytes, (unsigned long long)pkts);
+	ASSERT_EQ(bytes, MAGIC_BYTES * NUM_ITER * 2, "stats bytes");
+	ASSERT_EQ(pkts, NUM_ITER * 2, "stats pkts");
 	test_xdp_noinline__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
index 15a3900e4370..f543d1bd21b8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
@@ -4,22 +4,25 @@
 void test_xdp_perf(void)
 {
 	const char *file = "./xdp_dummy.o";
-	__u32 duration, retval, size;
 	struct bpf_object *obj;
 	char in[128], out[128];
 	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = in,
+		.data_size_in = sizeof(in),
+		.data_out = out,
+		.data_size_out = sizeof(out),
+		.repeat = 1000000,
+	);
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
-	err = bpf_prog_test_run(prog_fd, 1000000, &in[0], 128,
-				out, &size, &retval, &duration);
-
-	CHECK(err || retval != XDP_PASS || size != 128,
-	      "xdp-perf",
-	      "err %d errno %d retval %d size %d\n",
-	      err, errno, retval, size);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, XDP_PASS, "test_run retval");
+	ASSERT_EQ(topts.data_size_out, 128, "test_run data_size_out");
 
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index b9f1bbbc8aba..6e6235185a86 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -61,7 +61,11 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key,
 	};
 	__u8 data[64] = {};
 	int mfd, pfd, ret, zero = 0;
-	__u32 retval = 0;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = data,
+		.data_size_in = sizeof(data),
+		.repeat = 1,
+	);
 
 	mfd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(__u64), 1, NULL);
 	if (mfd < 0)
@@ -75,9 +79,8 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key,
 		return -1;
 	}
 
-	ret = bpf_prog_test_run(pfd, 1, data, sizeof(data),
-				NULL, NULL, &retval, NULL);
-	if (ret < 0 || retval != 42) {
+	ret = bpf_prog_test_run_opts(pfd, &topts);
+	if (ret < 0 || topts.retval != 42) {
 		ret = -1;
 	} else {
 		assert(!bpf_map_lookup_elem(mfd, &zero, value));
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 29bbaa58233c..163b303e8a2a 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1021,13 +1021,18 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
 {
 	__u8 tmp[TEST_DATA_LEN << 2];
 	__u32 size_tmp = sizeof(tmp);
-	uint32_t retval;
 	int err, saved_errno;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = data,
+		.data_size_in = size_data,
+		.data_out = tmp,
+		.data_size_out = size_tmp,
+		.repeat = 1,
+	);
 
 	if (unpriv)
 		set_admin(true);
-	err = bpf_prog_test_run(fd_prog, 1, data, size_data,
-				tmp, &size_tmp, &retval, NULL);
+	err = bpf_prog_test_run_opts(fd_prog, &topts);
 	saved_errno = errno;
 
 	if (unpriv)
@@ -1051,9 +1056,8 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
 		}
 	}
 
-	if (retval != expected_val &&
-	    expected_val != POINTER_VALUE) {
-		printf("FAIL retval %d != %d ", retval, expected_val);
+	if (topts.retval != expected_val && expected_val != POINTER_VALUE) {
+		printf("FAIL retval %d != %d ", topts.retval, expected_val);
 		return 1;
 	}
 
-- 
cgit 


From 3931618378451f7ae884b14e4120e07560875cab Mon Sep 17 00:00:00 2001
From: Delyan Kratunov <delyank@fb.com>
Date: Wed, 2 Feb 2022 15:54:21 -0800
Subject: selftests/bpf: Migrate from bpf_prog_test_run_xattr

bpf_prog_test_run_xattr is being deprecated in favor of the OPTS-based
bpf_prog_test_run_opts.
We end up unable to use CHECK_ATTR so replace usages with ASSERT_* calls.
Also, prog_run_xattr is now prog_run_opts.

Signed-off-by: Delyan Kratunov <delyank@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220202235423.1097270-3-delyank@fb.com
---
 tools/testing/selftests/bpf/prog_tests/check_mtu.c | 40 ++++-------
 .../selftests/bpf/prog_tests/cls_redirect.c        | 10 +--
 .../selftests/bpf/prog_tests/dummy_st_ops.c        | 27 ++++---
 .../selftests/bpf/prog_tests/flow_dissector.c      | 31 ++++----
 tools/testing/selftests/bpf/prog_tests/kfree_skb.c | 16 ++---
 .../selftests/bpf/prog_tests/prog_run_opts.c       | 77 ++++++++++++++++++++
 .../selftests/bpf/prog_tests/prog_run_xattr.c      | 83 ----------------------
 .../selftests/bpf/prog_tests/raw_tp_test_run.c     | 64 +++++++----------
 tools/testing/selftests/bpf/prog_tests/skb_ctx.c   | 81 ++++++++-------------
 .../testing/selftests/bpf/prog_tests/skb_helpers.c | 16 ++---
 .../selftests/bpf/prog_tests/sockmap_basic.c       | 20 +++---
 tools/testing/selftests/bpf/prog_tests/syscall.c   | 10 +--
 .../selftests/bpf/prog_tests/test_profiler.c       | 14 ++--
 .../selftests/bpf/prog_tests/xdp_adjust_tail.c     | 12 ++--
 14 files changed, 218 insertions(+), 283 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
 delete mode 100644 tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index f73e6e36b74d..12f4395f18b3 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -79,28 +79,21 @@ static void test_check_mtu_run_xdp(struct test_check_mtu *skel,
 				   struct bpf_program *prog,
 				   __u32 mtu_expect)
 {
-	const char *prog_name = bpf_program__name(prog);
 	int retval_expect = XDP_PASS;
 	__u32 mtu_result = 0;
 	char buf[256] = {};
-	int err;
-	struct bpf_prog_test_run_attr tattr = {
+	int err, prog_fd = bpf_program__fd(prog);
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.repeat = 1,
 		.data_in = &pkt_v4,
 		.data_size_in = sizeof(pkt_v4),
 		.data_out = buf,
 		.data_size_out = sizeof(buf),
-		.prog_fd = bpf_program__fd(prog),
-	};
-
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err != 0, "bpf_prog_test_run",
-		   "prog_name:%s (err %d errno %d retval %d)\n",
-		   prog_name, err, errno, tattr.retval);
+	);
 
-	CHECK(tattr.retval != retval_expect, "retval",
-	      "progname:%s unexpected retval=%d expected=%d\n",
-	      prog_name, tattr.retval, retval_expect);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, retval_expect, "retval");
 
 	/* Extract MTU that BPF-prog got */
 	mtu_result = skel->bss->global_bpf_mtu_xdp;
@@ -139,28 +132,21 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel,
 				  struct bpf_program *prog,
 				  __u32 mtu_expect)
 {
-	const char *prog_name = bpf_program__name(prog);
 	int retval_expect = BPF_OK;
 	__u32 mtu_result = 0;
 	char buf[256] = {};
-	int err;
-	struct bpf_prog_test_run_attr tattr = {
-		.repeat = 1,
+	int err, prog_fd = bpf_program__fd(prog);
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v4,
 		.data_size_in = sizeof(pkt_v4),
 		.data_out = buf,
 		.data_size_out = sizeof(buf),
-		.prog_fd = bpf_program__fd(prog),
-	};
-
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err != 0, "bpf_prog_test_run",
-		   "prog_name:%s (err %d errno %d retval %d)\n",
-		   prog_name, err, errno, tattr.retval);
+		.repeat = 1,
+	);
 
-	CHECK(tattr.retval != retval_expect, "retval",
-	      "progname:%s unexpected retval=%d expected=%d\n",
-	      prog_name, tattr.retval, retval_expect);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, retval_expect, "retval");
 
 	/* Extract MTU that BPF-prog got */
 	mtu_result = skel->bss->global_bpf_mtu_tc;
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index e075d03ab630..224f016b0a53 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -161,7 +161,7 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
 	}
 }
 
-static bool was_decapsulated(struct bpf_prog_test_run_attr *tattr)
+static bool was_decapsulated(struct bpf_test_run_opts *tattr)
 {
 	return tattr->data_size_out < tattr->data_size_in;
 }
@@ -367,12 +367,12 @@ static void close_fds(int *fds, int n)
 
 static void test_cls_redirect_common(struct bpf_program *prog)
 {
-	struct bpf_prog_test_run_attr tattr = {};
+	LIBBPF_OPTS(bpf_test_run_opts, tattr);
 	int families[] = { AF_INET, AF_INET6 };
 	struct sockaddr_storage ss;
 	struct sockaddr *addr;
 	socklen_t slen;
-	int i, j, err;
+	int i, j, err, prog_fd;
 	int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
 	int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
 	struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
@@ -394,7 +394,7 @@ static void test_cls_redirect_common(struct bpf_program *prog)
 			goto cleanup;
 	}
 
-	tattr.prog_fd = bpf_program__fd(prog);
+	prog_fd = bpf_program__fd(prog);
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		struct test_cfg *test = &tests[i];
 
@@ -415,7 +415,7 @@ static void test_cls_redirect_common(struct bpf_program *prog)
 			if (CHECK_FAIL(!tattr.data_size_in))
 				continue;
 
-			err = bpf_prog_test_run_xattr(&tattr);
+			err = bpf_prog_test_run_opts(prog_fd, &tattr);
 			if (CHECK_FAIL(err))
 				continue;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
index cbaa44ffb8c6..5aa52cc31dc2 100644
--- a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
+++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
@@ -26,10 +26,10 @@ static void test_dummy_st_ops_attach(void)
 static void test_dummy_init_ret_value(void)
 {
 	__u64 args[1] = {0};
-	struct bpf_prog_test_run_attr attr = {
-		.ctx_size_in = sizeof(args),
+	LIBBPF_OPTS(bpf_test_run_opts, attr,
 		.ctx_in = args,
-	};
+		.ctx_size_in = sizeof(args),
+	);
 	struct dummy_st_ops *skel;
 	int fd, err;
 
@@ -38,8 +38,7 @@ static void test_dummy_init_ret_value(void)
 		return;
 
 	fd = bpf_program__fd(skel->progs.test_1);
-	attr.prog_fd = fd;
-	err = bpf_prog_test_run_xattr(&attr);
+	err = bpf_prog_test_run_opts(fd, &attr);
 	ASSERT_OK(err, "test_run");
 	ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret");
 
@@ -53,10 +52,10 @@ static void test_dummy_init_ptr_arg(void)
 		.val = exp_retval,
 	};
 	__u64 args[1] = {(unsigned long)&in_state};
-	struct bpf_prog_test_run_attr attr = {
-		.ctx_size_in = sizeof(args),
+	LIBBPF_OPTS(bpf_test_run_opts, attr,
 		.ctx_in = args,
-	};
+		.ctx_size_in = sizeof(args),
+	);
 	struct dummy_st_ops *skel;
 	int fd, err;
 
@@ -65,8 +64,7 @@ static void test_dummy_init_ptr_arg(void)
 		return;
 
 	fd = bpf_program__fd(skel->progs.test_1);
-	attr.prog_fd = fd;
-	err = bpf_prog_test_run_xattr(&attr);
+	err = bpf_prog_test_run_opts(fd, &attr);
 	ASSERT_OK(err, "test_run");
 	ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret");
 	ASSERT_EQ(attr.retval, exp_retval, "test_ret");
@@ -77,10 +75,10 @@ static void test_dummy_init_ptr_arg(void)
 static void test_dummy_multiple_args(void)
 {
 	__u64 args[5] = {0, -100, 0x8a5f, 'c', 0x1234567887654321ULL};
-	struct bpf_prog_test_run_attr attr = {
-		.ctx_size_in = sizeof(args),
+	LIBBPF_OPTS(bpf_test_run_opts, attr,
 		.ctx_in = args,
-	};
+		.ctx_size_in = sizeof(args),
+	);
 	struct dummy_st_ops *skel;
 	int fd, err;
 	size_t i;
@@ -91,8 +89,7 @@ static void test_dummy_multiple_args(void)
 		return;
 
 	fd = bpf_program__fd(skel->progs.test_2);
-	attr.prog_fd = fd;
-	err = bpf_prog_test_run_xattr(&attr);
+	err = bpf_prog_test_run_opts(fd, &attr);
 	ASSERT_OK(err, "test_run");
 	for (i = 0; i < ARRAY_SIZE(args); i++) {
 		snprintf(name, sizeof(name), "arg %zu", i);
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index dfafd62df50b..0c1661ea996e 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -13,8 +13,9 @@
 #endif
 
 #define CHECK_FLOW_KEYS(desc, got, expected)				\
-	CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0,		\
+	_CHECK(memcmp(&got, &expected, sizeof(got)) != 0,		\
 	      desc,							\
+	      topts.duration,						\
 	      "nhoff=%u/%u "						\
 	      "thoff=%u/%u "						\
 	      "addr_proto=0x%x/0x%x "					\
@@ -487,7 +488,7 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
 		/* Keep in sync with 'flags' from eth_get_headlen. */
 		__u32 eth_get_headlen_flags =
 			BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
-		struct bpf_prog_test_run_attr tattr = {};
+		LIBBPF_OPTS(bpf_test_run_opts, topts);
 		struct bpf_flow_keys flow_keys = {};
 		__u32 key = (__u32)(tests[i].keys.sport) << 16 |
 			    tests[i].keys.dport;
@@ -503,13 +504,12 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
 		CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
 
 		err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
-		CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
+		ASSERT_OK(err, "bpf_map_lookup_elem");
 
-		CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
 		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
 
 		err = bpf_map_delete_elem(keys_fd, &key);
-		CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
+		ASSERT_OK(err, "bpf_map_delete_elem");
 	}
 }
 
@@ -573,27 +573,24 @@ void test_flow_dissector(void)
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		struct bpf_flow_keys flow_keys;
-		struct bpf_prog_test_run_attr tattr = {
-			.prog_fd = prog_fd,
+		LIBBPF_OPTS(bpf_test_run_opts, topts,
 			.data_in = &tests[i].pkt,
 			.data_size_in = sizeof(tests[i].pkt),
 			.data_out = &flow_keys,
-		};
+		);
 		static struct bpf_flow_keys ctx = {};
 
 		if (tests[i].flags) {
-			tattr.ctx_in = &ctx;
-			tattr.ctx_size_in = sizeof(ctx);
+			topts.ctx_in = &ctx;
+			topts.ctx_size_in = sizeof(ctx);
 			ctx.flags = tests[i].flags;
 		}
 
-		err = bpf_prog_test_run_xattr(&tattr);
-		CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) ||
-			   err || tattr.retval != 1,
-			   tests[i].name,
-			   "err %d errno %d retval %d duration %d size %u/%zu\n",
-			   err, errno, tattr.retval, tattr.duration,
-			   tattr.data_size_out, sizeof(flow_keys));
+		err = bpf_prog_test_run_opts(prog_fd, &topts);
+		ASSERT_OK(err, "test_run");
+		ASSERT_EQ(topts.retval, 1, "test_run retval");
+		ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
+			  "test_run data_size_out");
 		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
 	}
 
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index ce10d2fc3a6c..1cee6957285e 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -53,24 +53,24 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 void serial_test_kfree_skb(void)
 {
 	struct __sk_buff skb = {};
-	struct bpf_prog_test_run_attr tattr = {
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v6,
 		.data_size_in = sizeof(pkt_v6),
 		.ctx_in = &skb,
 		.ctx_size_in = sizeof(skb),
-	};
+	);
 	struct kfree_skb *skel = NULL;
 	struct bpf_link *link;
 	struct bpf_object *obj;
 	struct perf_buffer *pb = NULL;
-	int err;
+	int err, prog_fd;
 	bool passed = false;
 	__u32 duration = 0;
 	const int zero = 0;
 	bool test_ok[2];
 
 	err = bpf_prog_test_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
-			    &obj, &tattr.prog_fd);
+				 &obj, &prog_fd);
 	if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
 		return;
 
@@ -100,11 +100,9 @@ void serial_test_kfree_skb(void)
 		goto close_prog;
 
 	memcpy(skb.cb, &cb, sizeof(cb));
-	err = bpf_prog_test_run_xattr(&tattr);
-	duration = tattr.duration;
-	CHECK(err || tattr.retval, "ipv6",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, tattr.retval, duration);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "ipv6 test_run");
+	ASSERT_OK(topts.retval, "ipv6 test_run retval");
 
 	/* read perf buffer */
 	err = perf_buffer__poll(pb, 100);
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
new file mode 100644
index 000000000000..1ccd2bdf8fa8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_pkt_access.skel.h"
+
+static const __u32 duration;
+
+static void check_run_cnt(int prog_fd, __u64 run_cnt)
+{
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	int err;
+
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
+		return;
+
+	CHECK(run_cnt != info.run_cnt, "run_cnt",
+	      "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
+}
+
+void test_prog_run_opts(void)
+{
+	struct test_pkt_access *skel;
+	int err, stats_fd = -1, prog_fd;
+	char buf[10] = {};
+	__u64 run_cnt = 0;
+
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.repeat = 1,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.data_out = buf,
+		.data_size_out = 5,
+	);
+
+	stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+	if (!ASSERT_GE(stats_fd, 0, "enable_stats good fd"))
+		return;
+
+	skel = test_pkt_access__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		goto cleanup;
+
+	prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_EQ(errno, ENOSPC, "test_run errno");
+	ASSERT_ERR(err, "test_run");
+	ASSERT_OK(topts.retval, "test_run retval");
+
+	ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4), "test_run data_size_out");
+	ASSERT_EQ(buf[5], 0, "overflow, BPF_PROG_TEST_RUN ignored size hint");
+
+	run_cnt += topts.repeat;
+	check_run_cnt(prog_fd, run_cnt);
+
+	topts.data_out = NULL;
+	topts.data_size_out = 0;
+	topts.repeat = 2;
+	errno = 0;
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(errno, "run_no_output errno");
+	ASSERT_OK(err, "run_no_output err");
+	ASSERT_OK(topts.retval, "run_no_output retval");
+
+	run_cnt += topts.repeat;
+	check_run_cnt(prog_fd, run_cnt);
+
+cleanup:
+	if (skel)
+		test_pkt_access__destroy(skel);
+	if (stats_fd >= 0)
+		close(stats_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
deleted file mode 100644
index 89fc98faf19e..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include <network_helpers.h>
-
-#include "test_pkt_access.skel.h"
-
-static const __u32 duration;
-
-static void check_run_cnt(int prog_fd, __u64 run_cnt)
-{
-	struct bpf_prog_info info = {};
-	__u32 info_len = sizeof(info);
-	int err;
-
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-	if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
-		return;
-
-	CHECK(run_cnt != info.run_cnt, "run_cnt",
-	      "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
-}
-
-void test_prog_run_xattr(void)
-{
-	struct test_pkt_access *skel;
-	int err, stats_fd = -1;
-	char buf[10] = {};
-	__u64 run_cnt = 0;
-
-	struct bpf_prog_test_run_attr tattr = {
-		.repeat = 1,
-		.data_in = &pkt_v4,
-		.data_size_in = sizeof(pkt_v4),
-		.data_out = buf,
-		.data_size_out = 5,
-	};
-
-	stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
-	if (CHECK_ATTR(stats_fd < 0, "enable_stats", "failed %d\n", errno))
-		return;
-
-	skel = test_pkt_access__open_and_load();
-	if (CHECK_ATTR(!skel, "open_and_load", "failed\n"))
-		goto cleanup;
-
-	tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
-
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err >= 0 || errno != ENOSPC || tattr.retval, "run",
-	      "err %d errno %d retval %d\n", err, errno, tattr.retval);
-
-	CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
-	      "incorrect output size, want %zu have %u\n",
-	      sizeof(pkt_v4), tattr.data_size_out);
-
-	CHECK_ATTR(buf[5] != 0, "overflow",
-	      "BPF_PROG_TEST_RUN ignored size hint\n");
-
-	run_cnt += tattr.repeat;
-	check_run_cnt(tattr.prog_fd, run_cnt);
-
-	tattr.data_out = NULL;
-	tattr.data_size_out = 0;
-	tattr.repeat = 2;
-	errno = 0;
-
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err || errno || tattr.retval, "run_no_output",
-	      "err %d errno %d retval %d\n", err, errno, tattr.retval);
-
-	tattr.data_size_out = 1;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err);
-
-	run_cnt += tattr.repeat;
-	check_run_cnt(tattr.prog_fd, run_cnt);
-
-cleanup:
-	if (skel)
-		test_pkt_access__destroy(skel);
-	if (stats_fd >= 0)
-		close(stats_fd);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
index 41720a62c4fa..fe5b8fae2c36 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -5,59 +5,54 @@
 #include "bpf/libbpf_internal.h"
 #include "test_raw_tp_test_run.skel.h"
 
-static int duration;
-
 void test_raw_tp_test_run(void)
 {
-	struct bpf_prog_test_run_attr test_attr = {};
 	int comm_fd = -1, err, nr_online, i, prog_fd;
 	__u64 args[2] = {0x1234ULL, 0x5678ULL};
 	int expected_retval = 0x1234 + 0x5678;
 	struct test_raw_tp_test_run *skel;
 	char buf[] = "new_name";
 	bool *online = NULL;
-	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
-			    .ctx_in = args,
-			    .ctx_size_in = sizeof(args),
-			    .flags = BPF_F_TEST_RUN_ON_CPU,
-		);
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		.ctx_in = args,
+		.ctx_size_in = sizeof(args),
+		.flags = BPF_F_TEST_RUN_ON_CPU,
+	);
 
 	err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online,
 				  &nr_online);
-	if (CHECK(err, "parse_cpu_mask_file", "err %d\n", err))
+	if (!ASSERT_OK(err, "parse_cpu_mask_file"))
 		return;
 
 	skel = test_raw_tp_test_run__open_and_load();
-	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		goto cleanup;
 
 	err = test_raw_tp_test_run__attach(skel);
-	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "skel_attach"))
 		goto cleanup;
 
 	comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
-	if (CHECK(comm_fd < 0, "open /proc/self/comm", "err %d\n", errno))
+	if (!ASSERT_GE(comm_fd, 0, "open /proc/self/comm"))
 		goto cleanup;
 
 	err = write(comm_fd, buf, sizeof(buf));
-	CHECK(err < 0, "task rename", "err %d", errno);
+	ASSERT_GE(err, 0, "task rename");
 
-	CHECK(skel->bss->count == 0, "check_count", "didn't increase\n");
-	CHECK(skel->data->on_cpu != 0xffffffff, "check_on_cpu", "got wrong value\n");
+	ASSERT_NEQ(skel->bss->count, 0, "check_count");
+	ASSERT_EQ(skel->data->on_cpu, 0xffffffff, "check_on_cpu");
 
 	prog_fd = bpf_program__fd(skel->progs.rename);
-	test_attr.prog_fd = prog_fd;
-	test_attr.ctx_in = args;
-	test_attr.ctx_size_in = sizeof(__u64);
+	opts.ctx_in = args;
+	opts.ctx_size_in = sizeof(__u64);
 
-	err = bpf_prog_test_run_xattr(&test_attr);
-	CHECK(err == 0, "test_run", "should fail for too small ctx\n");
+	err = bpf_prog_test_run_opts(prog_fd, &opts);
+	ASSERT_NEQ(err, 0, "test_run should fail for too small ctx");
 
-	test_attr.ctx_size_in = sizeof(args);
-	err = bpf_prog_test_run_xattr(&test_attr);
-	CHECK(err < 0, "test_run", "err %d\n", errno);
-	CHECK(test_attr.retval != expected_retval, "check_retval",
-	      "expect 0x%x, got 0x%x\n", expected_retval, test_attr.retval);
+	opts.ctx_size_in = sizeof(args);
+	err = bpf_prog_test_run_opts(prog_fd, &opts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(opts.retval, expected_retval, "check_retval");
 
 	for (i = 0; i < nr_online; i++) {
 		if (!online[i])
@@ -66,28 +61,23 @@ void test_raw_tp_test_run(void)
 		opts.cpu = i;
 		opts.retval = 0;
 		err = bpf_prog_test_run_opts(prog_fd, &opts);
-		CHECK(err < 0, "test_run_opts", "err %d\n", errno);
-		CHECK(skel->data->on_cpu != i, "check_on_cpu",
-		      "expect %d got %d\n", i, skel->data->on_cpu);
-		CHECK(opts.retval != expected_retval,
-		      "check_retval", "expect 0x%x, got 0x%x\n",
-		      expected_retval, opts.retval);
+		ASSERT_OK(err, "test_run_opts");
+		ASSERT_EQ(skel->data->on_cpu, i, "check_on_cpu");
+		ASSERT_EQ(opts.retval, expected_retval, "check_retval");
 	}
 
 	/* invalid cpu ID should fail with ENXIO */
 	opts.cpu = 0xffffffff;
 	err = bpf_prog_test_run_opts(prog_fd, &opts);
-	CHECK(err >= 0 || errno != ENXIO,
-	      "test_run_opts_fail",
-	      "should failed with ENXIO\n");
+	ASSERT_EQ(errno, ENXIO, "test_run_opts should fail with ENXIO");
+	ASSERT_ERR(err, "test_run_opts_fail");
 
 	/* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */
 	opts.cpu = 1;
 	opts.flags = 0;
 	err = bpf_prog_test_run_opts(prog_fd, &opts);
-	CHECK(err >= 0 || errno != EINVAL,
-	      "test_run_opts_fail",
-	      "should failed with EINVAL\n");
+	ASSERT_EQ(errno, EINVAL, "test_run_opts should fail with EINVAL");
+	ASSERT_ERR(err, "test_run_opts_fail");
 
 cleanup:
 	close(comm_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index b5319ba2ee27..ce0e555b5e38 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -20,97 +20,72 @@ void test_skb_ctx(void)
 		.gso_size = 10,
 		.hwtstamp = 11,
 	};
-	struct bpf_prog_test_run_attr tattr = {
+	LIBBPF_OPTS(bpf_test_run_opts, tattr,
 		.data_in = &pkt_v4,
 		.data_size_in = sizeof(pkt_v4),
 		.ctx_in = &skb,
 		.ctx_size_in = sizeof(skb),
 		.ctx_out = &skb,
 		.ctx_size_out = sizeof(skb),
-	};
+	);
 	struct bpf_object *obj;
-	int err;
-	int i;
+	int err, prog_fd, i;
 
-	err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
-			    &tattr.prog_fd);
-	if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+	err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS,
+				 &obj, &prog_fd);
+	if (!ASSERT_OK(err, "load"))
 		return;
 
 	/* ctx_in != NULL, ctx_size_in == 0 */
 
 	tattr.ctx_size_in = 0;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err == 0, "ctx_size_in", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_NEQ(err, 0, "ctx_size_in");
 	tattr.ctx_size_in = sizeof(skb);
 
 	/* ctx_out != NULL, ctx_size_out == 0 */
 
 	tattr.ctx_size_out = 0;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err == 0, "ctx_size_out", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_NEQ(err, 0, "ctx_size_out");
 	tattr.ctx_size_out = sizeof(skb);
 
 	/* non-zero [len, tc_index] fields should be rejected*/
 
 	skb.len = 1;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err == 0, "len", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_NEQ(err, 0, "len");
 	skb.len = 0;
 
 	skb.tc_index = 1;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err == 0, "tc_index", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_NEQ(err, 0, "tc_index");
 	skb.tc_index = 0;
 
 	/* non-zero [hash, sk] fields should be rejected */
 
 	skb.hash = 1;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err == 0, "hash", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_NEQ(err, 0, "hash");
 	skb.hash = 0;
 
 	skb.sk = (struct bpf_sock *)1;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err == 0, "sk", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_NEQ(err, 0, "sk");
 	skb.sk = 0;
 
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err != 0 || tattr.retval,
-		   "run",
-		   "err %d errno %d retval %d\n",
-		   err, errno, tattr.retval);
-
-	CHECK_ATTR(tattr.ctx_size_out != sizeof(skb),
-		   "ctx_size_out",
-		   "incorrect output size, want %zu have %u\n",
-		   sizeof(skb), tattr.ctx_size_out);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	ASSERT_OK(err, "test_run");
+	ASSERT_OK(tattr.retval, "test_run retval");
+	ASSERT_EQ(tattr.ctx_size_out, sizeof(skb), "ctx_size_out");
 
 	for (i = 0; i < 5; i++)
-		CHECK_ATTR(skb.cb[i] != i + 2,
-			   "ctx_out_cb",
-			   "skb->cb[i] == %d, expected %d\n",
-			   skb.cb[i], i + 2);
-	CHECK_ATTR(skb.priority != 7,
-		   "ctx_out_priority",
-		   "skb->priority == %d, expected %d\n",
-		   skb.priority, 7);
-	CHECK_ATTR(skb.ifindex != 1,
-		   "ctx_out_ifindex",
-		   "skb->ifindex == %d, expected %d\n",
-		   skb.ifindex, 1);
-	CHECK_ATTR(skb.ingress_ifindex != 11,
-		   "ctx_out_ingress_ifindex",
-		   "skb->ingress_ifindex == %d, expected %d\n",
-		   skb.ingress_ifindex, 11);
-	CHECK_ATTR(skb.tstamp != 8,
-		   "ctx_out_tstamp",
-		   "skb->tstamp == %lld, expected %d\n",
-		   skb.tstamp, 8);
-	CHECK_ATTR(skb.mark != 10,
-		   "ctx_out_mark",
-		   "skb->mark == %u, expected %d\n",
-		   skb.mark, 10);
+		ASSERT_EQ(skb.cb[i], i + 2, "ctx_out_cb");
+	ASSERT_EQ(skb.priority, 7, "ctx_out_priority");
+	ASSERT_EQ(skb.ifindex, 1, "ctx_out_ifindex");
+	ASSERT_EQ(skb.ingress_ifindex, 11, "ctx_out_ingress_ifindex");
+	ASSERT_EQ(skb.tstamp, 8, "ctx_out_tstamp");
+	ASSERT_EQ(skb.mark, 10, "ctx_out_mark");
 
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
index 6f802a1c0800..97dc8b14be48 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
@@ -9,22 +9,22 @@ void test_skb_helpers(void)
 		.gso_segs = 8,
 		.gso_size = 10,
 	};
-	struct bpf_prog_test_run_attr tattr = {
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v4,
 		.data_size_in = sizeof(pkt_v4),
 		.ctx_in = &skb,
 		.ctx_size_in = sizeof(skb),
 		.ctx_out = &skb,
 		.ctx_size_out = sizeof(skb),
-	};
+	);
 	struct bpf_object *obj;
-	int err;
+	int err, prog_fd;
 
-	err = bpf_prog_test_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
-			    &tattr.prog_fd);
-	if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+	err = bpf_prog_test_load("./test_skb_helpers.o",
+				 BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+	if (!ASSERT_OK(err, "load"))
 		return;
-	err = bpf_prog_test_run_xattr(&tattr);
-	CHECK_ATTR(err, "len", "err %d errno %d\n", err, errno);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index b97a8f236b3a..cec5c0882372 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -140,12 +140,16 @@ out:
 
 static void test_sockmap_update(enum bpf_map_type map_type)
 {
-	struct bpf_prog_test_run_attr tattr;
 	int err, prog, src, duration = 0;
 	struct test_sockmap_update *skel;
 	struct bpf_map *dst_map;
 	const __u32 zero = 0;
 	char dummy[14] = {0};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = dummy,
+		.data_size_in = sizeof(dummy),
+		.repeat = 1,
+	);
 	__s64 sk;
 
 	sk = connected_socket_v4();
@@ -167,16 +171,10 @@ static void test_sockmap_update(enum bpf_map_type map_type)
 	if (CHECK(err, "update_elem(src)", "errno=%u\n", errno))
 		goto out;
 
-	tattr = (struct bpf_prog_test_run_attr){
-		.prog_fd = prog,
-		.repeat = 1,
-		.data_in = dummy,
-		.data_size_in = sizeof(dummy),
-	};
-
-	err = bpf_prog_test_run_xattr(&tattr);
-	if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run",
-		       "errno=%u retval=%u\n", errno, tattr.retval))
+	err = bpf_prog_test_run_opts(prog, &topts);
+	if (!ASSERT_OK(err, "test_run"))
+		goto out;
+	if (!ASSERT_NEQ(topts.retval, 0, "test_run retval"))
 		goto out;
 
 	compare_cookies(skel->maps.src, dst_map);
diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c
index 81e997a69f7a..f4d40001155a 100644
--- a/tools/testing/selftests/bpf/prog_tests/syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/syscall.c
@@ -20,20 +20,20 @@ void test_syscall(void)
 		.log_buf = (uintptr_t) verifier_log,
 		.log_size = sizeof(verifier_log),
 	};
-	struct bpf_prog_test_run_attr tattr = {
+	LIBBPF_OPTS(bpf_test_run_opts, tattr,
 		.ctx_in = &ctx,
 		.ctx_size_in = sizeof(ctx),
-	};
+	);
 	struct syscall *skel = NULL;
 	__u64 key = 12, value = 0;
-	int err;
+	int err, prog_fd;
 
 	skel = syscall__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel_load"))
 		goto cleanup;
 
-	tattr.prog_fd = bpf_program__fd(skel->progs.bpf_prog);
-	err = bpf_prog_test_run_xattr(&tattr);
+	prog_fd = bpf_program__fd(skel->progs.bpf_prog);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
 	ASSERT_EQ(err, 0, "err");
 	ASSERT_EQ(tattr.retval, 1, "retval");
 	ASSERT_GT(ctx.map_fd, 0, "ctx.map_fd");
diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
index 4ca275101ee0..de24e8f0e738 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_profiler.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
@@ -8,20 +8,20 @@
 
 static int sanity_run(struct bpf_program *prog)
 {
-	struct bpf_prog_test_run_attr test_attr = {};
+	LIBBPF_OPTS(bpf_test_run_opts, test_attr);
 	__u64 args[] = {1, 2, 3};
-	__u32 duration = 0;
 	int err, prog_fd;
 
 	prog_fd = bpf_program__fd(prog);
-	test_attr.prog_fd = prog_fd;
 	test_attr.ctx_in = args;
 	test_attr.ctx_size_in = sizeof(args);
-	err = bpf_prog_test_run_xattr(&test_attr);
-	if (CHECK(err || test_attr.retval, "test_run",
-		  "err %d errno %d retval %d duration %d\n",
-		  err, errno, test_attr.retval, duration))
+	err = bpf_prog_test_run_opts(prog_fd, &test_attr);
+	if (!ASSERT_OK(err, "test_run"))
+		return -1;
+
+	if (!ASSERT_OK(test_attr.retval, "test_run retval"))
 		return -1;
+
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 0289d09b350f..528a8c387720 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -78,17 +78,17 @@ static void test_xdp_adjust_tail_grow2(void)
 	int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/;
 	struct bpf_object *obj;
 	int err, cnt, i;
-	int max_grow;
+	int max_grow, prog_fd;
 
-	struct bpf_prog_test_run_attr tattr = {
+	LIBBPF_OPTS(bpf_test_run_opts, tattr,
 		.repeat		= 1,
 		.data_in	= &buf,
 		.data_out	= &buf,
 		.data_size_in	= 0, /* Per test */
 		.data_size_out	= 0, /* Per test */
-	};
+	);
 
-	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
+	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
 		return;
 
@@ -97,7 +97,7 @@ static void test_xdp_adjust_tail_grow2(void)
 	tattr.data_size_in  =  64; /* Determine test case via pkt size */
 	tattr.data_size_out = 128; /* Limit copy_size */
 	/* Kernel side alloc packet memory area that is zero init */
-	err = bpf_prog_test_run_xattr(&tattr);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
 
 	ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */
 	ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval");
@@ -115,7 +115,7 @@ static void test_xdp_adjust_tail_grow2(void)
 	memset(buf, 2, sizeof(buf));
 	tattr.data_size_in  = 128; /* Determine test case via pkt size */
 	tattr.data_size_out = sizeof(buf);   /* Copy everything */
-	err = bpf_prog_test_run_xattr(&tattr);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
 
 	max_grow = 4096 - XDP_PACKET_HEADROOM -	tailroom; /* 3520 */
 	ASSERT_OK(err, "case-128");
-- 
cgit 


From c25d29be00c1a1c53549b71d06a1fb666b598d3b Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Wed, 2 Feb 2022 17:03:42 -0800
Subject: selftests: mptcp: set fullmesh flag in pm_nl_ctl

This patch added the fullmesh flag setting and clearing support in
pm_nl_ctl:

 # pm_nl_ctl set ip flags fullmesh
 # pm_nl_ctl set ip flags nofullmesh

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 354784512748..152b84e44d69 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -28,7 +28,7 @@ static void syntax(char *argv[])
 	fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
 	fprintf(stderr, "\tdel <id> [<ip>]\n");
 	fprintf(stderr, "\tget <id>\n");
-	fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
+	fprintf(stderr, "\tset <ip> [flags backup|nobackup|fullmesh|nofullmesh]\n");
 	fprintf(stderr, "\tflush\n");
 	fprintf(stderr, "\tdump\n");
 	fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
@@ -704,12 +704,14 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
 			if (++arg >= argc)
 				error(1, 0, " missing flags value");
 
-			/* do not support flag list yet */
 			for (str = argv[arg]; (tok = strtok(str, ","));
 			     str = NULL) {
 				if (!strcmp(tok, "backup"))
 					flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
-				else if (strcmp(tok, "nobackup"))
+				else if (!strcmp(tok, "fullmesh"))
+					flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
+				else if (strcmp(tok, "nobackup") &&
+					 strcmp(tok, "nofullmesh"))
 					error(1, errno,
 					      "unknown flag %s", argv[arg]);
 			}
-- 
cgit 


From 6a0653b96f5d103d5579475304d76e7017165090 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Wed, 2 Feb 2022 17:03:43 -0800
Subject: selftests: mptcp: add fullmesh setting tests

This patch added the fullmesh setting and clearing selftests in
mptcp_join.sh.

Now we can set both backup and fullmesh flags, so avoid using the
words 'backup' and 'bkup'.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 49 ++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index b8bdbec0cf69..bd106c7ec232 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -289,7 +289,7 @@ do_transfer()
 	addr_nr_ns1="$7"
 	addr_nr_ns2="$8"
 	speed="$9"
-	bkup="${10}"
+	sflags="${10}"
 
 	port=$((10000+$TEST_COUNT))
 	TEST_COUNT=$((TEST_COUNT+1))
@@ -461,14 +461,13 @@ do_transfer()
 		fi
 	fi
 
-	if [ ! -z $bkup ]; then
+	if [ ! -z $sflags ]; then
 		sleep 1
 		for netns in "$ns1" "$ns2"; do
 			dump=(`ip netns exec $netns ./pm_nl_ctl dump`)
 			if [ ${#dump[@]} -gt 0 ]; then
 				addr=${dump[${#dump[@]} - 1]}
-				backup="ip netns exec $netns ./pm_nl_ctl set $addr flags $bkup"
-				$backup
+				ip netns exec $netns ./pm_nl_ctl set $addr flags $sflags
 			fi
 		done
 	fi
@@ -545,7 +544,7 @@ run_tests()
 	addr_nr_ns1="${5:-0}"
 	addr_nr_ns2="${6:-0}"
 	speed="${7:-fast}"
-	bkup="${8:-""}"
+	sflags="${8:-""}"
 	lret=0
 	oldin=""
 
@@ -574,7 +573,7 @@ run_tests()
 	fi
 
 	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
-		${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
+		${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${sflags}
 	lret=$?
 }
 
@@ -1888,6 +1887,44 @@ fullmesh_tests()
 	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
 	chk_join_nr "fullmesh test 1x2, limited" 4 4 4
 	chk_add_nr 1 1
+
+	# set fullmesh flag
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow
+	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+	run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh
+	chk_join_nr "set fullmesh flag test" 2 2 2
+	chk_rm_nr 0 1
+
+	# set nofullmesh flag
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow,fullmesh
+	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh
+	chk_join_nr "set nofullmesh flag test" 2 2 2
+	chk_rm_nr 0 1
+
+	# set backup,fullmesh flags
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow
+	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+	run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh
+	chk_join_nr "set backup,fullmesh flags test" 2 2 2
+	chk_prio_nr 0 1
+	chk_rm_nr 0 1
+
+	# set nobackup,nofullmesh flags
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,backup,fullmesh
+	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh
+	chk_join_nr "set nobackup,nofullmesh flags test" 2 2 2
+	chk_prio_nr 0 1
+	chk_rm_nr 0 1
 }
 
 all_tests()
-- 
cgit 


From 081197591769a7389ef6696bc10f32cd43c0cb6e Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Thu, 3 Feb 2022 11:16:57 +0100
Subject: selftests: net: bridge: Parameterize ageing timeout

Allow the ageing timeout that is set on bridges to be customized from
forwarding.config. This allows the tests to be run on hardware which
does not support a 10s timeout (e.g. mv88e6xxx).

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh     | 5 +++--
 tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh   | 5 +++--
 tools/testing/selftests/net/forwarding/forwarding.config.sample | 2 ++
 tools/testing/selftests/net/forwarding/lib.sh                   | 1 +
 4 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index b90dff8d3a94..64bd00fe9a4f 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -28,8 +28,9 @@ h2_destroy()
 
 switch_create()
 {
-	# 10 Seconds ageing time.
-	ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+	ip link add dev br0 type bridge \
+		vlan_filtering 1 \
+		ageing_time $LOW_AGEING_TIME \
 		mcast_snooping 0
 
 	ip link set dev $swp1 master br0
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index c15c6c85c984..1c8a26046589 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -27,8 +27,9 @@ h2_destroy()
 
 switch_create()
 {
-	# 10 Seconds ageing time.
-	ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0
+	ip link add dev br0 type bridge \
+		ageing_time $LOW_AGEING_TIME \
+		mcast_snooping 0
 
 	ip link set dev $swp1 master br0
 	ip link set dev $swp2 master br0
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index b0980a2efa31..4a546509de90 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -41,6 +41,8 @@ NETIF_CREATE=yes
 # Timeout (in seconds) before ping exits regardless of how many packets have
 # been sent or received
 PING_TIMEOUT=5
+# Minimum ageing_time (in centiseconds) supported by hardware
+LOW_AGEING_TIME=1000
 # Flag for tc match, supposed to be skip_sw/skip_hw which means do not process
 # filter by software/hardware
 TC_FLAG=skip_hw
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 7da783d6f453..e7e434a4758b 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -24,6 +24,7 @@ PING_COUNT=${PING_COUNT:=10}
 PING_TIMEOUT=${PING_TIMEOUT:=5}
 WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
 INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
+LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
 REQUIRE_JQ=${REQUIRE_JQ:=yes}
 REQUIRE_MZ=${REQUIRE_MZ:=yes}
 
-- 
cgit 


From 32e608f82946e1e600f8c92b765c18b7189e596d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Feb 2022 14:59:14 -0800
Subject: selftests/bpf: Remove usage of deprecated feature probing APIs

Switch to libbpf_probe_*() APIs instead of the deprecated ones.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20220202225916.3313522-5-andrii@kernel.org
---
 tools/testing/selftests/bpf/test_maps.c     | 2 +-
 tools/testing/selftests/bpf/test_verifier.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 50f7e74ca0b9..cbebfaa7c1e8 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -738,7 +738,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 			    sizeof(key), sizeof(value),
 			    6, NULL);
 	if (fd < 0) {
-		if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) {
+		if (!libbpf_probe_bpf_map_type(BPF_MAP_TYPE_SOCKMAP, NULL)) {
 			printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n",
 			       __func__);
 			skips++;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 163b303e8a2a..92e3465fbae8 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -456,7 +456,7 @@ static int probe_filter_length(const struct bpf_insn *fp)
 
 static bool skip_unsupported_map(enum bpf_map_type map_type)
 {
-	if (!bpf_probe_map_type(map_type, 0)) {
+	if (!libbpf_probe_bpf_map_type(map_type, NULL)) {
 		printf("SKIP (unsupported map type %d)\n", map_type);
 		skips++;
 		return true;
@@ -1180,7 +1180,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	 * bpf_probe_prog_type won't give correct answer
 	 */
 	if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING &&
-	    !bpf_probe_prog_type(prog_type, 0)) {
+	    !libbpf_probe_bpf_prog_type(prog_type, NULL)) {
 		printf("SKIP (unsupported program type %d)\n", prog_type);
 		skips++;
 		goto close_fds;
-- 
cgit 


From e4e284a8c0d9823c07ee674445de05928be67231 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 2 Feb 2022 14:59:15 -0800
Subject: selftests/bpf: Redo the switch to new libbpf XDP APIs

Switch to using new bpf_xdp_*() APIs across all selftests. Take
advantage of a more straightforward and user-friendly semantics of
old_prog_fd (0 means "don't care") in few places.

This is a redo of 544356524dd6 ("selftests/bpf: switch to new libbpf XDP
APIs"), which was previously reverted to minimize conflicts during bpf
and bpf-next tree merge.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20220202225916.3313522-6-andrii@kernel.org
---
 .../testing/selftests/bpf/prog_tests/xdp_attach.c  | 29 ++++++++++------------
 .../selftests/bpf/prog_tests/xdp_cpumap_attach.c   |  8 +++---
 .../selftests/bpf/prog_tests/xdp_devmap_attach.c   |  8 +++---
 tools/testing/selftests/bpf/prog_tests/xdp_info.c  | 14 +++++------
 tools/testing/selftests/bpf/prog_tests/xdp_link.c  | 26 +++++++++----------
 tools/testing/selftests/bpf/xdp_redirect_multi.c   |  8 +++---
 tools/testing/selftests/bpf/xdping.c               |  4 +--
 7 files changed, 47 insertions(+), 50 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index c6fa390e3aa1..62aa3edda5e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -11,8 +11,7 @@ void serial_test_xdp_attach(void)
 	const char *file = "./test_xdp.o";
 	struct bpf_prog_info info = {};
 	int err, fd1, fd2, fd3;
-	DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts,
-			    .old_fd = -1);
+	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
 
 	len = sizeof(info);
 
@@ -38,49 +37,47 @@ void serial_test_xdp_attach(void)
 	if (CHECK_FAIL(err))
 		goto out_2;
 
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE,
-				       &opts);
+	err = bpf_xdp_attach(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE, &opts);
 	if (CHECK(err, "load_ok", "initial load failed"))
 		goto out_close;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (CHECK(err || id0 != id1, "id1_check",
 		  "loaded prog id %u != id1 %u, err %d", id0, id1, err))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE,
-				       &opts);
+	err = bpf_xdp_attach(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE, &opts);
 	if (CHECK(!err, "load_fail", "load with expected id didn't fail"))
 		goto out;
 
-	opts.old_fd = fd1;
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts);
+	opts.old_prog_fd = fd1;
+	err = bpf_xdp_attach(IFINDEX_LO, fd2, 0, &opts);
 	if (CHECK(err, "replace_ok", "replace valid old_fd failed"))
 		goto out;
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (CHECK(err || id0 != id2, "id2_check",
 		  "loaded prog id %u != id2 %u, err %d", id0, id2, err))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts);
+	err = bpf_xdp_attach(IFINDEX_LO, fd3, 0, &opts);
 	if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail"))
 		goto out;
 
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+	err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
 	if (CHECK(!err, "remove_fail", "remove invalid old_fd didn't fail"))
 		goto out;
 
-	opts.old_fd = fd2;
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+	opts.old_prog_fd = fd2;
+	err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
 	if (CHECK(err, "remove_ok", "remove valid old_fd failed"))
 		goto out;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (CHECK(err || id0 != 0, "unload_check",
 		  "loaded prog id %u != 0, err %d", id0, err))
 		goto out_close;
 out:
-	bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+	bpf_xdp_detach(IFINDEX_LO, 0, NULL);
 out_close:
 	bpf_object__close(obj3);
 out_2:
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index 13aabb3b6cf2..b353e1f3acb5 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -24,11 +24,11 @@ void test_xdp_with_cpumap_helpers(void)
 		return;
 
 	prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
 	ASSERT_OK(err, "XDP program detach");
 
 	prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
@@ -46,9 +46,9 @@ void test_xdp_with_cpumap_helpers(void)
 	ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
 
 	/* can not attach BPF_XDP_CPUMAP program to a device */
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
-		bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+		bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
 
 	val.qsize = 192;
 	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 2a784ccd3136..463a72fc3e70 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -26,11 +26,11 @@ static void test_xdp_with_devmap_helpers(void)
 		return;
 
 	dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
 	ASSERT_OK(err, "XDP program detach");
 
 	dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
@@ -48,9 +48,9 @@ static void test_xdp_with_devmap_helpers(void)
 	ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
 
 	/* can not attach BPF_XDP_DEVMAP program to a device */
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
-		bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+		bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
 
 	val.ifindex = 1;
 	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
index abe48e82e1dc..0d01ff6cb91a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -14,13 +14,13 @@ void serial_test_xdp_info(void)
 
 	/* Get prog_id for XDP_ATTACHED_NONE mode */
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
 	if (CHECK(err, "get_xdp_none", "errno=%d\n", errno))
 		return;
 	if (CHECK(prog_id, "prog_id_none", "unexpected prog_id=%u\n", prog_id))
 		return;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
 	if (CHECK(err, "get_xdp_none_skb", "errno=%d\n", errno))
 		return;
 	if (CHECK(prog_id, "prog_id_none_skb", "unexpected prog_id=%u\n",
@@ -37,32 +37,32 @@ void serial_test_xdp_info(void)
 	if (CHECK(err, "get_prog_info", "errno=%d\n", errno))
 		goto out_close;
 
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
 	if (CHECK(err, "set_xdp_skb", "errno=%d\n", errno))
 		goto out_close;
 
 	/* Get prog_id for single prog mode */
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
 	if (CHECK(err, "get_xdp", "errno=%d\n", errno))
 		goto out;
 	if (CHECK(prog_id != info.id, "prog_id", "prog_id not available\n"))
 		goto out;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+	err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
 	if (CHECK(err, "get_xdp_skb", "errno=%d\n", errno))
 		goto out;
 	if (CHECK(prog_id != info.id, "prog_id_skb", "prog_id not available\n"))
 		goto out;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_DRV_MODE);
+	err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_DRV_MODE, &prog_id);
 	if (CHECK(err, "get_xdp_drv", "errno=%d\n", errno))
 		goto out;
 	if (CHECK(prog_id, "prog_id_drv", "unexpected prog_id=%u\n", prog_id))
 		goto out;
 
 out:
-	bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+	bpf_xdp_detach(IFINDEX_LO, 0, NULL);
 out_close:
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index b2b357f8c74c..3e9d5c5521f0 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -8,9 +8,9 @@
 
 void serial_test_xdp_link(void)
 {
-	DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
 	struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
 	__u32 id1, id2, id0 = 0, prog_fd1, prog_fd2;
+	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
 	struct bpf_link_info link_info;
 	struct bpf_prog_info prog_info;
 	struct bpf_link *link;
@@ -41,12 +41,12 @@ void serial_test_xdp_link(void)
 	id2 = prog_info.id;
 
 	/* set initial prog attachment */
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
 	if (!ASSERT_OK(err, "fd_attach"))
 		goto cleanup;
 
 	/* validate prog ID */
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
 		goto cleanup;
 
@@ -55,14 +55,14 @@ void serial_test_xdp_link(void)
 	if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
 		bpf_link__destroy(link);
 		/* best-effort detach prog */
-		opts.old_fd = prog_fd1;
-		bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+		opts.old_prog_fd = prog_fd1;
+		bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
 		goto cleanup;
 	}
 
 	/* detach BPF program */
-	opts.old_fd = prog_fd1;
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+	opts.old_prog_fd = prog_fd1;
+	err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
 	if (!ASSERT_OK(err, "prog_detach"))
 		goto cleanup;
 
@@ -73,23 +73,23 @@ void serial_test_xdp_link(void)
 	skel1->links.xdp_handler = link;
 
 	/* validate prog ID */
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
 		goto cleanup;
 
 	/* BPF prog attach is not allowed to replace BPF link */
-	opts.old_fd = prog_fd1;
-	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
+	opts.old_prog_fd = prog_fd1;
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
 	if (!ASSERT_ERR(err, "prog_attach_fail"))
 		goto cleanup;
 
 	/* Can't force-update when BPF link is active */
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
+	err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, 0, NULL);
 	if (!ASSERT_ERR(err, "prog_update_fail"))
 		goto cleanup;
 
 	/* Can't force-detach when BPF link is active */
-	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+	err = bpf_xdp_detach(IFINDEX_LO, 0, NULL);
 	if (!ASSERT_ERR(err, "prog_detach_fail"))
 		goto cleanup;
 
@@ -109,7 +109,7 @@ void serial_test_xdp_link(void)
 		goto cleanup;
 	skel2->links.xdp_handler = link;
 
-	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
 	if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val"))
 		goto cleanup;
 
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
index 51c8224b4ccc..aaedbf4955c3 100644
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -32,12 +32,12 @@ static void int_exit(int sig)
 	int i;
 
 	for (i = 0; ifaces[i] > 0; i++) {
-		if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
-			printf("bpf_get_link_xdp_id failed\n");
+		if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) {
+			printf("bpf_xdp_query_id failed\n");
 			exit(1);
 		}
 		if (prog_id)
-			bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+			bpf_xdp_detach(ifaces[i], xdp_flags, NULL);
 	}
 
 	exit(0);
@@ -210,7 +210,7 @@ int main(int argc, char **argv)
 		}
 
 		/* bind prog_fd to each interface */
-		ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
+		ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
 		if (ret) {
 			printf("Set xdp fd failed on %d\n", ifindex);
 			goto err_out;
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index baa870a759a2..c567856fd1bc 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -29,7 +29,7 @@ static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 
 static void cleanup(int sig)
 {
-	bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	bpf_xdp_detach(ifindex, xdp_flags, NULL);
 	if (sig)
 		exit(1);
 }
@@ -203,7 +203,7 @@ int main(int argc, char **argv)
 
 	printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n");
 
-	if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+	if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
 		fprintf(stderr, "Link set xdp fd failed for %s\n", ifname);
 		goto done;
 	}
-- 
cgit 


From cf1a4cbce63b766d3b7aa5eb57a56d9a2c45ca6c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 3 Feb 2022 11:17:32 -0800
Subject: selftests/bpf: Add a selftest for invalid func btf with btf decl_tag

Added a selftest similar to [1] which exposed a kernel bug.
Without the fix in the previous patch, the similar kasan error will appear.

  [1] https://lore.kernel.org/bpf/0000000000009b6eaa05d71a8c06@google.com/

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220203191732.742285-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 14f9b6136794..4038108aa499 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3938,6 +3938,25 @@ static struct btf_raw_test raw_tests[] = {
 	.btf_load_err = true,
 	.err_str = "Invalid component_idx",
 },
+{
+	.descr = "decl_tag test #15, func, invalid func proto",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_DECL_TAG_ENC(NAME_TBD, 3, 0),		/* [2] */
+		BTF_FUNC_ENC(NAME_TBD, 8),			/* [3] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag\0func"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type_id",
+},
 {
 	.descr = "type_tag test #1",
 	.raw_types = {
-- 
cgit 


From bafe517af2995762010b78422131e5b7270292e4 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Wed, 2 Feb 2022 19:30:28 +0100
Subject: selftests: fib offload: use sensible tos values

Although both iproute2 and the kernel accept 1 and 2 as tos values for
new routes, those are invalid. These values only set ECN bits, which
are ignored during IPv4 fib lookups. Therefore, no packet can actually
match such routes. This selftest therefore only succeeds because it
doesn't verify that the new routes do actually work in practice (it
just checks if the routes are offloaded or not).

It makes more sense to use tos values that don't conflict with ECN.
This way, the selftest won't be affected if we later decide to warn or
even reject invalid tos configurations for new routes.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://lore.kernel.org/r/5e43b343720360a1c0e4f5947d9e917b26f30fbf.1643826556.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/fib_offload_lib.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
index e134a5f529c9..1b3b46292179 100644
--- a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
+++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
@@ -99,15 +99,15 @@ fib_ipv4_tos_test()
 	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" false
 	check_err $? "Route not in hardware when should"
 
-	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 2 metric 1024
-	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 2 metric 1024" false
+	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 8 metric 1024
+	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 8 metric 1024" false
 	check_err $? "Highest TOS route not in hardware when should"
 
 	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" true
 	check_err $? "Lowest TOS route still in hardware when should not"
 
-	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1 metric 1024
-	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1 metric 1024" true
+	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 4 metric 1024
+	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 4 metric 1024" true
 	check_err $? "Middle TOS route in hardware when should not"
 
 	log_test "IPv4 routes with TOS"
@@ -277,11 +277,11 @@ fib_ipv4_replay_tos_test()
 	ip -n $ns link set dev dummy1 up
 
 	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0
-	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1
+	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 4
 
 	devlink -N $ns dev reload $devlink_dev
 
-	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1" false
+	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 4" false
 	check_err $? "Highest TOS route not in hardware when should"
 
 	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0" true
-- 
cgit 


From 95eb6ef82b73255094a23b8cd9045aedbe847435 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Wed, 2 Feb 2022 16:24:21 +0100
Subject: selftests: rtnetlink: Use more sensible tos values

Using tos 0x1 with 'ip route get <IPv4 address> ...' doesn't test much
of the tos option handling: 0x1 just sets an ECN bit, which is cleared
by inet_rtm_getroute() before doing the fib lookup. Let's use 0x10
instead, which is actually taken into account in the route lookup (and
is less surprising for the reader).

For consistency, use 0x10 for the IPv6 route lookup too (IPv6 currently
doesn't clear ECN bits, but might do so in the future).

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Link: https://lore.kernel.org/r/d61119e68d01ba7ef3ba50c1345a5123a11de123.1643815297.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/rtnetlink.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index c9ce3dfa42ee..0900c5438fbb 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -216,9 +216,9 @@ kci_test_route_get()
 	check_err $?
 	ip route get fe80::1 dev "$devdummy" > /dev/null
 	check_err $?
-	ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x1 mark 0x1 > /dev/null
+	ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x10 mark 0x1 > /dev/null
 	check_err $?
-	ip route get ::1 from ::1 iif lo oif lo tos 0x1 mark 0x1 > /dev/null
+	ip route get ::1 from ::1 iif lo oif lo tos 0x10 mark 0x1 > /dev/null
 	check_err $?
 	ip addr add dev "$devdummy" 10.23.7.11/24
 	check_err $?
-- 
cgit 


From b1446bda56456887f8d439938163164fe916bc0d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 2 Feb 2022 15:09:01 +0000
Subject: kselftest: alsa: Check for event generation when we write to controls

Add some coverage of event generation to mixer-test. Rather than doing a
separate set of writes designed to trigger events we add a step to the
existing write_and_verify() which checks to see if the value we read back
from non-volatile controls matches the value before writing and that an
event is or isn't generated as appropriate. The "tests" for events then
simply check that no spurious or missing events were detected. This avoids
needing further logic to generate appropriate values for each control type
and maximises coverage.

When checking for events we use a timeout of 0. This relies on the kernel
generating any event prior to returning to userspace when setting a control.
That is currently the case and it is difficult to see it changing, if it
does the test will need to be updated. Using a delay of 0 means that we
don't slow things down unduly when checking for no event or when events
fail to be generated.

We don't check behaviour for volatile controls since we can't tell what
the behaviour is supposed to be for any given control.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Link: https://lore.kernel.org/r/20220202150902.19563-1-broonie@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 tools/testing/selftests/alsa/mixer-test.c | 154 +++++++++++++++++++++++++++++-
 1 file changed, 151 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
index 0e88f4f3d802..6edb7dca32af 100644
--- a/tools/testing/selftests/alsa/mixer-test.c
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -3,7 +3,7 @@
 // kselftest for the ALSA mixer API
 //
 // Original author: Mark Brown <broonie@kernel.org>
-// Copyright (c) 2021 Arm Limited
+// Copyright (c) 2021-2 Arm Limited
 
 // This test will iterate over all cards detected in the system, exercising
 // every mixer control it can find.  This may conflict with other system
@@ -27,11 +27,12 @@
 
 #include "../kselftest.h"
 
-#define TESTS_PER_CONTROL 4
+#define TESTS_PER_CONTROL 6
 
 struct card_data {
 	snd_ctl_t *handle;
 	int card;
+	struct pollfd pollfd;
 	int num_ctls;
 	snd_ctl_elem_list_t *ctls;
 	struct card_data *next;
@@ -43,6 +44,8 @@ struct ctl_data {
 	snd_ctl_elem_info_t *info;
 	snd_ctl_elem_value_t *def_val;
 	int elem;
+	int event_missing;
+	int event_spurious;
 	struct card_data *card;
 	struct ctl_data *next;
 };
@@ -149,6 +152,7 @@ void find_controls(void)
 			if (!ctl_data)
 				ksft_exit_fail_msg("Out of memory\n");
 
+			memset(ctl_data, 0, sizeof(*ctl_data));
 			ctl_data->card = card_data;
 			ctl_data->elem = ctl;
 			ctl_data->name = snd_ctl_elem_list_get_name(card_data->ctls,
@@ -184,6 +188,26 @@ void find_controls(void)
 			ctl_list = ctl_data;
 		}
 
+		/* Set up for events */
+		err = snd_ctl_subscribe_events(card_data->handle, true);
+		if (err < 0) {
+			ksft_exit_fail_msg("snd_ctl_subscribe_events() failed for card %d: %d\n",
+					   card, err);
+		}
+
+		err = snd_ctl_poll_descriptors_count(card_data->handle);
+		if (err != 1) {
+			ksft_exit_fail_msg("Unexpected desciptor count %d for card %d\n",
+					   err, card);
+		}
+
+		err = snd_ctl_poll_descriptors(card_data->handle,
+					       &card_data->pollfd, 1);
+		if (err != 1) {
+			ksft_exit_fail_msg("snd_ctl_poll_descriptors() failed for %d\n",
+				       card, err);
+		}
+
 	next_card:
 		if (snd_card_next(&card) < 0) {
 			ksft_print_msg("snd_card_next");
@@ -194,6 +218,79 @@ void find_controls(void)
 	snd_config_delete(config);
 }
 
+/*
+ * Block for up to timeout ms for an event, returns a negative value
+ * on error, 0 for no event and 1 for an event.
+ */
+int wait_for_event(struct ctl_data *ctl, int timeout)
+{
+	unsigned short revents;
+	snd_ctl_event_t *event;
+	int count, err;
+	unsigned int mask = 0;
+	unsigned int ev_id;
+
+	snd_ctl_event_alloca(&event);
+
+	do {
+		err = poll(&(ctl->card->pollfd), 1, timeout);
+		if (err < 0) {
+			ksft_print_msg("poll() failed for %s: %s (%d)\n",
+				       ctl->name, strerror(errno), errno);
+			return -1;
+		}
+		/* Timeout */
+		if (err == 0)
+			return 0;
+
+		err = snd_ctl_poll_descriptors_revents(ctl->card->handle,
+						       &(ctl->card->pollfd),
+						       1, &revents);
+		if (err < 0) {
+			ksft_print_msg("snd_ctl_poll_desciptors_revents() failed for %s: %d\n",
+				       ctl->name, err);
+			return err;
+		}
+		if (revents & POLLERR) {
+			ksft_print_msg("snd_ctl_poll_desciptors_revents() reported POLLERR for %s\n",
+				       ctl->name);
+			return -1;
+		}
+		/* No read events */
+		if (!(revents & POLLIN)) {
+			ksft_print_msg("No POLLIN\n");
+			continue;
+		}
+
+		err = snd_ctl_read(ctl->card->handle, event);
+		if (err < 0) {
+			ksft_print_msg("snd_ctl_read() failed for %s: %d\n",
+			       ctl->name, err);
+			return err;
+		}
+
+		if (snd_ctl_event_get_type(event) != SND_CTL_EVENT_ELEM)
+			continue;
+
+		/* The ID returned from the event is 1 less than numid */
+		mask = snd_ctl_event_elem_get_mask(event);
+		ev_id = snd_ctl_event_elem_get_numid(event);
+		if (ev_id != snd_ctl_elem_info_get_numid(ctl->info)) {
+			ksft_print_msg("Event for unexpected ctl %s\n",
+				       snd_ctl_event_elem_get_name(event));
+			continue;
+		}
+
+		if ((mask & SND_CTL_EVENT_MASK_REMOVE) == SND_CTL_EVENT_MASK_REMOVE) {
+			ksft_print_msg("Removal event for %s\n",
+				       ctl->name);
+			return -1;
+		}
+	} while ((mask & SND_CTL_EVENT_MASK_VALUE) != SND_CTL_EVENT_MASK_VALUE);
+
+	return 1;
+}
+
 bool ctl_value_index_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val,
 			   int index)
 {
@@ -428,7 +525,8 @@ int write_and_verify(struct ctl_data *ctl,
 {
 	int err, i;
 	bool error_expected, mismatch_shown;
-	snd_ctl_elem_value_t *read_val, *w_val;
+	snd_ctl_elem_value_t *initial_val, *read_val, *w_val;
+	snd_ctl_elem_value_alloca(&initial_val);
 	snd_ctl_elem_value_alloca(&read_val);
 	snd_ctl_elem_value_alloca(&w_val);
 
@@ -446,6 +544,18 @@ int write_and_verify(struct ctl_data *ctl,
 		snd_ctl_elem_value_copy(expected_val, write_val);
 	}
 
+	/* Store the value before we write */
+	if (snd_ctl_elem_info_is_readable(ctl->info)) {
+		snd_ctl_elem_value_set_id(initial_val, ctl->id);
+
+		err = snd_ctl_elem_read(ctl->card->handle, initial_val);
+		if (err < 0) {
+			ksft_print_msg("snd_ctl_elem_read() failed: %s\n",
+				       snd_strerror(err));
+			return err;
+		}
+	}
+
 	/*
 	 * Do the write, if we have an expected value ignore the error
 	 * and carry on to validate the expected value.
@@ -470,6 +580,30 @@ int write_and_verify(struct ctl_data *ctl,
 		return err;
 	}
 
+	/*
+	 * Check for an event if the value changed, or confirm that
+	 * there was none if it didn't.  We rely on the kernel
+	 * generating the notification before it returns from the
+	 * write, this is currently true, should that ever change this
+	 * will most likely break and need updating.
+	 */
+	if (!snd_ctl_elem_info_is_volatile(ctl->info)) {
+		err = wait_for_event(ctl, 0);
+		if (snd_ctl_elem_value_compare(initial_val, read_val)) {
+			if (err < 1) {
+				ksft_print_msg("No event generated for %s\n",
+					       ctl->name);
+				ctl->event_missing++;
+			}
+		} else {
+			if (err != 0) {
+				ksft_print_msg("Spurious event generated for %s\n",
+					       ctl->name);
+				ctl->event_spurious++;
+			}
+		}
+	}
+
 	/*
 	 * Use the libray to compare values, if there's a mismatch
 	 * carry on and try to provide a more useful diagnostic than
@@ -898,6 +1032,18 @@ void test_ctl_write_invalid(struct ctl_data *ctl)
 			 ctl->card->card, ctl->elem);
 }
 
+void test_ctl_event_missing(struct ctl_data *ctl)
+{
+	ksft_test_result(!ctl->event_missing, "event_missing.%d.%d\n",
+			 ctl->card->card, ctl->elem);
+}
+
+void test_ctl_event_spurious(struct ctl_data *ctl)
+{
+	ksft_test_result(!ctl->event_spurious, "event_spurious.%d.%d\n",
+			 ctl->card->card, ctl->elem);
+}
+
 int main(void)
 {
 	struct ctl_data *ctl;
@@ -917,6 +1063,8 @@ int main(void)
 		test_ctl_write_default(ctl);
 		test_ctl_write_valid(ctl);
 		test_ctl_write_invalid(ctl);
+		test_ctl_event_missing(ctl);
+		test_ctl_event_spurious(ctl);
 	}
 
 	ksft_exit_pass();
-- 
cgit 


From 9d73d1928eb861cb90ddad08724c5ceb83c9a13b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 2 Feb 2022 15:09:02 +0000
Subject: kselftest: alsa: Declare most functions static

This program has only one file so most functions can be static.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Link: https://lore.kernel.org/r/20220202150902.19563-2-broonie@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 tools/testing/selftests/alsa/mixer-test.c | 58 ++++++++++++++++---------------
 1 file changed, 30 insertions(+), 28 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
index 6edb7dca32af..d0b788b8d287 100644
--- a/tools/testing/selftests/alsa/mixer-test.c
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -71,7 +71,8 @@ struct ctl_data *ctl_list = NULL;
 #endif
 
 #ifndef LIB_HAS_LOAD_STRING
-int snd_config_load_string(snd_config_t **config, const char *s, size_t size)
+static int snd_config_load_string(snd_config_t **config, const char *s,
+				  size_t size)
 {
 	snd_input_t *input;
 	snd_config_t *dst;
@@ -99,7 +100,7 @@ int snd_config_load_string(snd_config_t **config, const char *s, size_t size)
 }
 #endif
 
-void find_controls(void)
+static void find_controls(void)
 {
 	char name[32];
 	int card, ctl, err;
@@ -222,7 +223,7 @@ void find_controls(void)
  * Block for up to timeout ms for an event, returns a negative value
  * on error, 0 for no event and 1 for an event.
  */
-int wait_for_event(struct ctl_data *ctl, int timeout)
+static int wait_for_event(struct ctl_data *ctl, int timeout)
 {
 	unsigned short revents;
 	snd_ctl_event_t *event;
@@ -291,8 +292,9 @@ int wait_for_event(struct ctl_data *ctl, int timeout)
 	return 1;
 }
 
-bool ctl_value_index_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val,
-			   int index)
+static bool ctl_value_index_valid(struct ctl_data *ctl,
+				  snd_ctl_elem_value_t *val,
+				  int index)
 {
 	long int_val;
 	long long int64_val;
@@ -403,7 +405,7 @@ bool ctl_value_index_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val,
  * Check that the provided value meets the constraints for the
  * provided control.
  */
-bool ctl_value_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val)
+static bool ctl_value_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val)
 {
 	int i;
 	bool valid = true;
@@ -419,7 +421,7 @@ bool ctl_value_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val)
  * Check that we can read the default value and it is valid. Write
  * tests use the read value to restore the default.
  */
-void test_ctl_get_value(struct ctl_data *ctl)
+static void test_ctl_get_value(struct ctl_data *ctl)
 {
 	int err;
 
@@ -454,9 +456,9 @@ out:
 			 ctl->card->card, ctl->elem);
 }
 
-bool show_mismatch(struct ctl_data *ctl, int index,
-		   snd_ctl_elem_value_t *read_val,
-		   snd_ctl_elem_value_t *expected_val)
+static bool show_mismatch(struct ctl_data *ctl, int index,
+			  snd_ctl_elem_value_t *read_val,
+			  snd_ctl_elem_value_t *expected_val)
 {
 	long long expected_int, read_int;
 
@@ -519,9 +521,9 @@ bool show_mismatch(struct ctl_data *ctl, int index,
  * the write to fail, for verifying that invalid writes don't corrupt
  * anything.
  */
-int write_and_verify(struct ctl_data *ctl,
-		     snd_ctl_elem_value_t *write_val,
-		     snd_ctl_elem_value_t *expected_val)
+static int write_and_verify(struct ctl_data *ctl,
+			    snd_ctl_elem_value_t *write_val,
+			    snd_ctl_elem_value_t *expected_val)
 {
 	int err, i;
 	bool error_expected, mismatch_shown;
@@ -628,7 +630,7 @@ int write_and_verify(struct ctl_data *ctl,
  * Make sure we can write the default value back to the control, this
  * should validate that at least some write works.
  */
-void test_ctl_write_default(struct ctl_data *ctl)
+static void test_ctl_write_default(struct ctl_data *ctl)
 {
 	int err;
 
@@ -661,7 +663,7 @@ void test_ctl_write_default(struct ctl_data *ctl)
 			 ctl->card->card, ctl->elem);
 }
 
-bool test_ctl_write_valid_boolean(struct ctl_data *ctl)
+static bool test_ctl_write_valid_boolean(struct ctl_data *ctl)
 {
 	int err, i, j;
 	bool fail = false;
@@ -682,7 +684,7 @@ bool test_ctl_write_valid_boolean(struct ctl_data *ctl)
 	return !fail;
 }
 
-bool test_ctl_write_valid_integer(struct ctl_data *ctl)
+static bool test_ctl_write_valid_integer(struct ctl_data *ctl)
 {
 	int err;
 	int i;
@@ -712,7 +714,7 @@ bool test_ctl_write_valid_integer(struct ctl_data *ctl)
 	return !fail;
 }
 
-bool test_ctl_write_valid_integer64(struct ctl_data *ctl)
+static bool test_ctl_write_valid_integer64(struct ctl_data *ctl)
 {
 	int err, i;
 	long long j, step;
@@ -740,7 +742,7 @@ bool test_ctl_write_valid_integer64(struct ctl_data *ctl)
 	return !fail;
 }
 
-bool test_ctl_write_valid_enumerated(struct ctl_data *ctl)
+static bool test_ctl_write_valid_enumerated(struct ctl_data *ctl)
 {
 	int err, i, j;
 	bool fail = false;
@@ -761,7 +763,7 @@ bool test_ctl_write_valid_enumerated(struct ctl_data *ctl)
 	return !fail;
 }
 
-void test_ctl_write_valid(struct ctl_data *ctl)
+static void test_ctl_write_valid(struct ctl_data *ctl)
 {
 	bool pass;
 	int err;
@@ -814,8 +816,8 @@ void test_ctl_write_valid(struct ctl_data *ctl)
 			 ctl->card->card, ctl->elem);
 }
 
-bool test_ctl_write_invalid_value(struct ctl_data *ctl,
-				  snd_ctl_elem_value_t *val)
+static bool test_ctl_write_invalid_value(struct ctl_data *ctl,
+					 snd_ctl_elem_value_t *val)
 {
 	int err;
 	long val_read;
@@ -836,7 +838,7 @@ bool test_ctl_write_invalid_value(struct ctl_data *ctl,
 	return !ctl_value_valid(ctl, val);
 }
 
-bool test_ctl_write_invalid_boolean(struct ctl_data *ctl)
+static bool test_ctl_write_invalid_boolean(struct ctl_data *ctl)
 {
 	int err, i;
 	long val_read;
@@ -855,7 +857,7 @@ bool test_ctl_write_invalid_boolean(struct ctl_data *ctl)
 	return !fail;
 }
 
-bool test_ctl_write_invalid_integer(struct ctl_data *ctl)
+static bool test_ctl_write_invalid_integer(struct ctl_data *ctl)
 {
 	int i;
 	bool fail = false;
@@ -901,7 +903,7 @@ bool test_ctl_write_invalid_integer(struct ctl_data *ctl)
 	return !fail;
 }
 
-bool test_ctl_write_invalid_integer64(struct ctl_data *ctl)
+static bool test_ctl_write_invalid_integer64(struct ctl_data *ctl)
 {
 	int i;
 	bool fail = false;
@@ -947,7 +949,7 @@ bool test_ctl_write_invalid_integer64(struct ctl_data *ctl)
 	return !fail;
 }
 
-bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl)
+static bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl)
 {
 	int err, i;
 	unsigned int val_read;
@@ -979,7 +981,7 @@ bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl)
 }
 
 
-void test_ctl_write_invalid(struct ctl_data *ctl)
+static void test_ctl_write_invalid(struct ctl_data *ctl)
 {
 	bool pass;
 	int err;
@@ -1032,13 +1034,13 @@ void test_ctl_write_invalid(struct ctl_data *ctl)
 			 ctl->card->card, ctl->elem);
 }
 
-void test_ctl_event_missing(struct ctl_data *ctl)
+static void test_ctl_event_missing(struct ctl_data *ctl)
 {
 	ksft_test_result(!ctl->event_missing, "event_missing.%d.%d\n",
 			 ctl->card->card, ctl->elem);
 }
 
-void test_ctl_event_spurious(struct ctl_data *ctl)
+static void test_ctl_event_spurious(struct ctl_data *ctl)
 {
 	ksft_test_result(!ctl->event_spurious, "event_spurious.%d.%d\n",
 			 ctl->card->card, ctl->elem);
-- 
cgit 


From 976a38e05a49f401cf9ae3ae20273db6d69783cf Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Fri, 4 Feb 2022 01:55:19 +0100
Subject: selftests/bpf: Test bpf_core_types_are_compat() functionality.

Add several tests to check bpf_core_types_are_compat() functionality:
- candidate type name exists and types match
- candidate type name exists but types don't match
- nested func protos at kernel recursion limit
- nested func protos above kernel recursion limit. Such bpf prog
  is rejected during the load.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220204005519.60361-3-mcroce@linux.microsoft.com
---
 tools/testing/selftests/bpf/Makefile               |  2 +-
 .../selftests/bpf/bpf_testmod/bpf_testmod.c        |  7 +++++++
 tools/testing/selftests/bpf/prog_tests/core_kern.c | 16 +++++++++++++++-
 .../selftests/bpf/prog_tests/core_kern_overflow.c  | 13 +++++++++++++
 tools/testing/selftests/bpf/progs/core_kern.c      | 16 ++++++++++++++++
 .../selftests/bpf/progs/core_kern_overflow.c       | 22 ++++++++++++++++++++++
 6 files changed, 74 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c
 create mode 100644 tools/testing/selftests/bpf/progs/core_kern_overflow.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 945f92d71db3..91ea729990da 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -330,7 +330,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 
 LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
 	test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \
-	map_ptr_kern.c core_kern.c
+	map_ptr_kern.c core_kern.c core_kern_overflow.c
 # Generate both light skeleton and libbpf skeleton for these
 LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test_subprog.c
 SKEL_BLACKLIST += $$(LSKELS)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 595d32ab285a..27d63be47b95 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -13,6 +13,10 @@
 #define CREATE_TRACE_POINTS
 #include "bpf_testmod-events.h"
 
+typedef int (*func_proto_typedef)(long);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
+typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
+
 DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123;
 
 noinline void
@@ -31,6 +35,9 @@ struct bpf_testmod_btf_type_tag_2 {
 
 noinline int
 bpf_testmod_test_btf_type_tag_user_1(struct bpf_testmod_btf_type_tag_1 __user *arg) {
+	BTF_TYPE_EMIT(func_proto_typedef);
+	BTF_TYPE_EMIT(func_proto_typedef_nested1);
+	BTF_TYPE_EMIT(func_proto_typedef_nested2);
 	return arg->a;
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern.c b/tools/testing/selftests/bpf/prog_tests/core_kern.c
index 561c5185d886..6a5a1c019a5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_kern.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_kern.c
@@ -7,8 +7,22 @@
 void test_core_kern_lskel(void)
 {
 	struct core_kern_lskel *skel;
+	int link_fd;
 
 	skel = core_kern_lskel__open_and_load();
-	ASSERT_OK_PTR(skel, "open_and_load");
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	link_fd = core_kern_lskel__core_relo_proto__attach(skel);
+	if (!ASSERT_GT(link_fd, 0, "attach(core_relo_proto)"))
+		goto cleanup;
+
+	/* trigger tracepoints */
+	usleep(1);
+	ASSERT_TRUE(skel->bss->proto_out[0], "bpf_core_type_exists");
+	ASSERT_FALSE(skel->bss->proto_out[1], "!bpf_core_type_exists");
+	ASSERT_TRUE(skel->bss->proto_out[2], "bpf_core_type_exists. nested");
+
+cleanup:
 	core_kern_lskel__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c b/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c
new file mode 100644
index 000000000000..04cc145bc26a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "test_progs.h"
+#include "core_kern_overflow.lskel.h"
+
+void test_core_kern_overflow_lskel(void)
+{
+	struct core_kern_overflow_lskel *skel;
+
+	skel = core_kern_overflow_lskel__open_and_load();
+	if (!ASSERT_NULL(skel, "open_and_load"))
+		core_kern_overflow_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/core_kern.c b/tools/testing/selftests/bpf/progs/core_kern.c
index 13499cc15c7d..2715fe27d4cf 100644
--- a/tools/testing/selftests/bpf/progs/core_kern.c
+++ b/tools/testing/selftests/bpf/progs/core_kern.c
@@ -101,4 +101,20 @@ int balancer_ingress(struct __sk_buff *ctx)
 	return 0;
 }
 
+typedef int (*func_proto_typedef___match)(long);
+typedef int (*func_proto_typedef___doesnt_match)(char *);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef___match);
+
+int proto_out[3];
+
+SEC("raw_tracepoint/sys_enter")
+int core_relo_proto(void *ctx)
+{
+	proto_out[0] = bpf_core_type_exists(func_proto_typedef___match);
+	proto_out[1] = bpf_core_type_exists(func_proto_typedef___doesnt_match);
+	proto_out[2] = bpf_core_type_exists(func_proto_typedef_nested1);
+
+	return 0;
+}
+
 char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/core_kern_overflow.c b/tools/testing/selftests/bpf/progs/core_kern_overflow.c
new file mode 100644
index 000000000000..f0d5652256ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/core_kern_overflow.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+typedef int (*func_proto_typedef)(long);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
+typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
+
+int proto_out;
+
+SEC("raw_tracepoint/sys_enter")
+int core_relo_proto(void *ctx)
+{
+	proto_out = bpf_core_type_exists(func_proto_typedef_nested2);
+
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
-- 
cgit 


From d6a676e0e1a888c2b78de7544c48bd3cfd4d8902 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Feb 2022 16:03:30 -0800
Subject: selftests: mptcp: add the port argument for set_flags

This patch added the port argument for setting the address flags in
pm_nl_ctl.

Usage:

    pm_nl_ctl set 10.0.2.1 flags backup port 10100

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 152b84e44d69..2a57462764d0 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -28,7 +28,7 @@ static void syntax(char *argv[])
 	fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
 	fprintf(stderr, "\tdel <id> [<ip>]\n");
 	fprintf(stderr, "\tget <id>\n");
-	fprintf(stderr, "\tset <ip> [flags backup|nobackup|fullmesh|nofullmesh]\n");
+	fprintf(stderr, "\tset <ip> [flags backup|nobackup|fullmesh|nofullmesh] [port <nr>]\n");
 	fprintf(stderr, "\tflush\n");
 	fprintf(stderr, "\tdump\n");
 	fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
@@ -721,6 +721,18 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
 			rta->rta_len = RTA_LENGTH(4);
 			memcpy(RTA_DATA(rta), &flags, 4);
 			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "port")) {
+			u_int16_t port;
+
+			if (++arg >= argc)
+				error(1, 0, " missing port value");
+
+			port = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+			rta->rta_len = RTA_LENGTH(2);
+			memcpy(RTA_DATA(rta), &port, 2);
+			off += NLMSG_ALIGN(rta->rta_len);
 		} else {
 			error(1, 0, "unknown keyword %s", argv[arg]);
 		}
-- 
cgit 


From 33397b83eee6417ab144966743024cd7c0e55a9a Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Feb 2022 16:03:31 -0800
Subject: selftests: mptcp: add backup with port testcase

This patch added the backup testcase using an address with a port number.

The original backup tests only work for the output of 'pm_nl_ctl dump'
without the port number. It chooses the last item in the dump to parse
the address in it, and in this case, the address is showed at the end
of the item.

But it doesn't work for the dump with the port number, in this case, the
port number is showed at the end of the item, not the address.

So implemented a more flexible approach to get the address and the port
number from the dump to fit for the port number case.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 44 ++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index bd106c7ec232..eb945cebbd6d 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -239,6 +239,16 @@ is_v6()
 	[ -z "${1##*:*}" ]
 }
 
+is_addr()
+{
+	[ -z "${1##*[.:]*}" ]
+}
+
+is_number()
+{
+	[[ $1 == ?(-)+([0-9]) ]]
+}
+
 # $1: ns, $2: port
 wait_local_port_listen()
 {
@@ -464,11 +474,25 @@ do_transfer()
 	if [ ! -z $sflags ]; then
 		sleep 1
 		for netns in "$ns1" "$ns2"; do
-			dump=(`ip netns exec $netns ./pm_nl_ctl dump`)
-			if [ ${#dump[@]} -gt 0 ]; then
-				addr=${dump[${#dump[@]} - 1]}
-				ip netns exec $netns ./pm_nl_ctl set $addr flags $sflags
-			fi
+			ip netns exec $netns ./pm_nl_ctl dump | while read line; do
+				local arr=($line)
+				local addr
+				local port=0
+				local _port=""
+
+				for i in ${arr[@]}; do
+					if is_addr $i; then
+						addr=$i
+					elif is_number $i; then
+						# The minimum expected port number is 10000
+						if [ $i -gt 10000 ]; then
+							port=$i
+						fi
+					fi
+				done
+				if [ $port -ne 0 ]; then _port="port $port"; fi
+				ip netns exec $netns ./pm_nl_ctl set $addr flags $sflags $_port
+			done
 		done
 	fi
 
@@ -1616,6 +1640,16 @@ backup_tests()
 	chk_join_nr "single address, backup" 1 1 1
 	chk_add_nr 1 1
 	chk_prio_nr 1 0
+
+	# single address with port, backup
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+	chk_join_nr "single address with port, backup" 1 1 1
+	chk_add_nr 1 1
+	chk_prio_nr 1 0
 }
 
 add_addr_ports_tests()
-- 
cgit 


From 34aa6e3bccd8620ca07f47b52dfd144c2418690a Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Feb 2022 16:03:32 -0800
Subject: selftests: mptcp: add ip mptcp wrappers

This patch added four basic 'ip mptcp' wrappers:

pm_nl_set_limits()
pm_nl_add_endpoint()
pm_nl_del_endpoint()
pm_nl_flush_endpoint().

Wrapped the PM netlink commands 'ip mptcp' and 'pm_nl_ctl' in them, and
used a new argument 'ip_mptcp' to choose which one to use for setting the
PM limits, adding or deleting the PM endpoint.

Used the wrappers in all the selftests in mptcp_join.sh instead of using
the pm_nl_ctl commands directly.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 737 +++++++++++++-----------
 1 file changed, 407 insertions(+), 330 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index eb945cebbd6d..6ca6ed7336d0 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -15,6 +15,7 @@ timeout_test=$((timeout_poll * 2 + 1))
 mptcp_connect=""
 capture=0
 checksum=0
+ip_mptcp=0
 do_all_tests=1
 
 TEST_COUNT=0
@@ -288,6 +289,82 @@ wait_rm_addr()
 	done
 }
 
+pm_nl_set_limits()
+{
+	local ns=$1
+	local addrs=$2
+	local subflows=$3
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows
+	else
+		ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows
+	fi
+}
+
+pm_nl_add_endpoint()
+{
+	local ns=$1
+	local addr=$2
+	local flags
+	local port
+	local dev
+	local id
+	local nr=2
+
+	for p in $@
+	do
+		if [ $p = "flags" ]; then
+			eval _flags=\$"$nr"
+			[ ! -z $_flags ]; flags="flags $_flags"
+		fi
+		if [ $p = "dev" ]; then
+			eval _dev=\$"$nr"
+			[ ! -z $_dev ]; dev="dev $_dev"
+		fi
+		if [ $p = "id" ]; then
+			eval _id=\$"$nr"
+			[ ! -z $_id ]; id="id $_id"
+		fi
+		if [ $p = "port" ]; then
+			eval _port=\$"$nr"
+			[ ! -z $_port ]; port="port $_port"
+		fi
+
+		let nr+=1
+	done
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port
+	else
+		ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port
+	fi
+}
+
+pm_nl_del_endpoint()
+{
+	local ns=$1
+	local id=$2
+	local addr=$3
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint delete id $id $addr
+	else
+		ip netns exec $ns ./pm_nl_ctl del $id $addr
+	fi
+}
+
+pm_nl_flush_endpoint()
+{
+	local ns=$1
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint flush
+	else
+		ip netns exec $ns ./pm_nl_ctl flush
+	fi
+}
+
 do_transfer()
 {
 	listener_ns="$1"
@@ -388,7 +465,7 @@ do_transfer()
 			else
 				addr="10.0.$counter.1"
 			fi
-			ip netns exec $ns1 ./pm_nl_ctl add $addr flags signal
+			pm_nl_add_endpoint $ns1 $addr flags signal
 			let counter+=1
 			let add_nr_ns1-=1
 		done
@@ -403,16 +480,16 @@ do_transfer()
 				do
 					id=${dump[$pos]}
 					rm_addr=$(rm_addr_count ${connector_ns})
-					ip netns exec ${listener_ns} ./pm_nl_ctl del $id
+					pm_nl_del_endpoint ${listener_ns} $id
 					wait_rm_addr ${connector_ns} ${rm_addr}
 					let counter+=1
 					let pos+=5
 				done
 			fi
 		elif [ $rm_nr_ns1 -eq 8 ]; then
-			ip netns exec ${listener_ns} ./pm_nl_ctl flush
+			pm_nl_flush_endpoint ${listener_ns}
 		elif [ $rm_nr_ns1 -eq 9 ]; then
-			ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr}
+			pm_nl_del_endpoint ${listener_ns} 0 ${connect_addr}
 		fi
 	fi
 
@@ -436,7 +513,7 @@ do_transfer()
 			else
 				addr="10.0.$counter.2"
 			fi
-			ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags
+			pm_nl_add_endpoint $ns2 $addr flags $flags
 			let counter+=1
 			let add_nr_ns2-=1
 		done
@@ -452,14 +529,14 @@ do_transfer()
 					# rm_addr are serialized, allow the previous one to complete
 					id=${dump[$pos]}
 					rm_addr=$(rm_addr_count ${listener_ns})
-					ip netns exec ${connector_ns} ./pm_nl_ctl del $id
+					pm_nl_del_endpoint ${connector_ns} $id
 					wait_rm_addr ${listener_ns} ${rm_addr}
 					let counter+=1
 					let pos+=5
 				done
 			fi
 		elif [ $rm_nr_ns2 -eq 8 ]; then
-			ip netns exec ${connector_ns} ./pm_nl_ctl flush
+			pm_nl_flush_endpoint ${connector_ns}
 		elif [ $rm_nr_ns2 -eq 9 ]; then
 			local addr
 			if is_v6 "${connect_addr}"; then
@@ -467,7 +544,7 @@ do_transfer()
 			else
 				addr="10.0.1.2"
 			fi
-			ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr
+			pm_nl_del_endpoint ${connector_ns} 0 $addr
 		fi
 	fi
 
@@ -1001,51 +1078,51 @@ subflows_tests()
 
 	# subflow limited by client
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 0
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 0
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 0
+	pm_nl_set_limits $ns2 0 0
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow, limited by client" 0 0 0
 
 	# subflow limited by server
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 0
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 0
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow, limited by server" 1 1 0
 
 	# subflow
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow" 1 1 1
 
 	# multiple subflows
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple subflows" 2 2 2
 
 	# multiple subflows limited by server
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple subflows, limited by server" 2 2 1
 
 	# single subflow, dev
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow dev ns2eth3
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow dev ns2eth3
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow, dev" 1 1 1
 }
@@ -1055,28 +1132,28 @@ subflows_error_tests()
 	# If a single subflow is configured, and matches the MPC src
 	# address, no additional subflow should be created
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
 	chk_join_nr "no MPC reuse with single endpoint" 0 0 0
 
 	# multiple subflows, with subflow creation error
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
 	chk_join_nr "multi subflows, with failing subflow" 1 1 1
 
 	# multiple subflows, with subflow timeout on MPJ
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j DROP
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
 	chk_join_nr "multi subflows, with subflow timeout" 1 1 1
@@ -1085,9 +1162,9 @@ subflows_error_tests()
 	# closed subflow (due to reset) is not reused if additional
 	# subflows are added later
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
 
@@ -1097,7 +1174,7 @@ subflows_error_tests()
 
 	# mpj subflow will be in TW after the reset
 	wait_for_tw $ns2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	wait
 
 	# additional subflow could be created only if the PM select
@@ -1109,16 +1186,16 @@ signal_address_tests()
 {
 	# add_address, unused
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "unused signal address" 0 0 0
 	chk_add_nr 1 1
 
 	# accept and use add_addr
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal address" 1 1 1
 	chk_add_nr 1 1
@@ -1128,59 +1205,59 @@ signal_address_tests()
 	# belong to different subnets or one of the listed local address could be
 	# used for 'add_addr' subflow
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflow and signal" 2 2 2
 	chk_add_nr 1 1
 
 	# accept and use add_addr with additional subflows
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple subflows and signal" 3 3 3
 	chk_add_nr 1 1
 
 	# signal addresses
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+	pm_nl_set_limits $ns2 3 3
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal addresses" 3 3 3
 	chk_add_nr 3 3
 
 	# signal invalid addresses
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+	pm_nl_set_limits $ns2 3 3
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal invalid addresses" 1 1 1
 	chk_add_nr 3 3
 
 	# signal addresses race test
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
+	pm_nl_set_limits $ns1 4 4
+	pm_nl_set_limits $ns2 4 4
+	pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+	pm_nl_add_endpoint $ns2 10.0.1.2 flags signal
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags signal
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 
 	# the server will not signal the address terminating
@@ -1200,11 +1277,11 @@ link_failure_tests()
 	# active backup and link switch-over.
 	# Let's set some arbitrary (low) virtual link limits.
 	init_shapers
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 1
 	chk_join_nr "multiple flows, signal, link failure" 3 3 3
 	chk_add_nr 1 1
@@ -1214,11 +1291,11 @@ link_failure_tests()
 	# for bidirectional transfer
 	reset
 	init_shapers
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 2
 	chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
 	chk_add_nr 1 1
@@ -1228,11 +1305,11 @@ link_failure_tests()
 	# will never be used
 	reset
 	init_shapers
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+	pm_nl_set_limits $ns2 1 2
 	export FAILING_LINKS="1"
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
 	run_tests $ns1 $ns2 10.0.1.1 1
 	chk_join_nr "backup subflow unused, link failure" 2 2 2
 	chk_add_nr 1 1
@@ -1242,10 +1319,10 @@ link_failure_tests()
 	# the traffic
 	reset
 	init_shapers
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
 	export FAILING_LINKS="1 2"
 	run_tests $ns1 $ns2 10.0.1.1 1
 	chk_join_nr "backup flow used, multi links fail" 2 2 2
@@ -1257,10 +1334,10 @@ link_failure_tests()
 	# for bidirectional transfer
 	reset
 	init_shapers
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
 	run_tests $ns1 $ns2 10.0.1.1 2
 	chk_join_nr "backup flow used, bidi, link failure" 2 2 2
 	chk_add_nr 1 1
@@ -1272,38 +1349,38 @@ add_addr_timeout_tests()
 {
 	# add_addr timeout
 	reset_with_add_addr_timeout
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
 	chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1
 	chk_add_nr 4 0
 
 	# add_addr timeout IPv6
 	reset_with_add_addr_timeout 6
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
 	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
 	chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1
 	chk_add_nr 4 0
 
 	# signal addresses timeout
 	reset_with_add_addr_timeout
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_set_limits $ns2 2 2
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
 	chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2
 	chk_add_nr 8 0
 
 	# signal invalid addresses timeout
 	reset_with_add_addr_timeout
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_set_limits $ns2 2 2
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
 	chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1
 	chk_add_nr 8 0
@@ -1313,28 +1390,28 @@ remove_tests()
 {
 	# single subflow, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
 	chk_join_nr "remove single subflow" 1 1 1
 	chk_rm_nr 1 1
 
 	# multiple subflows, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow
 	chk_join_nr "remove multiple subflows" 2 2 2
 	chk_rm_nr 2 2
 
 	# single address, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
 	chk_join_nr "remove single address" 1 1 1
 	chk_add_nr 1 1
@@ -1342,10 +1419,10 @@ remove_tests()
 
 	# subflow and signal, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
 	chk_join_nr "remove subflow and signal" 2 2 2
 	chk_add_nr 1 1
@@ -1353,11 +1430,11 @@ remove_tests()
 
 	# subflows and signal, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow
 	chk_join_nr "remove subflows and signal" 3 3 3
 	chk_add_nr 1 1
@@ -1365,11 +1442,11 @@ remove_tests()
 
 	# addresses remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+	pm_nl_set_limits $ns2 3 3
 	run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
 	chk_join_nr "remove addresses" 3 3 3
 	chk_add_nr 3 3
@@ -1377,11 +1454,11 @@ remove_tests()
 
 	# invalid addresses remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+	pm_nl_set_limits $ns2 3 3
 	run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
 	chk_join_nr "remove invalid addresses" 1 1 1
 	chk_add_nr 3 3
@@ -1389,11 +1466,11 @@ remove_tests()
 
 	# subflows and signal, flush
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
 	chk_join_nr "flush subflows and signal" 3 3 3
 	chk_add_nr 1 1
@@ -1401,22 +1478,22 @@ remove_tests()
 
 	# subflows flush
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow id 150
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_set_limits $ns2 3 3
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
 	chk_join_nr "flush subflows" 3 3 3
 	chk_rm_nr 3 3
 
 	# addresses flush
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+	pm_nl_set_limits $ns2 3 3
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
 	chk_join_nr "flush addresses" 3 3 3
 	chk_add_nr 3 3
@@ -1424,11 +1501,11 @@ remove_tests()
 
 	# invalid addresses flush
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	pm_nl_set_limits $ns1 3 3
+	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+	pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+	pm_nl_set_limits $ns2 3 3
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow
 	chk_join_nr "flush invalid addresses" 1 1 1
 	chk_add_nr 3 3
@@ -1436,18 +1513,18 @@ remove_tests()
 
 	# remove id 0 subflow
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow
 	chk_join_nr "remove id 0 subflow" 1 1 1
 	chk_rm_nr 1 1
 
 	# remove id 0 address
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow
 	chk_join_nr "remove id 0 address" 1 1 1
 	chk_add_nr 1 1
@@ -1458,37 +1535,37 @@ add_tests()
 {
 	# add single subflow
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow
 	chk_join_nr "add single subflow" 1 1 1
 
 	# add signal address
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
 	chk_join_nr "add signal address" 1 1 1
 	chk_add_nr 1 1
 
 	# add multiple subflows
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
 	run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow
 	chk_join_nr "add multiple subflows" 2 2 2
 
 	# add multiple subflows IPv6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
 	run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow
 	chk_join_nr "add multiple subflows IPv6" 2 2 2
 
 	# add multiple addresses IPv6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 2 2
 	run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow
 	chk_join_nr "add multiple addresses IPv6" 2 2 2
 	chk_add_nr 2 2
@@ -1498,33 +1575,33 @@ ipv6_tests()
 {
 	# subflow IPv6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
 	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
 	chk_join_nr "single subflow IPv6" 1 1 1
 
 	# add_address, unused IPv6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
 	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
 	chk_join_nr "unused signal address IPv6" 0 0 0
 	chk_add_nr 1 1
 
 	# signal address IPv6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
 	chk_join_nr "single address IPv6" 1 1 1
 	chk_add_nr 1 1
 
 	# single address IPv6, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow
 	chk_join_nr "remove single address IPv6" 1 1 1
 	chk_add_nr 1 1
@@ -1532,10 +1609,10 @@ ipv6_tests()
 
 	# subflow and signal IPv6, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
 	run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
 	chk_join_nr "remove subflow and signal IPv6" 2 2 2
 	chk_add_nr 1 1
@@ -1546,76 +1623,76 @@ v4mapped_tests()
 {
 	# subflow IPv4-mapped to IPv4-mapped
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
 	run_tests $ns1 $ns2 "::ffff:10.0.1.1"
 	chk_join_nr "single subflow IPv4-mapped" 1 1 1
 
 	# signal address IPv4-mapped with IPv4-mapped sk
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
 	run_tests $ns1 $ns2 "::ffff:10.0.1.1"
 	chk_join_nr "signal address IPv4-mapped" 1 1 1
 	chk_add_nr 1 1
 
 	# subflow v4-map-v6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 "::ffff:10.0.1.1"
 	chk_join_nr "single subflow v4-map-v6" 1 1 1
 
 	# signal address v4-map-v6
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 "::ffff:10.0.1.1"
 	chk_join_nr "signal address v4-map-v6" 1 1 1
 	chk_add_nr 1 1
 
 	# subflow v6-map-v4
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow v6-map-v4" 1 1 1
 
 	# signal address v6-map-v4
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal address v6-map-v4" 1 1 1
 	chk_add_nr 1 1
 
 	# no subflow IPv6 to v4 address
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "no JOIN with diff families v4-v6" 0 0 0
 
 	# no subflow IPv6 to v4 address even if v6 has a valid v4 at the end
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 dead:beef:2::10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0
 
 	# no subflow IPv4 to v6 address, no need to slow down too then
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 dead:beef:1::1
 	chk_join_nr "no JOIN with diff families v6-v4" 0 0 0
 }
@@ -1624,18 +1701,18 @@ backup_tests()
 {
 	# single subflow, backup
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,backup
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup
 	chk_join_nr "single subflow, backup" 1 1 1
 	chk_prio_nr 0 1
 
 	# single address, backup
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
 	chk_join_nr "single address, backup" 1 1 1
 	chk_add_nr 1 1
@@ -1643,9 +1720,9 @@ backup_tests()
 
 	# single address with port, backup
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
 	chk_join_nr "single address with port, backup" 1 1 1
 	chk_add_nr 1 1
@@ -1656,28 +1733,28 @@ add_addr_ports_tests()
 {
 	# signal address with port
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal address with port" 1 1 1
 	chk_add_nr 1 1 1
 
 	# subflow and signal with port
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflow and signal with port" 2 2 2
 	chk_add_nr 1 1 1
 
 	# single address with port, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_set_limits $ns2 1 1
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
 	chk_join_nr "remove single address with port" 1 1 1
 	chk_add_nr 1 1 1
@@ -1685,10 +1762,10 @@ add_addr_ports_tests()
 
 	# subflow and signal with port, remove
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
 	chk_join_nr "remove subflow and signal with port" 2 2 2
 	chk_add_nr 1 1 1
@@ -1696,11 +1773,11 @@ add_addr_ports_tests()
 
 	# subflows and signal with port, flush
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow
 	chk_join_nr "flush subflows and signal with port" 3 3 3
 	chk_add_nr 1 1
@@ -1708,20 +1785,20 @@ add_addr_ports_tests()
 
 	# multiple addresses with port
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10100
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10100
+	pm_nl_set_limits $ns2 2 2
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple addresses with port" 2 2 2
 	chk_add_nr 2 2 2
 
 	# multiple addresses with ports
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10101
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10101
+	pm_nl_set_limits $ns2 2 2
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple addresses with ports" 2 2 2
 	chk_add_nr 2 2 2
@@ -1731,56 +1808,56 @@ syncookies_tests()
 {
 	# single subflow, syncookies
 	reset_with_cookies
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow with syn cookies" 1 1 1
 
 	# multiple subflows with syn cookies
 	reset_with_cookies
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple subflows with syn cookies" 2 2 2
 
 	# multiple subflows limited by server
 	reset_with_cookies
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflows limited by server w cookies" 2 1 1
 
 	# test signal address with cookies
 	reset_with_cookies
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal address with syn cookies" 1 1 1
 	chk_add_nr 1 1
 
 	# test cookie with subflow and signal
 	reset_with_cookies
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 0 2
+	pm_nl_set_limits $ns2 1 2
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflow and signal w cookies" 2 2 2
 	chk_add_nr 1 1
 
 	# accept and use add_addr with additional subflows
 	reset_with_cookies
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	pm_nl_set_limits $ns1 0 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflows and signal w. cookies" 3 3 3
 	chk_add_nr 1 1
@@ -1790,29 +1867,29 @@ checksum_tests()
 {
 	# checksum test 0 0
 	reset_with_checksum 0 0
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_csum_nr "checksum test 0 0"
 
 	# checksum test 1 1
 	reset_with_checksum 1 1
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_csum_nr "checksum test 1 1"
 
 	# checksum test 0 1
 	reset_with_checksum 0 1
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_csum_nr "checksum test 0 1"
 
 	# checksum test 1 0
 	reset_with_checksum 1 0
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	pm_nl_set_limits $ns1 0 1
+	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_csum_nr "checksum test 1 0"
 }
@@ -1821,26 +1898,26 @@ deny_join_id0_tests()
 {
 	# subflow allow join id0 ns1
 	reset_with_allow_join_id0 1 0
-	ip netns exec $ns1 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 1 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow allow join id0 ns1" 1 1 1
 
 	# subflow allow join id0 ns2
 	reset_with_allow_join_id0 0 1
-	ip netns exec $ns1 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 1 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow allow join id0 ns2" 0 0 0
 
 	# signal address allow join id0 ns1
 	# ADD_ADDRs are not affected by allow_join_id0 value.
 	reset_with_allow_join_id0 1 0
-	ip netns exec $ns1 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 1 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal address allow join id0 ns1" 1 1 1
 	chk_add_nr 1 1
@@ -1848,28 +1925,28 @@ deny_join_id0_tests()
 	# signal address allow join id0 ns2
 	# ADD_ADDRs are not affected by allow_join_id0 value.
 	reset_with_allow_join_id0 0 1
-	ip netns exec $ns1 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 1 1
+	pm_nl_set_limits $ns2 1 1
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal address allow join id0 ns2" 1 1 1
 	chk_add_nr 1 1
 
 	# subflow and address allow join id0 ns1
 	reset_with_allow_join_id0 1 0
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_set_limits $ns2 2 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflow and address allow join id0 1" 2 2 2
 
 	# subflow and address allow join id0 ns2
 	reset_with_allow_join_id0 0 1
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_set_limits $ns2 2 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "subflow and address allow join id0 2" 1 1 1
 }
@@ -1880,10 +1957,10 @@ fullmesh_tests()
 	# 2 fullmesh addrs in ns2, added before the connection,
 	# 1 non-fullmesh addr in ns1, added during the connection.
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 0 4
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 4
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh
+	pm_nl_set_limits $ns1 0 4
+	pm_nl_set_limits $ns2 1 4
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh
+	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh
 	run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
 	chk_join_nr "fullmesh test 2x1" 4 4 4
 	chk_add_nr 1 1
@@ -1892,9 +1969,9 @@ fullmesh_tests()
 	# 1 non-fullmesh addr in ns1, added before the connection,
 	# 1 fullmesh addr in ns2, added during the connection.
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 1 3
+	pm_nl_set_limits $ns2 1 3
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
 	chk_join_nr "fullmesh test 1x1" 3 3 3
 	chk_add_nr 1 1
@@ -1903,9 +1980,9 @@ fullmesh_tests()
 	# 1 non-fullmesh addr in ns1, added before the connection,
 	# 2 fullmesh addrs in ns2, added during the connection.
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 5
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 5
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 2 5
+	pm_nl_set_limits $ns2 1 5
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
 	chk_join_nr "fullmesh test 1x2" 5 5 5
 	chk_add_nr 1 1
@@ -1915,36 +1992,36 @@ fullmesh_tests()
 	# 2 fullmesh addrs in ns2, added during the connection,
 	# limit max_subflows to 4.
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 2 4
-	ip netns exec $ns2 ./pm_nl_ctl limits 1 4
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	pm_nl_set_limits $ns1 2 4
+	pm_nl_set_limits $ns2 1 4
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
 	chk_join_nr "fullmesh test 1x2, limited" 4 4 4
 	chk_add_nr 1 1
 
 	# set fullmesh flag
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+	pm_nl_set_limits $ns1 4 4
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
+	pm_nl_set_limits $ns2 4 4
 	run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh
 	chk_join_nr "set fullmesh flag test" 2 2 2
 	chk_rm_nr 0 1
 
 	# set nofullmesh flag
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow,fullmesh
-	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+	pm_nl_set_limits $ns1 4 4
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh
+	pm_nl_set_limits $ns2 4 4
 	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh
 	chk_join_nr "set nofullmesh flag test" 2 2 2
 	chk_rm_nr 0 1
 
 	# set backup,fullmesh flags
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+	pm_nl_set_limits $ns1 4 4
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
+	pm_nl_set_limits $ns2 4 4
 	run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh
 	chk_join_nr "set backup,fullmesh flags test" 2 2 2
 	chk_prio_nr 0 1
@@ -1952,9 +2029,9 @@ fullmesh_tests()
 
 	# set nobackup,nofullmesh flags
 	reset
-	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,backup,fullmesh
+	pm_nl_set_limits $ns1 4 4
+	pm_nl_set_limits $ns2 4 4
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh
 	chk_join_nr "set nobackup,nofullmesh flags test" 2 2 2
 	chk_prio_nr 0 1
-- 
cgit 


From dda61b3dbea09b3c6d84f3c3f5e39b2e19a329dc Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Feb 2022 16:03:33 -0800
Subject: selftests: mptcp: add wrapper for showing addrs

This patch implemented a new function named pm_nl_show_endpoints(), wrapped
the PM netlink commands 'ip mptcp endpoint show' and 'pm_nl_ctl dump' in
it, used a new argument 'ip_mptcp' to choose which one to use to show all
the PM endpoints.

Used this wrapper in do_transfer() instead of using the pm_nl_ctl commands
directly.

The original 'pos+=5' in the remoing tests only works for the output of
'pm_nl_ctl show':

  id 1 flags subflow 10.0.1.1

It doesn't work for the output of 'ip mptcp endpoint show':

  10.0.1.1 id 1 subflow

So implemented a more flexible approach to get the address ID from the PM
dump output to fit for both commands.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 78 ++++++++++++++++---------
 1 file changed, 50 insertions(+), 28 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 6ca6ed7336d0..093eb27f5c6d 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -365,6 +365,17 @@ pm_nl_flush_endpoint()
 	fi
 }
 
+pm_nl_show_endpoints()
+{
+	local ns=$1
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint show
+	else
+		ip netns exec $ns ./pm_nl_ctl dump
+	fi
+}
+
 do_transfer()
 {
 	listener_ns="$1"
@@ -472,20 +483,25 @@ do_transfer()
 	elif [ $addr_nr_ns1 -lt 0 ]; then
 		let rm_nr_ns1=-addr_nr_ns1
 		if [ $rm_nr_ns1 -lt 8 ]; then
-			counter=1
-			pos=1
-			dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
-			if [ ${#dump[@]} -gt 0 ]; then
-				while [ $counter -le $rm_nr_ns1 ]
-				do
-					id=${dump[$pos]}
-					rm_addr=$(rm_addr_count ${connector_ns})
-					pm_nl_del_endpoint ${listener_ns} $id
-					wait_rm_addr ${connector_ns} ${rm_addr}
-					let counter+=1
-					let pos+=5
+			counter=0
+			pm_nl_show_endpoints ${listener_ns} | while read line; do
+				local arr=($line)
+				local nr=0
+
+				for i in ${arr[@]}; do
+					if [ $i = "id" ]; then
+						if [ $counter -eq $rm_nr_ns1 ]; then
+							break
+						fi
+						id=${arr[$nr+1]}
+						rm_addr=$(rm_addr_count ${connector_ns})
+						pm_nl_del_endpoint ${listener_ns} $id
+						wait_rm_addr ${connector_ns} ${rm_addr}
+						let counter+=1
+					fi
+					let nr+=1
 				done
-			fi
+			done
 		elif [ $rm_nr_ns1 -eq 8 ]; then
 			pm_nl_flush_endpoint ${listener_ns}
 		elif [ $rm_nr_ns1 -eq 9 ]; then
@@ -520,21 +536,27 @@ do_transfer()
 	elif [ $addr_nr_ns2 -lt 0 ]; then
 		let rm_nr_ns2=-addr_nr_ns2
 		if [ $rm_nr_ns2 -lt 8 ]; then
-			counter=1
-			pos=1
-			dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
-			if [ ${#dump[@]} -gt 0 ]; then
-				while [ $counter -le $rm_nr_ns2 ]
-				do
-					# rm_addr are serialized, allow the previous one to complete
-					id=${dump[$pos]}
-					rm_addr=$(rm_addr_count ${listener_ns})
-					pm_nl_del_endpoint ${connector_ns} $id
-					wait_rm_addr ${listener_ns} ${rm_addr}
-					let counter+=1
-					let pos+=5
+			counter=0
+			pm_nl_show_endpoints ${connector_ns} | while read line; do
+				local arr=($line)
+				local nr=0
+
+				for i in ${arr[@]}; do
+					if [ $i = "id" ]; then
+						if [ $counter -eq $rm_nr_ns2 ]; then
+							break
+						fi
+						# rm_addr are serialized, allow the previous one to
+						# complete
+						id=${arr[$nr+1]}
+						rm_addr=$(rm_addr_count ${listener_ns})
+						pm_nl_del_endpoint ${connector_ns} $id
+						wait_rm_addr ${listener_ns} ${rm_addr}
+						let counter+=1
+					fi
+					let nr+=1
 				done
-			fi
+			done
 		elif [ $rm_nr_ns2 -eq 8 ]; then
 			pm_nl_flush_endpoint ${connector_ns}
 		elif [ $rm_nr_ns2 -eq 9 ]; then
@@ -551,7 +573,7 @@ do_transfer()
 	if [ ! -z $sflags ]; then
 		sleep 1
 		for netns in "$ns1" "$ns2"; do
-			ip netns exec $netns ./pm_nl_ctl dump | while read line; do
+			pm_nl_show_endpoints $netns | while read line; do
 				local arr=($line)
 				local addr
 				local port=0
-- 
cgit 


From f01403862592c75b4c4096457016938c4a8d7e6f Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Feb 2022 16:03:34 -0800
Subject: selftests: mptcp: add wrapper for setting flags

This patch implemented a new function named pm_nl_set_endpoint(), wrapped
the PM netlink commands 'ip mptcp endpoint change flags' and 'pm_nl_ctl
set flags' in it, and used a new argument 'ip_mptcp' to choose which one
to use to set the flags of the PM endpoint.

'ip mptcp' used the ID number argument to find out the address to change
flags, while 'pm_nl_ctl' used the address and port number arguments. So
we need to parse the address ID from the PM dump output as well as the
address and port number.

Used this wrapper in do_transfer() instead of using the pm_nl_ctl command
directly.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 093eb27f5c6d..757f26674c62 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -376,6 +376,22 @@ pm_nl_show_endpoints()
 	fi
 }
 
+pm_nl_change_endpoint()
+{
+	local ns=$1
+	local flags=$2
+	local id=$3
+	local addr=$4
+	local port=""
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint change id $id ${flags//","/" "}
+	else
+		if [ $5 -ne 0 ]; then port="port $5"; fi
+		ip netns exec $ns ./pm_nl_ctl set $addr flags $flags $port
+	fi
+}
+
 do_transfer()
 {
 	listener_ns="$1"
@@ -577,7 +593,7 @@ do_transfer()
 				local arr=($line)
 				local addr
 				local port=0
-				local _port=""
+				local id
 
 				for i in ${arr[@]}; do
 					if is_addr $i; then
@@ -586,11 +602,13 @@ do_transfer()
 						# The minimum expected port number is 10000
 						if [ $i -gt 10000 ]; then
 							port=$i
+						# The maximum id number is 255
+						elif [ $i -lt 255 ]; then
+							id=$i
 						fi
 					fi
 				done
-				if [ $port -ne 0 ]; then _port="port $port"; fi
-				ip netns exec $netns ./pm_nl_ctl set $addr flags $sflags $_port
+				pm_nl_change_endpoint $netns $sflags $id $addr $port
 			done
 		done
 	fi
-- 
cgit 


From a224a847ae7aa7eb09b0d4c3d808c9725806c10d Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Feb 2022 16:03:35 -0800
Subject: selftests: mptcp: add the id argument for set_flags

This patch added the id argument for setting the address flags in
pm_nl_ctl.

Usage:

    pm_nl_ctl set id 1 flags backup

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 63 ++++++++++++++++++---------
 1 file changed, 42 insertions(+), 21 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 2a57462764d0..22a5ec1e128e 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -28,7 +28,7 @@ static void syntax(char *argv[])
 	fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
 	fprintf(stderr, "\tdel <id> [<ip>]\n");
 	fprintf(stderr, "\tget <id>\n");
-	fprintf(stderr, "\tset <ip> [flags backup|nobackup|fullmesh|nofullmesh] [port <nr>]\n");
+	fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n");
 	fprintf(stderr, "\tflush\n");
 	fprintf(stderr, "\tdump\n");
 	fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
@@ -657,8 +657,10 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
 	u_int32_t flags = 0;
 	u_int16_t family;
 	int nest_start;
+	int use_id = 0;
+	u_int8_t id;
 	int off = 0;
-	int arg;
+	int arg = 2;
 
 	memset(data, 0, sizeof(data));
 	nh = (void *)data;
@@ -674,29 +676,45 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
 	nest->rta_len = RTA_LENGTH(0);
 	off += NLMSG_ALIGN(nest->rta_len);
 
-	/* addr data */
-	rta = (void *)(data + off);
-	if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) {
-		family = AF_INET;
-		rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
-		rta->rta_len = RTA_LENGTH(4);
-	} else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) {
-		family = AF_INET6;
-		rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
-		rta->rta_len = RTA_LENGTH(16);
+	if (!strcmp(argv[arg], "id")) {
+		if (++arg >= argc)
+			error(1, 0, " missing id value");
+
+		use_id = 1;
+		id = atoi(argv[arg]);
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+		rta->rta_len = RTA_LENGTH(1);
+		memcpy(RTA_DATA(rta), &id, 1);
+		off += NLMSG_ALIGN(rta->rta_len);
 	} else {
-		error(1, errno, "can't parse ip %s", argv[2]);
+		/* addr data */
+		rta = (void *)(data + off);
+		if (inet_pton(AF_INET, argv[arg], RTA_DATA(rta))) {
+			family = AF_INET;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+			rta->rta_len = RTA_LENGTH(4);
+		} else if (inet_pton(AF_INET6, argv[arg], RTA_DATA(rta))) {
+			family = AF_INET6;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+			rta->rta_len = RTA_LENGTH(16);
+		} else {
+			error(1, errno, "can't parse ip %s", argv[arg]);
+		}
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* family */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &family, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
 	}
-	off += NLMSG_ALIGN(rta->rta_len);
 
-	/* family */
-	rta = (void *)(data + off);
-	rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
-	rta->rta_len = RTA_LENGTH(2);
-	memcpy(RTA_DATA(rta), &family, 2);
-	off += NLMSG_ALIGN(rta->rta_len);
+	if (++arg >= argc)
+		error(1, 0, " missing flags keyword");
 
-	for (arg = 3; arg < argc; arg++) {
+	for (; arg < argc; arg++) {
 		if (!strcmp(argv[arg], "flags")) {
 			char *tok, *str;
 
@@ -724,6 +742,9 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
 		} else if (!strcmp(argv[arg], "port")) {
 			u_int16_t port;
 
+			if (use_id)
+				error(1, 0, " port can't be used with id");
+
 			if (++arg >= argc)
 				error(1, 0, " missing port value");
 
-- 
cgit 


From 6da1dfdd037eb1ed40ebbf478ecd05cda3c5868b Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Feb 2022 16:03:36 -0800
Subject: selftests: mptcp: add set_flags tests in pm_netlink.sh

This patch added the setting flags test cases, using both addr-based and
id-based lookups for the setting address.

The output looks like this:

 set flags (backup)                                 [ OK ]
           (nobackup)                               [ OK ]
           (fullmesh)                               [ OK ]
           (nofullmesh)                             [ OK ]
           (backup,fullmesh)                        [ OK ]

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/pm_netlink.sh | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index cbacf9f6538b..89839d1ff9d8 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -164,4 +164,22 @@ id 253 flags  10.0.0.5
 id 254 flags  10.0.0.2
 id 255 flags  10.0.0.3" "wrap-around ids"
 
+ip netns exec $ns1 ./pm_nl_ctl flush
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow
+ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,backup 10.0.1.1" "set flags (backup)"
+ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow 10.0.1.1" "          (nobackup)"
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,fullmesh 10.0.1.1" "          (fullmesh)"
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow 10.0.1.1" "          (nofullmesh)"
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,backup,fullmesh 10.0.1.1" "          (backup,fullmesh)"
+
 exit $ret
-- 
cgit 


From 621bd393039e81533ad5f5e2a70ba3ce36202f57 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Feb 2022 16:03:37 -0800
Subject: selftests: mptcp: set ip_mptcp in command line

This patch added a command line option '-i' for mptcp_join.sh to use
'ip mptcp' commands instead of using 'pm_nl_ctl' commands to deal with
PM netlink.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 757f26674c62..4a565fb84137 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -2117,6 +2117,7 @@ usage()
 	echo "  -m fullmesh_tests"
 	echo "  -c capture pcap files"
 	echo "  -C enable data checksum"
+	echo "  -i use ip mptcp"
 	echo "  -h help"
 }
 
@@ -2138,9 +2139,12 @@ for arg in "$@"; do
 	if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then
 		checksum=1
 	fi
+	if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"i"[0-9a-zA-Z]*$ ]]; then
+		ip_mptcp=1
+	fi
 
-	# exception for the capture/checksum options, the rest means: a part of the tests
-	if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then
+	# exception for the capture/checksum/ip_mptcp options, the rest means: a part of the tests
+	if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ] && [ "${arg}" != "-i" ]; then
 		do_all_tests=0
 	fi
 done
@@ -2150,7 +2154,7 @@ if [ $do_all_tests -eq 1 ]; then
 	exit $ret
 fi
 
-while getopts 'fesltra64bpkdmchCS' opt; do
+while getopts 'fesltra64bpkdmchCSi' opt; do
 	case $opt in
 		f)
 			subflows_tests
@@ -2201,6 +2205,8 @@ while getopts 'fesltra64bpkdmchCS' opt; do
 			;;
 		C)
 			;;
+		i)
+			;;
 		h | *)
 			usage
 			;;
-- 
cgit 


From 92ad3828944e0c420990a41038920494272c255e Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Sun, 6 Feb 2022 17:36:13 +0200
Subject: selftests: forwarding: Add a test for pedit munge SIP and DIP

Add a test that checks that pedit adjusts source and destination
addresses of IPv4 and IPv6 packets.

Output example:

$ ./pedit_ip.sh
TEST: ping                                                          [ OK ]
TEST: ping6                                                         [ OK ]
TEST: dev swp2 ingress pedit ip src set 198.51.100.1                [ OK ]
TEST: dev swp3 egress pedit ip src set 198.51.100.1                 [ OK ]
TEST: dev swp2 ingress pedit ip dst set 198.51.100.1                [ OK ]
TEST: dev swp3 egress pedit ip dst set 198.51.100.1                 [ OK ]
TEST: dev swp2 ingress pedit ip6 src set 2001:db8:2::1              [ OK ]
TEST: dev swp3 egress pedit ip6 src set 2001:db8:2::1               [ OK ]
TEST: dev swp2 ingress pedit ip6 dst set 2001:db8:2::1              [ OK ]
TEST: dev swp3 egress pedit ip6 dst set 2001:db8:2::1               [ OK ]

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/pedit_ip.sh | 201 +++++++++++++++++++++
 1 file changed, 201 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/pedit_ip.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/pedit_ip.sh b/tools/testing/selftests/net/forwarding/pedit_ip.sh
new file mode 100755
index 000000000000..d14efb2d23b2
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_ip.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
+# egress of $swp2, the traffic is acted upon by a pedit action. An ingress
+# filter installed on $h2 verifies that the packet looks like expected.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |   192.0.2.2/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |  +-|----------------------------------------------------------------|-+   |
+# |  | + $swp1                       BR                           $swp2 + |   |
+# |  +--------------------------------------------------------------------+   |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	test_ip4_src
+	test_ip4_dst
+	test_ip6_src
+	test_ip6_dst
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add name br1 up type bridge vlan_filtering 1
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2
+}
+
+do_test_pedit_ip()
+{
+	local pedit_locus=$1; shift
+	local pedit_action=$1; shift
+	local match_prot=$1; shift
+	local match_flower=$1; shift
+	local mz_flags=$1; shift
+
+	tc filter add $pedit_locus handle 101 pref 1 \
+	   flower action pedit ex munge $pedit_action
+	tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+	   flower skip_hw $match_flower action pass
+
+	RET=0
+
+	$MZ $mz_flags $h1 -c 10 -d 20msec -p 100 -a own -b $h2mac -q -t ip
+
+	local pkts
+	pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+			tc_rule_handle_stats_get "dev $h2 ingress" 101)
+	check_err $? "Expected to get 10 packets, but got $pkts."
+
+	pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+	((pkts >= 10))
+	check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
+	log_test "$pedit_locus pedit $pedit_action"
+
+	tc filter del dev $h2 ingress pref 1
+	tc filter del $pedit_locus pref 1
+}
+
+do_test_pedit_ip6()
+{
+	local locus=$1; shift
+	local pedit_addr=$1; shift
+	local flower_addr=$1; shift
+
+	do_test_pedit_ip "$locus" "$pedit_addr set 2001:db8:2::1" ipv6	\
+			 "$flower_addr 2001:db8:2::1"			\
+			 "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+}
+
+do_test_pedit_ip4()
+{
+	local locus=$1; shift
+	local pedit_addr=$1; shift
+	local flower_addr=$1; shift
+
+	do_test_pedit_ip "$locus" "$pedit_addr set 198.51.100.1" ip	\
+			 "$flower_addr 198.51.100.1"			\
+			 "-A 192.0.2.1 -B 192.0.2.2"
+}
+
+test_ip4_src()
+{
+	do_test_pedit_ip4 "dev $swp1 ingress" "ip src" src_ip
+	do_test_pedit_ip4 "dev $swp2 egress"  "ip src" src_ip
+}
+
+test_ip4_dst()
+{
+	do_test_pedit_ip4 "dev $swp1 ingress" "ip dst" dst_ip
+	do_test_pedit_ip4 "dev $swp2 egress"  "ip dst" dst_ip
+}
+
+test_ip6_src()
+{
+	do_test_pedit_ip6 "dev $swp1 ingress" "ip6 src" src_ip
+	do_test_pedit_ip6 "dev $swp2 egress"  "ip6 src" src_ip
+}
+
+test_ip6_dst()
+{
+	do_test_pedit_ip6 "dev $swp1 ingress" "ip6 dst" dst_ip
+	do_test_pedit_ip6 "dev $swp2 egress"  "ip6 dst" dst_ip
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From 046b841ea7c528931e7d2e74d5e668aa6c94c1fc Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Fri, 4 Feb 2022 17:05:19 +0530
Subject: selftests/bpf: Use "__se_" prefix on architectures without syscall
 wrapper

On architectures that don't use a syscall wrapper, sys_* function names
are set as an alias of __se_sys_* functions. Due to this, there is no
BTF associated with sys_* function names. This results in some of the
test progs failing to load. Set the SYS_PREFIX to "__se_" to fix this
issue.

Fixes: 38261f369fb905 ("selftests/bpf: Fix probe_user test failure with clang build kernel")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/013d632aacd3e41290445c0025db6a7055ec6e18.1643973917.git.naveen.n.rao@linux.vnet.ibm.com
---
 tools/testing/selftests/bpf/progs/bpf_misc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 0b78bc9b1b4c..5bb11fe595a4 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -13,7 +13,7 @@
 #define SYS_PREFIX "__arm64_"
 #else
 #define SYSCALL_WRAPPER 0
-#define SYS_PREFIX ""
+#define SYS_PREFIX "__se_"
 #endif
 
 #endif
-- 
cgit 


From e91d280c840f133560072f246321f9a4d1f4eb14 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Fri, 4 Feb 2022 17:05:20 +0530
Subject: selftests/bpf: Fix tests to use arch-dependent syscall entry points

Some of the tests are using x86_64 ABI-specific syscall entry points
(such as __x64_sys_nanosleep and __x64_sys_getpgid). Update them to use
architecture-dependent syscall entry names.

Also update fexit_sleep test to not use BPF_PROG() so that it is clear
that the syscall parameters aren't being accessed in the bpf prog.

Note that none of the bpf progs in these tests are actually accessing
any of the syscall parameters. The only exception is perfbuf_bench, which
passes on the bpf prog context into bpf_perf_event_output() as a pointer
to pt_regs, but that looks to be mostly ignored.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/e35f7051f03e269b623a68b139d8ed131325f7b7.1643973917.git.naveen.n.rao@linux.vnet.ibm.com
---
 tools/testing/selftests/bpf/progs/bloom_filter_bench.c | 7 ++++---
 tools/testing/selftests/bpf/progs/bloom_filter_map.c   | 5 +++--
 tools/testing/selftests/bpf/progs/bpf_loop.c           | 9 +++++----
 tools/testing/selftests/bpf/progs/bpf_loop_bench.c     | 3 ++-
 tools/testing/selftests/bpf/progs/fexit_sleep.c        | 9 +++++----
 tools/testing/selftests/bpf/progs/perfbuf_bench.c      | 3 ++-
 tools/testing/selftests/bpf/progs/ringbuf_bench.c      | 3 ++-
 tools/testing/selftests/bpf/progs/test_ringbuf.c       | 3 ++-
 tools/testing/selftests/bpf/progs/trace_printk.c       | 3 ++-
 tools/testing/selftests/bpf/progs/trace_vprintk.c      | 3 ++-
 tools/testing/selftests/bpf/progs/trigger_bench.c      | 9 +++++----
 11 files changed, 34 insertions(+), 23 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
index d9a88dd1ea65..7efcbdbe772d 100644
--- a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
+++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
@@ -5,6 +5,7 @@
 #include <linux/bpf.h>
 #include <stdbool.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -87,7 +88,7 @@ bloom_callback(struct bpf_map *map, __u32 *key, void *val,
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int bloom_lookup(void *ctx)
 {
 	struct callback_ctx data;
@@ -100,7 +101,7 @@ int bloom_lookup(void *ctx)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int bloom_update(void *ctx)
 {
 	struct callback_ctx data;
@@ -113,7 +114,7 @@ int bloom_update(void *ctx)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int bloom_hashmap_lookup(void *ctx)
 {
 	__u64 *result;
diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_map.c b/tools/testing/selftests/bpf/progs/bloom_filter_map.c
index 1316f3db79d9..f245fcfe0c61 100644
--- a/tools/testing/selftests/bpf/progs/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/progs/bloom_filter_map.c
@@ -3,6 +3,7 @@
 
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -51,7 +52,7 @@ check_elem(struct bpf_map *map, __u32 *key, __u32 *val,
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int inner_map(void *ctx)
 {
 	struct bpf_map *inner_map;
@@ -70,7 +71,7 @@ int inner_map(void *ctx)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int check_bloom(void *ctx)
 {
 	struct callback_ctx data;
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c
index 12349e4601e8..e08565282759 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop.c
@@ -3,6 +3,7 @@
 
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -53,7 +54,7 @@ static int nested_callback1(__u32 index, void *data)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int test_prog(void *ctx)
 {
 	struct callback_ctx data = {};
@@ -71,7 +72,7 @@ int test_prog(void *ctx)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int prog_null_ctx(void *ctx)
 {
 	if (bpf_get_current_pid_tgid() >> 32 != pid)
@@ -82,7 +83,7 @@ int prog_null_ctx(void *ctx)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int prog_invalid_flags(void *ctx)
 {
 	struct callback_ctx data = {};
@@ -95,7 +96,7 @@ int prog_invalid_flags(void *ctx)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int prog_nested_calls(void *ctx)
 {
 	struct callback_ctx data = {};
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
index 9dafdc244462..4ce76eb064c4 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
@@ -3,6 +3,7 @@
 
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -14,7 +15,7 @@ static int empty_callback(__u32 index, void *data)
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int benchmark(void *ctx)
 {
 	for (int i = 0; i < 1000; i++) {
diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c
index bca92c9bd29a..106dc75efcc4 100644
--- a/tools/testing/selftests/bpf/progs/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c
@@ -3,6 +3,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 char LICENSE[] SEC("license") = "GPL";
 
@@ -10,8 +11,8 @@ int pid = 0;
 int fentry_cnt = 0;
 int fexit_cnt = 0;
 
-SEC("fentry/__x64_sys_nanosleep")
-int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs)
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int nanosleep_fentry(void *ctx)
 {
 	if (bpf_get_current_pid_tgid() >> 32 != pid)
 		return 0;
@@ -20,8 +21,8 @@ int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs)
 	return 0;
 }
 
-SEC("fexit/__x64_sys_nanosleep")
-int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret)
+SEC("fexit/" SYS_PREFIX "sys_nanosleep")
+int nanosleep_fexit(void *ctx)
 {
 	if (bpf_get_current_pid_tgid() >> 32 != pid)
 		return 0;
diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
index e5ab4836a641..45204fe0c570 100644
--- a/tools/testing/selftests/bpf/progs/perfbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -18,7 +19,7 @@ const volatile int batch_cnt = 0;
 long sample_val = 42;
 long dropped __attribute__((aligned(128))) = 0;
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int bench_perfbuf(void *ctx)
 {
 	__u64 *sample;
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
index 123607d314d6..6a468496f539 100644
--- a/tools/testing/selftests/bpf/progs/ringbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <stdint.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -30,7 +31,7 @@ static __always_inline long get_flags()
 	return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int bench_ringbuf(void *ctx)
 {
 	long *sample, flags;
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index eaa7d9dba0be..5bdc0d38efc0 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -3,6 +3,7 @@
 
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -35,7 +36,7 @@ long prod_pos = 0;
 /* inner state */
 long seq = 0;
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int test_ringbuf(void *ctx)
 {
 	int cur_pid = bpf_get_current_pid_tgid() >> 32;
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
index 119582aa105a..6695478c2b25 100644
--- a/tools/testing/selftests/bpf/progs/trace_printk.c
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -4,6 +4,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -12,7 +13,7 @@ int trace_printk_ran = 0;
 
 const char fmt[] = "Testing,testing %d\n";
 
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int sys_enter(void *ctx)
 {
 	trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
diff --git a/tools/testing/selftests/bpf/progs/trace_vprintk.c b/tools/testing/selftests/bpf/progs/trace_vprintk.c
index d327241ba047..969306cd4f33 100644
--- a/tools/testing/selftests/bpf/progs/trace_vprintk.c
+++ b/tools/testing/selftests/bpf/progs/trace_vprintk.c
@@ -4,6 +4,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -11,7 +12,7 @@ int null_data_vprintk_ret = 0;
 int trace_vprintk_ret = 0;
 int trace_vprintk_ran = 0;
 
-SEC("fentry/__x64_sys_nanosleep")
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int sys_enter(void *ctx)
 {
 	static const char one[] = "1";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 2098f3f27f18..2ab049b54d6c 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -5,6 +5,7 @@
 #include <asm/unistd.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -25,28 +26,28 @@ int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
 	return 0;
 }
 
-SEC("kprobe/__x64_sys_getpgid")
+SEC("kprobe/" SYS_PREFIX "sys_getpgid")
 int bench_trigger_kprobe(void *ctx)
 {
 	__sync_add_and_fetch(&hits, 1);
 	return 0;
 }
 
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int bench_trigger_fentry(void *ctx)
 {
 	__sync_add_and_fetch(&hits, 1);
 	return 0;
 }
 
-SEC("fentry.s/__x64_sys_getpgid")
+SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
 int bench_trigger_fentry_sleep(void *ctx)
 {
 	__sync_add_and_fetch(&hits, 1);
 	return 0;
 }
 
-SEC("fmod_ret/__x64_sys_getpgid")
+SEC("fmod_ret/" SYS_PREFIX "sys_getpgid")
 int bench_trigger_fmodret(void *ctx)
 {
 	__sync_add_and_fetch(&hits, 1);
-- 
cgit 


From 2b9e2eadc9c899af3b508503677afecc85e44766 Mon Sep 17 00:00:00 2001
From: Mauricio Vásquez <mauricio@kinvolk.io>
Date: Mon, 7 Feb 2022 09:50:52 -0500
Subject: selftests/bpf: Fix strict mode calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

"(__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS" is wrong
as it is equal to 0 (LIBBPF_STRICT_NONE). Let's use
"LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS" now that the
previous commit makes it possible in libbpf.

Fixes: 93b8952d223a ("libbpf: deprecate legacy BPF map definitions")
Signed-off-by: Mauricio Vásquez <mauricio@kinvolk.io>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220207145052.124421-4-mauricio@kinvolk.io
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 4038108aa499..8b652f5ce423 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -4580,7 +4580,7 @@ static void do_test_file(unsigned int test_num)
 	btf_ext__free(btf_ext);
 
 	/* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */
-	libbpf_set_strict_mode((__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS);
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
 	obj = bpf_object__open(test->file);
 	err = libbpf_get_error(obj);
 	if (CHECK(err, "obj: %d", err))
-- 
cgit 


From a410a0cf98854a698a519bfbeb604145da384c0e Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 4 Feb 2022 14:58:11 +0100
Subject: ipv6: Define dscp_t and stop taking ECN bits into account in
 fib6-rules
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Define a dscp_t type and its appropriate helpers that ensure ECN bits
are not taken into account when handling DSCP.

Use this new type to replace the tclass field of struct fib6_rule, so
that fib6-rules don't get influenced by ECN bits anymore.

Before this patch, fib6-rules didn't make any distinction between the
DSCP and ECN bits. Therefore, rules specifying a DSCP (tos or dsfield
options in iproute2) stopped working as soon a packets had at least one
of its ECN bits set (as a work around one could create four rules for
each DSCP value to match, one for each possible ECN value).

After this patch fib6-rules only compare the DSCP bits. ECN doesn't
influence the result anymore. Also, fib6-rules now must have the ECN
bits cleared or they will be rejected.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Acked-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_dscp.h                       | 57 +++++++++++++++++++++++++++
 include/net/ipv6.h                            |  6 +++
 net/ipv6/fib6_rules.c                         | 19 ++++++---
 tools/testing/selftests/net/fib_rule_tests.sh | 30 +++++++++++++-
 4 files changed, 105 insertions(+), 7 deletions(-)
 create mode 100644 include/net/inet_dscp.h

(limited to 'tools/testing')

diff --git a/include/net/inet_dscp.h b/include/net/inet_dscp.h
new file mode 100644
index 000000000000..72f250dffada
--- /dev/null
+++ b/include/net/inet_dscp.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * inet_dscp.h: helpers for handling differentiated services codepoints (DSCP)
+ *
+ * DSCP is defined in RFC 2474:
+ *
+ *        0   1   2   3   4   5   6   7
+ *      +---+---+---+---+---+---+---+---+
+ *      |         DSCP          |  CU   |
+ *      +---+---+---+---+---+---+---+---+
+ *
+ *        DSCP: differentiated services codepoint
+ *        CU:   currently unused
+ *
+ * The whole DSCP + CU bits form the DS field.
+ * The DS field is also commonly called TOS or Traffic Class (for IPv6).
+ *
+ * Note: the CU bits are now used for Explicit Congestion Notification
+ *       (RFC 3168).
+ */
+
+#ifndef _INET_DSCP_H
+#define _INET_DSCP_H
+
+#include <linux/types.h>
+
+/* Special type for storing DSCP values.
+ *
+ * A dscp_t variable stores a DS field with the CU (ECN) bits cleared.
+ * Using dscp_t allows to strictly separate DSCP and ECN bits, thus avoiding
+ * bugs where ECN bits are erroneously taken into account during FIB lookups
+ * or policy routing.
+ *
+ * Note: to get the real DSCP value contained in a dscp_t variable one would
+ * have to do a bit shift after calling inet_dscp_to_dsfield(). We could have
+ * a helper for that, but there's currently no users.
+ */
+typedef u8 __bitwise dscp_t;
+
+#define INET_DSCP_MASK 0xfc
+
+static inline dscp_t inet_dsfield_to_dscp(__u8 dsfield)
+{
+	return (__force dscp_t)(dsfield & INET_DSCP_MASK);
+}
+
+static inline __u8 inet_dscp_to_dsfield(dscp_t dscp)
+{
+	return (__force __u8)dscp;
+}
+
+static inline bool inet_validate_dscp(__u8 val)
+{
+	return !(val & ~INET_DSCP_MASK);
+}
+
+#endif /* _INET_DSCP_H */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index cda1f205f391..f693784e1419 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -17,6 +17,7 @@
 #include <net/if_inet6.h>
 #include <net/flow.h>
 #include <net/flow_dissector.h>
+#include <net/inet_dscp.h>
 #include <net/snmp.h>
 #include <net/netns/hash.h>
 
@@ -974,6 +975,11 @@ static inline u8 ip6_tclass(__be32 flowinfo)
 	return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT;
 }
 
+static inline dscp_t ip6_dscp(__be32 flowinfo)
+{
+	return inet_dsfield_to_dscp(ip6_tclass(flowinfo));
+}
+
 static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel)
 {
 	return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec029c86ae06..e2a7b0059669 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -16,6 +16,7 @@
 #include <linux/indirect_call_wrapper.h>
 
 #include <net/fib_rules.h>
+#include <net/inet_dscp.h>
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #include <net/ip6_route.h>
@@ -25,14 +26,14 @@ struct fib6_rule {
 	struct fib_rule		common;
 	struct rt6key		src;
 	struct rt6key		dst;
-	u8			tclass;
+	dscp_t			dscp;
 };
 
 static bool fib6_rule_matchall(const struct fib_rule *rule)
 {
 	struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
 
-	if (r->dst.plen || r->src.plen || r->tclass)
+	if (r->dst.plen || r->src.plen || r->dscp)
 		return false;
 	return fib_rule_matchall(rule);
 }
@@ -323,7 +324,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
 			return 0;
 	}
 
-	if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
+	if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
 		return 0;
 
 	if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
@@ -349,6 +350,13 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	struct net *net = sock_net(skb->sk);
 	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
 
+	if (!inet_validate_dscp(frh->tos)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid dsfield (tos): ECN bits must be 0");
+		goto errout;
+	}
+	rule6->dscp = inet_dsfield_to_dscp(frh->tos);
+
 	if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
 		if (rule->table == RT6_TABLE_UNSPEC) {
 			NL_SET_ERR_MSG(extack, "Invalid table");
@@ -369,7 +377,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 
 	rule6->src.plen = frh->src_len;
 	rule6->dst.plen = frh->dst_len;
-	rule6->tclass = frh->tos;
 
 	if (fib_rule_requires_fldissect(rule))
 		net->ipv6.fib6_rules_require_fldissect++;
@@ -402,7 +409,7 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 	if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
 		return 0;
 
-	if (frh->tos && (rule6->tclass != frh->tos))
+	if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
 		return 0;
 
 	if (frh->src_len &&
@@ -423,7 +430,7 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 
 	frh->dst_len = rule6->dst.plen;
 	frh->src_len = rule6->src.plen;
-	frh->tos = rule6->tclass;
+	frh->tos = inet_dscp_to_dsfield(rule6->dscp);
 
 	if ((rule6->dst.plen &&
 	     nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 3b0489910422..d7a9ab3be1d3 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -114,10 +114,25 @@ fib_rule6_test_match_n_redirect()
 	log_test $? 0 "rule6 del by pref: $description"
 }
 
+fib_rule6_test_reject()
+{
+	local match="$1"
+	local rc
+
+	$IP -6 rule add $match table $RTABLE 2>/dev/null
+	rc=$?
+	log_test $rc 2 "rule6 check: $match"
+
+	if [ $rc -eq 0 ]; then
+		$IP -6 rule del $match table $RTABLE
+	fi
+}
+
 fib_rule6_test()
 {
 	local getmatch
 	local match
+	local cnt
 
 	# setup the fib rule redirect route
 	$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
@@ -128,8 +143,21 @@ fib_rule6_test()
 	match="from $SRC_IP6 iif $DEV"
 	fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
 
+	# Reject dsfield (tos) options which have ECN bits set
+	for cnt in $(seq 1 3); do
+		match="dsfield $cnt"
+		fib_rule6_test_reject "$match"
+	done
+
+	# Don't take ECN bits into account when matching on dsfield
 	match="tos 0x10"
-	fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+	for cnt in "0x10" "0x11" "0x12" "0x13"; do
+		# Using option 'tos' instead of 'dsfield' as old iproute2
+		# versions don't support 'dsfield' in ip rule show.
+		getmatch="tos $cnt"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+						"$getmatch redirect to table"
+	done
 
 	match="fwmark 0x64"
 	getmatch="mark 0x64"
-- 
cgit 


From 563f8e97e054451d167327336a53b7381517a998 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 4 Feb 2022 14:58:14 +0100
Subject: ipv4: Stop taking ECN bits into account in fib4-rules
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the new dscp_t type to replace the tos field of struct fib4_rule,
so that fib4-rules consistently ignore ECN bits.

Before this patch, fib4-rules did accept rules with the high order ECN
bit set (but not the low order one). Also, it relied on its callers
masking the ECN bits of ->flowi4_tos to prevent those from influencing
the result. This was brittle and a few call paths still do the lookup
without masking the ECN bits first.

After this patch fib4-rules only compare the DSCP bits. ECN can't
influence the result anymore, even if the caller didn't mask these
bits. Also, fib4-rules now must have both ECN bits cleared or they will
be rejected.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Acked-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/fib_rules.c                          | 18 +++++++++-------
 tools/testing/selftests/net/fib_rule_tests.sh | 30 ++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index e0b6c8b6de57..117c48571cf0 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -23,6 +23,7 @@
 #include <linux/list.h>
 #include <linux/rcupdate.h>
 #include <linux/export.h>
+#include <net/inet_dscp.h>
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/tcp.h>
@@ -35,7 +36,7 @@ struct fib4_rule {
 	struct fib_rule		common;
 	u8			dst_len;
 	u8			src_len;
-	u8			tos;
+	dscp_t			dscp;
 	__be32			src;
 	__be32			srcmask;
 	__be32			dst;
@@ -49,7 +50,7 @@ static bool fib4_rule_matchall(const struct fib_rule *rule)
 {
 	struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
 
-	if (r->dst_len || r->src_len || r->tos)
+	if (r->dst_len || r->src_len || r->dscp)
 		return false;
 	return fib_rule_matchall(rule);
 }
@@ -185,7 +186,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
 	    ((daddr ^ r->dst) & r->dstmask))
 		return 0;
 
-	if (r->tos && (r->tos != fl4->flowi4_tos))
+	if (r->dscp && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
 		return 0;
 
 	if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
@@ -225,10 +226,12 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	int err = -EINVAL;
 	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-	if (frh->tos & ~IPTOS_TOS_MASK) {
-		NL_SET_ERR_MSG(extack, "Invalid tos");
+	if (!inet_validate_dscp(frh->tos)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid dsfield (tos): ECN bits must be 0");
 		goto errout;
 	}
+	rule4->dscp = inet_dsfield_to_dscp(frh->tos);
 
 	/* split local/main if they are not already split */
 	err = fib_unmerge(net);
@@ -270,7 +273,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule4->srcmask = inet_make_mask(rule4->src_len);
 	rule4->dst_len = frh->dst_len;
 	rule4->dstmask = inet_make_mask(rule4->dst_len);
-	rule4->tos = frh->tos;
 
 	net->ipv4.fib_has_custom_rules = true;
 
@@ -313,7 +315,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 	if (frh->dst_len && (rule4->dst_len != frh->dst_len))
 		return 0;
 
-	if (frh->tos && (rule4->tos != frh->tos))
+	if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos)
 		return 0;
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
@@ -337,7 +339,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 
 	frh->dst_len = rule4->dst_len;
 	frh->src_len = rule4->src_len;
-	frh->tos = rule4->tos;
+	frh->tos = inet_dscp_to_dsfield(rule4->dscp);
 
 	if ((rule4->dst_len &&
 	     nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index d7a9ab3be1d3..4f70baad867d 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -215,10 +215,25 @@ fib_rule4_test_match_n_redirect()
 	log_test $? 0 "rule4 del by pref: $description"
 }
 
+fib_rule4_test_reject()
+{
+	local match="$1"
+	local rc
+
+	$IP rule add $match table $RTABLE 2>/dev/null
+	rc=$?
+	log_test $rc 2 "rule4 check: $match"
+
+	if [ $rc -eq 0 ]; then
+		$IP rule del $match table $RTABLE
+	fi
+}
+
 fib_rule4_test()
 {
 	local getmatch
 	local match
+	local cnt
 
 	# setup the fib rule redirect route
 	$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
@@ -234,8 +249,21 @@ fib_rule4_test()
 	fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
 	ip netns exec testns sysctl -qw net.ipv4.ip_forward=0
 
+	# Reject dsfield (tos) options which have ECN bits set
+	for cnt in $(seq 1 3); do
+		match="dsfield $cnt"
+		fib_rule4_test_reject "$match"
+	done
+
+	# Don't take ECN bits into account when matching on dsfield
 	match="tos 0x10"
-	fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+	for cnt in "0x10" "0x11" "0x12" "0x13"; do
+		# Using option 'tos' instead of 'dsfield' as old iproute2
+		# versions don't support 'dsfield' in ip rule show.
+		getmatch="tos $cnt"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+						"$getmatch redirect to table"
+	done
 
 	match="fwmark 0x64"
 	getmatch="mark 0x64"
-- 
cgit 


From f55fbb6afb8d701e3185e31e73f5ea9503a66744 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 4 Feb 2022 14:58:16 +0100
Subject: ipv4: Reject routes specifying ECN bits in rtm_tos
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the new dscp_t type to replace the fc_tos field of fib_config, to
ensure IPv4 routes aren't influenced by ECN bits when configured with
non-zero rtm_tos.

Before this patch, IPv4 routes specifying an rtm_tos with some of the
ECN bits set were accepted. However they wouldn't work (never match) as
IPv4 normally clears the ECN bits with IPTOS_RT_MASK before doing a FIB
lookup (although a few buggy code paths don't).

After this patch, IPv4 routes specifying an rtm_tos with any ECN bit
set is rejected.

Note: IPv6 routes ignore rtm_tos altogether, any rtm_tos is accepted,
but treated as if it were 0.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Acked-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip_fib.h                     |  3 +-
 net/ipv4/fib_frontend.c                  | 11 ++++-
 net/ipv4/fib_trie.c                      |  7 ++-
 tools/testing/selftests/net/fib_tests.sh | 76 ++++++++++++++++++++++++++++++++
 4 files changed, 93 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index c4297704bbcb..6a82bcb8813b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -17,6 +17,7 @@
 #include <linux/rcupdate.h>
 #include <net/fib_notifier.h>
 #include <net/fib_rules.h>
+#include <net/inet_dscp.h>
 #include <net/inetpeer.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
@@ -24,7 +25,7 @@
 
 struct fib_config {
 	u8			fc_dst_len;
-	u8			fc_tos;
+	dscp_t			fc_dscp;
 	u8			fc_protocol;
 	u8			fc_scope;
 	u8			fc_type;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4d61ddd8a0ec..c60e1d1ed2b0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -32,6 +32,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 
+#include <net/inet_dscp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
@@ -735,8 +736,16 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 	memset(cfg, 0, sizeof(*cfg));
 
 	rtm = nlmsg_data(nlh);
+
+	if (!inet_validate_dscp(rtm->rtm_tos)) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid dsfield (tos): ECN bits must be 0");
+		err = -EINVAL;
+		goto errout;
+	}
+	cfg->fc_dscp = inet_dsfield_to_dscp(rtm->rtm_tos);
+
 	cfg->fc_dst_len = rtm->rtm_dst_len;
-	cfg->fc_tos = rtm->rtm_tos;
 	cfg->fc_table = rtm->rtm_table;
 	cfg->fc_protocol = rtm->rtm_protocol;
 	cfg->fc_scope = rtm->rtm_scope;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8060524f4256..d937eeebb812 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -61,6 +61,7 @@
 #include <linux/vmalloc.h>
 #include <linux/notifier.h>
 #include <net/net_namespace.h>
+#include <net/inet_dscp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
@@ -1210,9 +1211,9 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 	struct fib_info *fi;
 	u8 plen = cfg->fc_dst_len;
 	u8 slen = KEYLENGTH - plen;
-	u8 tos = cfg->fc_tos;
 	u32 key;
 	int err;
+	u8 tos;
 
 	key = ntohl(cfg->fc_dst);
 
@@ -1227,6 +1228,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 		goto err;
 	}
 
+	tos = inet_dscp_to_dsfield(cfg->fc_dscp);
 	l = fib_find_node(t, &tp, key);
 	fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
 				tb->tb_id, false) : NULL;
@@ -1703,8 +1705,8 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 	struct key_vector *l, *tp;
 	u8 plen = cfg->fc_dst_len;
 	u8 slen = KEYLENGTH - plen;
-	u8 tos = cfg->fc_tos;
 	u32 key;
+	u8 tos;
 
 	key = ntohl(cfg->fc_dst);
 
@@ -1715,6 +1717,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 	if (!l)
 		return -ESRCH;
 
+	tos = inet_dscp_to_dsfield(cfg->fc_dscp);
 	fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false);
 	if (!fa)
 		return -ESRCH;
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 996af1ae3d3d..bb73235976b3 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1447,6 +1447,81 @@ ipv4_local_rt_cache()
 	log_test $? 0 "Cached route removed from VRF port device"
 }
 
+ipv4_rt_dsfield()
+{
+	echo
+	echo "IPv4 route with dsfield tests"
+
+	run_cmd "$IP route flush 172.16.102.0/24"
+
+	# New routes should reject dsfield options that interfere with ECN
+	run_cmd "$IP route add 172.16.102.0/24 dsfield 0x01 via 172.16.101.2"
+	log_test $? 2 "Reject route with dsfield 0x01"
+
+	run_cmd "$IP route add 172.16.102.0/24 dsfield 0x02 via 172.16.101.2"
+	log_test $? 2 "Reject route with dsfield 0x02"
+
+	run_cmd "$IP route add 172.16.102.0/24 dsfield 0x03 via 172.16.101.2"
+	log_test $? 2 "Reject route with dsfield 0x03"
+
+	# A generic route that doesn't take DSCP into account
+	run_cmd "$IP route add 172.16.102.0/24 via 172.16.101.2"
+
+	# A more specific route for DSCP 0x10
+	run_cmd "$IP route add 172.16.102.0/24 dsfield 0x10 via 172.16.103.2"
+
+	# DSCP 0x10 should match the specific route, no matter the ECN bits
+	$IP route get fibmatch 172.16.102.1 dsfield 0x10 | \
+		grep -q "via 172.16.103.2"
+	log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x11 | \
+		grep -q "via 172.16.103.2"
+	log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x12 | \
+		grep -q "via 172.16.103.2"
+	log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x13 | \
+		grep -q "via 172.16.103.2"
+	log_test $? 0 "IPv4 route with DSCP and ECN:CE"
+
+	# Unknown DSCP should match the generic route, no matter the ECN bits
+	$IP route get fibmatch 172.16.102.1 dsfield 0x14 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x15 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x16 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x17 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE"
+
+	# Null DSCP should match the generic route, no matter the ECN bits
+	$IP route get fibmatch 172.16.102.1 dsfield 0x00 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x01 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x02 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x03 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with no DSCP and ECN:CE"
+}
+
 ipv4_route_test()
 {
 	route_setup
@@ -1454,6 +1529,7 @@ ipv4_route_test()
 	ipv4_rt_add
 	ipv4_rt_replace
 	ipv4_local_rt_cache
+	ipv4_rt_dsfield
 
 	route_cleanup
 }
-- 
cgit 


From 8f85b4da579e006547c8cf3660220c54b99451da Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 7 Feb 2022 09:22:35 +0000
Subject: kselftest: alsa: fix spelling mistake "desciptor" -> "descriptor"

There are some spelling mistakes in some ksft messages. Fix them.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220207092235.240284-1-colin.i.king@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 tools/testing/selftests/alsa/mixer-test.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
index d0b788b8d287..eb2213540fe3 100644
--- a/tools/testing/selftests/alsa/mixer-test.c
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -198,7 +198,7 @@ static void find_controls(void)
 
 		err = snd_ctl_poll_descriptors_count(card_data->handle);
 		if (err != 1) {
-			ksft_exit_fail_msg("Unexpected desciptor count %d for card %d\n",
+			ksft_exit_fail_msg("Unexpected descriptor count %d for card %d\n",
 					   err, card);
 		}
 
@@ -248,12 +248,12 @@ static int wait_for_event(struct ctl_data *ctl, int timeout)
 						       &(ctl->card->pollfd),
 						       1, &revents);
 		if (err < 0) {
-			ksft_print_msg("snd_ctl_poll_desciptors_revents() failed for %s: %d\n",
+			ksft_print_msg("snd_ctl_poll_descriptors_revents() failed for %s: %d\n",
 				       ctl->name, err);
 			return err;
 		}
 		if (revents & POLLERR) {
-			ksft_print_msg("snd_ctl_poll_desciptors_revents() reported POLLERR for %s\n",
+			ksft_print_msg("snd_ctl_poll_descriptors_revents() reported POLLERR for %s\n",
 				       ctl->name);
 			return -1;
 		}
-- 
cgit 


From d134998838ac217a8427c1ddc83cf48888bb3fa3 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Thu, 3 Feb 2022 17:41:58 +0000
Subject: selftests: KVM: Add OSLSR_EL1 to the list of blessed regs

OSLSR_EL1 is now part of the visible system register state. Add it to
the get-reg-list selftest to ensure we keep it that way.

Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220203174159.2887882-6-oupton@google.com
---
 tools/testing/selftests/kvm/aarch64/get-reg-list.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index f769fc6cd927..f12147c43464 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -760,6 +760,7 @@ static __u64 base_regs[] = {
 	ARM64_SYS_REG(2, 0, 0, 15, 5),
 	ARM64_SYS_REG(2, 0, 0, 15, 6),
 	ARM64_SYS_REG(2, 0, 0, 15, 7),
+	ARM64_SYS_REG(2, 0, 1, 1, 4),	/* OSLSR_EL1 */
 	ARM64_SYS_REG(2, 4, 0, 7, 0),	/* DBGVCR32_EL2 */
 	ARM64_SYS_REG(3, 0, 0, 0, 5),	/* MPIDR_EL1 */
 	ARM64_SYS_REG(3, 0, 0, 1, 0),	/* ID_PFR0_EL1 */
-- 
cgit 


From 05c9324de1695b5e61dceca6d2ef0ab8c0f2f26b Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Thu, 3 Feb 2022 17:41:59 +0000
Subject: selftests: KVM: Test OS lock behavior

KVM now correctly handles the OS Lock for its guests. When set, KVM
blocks all debug exceptions originating from the guest. Add test cases
to the debug-exceptions test to assert that software breakpoint,
hardware breakpoint, watchpoint, and single-step exceptions are in fact
blocked.

Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220203174159.2887882-7-oupton@google.com
---
 .../selftests/kvm/aarch64/debug-exceptions.c       | 58 +++++++++++++++++++++-
 1 file changed, 56 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index ea189d83abf7..63b2178210c4 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -23,7 +23,7 @@
 #define SPSR_D		(1 << 9)
 #define SPSR_SS		(1 << 21)
 
-extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start;
+extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start;
 static volatile uint64_t sw_bp_addr, hw_bp_addr;
 static volatile uint64_t wp_addr, wp_data_addr;
 static volatile uint64_t svc_addr;
@@ -47,6 +47,14 @@ static void reset_debug_state(void)
 	isb();
 }
 
+static void enable_os_lock(void)
+{
+	write_sysreg(1, oslar_el1);
+	isb();
+
+	GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
+}
+
 static void install_wp(uint64_t addr)
 {
 	uint32_t wcr;
@@ -99,6 +107,7 @@ static void guest_code(void)
 	GUEST_SYNC(0);
 
 	/* Software-breakpoint */
+	reset_debug_state();
 	asm volatile("sw_bp: brk #0");
 	GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
 
@@ -152,6 +161,51 @@ static void guest_code(void)
 	GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
 	GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
 
+	GUEST_SYNC(6);
+
+	/* OS Lock does not block software-breakpoint */
+	reset_debug_state();
+	enable_os_lock();
+	sw_bp_addr = 0;
+	asm volatile("sw_bp2: brk #0");
+	GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
+
+	GUEST_SYNC(7);
+
+	/* OS Lock blocking hardware-breakpoint */
+	reset_debug_state();
+	enable_os_lock();
+	install_hw_bp(PC(hw_bp2));
+	hw_bp_addr = 0;
+	asm volatile("hw_bp2: nop");
+	GUEST_ASSERT_EQ(hw_bp_addr, 0);
+
+	GUEST_SYNC(8);
+
+	/* OS Lock blocking watchpoint */
+	reset_debug_state();
+	enable_os_lock();
+	write_data = '\0';
+	wp_data_addr = 0;
+	install_wp(PC(write_data));
+	write_data = 'x';
+	GUEST_ASSERT_EQ(write_data, 'x');
+	GUEST_ASSERT_EQ(wp_data_addr, 0);
+
+	GUEST_SYNC(9);
+
+	/* OS Lock blocking single-step */
+	reset_debug_state();
+	enable_os_lock();
+	ss_addr[0] = 0;
+	install_ss();
+	ss_idx = 0;
+	asm volatile("mrs x0, esr_el1\n\t"
+		     "add x0, x0, #1\n\t"
+		     "msr daifset, #8\n\t"
+		     : : : "x0");
+	GUEST_ASSERT_EQ(ss_addr[0], 0);
+
 	GUEST_DONE();
 }
 
@@ -223,7 +277,7 @@ int main(int argc, char *argv[])
 	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
 				ESR_EC_SVC64, guest_svc_handler);
 
-	for (stage = 0; stage < 7; stage++) {
+	for (stage = 0; stage < 11; stage++) {
 		vcpu_run(vm, VCPU_ID);
 
 		switch (get_ucall(vm, VCPU_ID, &uc)) {
-- 
cgit 


From c340f7899af6f83bd937f8838949bb32da54c8a4 Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Tue, 18 Jan 2022 01:57:03 +0000
Subject: KVM: selftests: Add vgic initialization for dirty log perf test for
 ARM

For ARM64, if no vgic is setup before the dirty log perf test, the
userspace irqchip would be used, which would affect the dirty log perf
test result.

Signed-off-by: Jing Zhang <jingzhangos@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220118015703.3630552-4-jingzhangos@google.com
---
 tools/testing/selftests/kvm/dirty_log_perf_test.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 1954b964d1cf..b501338d9430 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -18,6 +18,12 @@
 #include "test_util.h"
 #include "perf_test_util.h"
 #include "guest_modes.h"
+#ifdef __aarch64__
+#include "aarch64/vgic.h"
+
+#define GICD_BASE_GPA			0x8000000ULL
+#define GICR_BASE_GPA			0x80A0000ULL
+#endif
 
 /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
 #define TEST_HOST_LOOP_N		2UL
@@ -200,6 +206,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 		vm_enable_cap(vm, &cap);
 	}
 
+#ifdef __aarch64__
+	vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+#endif
+
 	/* Start the iterations */
 	iteration = 0;
 	host_quit = false;
-- 
cgit 


From cc94d47ce16d4147d546e47c8248e8bd12ba5fe5 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Wed, 26 Jan 2022 19:08:54 -0800
Subject: kvm: selftests: aarch64: fix assert in gicv3_access_reg

The val argument in gicv3_access_reg can have any value when used for a
read, not necessarily 0.  Fix the assert by checking val only for
writes.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reported-by: Reiji Watanabe <reijiw@google.com>
Cc: Andrew Jones <drjones@redhat.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220127030858.3269036-2-ricarkol@google.com
---
 tools/testing/selftests/kvm/lib/aarch64/gic_v3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
index 00f613c0583c..e4945fe66620 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
@@ -159,7 +159,7 @@ static void gicv3_access_reg(uint32_t intid, uint64_t offset,
 	uint32_t cpu_or_dist;
 
 	GUEST_ASSERT(bits_per_field <= reg_bits);
-	GUEST_ASSERT(*val < (1U << bits_per_field));
+	GUEST_ASSERT(!write || *val < (1U << bits_per_field));
 	/* Some registers like IROUTER are 64 bit long. Those are currently not
 	 * supported by readl nor writel, so just asserting here until then.
 	 */
-- 
cgit 


From 11024a7a0ac26dd31ddfa0f6590e158bdf9ab858 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Wed, 26 Jan 2022 19:08:55 -0800
Subject: kvm: selftests: aarch64: pass vgic_irq guest args as a pointer

The guest in vgic_irq gets its arguments in a struct. This struct used
to fit nicely in a single register so vcpu_args_set() was able to pass
it by value by setting x0 with it. Unfortunately, this args struct grew
after some commits and some guest args became random (specically
kvm_supports_irqfd).

Fix this by passing the guest args as a pointer (after allocating some
guest memory for it).

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reported-by: Reiji Watanabe <reijiw@google.com>
Cc: Andrew Jones <drjones@redhat.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220127030858.3269036-3-ricarkol@google.com
---
 tools/testing/selftests/kvm/aarch64/vgic_irq.c | 29 ++++++++++++++------------
 1 file changed, 16 insertions(+), 13 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index e6c7d7f8fbd1..b701eb80128d 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -472,10 +472,10 @@ static void test_restore_active(struct test_args *args, struct kvm_inject_desc *
 		guest_restore_active(args, MIN_SPI, 4, f->cmd);
 }
 
-static void guest_code(struct test_args args)
+static void guest_code(struct test_args *args)
 {
-	uint32_t i, nr_irqs = args.nr_irqs;
-	bool level_sensitive = args.level_sensitive;
+	uint32_t i, nr_irqs = args->nr_irqs;
+	bool level_sensitive = args->level_sensitive;
 	struct kvm_inject_desc *f, *inject_fns;
 
 	gic_init(GIC_V3, 1, dist, redist);
@@ -484,11 +484,11 @@ static void guest_code(struct test_args args)
 		gic_irq_enable(i);
 
 	for (i = MIN_SPI; i < nr_irqs; i++)
-		gic_irq_set_config(i, !args.level_sensitive);
+		gic_irq_set_config(i, !level_sensitive);
 
-	gic_set_eoi_split(args.eoi_split);
+	gic_set_eoi_split(args->eoi_split);
 
-	reset_priorities(&args);
+	reset_priorities(args);
 	gic_set_priority_mask(CPU_PRIO_MASK);
 
 	inject_fns  = level_sensitive ? inject_level_fns
@@ -497,17 +497,17 @@ static void guest_code(struct test_args args)
 	local_irq_enable();
 
 	/* Start the tests. */
-	for_each_supported_inject_fn(&args, inject_fns, f) {
-		test_injection(&args, f);
-		test_preemption(&args, f);
-		test_injection_failure(&args, f);
+	for_each_supported_inject_fn(args, inject_fns, f) {
+		test_injection(args, f);
+		test_preemption(args, f);
+		test_injection_failure(args, f);
 	}
 
 	/* Restore the active state of IRQs. This would happen when live
 	 * migrating IRQs in the middle of being handled.
 	 */
-	for_each_supported_activate_fn(&args, set_active_fns, f)
-		test_restore_active(&args, f);
+	for_each_supported_activate_fn(args, set_active_fns, f)
+		test_restore_active(args, f);
 
 	GUEST_DONE();
 }
@@ -739,6 +739,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
 	int gic_fd;
 	struct kvm_vm *vm;
 	struct kvm_inject_args inject_args;
+	vm_vaddr_t args_gva;
 
 	struct test_args args = {
 		.nr_irqs = nr_irqs,
@@ -757,7 +758,9 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
 	vcpu_init_descriptor_tables(vm, VCPU_ID);
 
 	/* Setup the guest args page (so it gets the args). */
-	vcpu_args_set(vm, 0, 1, args);
+	args_gva = vm_vaddr_alloc_page(vm);
+	memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+	vcpu_args_set(vm, 0, 1, args_gva);
 
 	gic_fd = vgic_v3_setup(vm, 1, nr_irqs,
 			GICD_BASE_GPA, GICR_BASE_GPA);
-- 
cgit 


From 5b7898648f02083012900e48d063e51ccbdad165 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Wed, 26 Jan 2022 19:08:56 -0800
Subject: kvm: selftests: aarch64: fix the failure check in
 kvm_set_gsi_routing_irqchip_check

kvm_set_gsi_routing_irqchip_check(expect_failure=true) is used to check
the error code returned by the kernel when trying to setup an invalid
gsi routing table. The ioctl fails if "pin >= KVM_IRQCHIP_NUM_PINS", so
kvm_set_gsi_routing_irqchip_check() should test the error only when
"intid >= KVM_IRQCHIP_NUM_PINS+32". The issue is that the test check is
"intid >= KVM_IRQCHIP_NUM_PINS", so for a case like "intid =
KVM_IRQCHIP_NUM_PINS" the test wrongly assumes that the kernel will
return an error.  Fix this by using the right check.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reported-by: Reiji Watanabe <reijiw@google.com>
Cc: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220127030858.3269036-4-ricarkol@google.com
---
 tools/testing/selftests/kvm/aarch64/vgic_irq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index b701eb80128d..0106fc464afe 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -573,8 +573,8 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
 		kvm_gsi_routing_write(vm, routing);
 	} else {
 		ret = _kvm_gsi_routing_write(vm, routing);
-		/* The kernel only checks for KVM_IRQCHIP_NUM_PINS. */
-		if (intid >= KVM_IRQCHIP_NUM_PINS)
+		/* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */
+		if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
 			TEST_ASSERT(ret != 0 && errno == EINVAL,
 				"Bad intid %u did not cause KVM_SET_GSI_ROUTING "
 				"error: rc: %i errno: %i", intid, ret, errno);
-- 
cgit 


From a5cd38fd9c47b23abc6df08d6ee6a71b39038185 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Wed, 26 Jan 2022 19:08:57 -0800
Subject: kvm: selftests: aarch64: fix some vgic related comments

Fix the formatting of some comments and the wording of one of them (in
gicv3_access_reg).

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reported-by: Reiji Watanabe <reijiw@google.com>
Cc: Andrew Jones <drjones@redhat.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220127030858.3269036-5-ricarkol@google.com
---
 tools/testing/selftests/kvm/aarch64/vgic_irq.c   | 12 ++++++++----
 tools/testing/selftests/kvm/lib/aarch64/gic_v3.c | 10 ++++++----
 tools/testing/selftests/kvm/lib/aarch64/vgic.c   |  3 ++-
 3 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index 0106fc464afe..f0230711fbe9 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -306,7 +306,8 @@ static void guest_restore_active(struct test_args *args,
 	uint32_t prio, intid, ap1r;
 	int i;
 
-	/* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+	/*
+	 * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
 	 * in descending order, so intid+1 can preempt intid.
 	 */
 	for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
@@ -315,7 +316,8 @@ static void guest_restore_active(struct test_args *args,
 		gic_set_priority(intid, prio);
 	}
 
-	/* In a real migration, KVM would restore all GIC state before running
+	/*
+	 * In a real migration, KVM would restore all GIC state before running
 	 * guest code.
 	 */
 	for (i = 0; i < num; i++) {
@@ -503,7 +505,8 @@ static void guest_code(struct test_args *args)
 		test_injection_failure(args, f);
 	}
 
-	/* Restore the active state of IRQs. This would happen when live
+	/*
+	 * Restore the active state of IRQs. This would happen when live
 	 * migrating IRQs in the middle of being handled.
 	 */
 	for_each_supported_activate_fn(args, set_active_fns, f)
@@ -840,7 +843,8 @@ int main(int argc, char **argv)
 		}
 	}
 
-	/* If the user just specified nr_irqs and/or gic_version, then run all
+	/*
+	 * If the user just specified nr_irqs and/or gic_version, then run all
 	 * combinations.
 	 */
 	if (default_args) {
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
index e4945fe66620..263bf3ed8fd5 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
@@ -19,7 +19,7 @@ struct gicv3_data {
 	unsigned int nr_spis;
 };
 
-#define sgi_base_from_redist(redist_base) 	(redist_base + SZ_64K)
+#define sgi_base_from_redist(redist_base)	(redist_base + SZ_64K)
 #define DIST_BIT				(1U << 31)
 
 enum gicv3_intid_range {
@@ -105,7 +105,8 @@ static void gicv3_set_eoi_split(bool split)
 {
 	uint32_t val;
 
-	/* All other fields are read-only, so no need to read CTLR first. In
+	/*
+	 * All other fields are read-only, so no need to read CTLR first. In
 	 * fact, the kernel does the same.
 	 */
 	val = split ? (1U << 1) : 0;
@@ -160,8 +161,9 @@ static void gicv3_access_reg(uint32_t intid, uint64_t offset,
 
 	GUEST_ASSERT(bits_per_field <= reg_bits);
 	GUEST_ASSERT(!write || *val < (1U << bits_per_field));
-	/* Some registers like IROUTER are 64 bit long. Those are currently not
-	 * supported by readl nor writel, so just asserting here until then.
+	/*
+	 * This function does not support 64 bit accesses. Just asserting here
+	 * until we implement readq/writeq.
 	 */
 	GUEST_ASSERT(reg_bits == 32);
 
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
index b3a0fca0d780..79864b941617 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -150,7 +150,8 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid,
 		attr += SZ_64K;
 	}
 
-	/* All calls will succeed, even with invalid intid's, as long as the
+	/*
+	 * All calls will succeed, even with invalid intid's, as long as the
 	 * addr part of the attr is within 32 bits (checked above). An invalid
 	 * intid will just make the read/writes point to above the intended
 	 * register space (i.e., ICPENDR after ISPENDR).
-- 
cgit 


From b53de63a89244c196d8a2ea76b6754e3fdb4b626 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Wed, 26 Jan 2022 19:08:58 -0800
Subject: kvm: selftests: aarch64: use a tighter assert in vgic_poke_irq()

vgic_poke_irq() checks that the attr argument passed to the vgic device
ioctl is sane. Make this check tighter by moving it to after the last
attr update.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reported-by: Reiji Watanabe <reijiw@google.com>
Cc: Andrew Jones <drjones@redhat.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220127030858.3269036-6-ricarkol@google.com
---
 tools/testing/selftests/kvm/lib/aarch64/vgic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
index 79864b941617..f365c32a7296 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -138,9 +138,6 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid,
 	uint64_t val;
 	bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
 
-	/* Check that the addr part of the attr is within 32 bits. */
-	assert(attr <= KVM_DEV_ARM_VGIC_OFFSET_MASK);
-
 	uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
 					  : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
 
@@ -150,6 +147,9 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid,
 		attr += SZ_64K;
 	}
 
+	/* Check that the addr part of the attr is within 32 bits. */
+	assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0);
+
 	/*
 	 * All calls will succeed, even with invalid intid's, as long as the
 	 * addr part of the attr is within 32 bits (checked above). An invalid
-- 
cgit 


From f233673cd32a048f2eed69e56b61174c33fb740b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 Jan 2022 09:39:15 -0800
Subject: torture: Make torture.sh help message match reality

This commit fixes a couple of typos: s/--doall/--do-all/ and
s/--doallmodconfig/--do-allmodconfig/.

[ paulmck: Add Fixes: supplied by Paul Menzel. ]

Fixes: a115a775a8d5 ("torture: Add "make allmodconfig" to torture.sh")
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index e00e60efb231..bfe09e2829c8 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -71,8 +71,8 @@ usage () {
 	echo "       --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\""
 	echo "       --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\""
 	echo "       --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
-	echo "       --doall"
-	echo "       --doallmodconfig / --do-no-allmodconfig"
+	echo "       --do-all"
+	echo "       --do-allmodconfig / --do-no-allmodconfig"
 	echo "       --do-clocksourcewd / --do-no-clocksourcewd"
 	echo "       --do-kasan / --do-no-kasan"
 	echo "       --do-kcsan / --do-no-kcsan"
-- 
cgit 


From b5597cb36f8bb29b244b2f90030d54bf81bf6fbc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 31 Jan 2022 15:03:36 -0800
Subject: rcutorture: Test SRCU size transitions

Thie commit adds kernel boot parameters to the SRCU-N and SRCU-P
rcutorture scenarios to cause SRCU-N to test contention-based resizing
and SRCU-P to test init_srcu_struct()-time resizing.  Note that this
also tests never-resizing because the contention-based resizing normally
takes some minutes to make the shift.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot | 1 +
 tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
index 238bfe3bd0cc..ce0694fd9b92 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
@@ -1 +1,2 @@
 rcutorture.torture_type=srcu
+rcutorture.fwd_progress=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index ce48c7b82673..2db39f298d18 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -1,2 +1,4 @@
 rcutorture.torture_type=srcud
 rcupdate.rcu_self_test=1
+rcutorture.fwd_progress=3
+srcutree.big_cpu_lim=5
-- 
cgit 


From 8ea7a53daf3c9d26910ee9a115b2fb6b86cf3c01 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 1 Feb 2022 08:23:46 -0800
Subject: rcutorture: Provide non-power-of-two Tasks RCU scenarios

This commit adjusts RUDE01 to 3 CPUs and TRACE01 to 5 CPUs in order to
test Tasks RCU's ability to handle non-power-of-two numbers of CPUs.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/RUDE01  | 2 +-
 tools/testing/selftests/rcutorture/configs/rcu/TRACE01 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
index 3ca112444ce7..7093422050f6 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -1,5 +1,5 @@
 CONFIG_SMP=y
-CONFIG_NR_CPUS=4
+CONFIG_NR_CPUS=3
 CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index 34c8ff5a12f2..e4d74e5fc1d0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -1,5 +1,5 @@
 CONFIG_SMP=y
-CONFIG_NR_CPUS=4
+CONFIG_NR_CPUS=5
 CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
-- 
cgit 


From 5912fcb4bee1ab7a956ffe81b8ab30a8667ac034 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Tue, 8 Feb 2022 14:54:44 +0800
Subject: selftests/bpf: Do not export subtest as standalone test

Two subtests in ksyms_module.c are not qualified as static, so these
subtests are exported as standalone tests in tests.h and lead to
confusion for the output of "./test_progs -t ksyms_module".

By using the following command ...

  grep "^void \(serial_\)\?test_[a-zA-Z0-9_]\+(\(void\)\?)" \
      tools/testing/selftests/bpf/prog_tests/*.c | \
	awk -F : '{print $1}' | sort | uniq -c | awk '$1 != 1'

... one finds out that other tests also have a similar problem, so
fix these tests by marking subtests in these tests as static.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220208065444.648778-1-houtao1@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/ksyms_module.c      | 4 ++--
 tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c  | 2 +-
 tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c   | 4 ++--
 tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c | 4 ++--
 tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
index ecc58c9e7631..a1ebac70ec29 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
@@ -6,7 +6,7 @@
 #include "test_ksyms_module.lskel.h"
 #include "test_ksyms_module.skel.h"
 
-void test_ksyms_module_lskel(void)
+static void test_ksyms_module_lskel(void)
 {
 	struct test_ksyms_module_lskel *skel;
 	int err;
@@ -33,7 +33,7 @@ cleanup:
 	test_ksyms_module_lskel__destroy(skel);
 }
 
-void test_ksyms_module_libbpf(void)
+static void test_ksyms_module_libbpf(void)
 {
 	struct test_ksyms_module *skel;
 	int err;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
index 134d0ac32f59..d18e6f343c48 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
@@ -2,7 +2,7 @@
 #include <test_progs.h>
 #include <network_helpers.h>
 
-void test_xdp_update_frags(void)
+static void test_xdp_update_frags(void)
 {
 	const char *file = "./test_xdp_update_frags.o";
 	struct bpf_program *prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 528a8c387720..21ceac24e174 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -133,7 +133,7 @@ static void test_xdp_adjust_tail_grow2(void)
 	bpf_object__close(obj);
 }
 
-void test_xdp_adjust_frags_tail_shrink(void)
+static void test_xdp_adjust_frags_tail_shrink(void)
 {
 	const char *file = "./test_xdp_adjust_tail_shrink.o";
 	__u32 exp_size;
@@ -200,7 +200,7 @@ out:
 	bpf_object__close(obj);
 }
 
-void test_xdp_adjust_frags_tail_grow(void)
+static void test_xdp_adjust_frags_tail_grow(void)
 {
 	const char *file = "./test_xdp_adjust_tail_grow.o";
 	__u32 exp_size;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index b353e1f3acb5..f775a1613833 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -8,7 +8,7 @@
 
 #define IFINDEX_LO	1
 
-void test_xdp_with_cpumap_helpers(void)
+static void test_xdp_with_cpumap_helpers(void)
 {
 	struct test_xdp_with_cpumap_helpers *skel;
 	struct bpf_prog_info info = {};
@@ -68,7 +68,7 @@ out_close:
 	test_xdp_with_cpumap_helpers__destroy(skel);
 }
 
-void test_xdp_with_cpumap_frags_helpers(void)
+static void test_xdp_with_cpumap_frags_helpers(void)
 {
 	struct test_xdp_with_cpumap_frags_helpers *skel;
 	struct bpf_prog_info info = {};
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 463a72fc3e70..ead40016c324 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -81,7 +81,7 @@ static void test_neg_xdp_devmap_helpers(void)
 	}
 }
 
-void test_xdp_with_devmap_frags_helpers(void)
+static void test_xdp_with_devmap_frags_helpers(void)
 {
 	struct test_xdp_with_devmap_frags_helpers *skel;
 	struct bpf_prog_info info = {};
-- 
cgit 


From 4fc49b51ab9d58e830f9df37ea775625ea966d50 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Wed, 9 Feb 2022 03:17:36 +0100
Subject: selftests/bpf: Fix an endianness issue in bpf_syscall_macro test

bpf_syscall_macro reads a long argument into an int variable, which
produces a wrong value on big-endian systems. Fix by reading the
argument into an intermediate long variable first.

Fixes: 77fc0330dfe5 ("selftests/bpf: Add a test to confirm PT_REGS_PARM4_SYSCALL")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220209021745.2215452-2-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/progs/bpf_syscall_macro.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
index c8e60220cda8..f5c6ef2ff6d1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
+++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
@@ -28,6 +28,7 @@ int BPF_KPROBE(handle_sys_prctl)
 {
 	struct pt_regs *real_regs;
 	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+	unsigned long tmp;
 
 	if (pid != filter_pid)
 		return 0;
@@ -35,7 +36,9 @@ int BPF_KPROBE(handle_sys_prctl)
 	real_regs = (struct pt_regs *)PT_REGS_PARM1(ctx);
 
 	/* test for PT_REGS_PARM */
-	bpf_probe_read_kernel(&arg1, sizeof(arg1), &PT_REGS_PARM1_SYSCALL(real_regs));
+
+	bpf_probe_read_kernel(&tmp, sizeof(tmp), &PT_REGS_PARM1_SYSCALL(real_regs));
+	arg1 = tmp;
 	bpf_probe_read_kernel(&arg2, sizeof(arg2), &PT_REGS_PARM2_SYSCALL(real_regs));
 	bpf_probe_read_kernel(&arg3, sizeof(arg3), &PT_REGS_PARM3_SYSCALL(real_regs));
 	bpf_probe_read_kernel(&arg4_cx, sizeof(arg4_cx), &PT_REGS_PARM4(real_regs));
-- 
cgit 


From 3f928cab927c576f3385f3e828c53a95b2199f58 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Wed, 9 Feb 2022 03:17:38 +0100
Subject: selftests/bpf: Use PT_REGS_SYSCALL_REGS in bpf_syscall_macro

Ensure that PT_REGS_SYSCALL_REGS works correctly.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220209021745.2215452-4-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/progs/bpf_syscall_macro.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
index f5c6ef2ff6d1..e7c622cb6a39 100644
--- a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
+++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
@@ -33,7 +33,7 @@ int BPF_KPROBE(handle_sys_prctl)
 	if (pid != filter_pid)
 		return 0;
 
-	real_regs = (struct pt_regs *)PT_REGS_PARM1(ctx);
+	real_regs = PT_REGS_SYSCALL_REGS(ctx);
 
 	/* test for PT_REGS_PARM */
 
-- 
cgit 


From 9e45a377f29b5a66f75c0c3a0d84ad5c583290e8 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Wed, 9 Feb 2022 03:17:42 +0100
Subject: selftests/bpf: Skip test_bpf_syscall_macro's syscall_arg1 on arm64
 and s390

These architectures can provide access to the first syscall argument
only through PT_REGS_PARM1_CORE_SYSCALL().

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220209021745.2215452-8-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c | 4 ++++
 tools/testing/selftests/bpf/progs/bpf_syscall_macro.c           | 4 +++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
index f5f4c8adf539..8bc58bda500d 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
@@ -33,7 +33,11 @@ void test_bpf_syscall_macro(void)
 
 	/* check whether args of syscall are copied correctly */
 	prctl(exp_arg1, exp_arg2, exp_arg3, exp_arg4, exp_arg5);
+#if defined(__aarch64__) || defined(__s390__)
+	ASSERT_NEQ(skel->bss->arg1, exp_arg1, "syscall_arg1");
+#else
 	ASSERT_EQ(skel->bss->arg1, exp_arg1, "syscall_arg1");
+#endif
 	ASSERT_EQ(skel->bss->arg2, exp_arg2, "syscall_arg2");
 	ASSERT_EQ(skel->bss->arg3, exp_arg3, "syscall_arg3");
 	/* it cannot copy arg4 when uses PT_REGS_PARM4 on x86_64 */
diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
index e7c622cb6a39..496e54d0ac22 100644
--- a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
+++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
@@ -28,7 +28,7 @@ int BPF_KPROBE(handle_sys_prctl)
 {
 	struct pt_regs *real_regs;
 	pid_t pid = bpf_get_current_pid_tgid() >> 32;
-	unsigned long tmp;
+	unsigned long tmp = 0;
 
 	if (pid != filter_pid)
 		return 0;
@@ -37,7 +37,9 @@ int BPF_KPROBE(handle_sys_prctl)
 
 	/* test for PT_REGS_PARM */
 
+#if !defined(bpf_target_arm64) && !defined(bpf_target_s390)
 	bpf_probe_read_kernel(&tmp, sizeof(tmp), &PT_REGS_PARM1_SYSCALL(real_regs));
+#endif
 	arg1 = tmp;
 	bpf_probe_read_kernel(&arg2, sizeof(arg2), &PT_REGS_PARM2_SYSCALL(real_regs));
 	bpf_probe_read_kernel(&arg3, sizeof(arg3), &PT_REGS_PARM3_SYSCALL(real_regs));
-- 
cgit 


From c28748233b4736bd31b3d3c3011d42054cc738f5 Mon Sep 17 00:00:00 2001
From: Hengqi Chen <hengqi.chen@gmail.com>
Date: Mon, 7 Feb 2022 22:31:34 +0800
Subject: selftests/bpf: Test BPF_KPROBE_SYSCALL macro

Add tests for the newly added BPF_KPROBE_SYSCALL macro.

Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220207143134.2977852-3-hengqi.chen@gmail.com
---
 .../bpf/prog_tests/test_bpf_syscall_macro.c        |  6 ++++++
 .../selftests/bpf/progs/bpf_syscall_macro.c        | 23 ++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
index 8bc58bda500d..c381faaae741 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
@@ -62,6 +62,12 @@ void test_bpf_syscall_macro(void)
 	ASSERT_EQ(skel->bss->arg4_core, exp_arg4, "syscall_arg4_core_variant");
 	ASSERT_EQ(skel->bss->arg5_core, exp_arg5, "syscall_arg5_core_variant");
 
+	ASSERT_EQ(skel->bss->option_syscall, exp_arg1, "BPF_KPROBE_SYSCALL_option");
+	ASSERT_EQ(skel->bss->arg2_syscall, exp_arg2, "BPF_KPROBE_SYSCALL_arg2");
+	ASSERT_EQ(skel->bss->arg3_syscall, exp_arg3, "BPF_KPROBE_SYSCALL_arg3");
+	ASSERT_EQ(skel->bss->arg4_syscall, exp_arg4, "BPF_KPROBE_SYSCALL_arg4");
+	ASSERT_EQ(skel->bss->arg5_syscall, exp_arg5, "BPF_KPROBE_SYSCALL_arg5");
+
 cleanup:
 	bpf_syscall_macro__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
index 496e54d0ac22..05838ed9b89c 100644
--- a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
+++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
@@ -21,6 +21,12 @@ unsigned long arg4_core_cx = 0;
 unsigned long arg4_core = 0;
 unsigned long arg5_core = 0;
 
+int option_syscall = 0;
+unsigned long arg2_syscall = 0;
+unsigned long arg3_syscall = 0;
+unsigned long arg4_syscall = 0;
+unsigned long arg5_syscall = 0;
+
 const volatile pid_t filter_pid = 0;
 
 SEC("kprobe/" SYS_PREFIX "sys_prctl")
@@ -58,4 +64,21 @@ int BPF_KPROBE(handle_sys_prctl)
 	return 0;
 }
 
+SEC("kprobe/" SYS_PREFIX "sys_prctl")
+int BPF_KPROBE_SYSCALL(prctl_enter, int option, unsigned long arg2,
+		       unsigned long arg3, unsigned long arg4, unsigned long arg5)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid != filter_pid)
+		return 0;
+
+	option_syscall = option;
+	arg2_syscall = arg2;
+	arg3_syscall = arg3;
+	arg4_syscall = arg4;
+	arg5_syscall = arg5;
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit 


From 0ff0af18216436d0151af4e410400c7a19ca9437 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 23 Jan 2022 16:29:21 -0800
Subject: cxl/core/port: Rename bus.c to port.c

Given it is dominated by port infrastructure, and will only acquire
more, rename bus.c to port.c.

Reviewed-by: Ben Widawsky <ben.widawsky@intel.com>
Link: https://lore.kernel.org/r/164298416136.3018233.15442880970000855425.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/driver-api/cxl/memory-devices.rst |   4 +-
 drivers/cxl/core/Makefile                       |   2 +-
 drivers/cxl/core/bus.c                          | 675 ------------------------
 drivers/cxl/core/port.c                         | 675 ++++++++++++++++++++++++
 tools/testing/cxl/Kbuild                        |   2 +-
 5 files changed, 679 insertions(+), 679 deletions(-)
 delete mode 100644 drivers/cxl/core/bus.c
 create mode 100644 drivers/cxl/core/port.c

(limited to 'tools/testing')

diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst
index 3b8f41395f6b..c8f7a16cd0e3 100644
--- a/Documentation/driver-api/cxl/memory-devices.rst
+++ b/Documentation/driver-api/cxl/memory-devices.rst
@@ -36,10 +36,10 @@ CXL Core
 .. kernel-doc:: drivers/cxl/cxl.h
    :internal:
 
-.. kernel-doc:: drivers/cxl/core/bus.c
+.. kernel-doc:: drivers/cxl/core/port.c
    :doc: cxl core
 
-.. kernel-doc:: drivers/cxl/core/bus.c
+.. kernel-doc:: drivers/cxl/core/port.c
    :identifiers:
 
 .. kernel-doc:: drivers/cxl/core/pmem.c
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 40ab50318daf..a90202ac88d2 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -2,7 +2,7 @@
 obj-$(CONFIG_CXL_BUS) += cxl_core.o
 
 ccflags-y += -I$(srctree)/drivers/cxl
-cxl_core-y := bus.o
+cxl_core-y := port.o
 cxl_core-y += pmem.o
 cxl_core-y += regs.o
 cxl_core-y += memdev.o
diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
deleted file mode 100644
index 3f9b98ecd18b..000000000000
--- a/drivers/cxl/core/bus.c
+++ /dev/null
@@ -1,675 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
-#include <linux/io-64-nonatomic-lo-hi.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/idr.h>
-#include <cxlmem.h>
-#include <cxl.h>
-#include "core.h"
-
-/**
- * DOC: cxl core
- *
- * The CXL core provides a set of interfaces that can be consumed by CXL aware
- * drivers. The interfaces allow for creation, modification, and destruction of
- * regions, memory devices, ports, and decoders. CXL aware drivers must register
- * with the CXL core via these interfaces in order to be able to participate in
- * cross-device interleave coordination. The CXL core also establishes and
- * maintains the bridge to the nvdimm subsystem.
- *
- * CXL core introduces sysfs hierarchy to control the devices that are
- * instantiated by the core.
- */
-
-static DEFINE_IDA(cxl_port_ida);
-
-static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
-{
-	return sysfs_emit(buf, "%s\n", dev->type->name);
-}
-static DEVICE_ATTR_RO(devtype);
-
-static struct attribute *cxl_base_attributes[] = {
-	&dev_attr_devtype.attr,
-	NULL,
-};
-
-struct attribute_group cxl_base_attribute_group = {
-	.attrs = cxl_base_attributes,
-};
-
-static ssize_t start_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	struct cxl_decoder *cxld = to_cxl_decoder(dev);
-
-	return sysfs_emit(buf, "%#llx\n", cxld->range.start);
-}
-static DEVICE_ATTR_RO(start);
-
-static ssize_t size_show(struct device *dev, struct device_attribute *attr,
-			char *buf)
-{
-	struct cxl_decoder *cxld = to_cxl_decoder(dev);
-
-	return sysfs_emit(buf, "%#llx\n", range_len(&cxld->range));
-}
-static DEVICE_ATTR_RO(size);
-
-#define CXL_DECODER_FLAG_ATTR(name, flag)                            \
-static ssize_t name##_show(struct device *dev,                       \
-			   struct device_attribute *attr, char *buf) \
-{                                                                    \
-	struct cxl_decoder *cxld = to_cxl_decoder(dev);              \
-                                                                     \
-	return sysfs_emit(buf, "%s\n",                               \
-			  (cxld->flags & (flag)) ? "1" : "0");       \
-}                                                                    \
-static DEVICE_ATTR_RO(name)
-
-CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM);
-CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM);
-CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2);
-CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3);
-CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK);
-
-static ssize_t target_type_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct cxl_decoder *cxld = to_cxl_decoder(dev);
-
-	switch (cxld->target_type) {
-	case CXL_DECODER_ACCELERATOR:
-		return sysfs_emit(buf, "accelerator\n");
-	case CXL_DECODER_EXPANDER:
-		return sysfs_emit(buf, "expander\n");
-	}
-	return -ENXIO;
-}
-static DEVICE_ATTR_RO(target_type);
-
-static ssize_t target_list_show(struct device *dev,
-			       struct device_attribute *attr, char *buf)
-{
-	struct cxl_decoder *cxld = to_cxl_decoder(dev);
-	ssize_t offset = 0;
-	int i, rc = 0;
-
-	device_lock(dev);
-	for (i = 0; i < cxld->interleave_ways; i++) {
-		struct cxl_dport *dport = cxld->target[i];
-		struct cxl_dport *next = NULL;
-
-		if (!dport)
-			break;
-
-		if (i + 1 < cxld->interleave_ways)
-			next = cxld->target[i + 1];
-		rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id,
-				   next ? "," : "");
-		if (rc < 0)
-			break;
-		offset += rc;
-	}
-	device_unlock(dev);
-
-	if (rc < 0)
-		return rc;
-
-	rc = sysfs_emit_at(buf, offset, "\n");
-	if (rc < 0)
-		return rc;
-
-	return offset + rc;
-}
-static DEVICE_ATTR_RO(target_list);
-
-static struct attribute *cxl_decoder_base_attrs[] = {
-	&dev_attr_start.attr,
-	&dev_attr_size.attr,
-	&dev_attr_locked.attr,
-	&dev_attr_target_list.attr,
-	NULL,
-};
-
-static struct attribute_group cxl_decoder_base_attribute_group = {
-	.attrs = cxl_decoder_base_attrs,
-};
-
-static struct attribute *cxl_decoder_root_attrs[] = {
-	&dev_attr_cap_pmem.attr,
-	&dev_attr_cap_ram.attr,
-	&dev_attr_cap_type2.attr,
-	&dev_attr_cap_type3.attr,
-	NULL,
-};
-
-static struct attribute_group cxl_decoder_root_attribute_group = {
-	.attrs = cxl_decoder_root_attrs,
-};
-
-static const struct attribute_group *cxl_decoder_root_attribute_groups[] = {
-	&cxl_decoder_root_attribute_group,
-	&cxl_decoder_base_attribute_group,
-	&cxl_base_attribute_group,
-	NULL,
-};
-
-static struct attribute *cxl_decoder_switch_attrs[] = {
-	&dev_attr_target_type.attr,
-	NULL,
-};
-
-static struct attribute_group cxl_decoder_switch_attribute_group = {
-	.attrs = cxl_decoder_switch_attrs,
-};
-
-static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = {
-	&cxl_decoder_switch_attribute_group,
-	&cxl_decoder_base_attribute_group,
-	&cxl_base_attribute_group,
-	NULL,
-};
-
-static void cxl_decoder_release(struct device *dev)
-{
-	struct cxl_decoder *cxld = to_cxl_decoder(dev);
-	struct cxl_port *port = to_cxl_port(dev->parent);
-
-	ida_free(&port->decoder_ida, cxld->id);
-	kfree(cxld);
-}
-
-static const struct device_type cxl_decoder_switch_type = {
-	.name = "cxl_decoder_switch",
-	.release = cxl_decoder_release,
-	.groups = cxl_decoder_switch_attribute_groups,
-};
-
-static const struct device_type cxl_decoder_root_type = {
-	.name = "cxl_decoder_root",
-	.release = cxl_decoder_release,
-	.groups = cxl_decoder_root_attribute_groups,
-};
-
-bool is_root_decoder(struct device *dev)
-{
-	return dev->type == &cxl_decoder_root_type;
-}
-EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL);
-
-struct cxl_decoder *to_cxl_decoder(struct device *dev)
-{
-	if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release,
-			  "not a cxl_decoder device\n"))
-		return NULL;
-	return container_of(dev, struct cxl_decoder, dev);
-}
-EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL);
-
-static void cxl_dport_release(struct cxl_dport *dport)
-{
-	list_del(&dport->list);
-	put_device(dport->dport);
-	kfree(dport);
-}
-
-static void cxl_port_release(struct device *dev)
-{
-	struct cxl_port *port = to_cxl_port(dev);
-	struct cxl_dport *dport, *_d;
-
-	device_lock(dev);
-	list_for_each_entry_safe(dport, _d, &port->dports, list)
-		cxl_dport_release(dport);
-	device_unlock(dev);
-	ida_free(&cxl_port_ida, port->id);
-	kfree(port);
-}
-
-static const struct attribute_group *cxl_port_attribute_groups[] = {
-	&cxl_base_attribute_group,
-	NULL,
-};
-
-static const struct device_type cxl_port_type = {
-	.name = "cxl_port",
-	.release = cxl_port_release,
-	.groups = cxl_port_attribute_groups,
-};
-
-struct cxl_port *to_cxl_port(struct device *dev)
-{
-	if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
-			  "not a cxl_port device\n"))
-		return NULL;
-	return container_of(dev, struct cxl_port, dev);
-}
-
-static void unregister_port(void *_port)
-{
-	struct cxl_port *port = _port;
-	struct cxl_dport *dport;
-
-	device_lock(&port->dev);
-	list_for_each_entry(dport, &port->dports, list) {
-		char link_name[CXL_TARGET_STRLEN];
-
-		if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d",
-			     dport->port_id) >= CXL_TARGET_STRLEN)
-			continue;
-		sysfs_remove_link(&port->dev.kobj, link_name);
-	}
-	device_unlock(&port->dev);
-	device_unregister(&port->dev);
-}
-
-static void cxl_unlink_uport(void *_port)
-{
-	struct cxl_port *port = _port;
-
-	sysfs_remove_link(&port->dev.kobj, "uport");
-}
-
-static int devm_cxl_link_uport(struct device *host, struct cxl_port *port)
-{
-	int rc;
-
-	rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport");
-	if (rc)
-		return rc;
-	return devm_add_action_or_reset(host, cxl_unlink_uport, port);
-}
-
-static struct cxl_port *cxl_port_alloc(struct device *uport,
-				       resource_size_t component_reg_phys,
-				       struct cxl_port *parent_port)
-{
-	struct cxl_port *port;
-	struct device *dev;
-	int rc;
-
-	port = kzalloc(sizeof(*port), GFP_KERNEL);
-	if (!port)
-		return ERR_PTR(-ENOMEM);
-
-	rc = ida_alloc(&cxl_port_ida, GFP_KERNEL);
-	if (rc < 0)
-		goto err;
-	port->id = rc;
-
-	/*
-	 * The top-level cxl_port "cxl_root" does not have a cxl_port as
-	 * its parent and it does not have any corresponding component
-	 * registers as its decode is described by a fixed platform
-	 * description.
-	 */
-	dev = &port->dev;
-	if (parent_port)
-		dev->parent = &parent_port->dev;
-	else
-		dev->parent = uport;
-
-	port->uport = uport;
-	port->component_reg_phys = component_reg_phys;
-	ida_init(&port->decoder_ida);
-	INIT_LIST_HEAD(&port->dports);
-
-	device_initialize(dev);
-	device_set_pm_not_required(dev);
-	dev->bus = &cxl_bus_type;
-	dev->type = &cxl_port_type;
-
-	return port;
-
-err:
-	kfree(port);
-	return ERR_PTR(rc);
-}
-
-/**
- * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
- * @host: host device for devm operations
- * @uport: "physical" device implementing this upstream port
- * @component_reg_phys: (optional) for configurable cxl_port instances
- * @parent_port: next hop up in the CXL memory decode hierarchy
- */
-struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
-				   resource_size_t component_reg_phys,
-				   struct cxl_port *parent_port)
-{
-	struct cxl_port *port;
-	struct device *dev;
-	int rc;
-
-	port = cxl_port_alloc(uport, component_reg_phys, parent_port);
-	if (IS_ERR(port))
-		return port;
-
-	dev = &port->dev;
-	if (parent_port)
-		rc = dev_set_name(dev, "port%d", port->id);
-	else
-		rc = dev_set_name(dev, "root%d", port->id);
-	if (rc)
-		goto err;
-
-	rc = device_add(dev);
-	if (rc)
-		goto err;
-
-	rc = devm_add_action_or_reset(host, unregister_port, port);
-	if (rc)
-		return ERR_PTR(rc);
-
-	rc = devm_cxl_link_uport(host, port);
-	if (rc)
-		return ERR_PTR(rc);
-
-	return port;
-
-err:
-	put_device(dev);
-	return ERR_PTR(rc);
-}
-EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL);
-
-static struct cxl_dport *find_dport(struct cxl_port *port, int id)
-{
-	struct cxl_dport *dport;
-
-	device_lock_assert(&port->dev);
-	list_for_each_entry (dport, &port->dports, list)
-		if (dport->port_id == id)
-			return dport;
-	return NULL;
-}
-
-static int add_dport(struct cxl_port *port, struct cxl_dport *new)
-{
-	struct cxl_dport *dup;
-
-	device_lock(&port->dev);
-	dup = find_dport(port, new->port_id);
-	if (dup)
-		dev_err(&port->dev,
-			"unable to add dport%d-%s non-unique port id (%s)\n",
-			new->port_id, dev_name(new->dport),
-			dev_name(dup->dport));
-	else
-		list_add_tail(&new->list, &port->dports);
-	device_unlock(&port->dev);
-
-	return dup ? -EEXIST : 0;
-}
-
-/**
- * cxl_add_dport - append downstream port data to a cxl_port
- * @port: the cxl_port that references this dport
- * @dport_dev: firmware or PCI device representing the dport
- * @port_id: identifier for this dport in a decoder's target list
- * @component_reg_phys: optional location of CXL component registers
- *
- * Note that all allocations and links are undone by cxl_port deletion
- * and release.
- */
-int cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id,
-		  resource_size_t component_reg_phys)
-{
-	char link_name[CXL_TARGET_STRLEN];
-	struct cxl_dport *dport;
-	int rc;
-
-	if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >=
-	    CXL_TARGET_STRLEN)
-		return -EINVAL;
-
-	dport = kzalloc(sizeof(*dport), GFP_KERNEL);
-	if (!dport)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&dport->list);
-	dport->dport = get_device(dport_dev);
-	dport->port_id = port_id;
-	dport->component_reg_phys = component_reg_phys;
-	dport->port = port;
-
-	rc = add_dport(port, dport);
-	if (rc)
-		goto err;
-
-	rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name);
-	if (rc)
-		goto err;
-
-	return 0;
-err:
-	cxl_dport_release(dport);
-	return rc;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_add_dport, CXL);
-
-static int decoder_populate_targets(struct cxl_decoder *cxld,
-				    struct cxl_port *port, int *target_map)
-{
-	int rc = 0, i;
-
-	if (!target_map)
-		return 0;
-
-	device_lock(&port->dev);
-	if (list_empty(&port->dports)) {
-		rc = -EINVAL;
-		goto out_unlock;
-	}
-
-	for (i = 0; i < cxld->nr_targets; i++) {
-		struct cxl_dport *dport = find_dport(port, target_map[i]);
-
-		if (!dport) {
-			rc = -ENXIO;
-			goto out_unlock;
-		}
-		cxld->target[i] = dport;
-	}
-
-out_unlock:
-	device_unlock(&port->dev);
-
-	return rc;
-}
-
-struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets)
-{
-	struct cxl_decoder *cxld;
-	struct device *dev;
-	int rc = 0;
-
-	if (nr_targets > CXL_DECODER_MAX_INTERLEAVE || nr_targets < 1)
-		return ERR_PTR(-EINVAL);
-
-	cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL);
-	if (!cxld)
-		return ERR_PTR(-ENOMEM);
-
-	rc = ida_alloc(&port->decoder_ida, GFP_KERNEL);
-	if (rc < 0)
-		goto err;
-
-	cxld->id = rc;
-	cxld->nr_targets = nr_targets;
-	dev = &cxld->dev;
-	device_initialize(dev);
-	device_set_pm_not_required(dev);
-	dev->parent = &port->dev;
-	dev->bus = &cxl_bus_type;
-
-	/* root ports do not have a cxl_port_type parent */
-	if (port->dev.parent->type == &cxl_port_type)
-		dev->type = &cxl_decoder_switch_type;
-	else
-		dev->type = &cxl_decoder_root_type;
-
-	return cxld;
-err:
-	kfree(cxld);
-	return ERR_PTR(rc);
-}
-EXPORT_SYMBOL_NS_GPL(cxl_decoder_alloc, CXL);
-
-int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
-{
-	struct cxl_port *port;
-	struct device *dev;
-	int rc;
-
-	if (WARN_ON_ONCE(!cxld))
-		return -EINVAL;
-
-	if (WARN_ON_ONCE(IS_ERR(cxld)))
-		return PTR_ERR(cxld);
-
-	if (cxld->interleave_ways < 1)
-		return -EINVAL;
-
-	port = to_cxl_port(cxld->dev.parent);
-	rc = decoder_populate_targets(cxld, port, target_map);
-	if (rc)
-		return rc;
-
-	dev = &cxld->dev;
-	rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id);
-	if (rc)
-		return rc;
-
-	return device_add(dev);
-}
-EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL);
-
-static void cxld_unregister(void *dev)
-{
-	device_unregister(dev);
-}
-
-int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
-{
-	return devm_add_action_or_reset(host, cxld_unregister, &cxld->dev);
-}
-EXPORT_SYMBOL_NS_GPL(cxl_decoder_autoremove, CXL);
-
-/**
- * __cxl_driver_register - register a driver for the cxl bus
- * @cxl_drv: cxl driver structure to attach
- * @owner: owning module/driver
- * @modname: KBUILD_MODNAME for parent driver
- */
-int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
-			  const char *modname)
-{
-	if (!cxl_drv->probe) {
-		pr_debug("%s ->probe() must be specified\n", modname);
-		return -EINVAL;
-	}
-
-	if (!cxl_drv->name) {
-		pr_debug("%s ->name must be specified\n", modname);
-		return -EINVAL;
-	}
-
-	if (!cxl_drv->id) {
-		pr_debug("%s ->id must be specified\n", modname);
-		return -EINVAL;
-	}
-
-	cxl_drv->drv.bus = &cxl_bus_type;
-	cxl_drv->drv.owner = owner;
-	cxl_drv->drv.mod_name = modname;
-	cxl_drv->drv.name = cxl_drv->name;
-
-	return driver_register(&cxl_drv->drv);
-}
-EXPORT_SYMBOL_NS_GPL(__cxl_driver_register, CXL);
-
-void cxl_driver_unregister(struct cxl_driver *cxl_drv)
-{
-	driver_unregister(&cxl_drv->drv);
-}
-EXPORT_SYMBOL_NS_GPL(cxl_driver_unregister, CXL);
-
-static int cxl_device_id(struct device *dev)
-{
-	if (dev->type == &cxl_nvdimm_bridge_type)
-		return CXL_DEVICE_NVDIMM_BRIDGE;
-	if (dev->type == &cxl_nvdimm_type)
-		return CXL_DEVICE_NVDIMM;
-	return 0;
-}
-
-static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT,
-			      cxl_device_id(dev));
-}
-
-static int cxl_bus_match(struct device *dev, struct device_driver *drv)
-{
-	return cxl_device_id(dev) == to_cxl_drv(drv)->id;
-}
-
-static int cxl_bus_probe(struct device *dev)
-{
-	return to_cxl_drv(dev->driver)->probe(dev);
-}
-
-static void cxl_bus_remove(struct device *dev)
-{
-	struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver);
-
-	if (cxl_drv->remove)
-		cxl_drv->remove(dev);
-}
-
-struct bus_type cxl_bus_type = {
-	.name = "cxl",
-	.uevent = cxl_bus_uevent,
-	.match = cxl_bus_match,
-	.probe = cxl_bus_probe,
-	.remove = cxl_bus_remove,
-};
-EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL);
-
-static __init int cxl_core_init(void)
-{
-	int rc;
-
-	cxl_mbox_init();
-
-	rc = cxl_memdev_init();
-	if (rc)
-		return rc;
-
-	rc = bus_register(&cxl_bus_type);
-	if (rc)
-		goto err;
-	return 0;
-
-err:
-	cxl_memdev_exit();
-	cxl_mbox_exit();
-	return rc;
-}
-
-static void cxl_core_exit(void)
-{
-	bus_unregister(&cxl_bus_type);
-	cxl_memdev_exit();
-	cxl_mbox_exit();
-}
-
-module_init(cxl_core_init);
-module_exit(cxl_core_exit);
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
new file mode 100644
index 000000000000..3f9b98ecd18b
--- /dev/null
+++ b/drivers/cxl/core/port.c
@@ -0,0 +1,675 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "core.h"
+
+/**
+ * DOC: cxl core
+ *
+ * The CXL core provides a set of interfaces that can be consumed by CXL aware
+ * drivers. The interfaces allow for creation, modification, and destruction of
+ * regions, memory devices, ports, and decoders. CXL aware drivers must register
+ * with the CXL core via these interfaces in order to be able to participate in
+ * cross-device interleave coordination. The CXL core also establishes and
+ * maintains the bridge to the nvdimm subsystem.
+ *
+ * CXL core introduces sysfs hierarchy to control the devices that are
+ * instantiated by the core.
+ */
+
+static DEFINE_IDA(cxl_port_ida);
+
+static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	return sysfs_emit(buf, "%s\n", dev->type->name);
+}
+static DEVICE_ATTR_RO(devtype);
+
+static struct attribute *cxl_base_attributes[] = {
+	&dev_attr_devtype.attr,
+	NULL,
+};
+
+struct attribute_group cxl_base_attribute_group = {
+	.attrs = cxl_base_attributes,
+};
+
+static ssize_t start_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+	return sysfs_emit(buf, "%#llx\n", cxld->range.start);
+}
+static DEVICE_ATTR_RO(start);
+
+static ssize_t size_show(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+	return sysfs_emit(buf, "%#llx\n", range_len(&cxld->range));
+}
+static DEVICE_ATTR_RO(size);
+
+#define CXL_DECODER_FLAG_ATTR(name, flag)                            \
+static ssize_t name##_show(struct device *dev,                       \
+			   struct device_attribute *attr, char *buf) \
+{                                                                    \
+	struct cxl_decoder *cxld = to_cxl_decoder(dev);              \
+                                                                     \
+	return sysfs_emit(buf, "%s\n",                               \
+			  (cxld->flags & (flag)) ? "1" : "0");       \
+}                                                                    \
+static DEVICE_ATTR_RO(name)
+
+CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM);
+CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM);
+CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2);
+CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3);
+CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK);
+
+static ssize_t target_type_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+	switch (cxld->target_type) {
+	case CXL_DECODER_ACCELERATOR:
+		return sysfs_emit(buf, "accelerator\n");
+	case CXL_DECODER_EXPANDER:
+		return sysfs_emit(buf, "expander\n");
+	}
+	return -ENXIO;
+}
+static DEVICE_ATTR_RO(target_type);
+
+static ssize_t target_list_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct cxl_decoder *cxld = to_cxl_decoder(dev);
+	ssize_t offset = 0;
+	int i, rc = 0;
+
+	device_lock(dev);
+	for (i = 0; i < cxld->interleave_ways; i++) {
+		struct cxl_dport *dport = cxld->target[i];
+		struct cxl_dport *next = NULL;
+
+		if (!dport)
+			break;
+
+		if (i + 1 < cxld->interleave_ways)
+			next = cxld->target[i + 1];
+		rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id,
+				   next ? "," : "");
+		if (rc < 0)
+			break;
+		offset += rc;
+	}
+	device_unlock(dev);
+
+	if (rc < 0)
+		return rc;
+
+	rc = sysfs_emit_at(buf, offset, "\n");
+	if (rc < 0)
+		return rc;
+
+	return offset + rc;
+}
+static DEVICE_ATTR_RO(target_list);
+
+static struct attribute *cxl_decoder_base_attrs[] = {
+	&dev_attr_start.attr,
+	&dev_attr_size.attr,
+	&dev_attr_locked.attr,
+	&dev_attr_target_list.attr,
+	NULL,
+};
+
+static struct attribute_group cxl_decoder_base_attribute_group = {
+	.attrs = cxl_decoder_base_attrs,
+};
+
+static struct attribute *cxl_decoder_root_attrs[] = {
+	&dev_attr_cap_pmem.attr,
+	&dev_attr_cap_ram.attr,
+	&dev_attr_cap_type2.attr,
+	&dev_attr_cap_type3.attr,
+	NULL,
+};
+
+static struct attribute_group cxl_decoder_root_attribute_group = {
+	.attrs = cxl_decoder_root_attrs,
+};
+
+static const struct attribute_group *cxl_decoder_root_attribute_groups[] = {
+	&cxl_decoder_root_attribute_group,
+	&cxl_decoder_base_attribute_group,
+	&cxl_base_attribute_group,
+	NULL,
+};
+
+static struct attribute *cxl_decoder_switch_attrs[] = {
+	&dev_attr_target_type.attr,
+	NULL,
+};
+
+static struct attribute_group cxl_decoder_switch_attribute_group = {
+	.attrs = cxl_decoder_switch_attrs,
+};
+
+static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = {
+	&cxl_decoder_switch_attribute_group,
+	&cxl_decoder_base_attribute_group,
+	&cxl_base_attribute_group,
+	NULL,
+};
+
+static void cxl_decoder_release(struct device *dev)
+{
+	struct cxl_decoder *cxld = to_cxl_decoder(dev);
+	struct cxl_port *port = to_cxl_port(dev->parent);
+
+	ida_free(&port->decoder_ida, cxld->id);
+	kfree(cxld);
+}
+
+static const struct device_type cxl_decoder_switch_type = {
+	.name = "cxl_decoder_switch",
+	.release = cxl_decoder_release,
+	.groups = cxl_decoder_switch_attribute_groups,
+};
+
+static const struct device_type cxl_decoder_root_type = {
+	.name = "cxl_decoder_root",
+	.release = cxl_decoder_release,
+	.groups = cxl_decoder_root_attribute_groups,
+};
+
+bool is_root_decoder(struct device *dev)
+{
+	return dev->type == &cxl_decoder_root_type;
+}
+EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL);
+
+struct cxl_decoder *to_cxl_decoder(struct device *dev)
+{
+	if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release,
+			  "not a cxl_decoder device\n"))
+		return NULL;
+	return container_of(dev, struct cxl_decoder, dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL);
+
+static void cxl_dport_release(struct cxl_dport *dport)
+{
+	list_del(&dport->list);
+	put_device(dport->dport);
+	kfree(dport);
+}
+
+static void cxl_port_release(struct device *dev)
+{
+	struct cxl_port *port = to_cxl_port(dev);
+	struct cxl_dport *dport, *_d;
+
+	device_lock(dev);
+	list_for_each_entry_safe(dport, _d, &port->dports, list)
+		cxl_dport_release(dport);
+	device_unlock(dev);
+	ida_free(&cxl_port_ida, port->id);
+	kfree(port);
+}
+
+static const struct attribute_group *cxl_port_attribute_groups[] = {
+	&cxl_base_attribute_group,
+	NULL,
+};
+
+static const struct device_type cxl_port_type = {
+	.name = "cxl_port",
+	.release = cxl_port_release,
+	.groups = cxl_port_attribute_groups,
+};
+
+struct cxl_port *to_cxl_port(struct device *dev)
+{
+	if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
+			  "not a cxl_port device\n"))
+		return NULL;
+	return container_of(dev, struct cxl_port, dev);
+}
+
+static void unregister_port(void *_port)
+{
+	struct cxl_port *port = _port;
+	struct cxl_dport *dport;
+
+	device_lock(&port->dev);
+	list_for_each_entry(dport, &port->dports, list) {
+		char link_name[CXL_TARGET_STRLEN];
+
+		if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d",
+			     dport->port_id) >= CXL_TARGET_STRLEN)
+			continue;
+		sysfs_remove_link(&port->dev.kobj, link_name);
+	}
+	device_unlock(&port->dev);
+	device_unregister(&port->dev);
+}
+
+static void cxl_unlink_uport(void *_port)
+{
+	struct cxl_port *port = _port;
+
+	sysfs_remove_link(&port->dev.kobj, "uport");
+}
+
+static int devm_cxl_link_uport(struct device *host, struct cxl_port *port)
+{
+	int rc;
+
+	rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport");
+	if (rc)
+		return rc;
+	return devm_add_action_or_reset(host, cxl_unlink_uport, port);
+}
+
+static struct cxl_port *cxl_port_alloc(struct device *uport,
+				       resource_size_t component_reg_phys,
+				       struct cxl_port *parent_port)
+{
+	struct cxl_port *port;
+	struct device *dev;
+	int rc;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return ERR_PTR(-ENOMEM);
+
+	rc = ida_alloc(&cxl_port_ida, GFP_KERNEL);
+	if (rc < 0)
+		goto err;
+	port->id = rc;
+
+	/*
+	 * The top-level cxl_port "cxl_root" does not have a cxl_port as
+	 * its parent and it does not have any corresponding component
+	 * registers as its decode is described by a fixed platform
+	 * description.
+	 */
+	dev = &port->dev;
+	if (parent_port)
+		dev->parent = &parent_port->dev;
+	else
+		dev->parent = uport;
+
+	port->uport = uport;
+	port->component_reg_phys = component_reg_phys;
+	ida_init(&port->decoder_ida);
+	INIT_LIST_HEAD(&port->dports);
+
+	device_initialize(dev);
+	device_set_pm_not_required(dev);
+	dev->bus = &cxl_bus_type;
+	dev->type = &cxl_port_type;
+
+	return port;
+
+err:
+	kfree(port);
+	return ERR_PTR(rc);
+}
+
+/**
+ * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
+ * @host: host device for devm operations
+ * @uport: "physical" device implementing this upstream port
+ * @component_reg_phys: (optional) for configurable cxl_port instances
+ * @parent_port: next hop up in the CXL memory decode hierarchy
+ */
+struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
+				   resource_size_t component_reg_phys,
+				   struct cxl_port *parent_port)
+{
+	struct cxl_port *port;
+	struct device *dev;
+	int rc;
+
+	port = cxl_port_alloc(uport, component_reg_phys, parent_port);
+	if (IS_ERR(port))
+		return port;
+
+	dev = &port->dev;
+	if (parent_port)
+		rc = dev_set_name(dev, "port%d", port->id);
+	else
+		rc = dev_set_name(dev, "root%d", port->id);
+	if (rc)
+		goto err;
+
+	rc = device_add(dev);
+	if (rc)
+		goto err;
+
+	rc = devm_add_action_or_reset(host, unregister_port, port);
+	if (rc)
+		return ERR_PTR(rc);
+
+	rc = devm_cxl_link_uport(host, port);
+	if (rc)
+		return ERR_PTR(rc);
+
+	return port;
+
+err:
+	put_device(dev);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL);
+
+static struct cxl_dport *find_dport(struct cxl_port *port, int id)
+{
+	struct cxl_dport *dport;
+
+	device_lock_assert(&port->dev);
+	list_for_each_entry (dport, &port->dports, list)
+		if (dport->port_id == id)
+			return dport;
+	return NULL;
+}
+
+static int add_dport(struct cxl_port *port, struct cxl_dport *new)
+{
+	struct cxl_dport *dup;
+
+	device_lock(&port->dev);
+	dup = find_dport(port, new->port_id);
+	if (dup)
+		dev_err(&port->dev,
+			"unable to add dport%d-%s non-unique port id (%s)\n",
+			new->port_id, dev_name(new->dport),
+			dev_name(dup->dport));
+	else
+		list_add_tail(&new->list, &port->dports);
+	device_unlock(&port->dev);
+
+	return dup ? -EEXIST : 0;
+}
+
+/**
+ * cxl_add_dport - append downstream port data to a cxl_port
+ * @port: the cxl_port that references this dport
+ * @dport_dev: firmware or PCI device representing the dport
+ * @port_id: identifier for this dport in a decoder's target list
+ * @component_reg_phys: optional location of CXL component registers
+ *
+ * Note that all allocations and links are undone by cxl_port deletion
+ * and release.
+ */
+int cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id,
+		  resource_size_t component_reg_phys)
+{
+	char link_name[CXL_TARGET_STRLEN];
+	struct cxl_dport *dport;
+	int rc;
+
+	if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >=
+	    CXL_TARGET_STRLEN)
+		return -EINVAL;
+
+	dport = kzalloc(sizeof(*dport), GFP_KERNEL);
+	if (!dport)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&dport->list);
+	dport->dport = get_device(dport_dev);
+	dport->port_id = port_id;
+	dport->component_reg_phys = component_reg_phys;
+	dport->port = port;
+
+	rc = add_dport(port, dport);
+	if (rc)
+		goto err;
+
+	rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name);
+	if (rc)
+		goto err;
+
+	return 0;
+err:
+	cxl_dport_release(dport);
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_add_dport, CXL);
+
+static int decoder_populate_targets(struct cxl_decoder *cxld,
+				    struct cxl_port *port, int *target_map)
+{
+	int rc = 0, i;
+
+	if (!target_map)
+		return 0;
+
+	device_lock(&port->dev);
+	if (list_empty(&port->dports)) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+
+	for (i = 0; i < cxld->nr_targets; i++) {
+		struct cxl_dport *dport = find_dport(port, target_map[i]);
+
+		if (!dport) {
+			rc = -ENXIO;
+			goto out_unlock;
+		}
+		cxld->target[i] = dport;
+	}
+
+out_unlock:
+	device_unlock(&port->dev);
+
+	return rc;
+}
+
+struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets)
+{
+	struct cxl_decoder *cxld;
+	struct device *dev;
+	int rc = 0;
+
+	if (nr_targets > CXL_DECODER_MAX_INTERLEAVE || nr_targets < 1)
+		return ERR_PTR(-EINVAL);
+
+	cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL);
+	if (!cxld)
+		return ERR_PTR(-ENOMEM);
+
+	rc = ida_alloc(&port->decoder_ida, GFP_KERNEL);
+	if (rc < 0)
+		goto err;
+
+	cxld->id = rc;
+	cxld->nr_targets = nr_targets;
+	dev = &cxld->dev;
+	device_initialize(dev);
+	device_set_pm_not_required(dev);
+	dev->parent = &port->dev;
+	dev->bus = &cxl_bus_type;
+
+	/* root ports do not have a cxl_port_type parent */
+	if (port->dev.parent->type == &cxl_port_type)
+		dev->type = &cxl_decoder_switch_type;
+	else
+		dev->type = &cxl_decoder_root_type;
+
+	return cxld;
+err:
+	kfree(cxld);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_decoder_alloc, CXL);
+
+int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
+{
+	struct cxl_port *port;
+	struct device *dev;
+	int rc;
+
+	if (WARN_ON_ONCE(!cxld))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(IS_ERR(cxld)))
+		return PTR_ERR(cxld);
+
+	if (cxld->interleave_ways < 1)
+		return -EINVAL;
+
+	port = to_cxl_port(cxld->dev.parent);
+	rc = decoder_populate_targets(cxld, port, target_map);
+	if (rc)
+		return rc;
+
+	dev = &cxld->dev;
+	rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id);
+	if (rc)
+		return rc;
+
+	return device_add(dev);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL);
+
+static void cxld_unregister(void *dev)
+{
+	device_unregister(dev);
+}
+
+int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
+{
+	return devm_add_action_or_reset(host, cxld_unregister, &cxld->dev);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_decoder_autoremove, CXL);
+
+/**
+ * __cxl_driver_register - register a driver for the cxl bus
+ * @cxl_drv: cxl driver structure to attach
+ * @owner: owning module/driver
+ * @modname: KBUILD_MODNAME for parent driver
+ */
+int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
+			  const char *modname)
+{
+	if (!cxl_drv->probe) {
+		pr_debug("%s ->probe() must be specified\n", modname);
+		return -EINVAL;
+	}
+
+	if (!cxl_drv->name) {
+		pr_debug("%s ->name must be specified\n", modname);
+		return -EINVAL;
+	}
+
+	if (!cxl_drv->id) {
+		pr_debug("%s ->id must be specified\n", modname);
+		return -EINVAL;
+	}
+
+	cxl_drv->drv.bus = &cxl_bus_type;
+	cxl_drv->drv.owner = owner;
+	cxl_drv->drv.mod_name = modname;
+	cxl_drv->drv.name = cxl_drv->name;
+
+	return driver_register(&cxl_drv->drv);
+}
+EXPORT_SYMBOL_NS_GPL(__cxl_driver_register, CXL);
+
+void cxl_driver_unregister(struct cxl_driver *cxl_drv)
+{
+	driver_unregister(&cxl_drv->drv);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_driver_unregister, CXL);
+
+static int cxl_device_id(struct device *dev)
+{
+	if (dev->type == &cxl_nvdimm_bridge_type)
+		return CXL_DEVICE_NVDIMM_BRIDGE;
+	if (dev->type == &cxl_nvdimm_type)
+		return CXL_DEVICE_NVDIMM;
+	return 0;
+}
+
+static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT,
+			      cxl_device_id(dev));
+}
+
+static int cxl_bus_match(struct device *dev, struct device_driver *drv)
+{
+	return cxl_device_id(dev) == to_cxl_drv(drv)->id;
+}
+
+static int cxl_bus_probe(struct device *dev)
+{
+	return to_cxl_drv(dev->driver)->probe(dev);
+}
+
+static void cxl_bus_remove(struct device *dev)
+{
+	struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver);
+
+	if (cxl_drv->remove)
+		cxl_drv->remove(dev);
+}
+
+struct bus_type cxl_bus_type = {
+	.name = "cxl",
+	.uevent = cxl_bus_uevent,
+	.match = cxl_bus_match,
+	.probe = cxl_bus_probe,
+	.remove = cxl_bus_remove,
+};
+EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL);
+
+static __init int cxl_core_init(void)
+{
+	int rc;
+
+	cxl_mbox_init();
+
+	rc = cxl_memdev_init();
+	if (rc)
+		return rc;
+
+	rc = bus_register(&cxl_bus_type);
+	if (rc)
+		goto err;
+	return 0;
+
+err:
+	cxl_memdev_exit();
+	cxl_mbox_exit();
+	return rc;
+}
+
+static void cxl_core_exit(void)
+{
+	bus_unregister(&cxl_bus_type);
+	cxl_memdev_exit();
+	cxl_mbox_exit();
+}
+
+module_init(cxl_core_init);
+module_exit(cxl_core_exit);
+MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 1acdf2fc31c5..3299fb0977b2 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -25,7 +25,7 @@ cxl_pmem-y += config_check.o
 
 obj-m += cxl_core.o
 
-cxl_core-y := $(CXL_CORE_SRC)/bus.o
+cxl_core-y := $(CXL_CORE_SRC)/port.o
 cxl_core-y += $(CXL_CORE_SRC)/pmem.o
 cxl_core-y += $(CXL_CORE_SRC)/regs.o
 cxl_core-y += $(CXL_CORE_SRC)/memdev.o
-- 
cgit 


From a46cfc0f011ce77d120e1cdbf973f733d18f0105 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 31 Jan 2022 16:34:40 -0800
Subject: cxl/pmem: Introduce a find_cxl_root() helper

In preparation for switch port enumeration while also preserving the
potential for multi-domain / multi-root CXL topologies. Introduce a
'struct device' generic mechanism for retrieving a root CXL port, if one
is registered. Note that the only known multi-domain CXL configurations
are running the cxl_test unit test on a system that also publishes an
ACPI0017 device.

With this in hand the nvdimm-bridge lookup can be with
device_find_child() instead of bus_find_device() + custom mocked lookup
infrastructure in cxl_test.

The mechanism looks for a 2nd level port since the root level topology
is platform-firmware specific and the 2nd level down follows standard
PCIe topology expectations. The cxl_acpi 2nd level is associated with a
PCIe Root Port.

Reported-by: Ben Widawsky <ben.widawsky@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/164367562182.225521.9488555616768096049.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/pmem.c       | 14 +++++++++----
 drivers/cxl/core/port.c       | 49 +++++++++++++++++++++++++++++++++++++++++++
 drivers/cxl/cxl.h             |  1 +
 tools/testing/cxl/Kbuild      |  2 --
 tools/testing/cxl/mock_pmem.c | 24 ---------------------
 5 files changed, 60 insertions(+), 30 deletions(-)
 delete mode 100644 tools/testing/cxl/mock_pmem.c

(limited to 'tools/testing')

diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index 40b3f5030496..8de240c4d96b 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -57,24 +57,30 @@ bool is_cxl_nvdimm_bridge(struct device *dev)
 }
 EXPORT_SYMBOL_NS_GPL(is_cxl_nvdimm_bridge, CXL);
 
-__mock int match_nvdimm_bridge(struct device *dev, const void *data)
+static int match_nvdimm_bridge(struct device *dev, void *data)
 {
 	return is_cxl_nvdimm_bridge(dev);
 }
 
 struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd)
 {
+	struct cxl_port *port = find_cxl_root(&cxl_nvd->dev);
 	struct device *dev;
 
-	dev = bus_find_device(&cxl_bus_type, NULL, cxl_nvd, match_nvdimm_bridge);
+	if (!port)
+		return NULL;
+
+	dev = device_find_child(&port->dev, NULL, match_nvdimm_bridge);
+	put_device(&port->dev);
+
 	if (!dev)
 		return NULL;
+
 	return to_cxl_nvdimm_bridge(dev);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL);
 
-static struct cxl_nvdimm_bridge *
-cxl_nvdimm_bridge_alloc(struct cxl_port *port)
+static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port)
 {
 	struct cxl_nvdimm_bridge *cxl_nvb;
 	struct device *dev;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 2a4230d685d5..af7a515e4572 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -455,6 +455,55 @@ int devm_cxl_register_pci_bus(struct device *host, struct device *uport,
 }
 EXPORT_SYMBOL_NS_GPL(devm_cxl_register_pci_bus, CXL);
 
+/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */
+static int match_root_child(struct device *dev, const void *match)
+{
+	const struct device *iter = NULL;
+	struct cxl_port *port, *parent;
+	struct cxl_dport *dport;
+
+	if (!is_cxl_port(dev))
+		return 0;
+
+	port = to_cxl_port(dev);
+	if (is_cxl_root(port))
+		return 0;
+
+	parent = to_cxl_port(port->dev.parent);
+	if (!is_cxl_root(parent))
+		return 0;
+
+	cxl_device_lock(&port->dev);
+	list_for_each_entry(dport, &port->dports, list) {
+		iter = match;
+		while (iter) {
+			if (iter == dport->dport)
+				goto out;
+			iter = iter->parent;
+		}
+	}
+out:
+	cxl_device_unlock(&port->dev);
+
+	return !!iter;
+}
+
+struct cxl_port *find_cxl_root(struct device *dev)
+{
+	struct device *port_dev;
+	struct cxl_port *root;
+
+	port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child);
+	if (!port_dev)
+		return NULL;
+
+	root = to_cxl_port(port_dev->parent);
+	get_device(&root->dev);
+	put_device(port_dev);
+	return root;
+}
+EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL);
+
 static struct cxl_dport *find_dport(struct cxl_port *port, int id)
 {
 	struct cxl_dport *dport;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 4d4cc8292137..61b0db526fa2 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -304,6 +304,7 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
 
 int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id,
 		  resource_size_t component_reg_phys);
+struct cxl_port *find_cxl_root(struct device *dev);
 
 struct cxl_decoder *to_cxl_decoder(struct device *dev);
 bool is_root_decoder(struct device *dev);
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 3299fb0977b2..ddaee8a2c418 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -32,6 +32,4 @@ cxl_core-y += $(CXL_CORE_SRC)/memdev.o
 cxl_core-y += $(CXL_CORE_SRC)/mbox.o
 cxl_core-y += config_check.o
 
-cxl_core-y += mock_pmem.o
-
 obj-m += test/
diff --git a/tools/testing/cxl/mock_pmem.c b/tools/testing/cxl/mock_pmem.c
deleted file mode 100644
index f7315e6f52c0..000000000000
--- a/tools/testing/cxl/mock_pmem.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
-#include <cxl.h>
-#include "test/mock.h"
-#include <core/core.h>
-
-int match_nvdimm_bridge(struct device *dev, const void *data)
-{
-	int index, rc = 0;
-	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-	const struct cxl_nvdimm *cxl_nvd = data;
-
-	if (ops) {
-		if (dev->type == &cxl_nvdimm_bridge_type &&
-		    (ops->is_mock_dev(dev->parent->parent) ==
-		     ops->is_mock_dev(cxl_nvd->dev.parent->parent)))
-			rc = 1;
-	} else
-		rc = dev->type == &cxl_nvdimm_bridge_type;
-
-	put_cxl_mock_ops(index);
-
-	return rc;
-}
-- 
cgit 


From c978f1b10aba8ce4f8e1f6fcc86b174e08a6e7f7 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 31 Jan 2022 17:07:38 -0800
Subject: cxl/port: Up-level cxl_add_dport() locking requirements to the caller

In preparation for moving dport enumeration into the core, require the
port device lock to be acquired by the caller.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/164367759016.324231.105551648350470000.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/acpi.c            | 2 ++
 drivers/cxl/core/port.c       | 3 +--
 tools/testing/cxl/mock_acpi.c | 4 ++++
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index ab2b76532272..5d848b77d8e8 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -342,7 +342,9 @@ static int add_host_bridge_dport(struct device *match, void *arg)
 		return 0;
 	}
 
+	cxl_device_lock(&root_port->dev);
 	rc = cxl_add_dport(root_port, match, uid, ctx.chbcr);
+	cxl_device_unlock(&root_port->dev);
 	if (rc) {
 		dev_err(host, "failed to add downstream port: %s\n",
 			dev_name(match));
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index af7a515e4572..369cc52e0837 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -519,7 +519,7 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new)
 {
 	struct cxl_dport *dup;
 
-	cxl_device_lock(&port->dev);
+	device_lock_assert(&port->dev);
 	dup = find_dport(port, new->port_id);
 	if (dup)
 		dev_err(&port->dev,
@@ -528,7 +528,6 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new)
 			dev_name(dup->dport));
 	else
 		list_add_tail(&new->list, &port->dports);
-	cxl_device_unlock(&port->dev);
 
 	return dup ? -EEXIST : 0;
 }
diff --git a/tools/testing/cxl/mock_acpi.c b/tools/testing/cxl/mock_acpi.c
index 4c8a493ace56..c953e3ab6494 100644
--- a/tools/testing/cxl/mock_acpi.c
+++ b/tools/testing/cxl/mock_acpi.c
@@ -57,7 +57,9 @@ static int match_add_root_port(struct pci_dev *pdev, void *data)
 
 	/* TODO walk DVSEC to find component register base */
 	port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
+	cxl_device_lock(&port->dev);
 	rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE);
+	cxl_device_unlock(&port->dev);
 	if (rc) {
 		dev_err(dev, "failed to add dport: %s (%d)\n",
 			dev_name(&pdev->dev), rc);
@@ -78,7 +80,9 @@ static int mock_add_root_port(struct platform_device *pdev, void *data)
 	struct device *dev = ctx->dev;
 	int rc;
 
+	cxl_device_lock(&port->dev);
 	rc = cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE);
+	cxl_device_unlock(&port->dev);
 	if (rc) {
 		dev_err(dev, "failed to add dport: %s (%d)\n",
 			dev_name(&pdev->dev), rc);
-- 
cgit 


From 98d2d3a264543680281fd8a4e6ae490ca26b4f85 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 31 Jan 2022 18:10:04 -0800
Subject: cxl/core: Generalize dport enumeration in the core

The core houses infrastructure for decoder resources. A CXL port's
dports are more closely related to decoder infrastructure than topology
enumeration. Implement generic PCI based dport enumeration in the core,
i.e. arrange for existing root port enumeration from cxl_acpi to share
code with switch port enumeration which just amounts to a small
difference in a pci_walk_bus() invocation once the appropriate 'struct
pci_bus' has been retrieved.

Set the convention that decoder objects are registered after all dports
are enumerated. This enables userspace to know when the CXL core is
finished establishing 'dportX' links underneath the 'portX' object.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/164368114191.354031.5270501846455462665.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/acpi.c            |  67 ++++------------------------
 drivers/cxl/core/Makefile     |   1 +
 drivers/cxl/core/pci.c        | 101 ++++++++++++++++++++++++++++++++++++++++++
 drivers/cxl/core/port.c       |  91 +++++++++++++++++++++----------------
 drivers/cxl/cxl.h             |  16 ++-----
 drivers/cxl/cxlpci.h          |   1 +
 tools/testing/cxl/Kbuild      |   3 +-
 tools/testing/cxl/mock_acpi.c |  78 --------------------------------
 tools/testing/cxl/test/cxl.c  |  67 +++++++++++++++++++---------
 tools/testing/cxl/test/mock.c |  45 ++++++++-----------
 tools/testing/cxl/test/mock.h |   6 ++-
 11 files changed, 238 insertions(+), 238 deletions(-)
 create mode 100644 drivers/cxl/core/pci.c

(limited to 'tools/testing')

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 8d4fd7534e1e..259441245687 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -130,48 +130,6 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
 	return 0;
 }
 
-__mock int match_add_root_ports(struct pci_dev *pdev, void *data)
-{
-	resource_size_t creg = CXL_RESOURCE_NONE;
-	struct cxl_walk_context *ctx = data;
-	struct pci_bus *root_bus = ctx->root;
-	struct cxl_port *port = ctx->port;
-	int type = pci_pcie_type(pdev);
-	struct device *dev = ctx->dev;
-	struct cxl_register_map map;
-	u32 lnkcap, port_num;
-	int rc;
-
-	if (pdev->bus != root_bus)
-		return 0;
-	if (!pci_is_pcie(pdev))
-		return 0;
-	if (type != PCI_EXP_TYPE_ROOT_PORT)
-		return 0;
-	if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
-				  &lnkcap) != PCIBIOS_SUCCESSFUL)
-		return 0;
-
-	/* The driver doesn't rely on component registers for Root Ports yet. */
-	rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
-	if (!rc)
-		dev_info(&pdev->dev, "No component register block found\n");
-
-	creg = cxl_regmap_to_base(pdev, &map);
-
-	port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
-	rc = cxl_add_dport(port, &pdev->dev, port_num, creg);
-	if (rc) {
-		ctx->error = rc;
-		return rc;
-	}
-	ctx->count++;
-
-	dev_dbg(dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev));
-
-	return 0;
-}
-
 static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device *dev)
 {
 	struct cxl_dport *dport;
@@ -210,7 +168,6 @@ static int add_host_bridge_uport(struct device *match, void *arg)
 	struct device *host = root_port->dev.parent;
 	struct acpi_device *bridge = to_cxl_host_bridge(host, match);
 	struct acpi_pci_root *pci_root;
-	struct cxl_walk_context ctx;
 	int single_port_map[1], rc;
 	struct cxl_decoder *cxld;
 	struct cxl_dport *dport;
@@ -240,18 +197,10 @@ static int add_host_bridge_uport(struct device *match, void *arg)
 		return PTR_ERR(port);
 	dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev));
 
-	ctx = (struct cxl_walk_context){
-		.dev = host,
-		.root = pci_root->bus,
-		.port = port,
-	};
-	pci_walk_bus(pci_root->bus, match_add_root_ports, &ctx);
-
-	if (ctx.count == 0)
-		return -ENODEV;
-	if (ctx.error)
-		return ctx.error;
-	if (ctx.count > 1)
+	rc = devm_cxl_port_enumerate_dports(host, port);
+	if (rc < 0)
+		return rc;
+	if (rc > 1)
 		return 0;
 
 	/* TODO: Scan CHBCR for HDM Decoder resources */
@@ -311,9 +260,9 @@ static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg,
 
 static int add_host_bridge_dport(struct device *match, void *arg)
 {
-	int rc;
 	acpi_status status;
 	unsigned long long uid;
+	struct cxl_dport *dport;
 	struct cxl_chbs_context ctx;
 	struct cxl_port *root_port = arg;
 	struct device *host = root_port->dev.parent;
@@ -343,12 +292,12 @@ static int add_host_bridge_dport(struct device *match, void *arg)
 	}
 
 	cxl_device_lock(&root_port->dev);
-	rc = cxl_add_dport(root_port, match, uid, ctx.chbcr);
+	dport = devm_cxl_add_dport(host, root_port, match, uid, ctx.chbcr);
 	cxl_device_unlock(&root_port->dev);
-	if (rc) {
+	if (IS_ERR(dport)) {
 		dev_err(host, "failed to add downstream port: %s\n",
 			dev_name(match));
-		return rc;
+		return PTR_ERR(dport);
 	}
 	dev_dbg(host, "add dport%llu: %s\n", uid, dev_name(match));
 	return 0;
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index a90202ac88d2..91057f0ec763 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -7,3 +7,4 @@ cxl_core-y += pmem.o
 cxl_core-y += regs.o
 cxl_core-y += memdev.o
 cxl_core-y += mbox.o
+cxl_core-y += pci.o
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
new file mode 100644
index 000000000000..c5a9e03ed477
--- /dev/null
+++ b/drivers/cxl/core/pci.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <cxlpci.h>
+#include <cxl.h>
+#include "core.h"
+
+/**
+ * DOC: cxl core pci
+ *
+ * Compute Express Link protocols are layered on top of PCIe. CXL core provides
+ * a set of helpers for CXL interactions which occur via PCIe.
+ */
+
+struct cxl_walk_context {
+	struct pci_bus *bus;
+	struct device *host;
+	struct cxl_port *port;
+	int type;
+	int error;
+	int count;
+};
+
+static int match_add_dports(struct pci_dev *pdev, void *data)
+{
+	struct cxl_walk_context *ctx = data;
+	struct cxl_port *port = ctx->port;
+	int type = pci_pcie_type(pdev);
+	struct cxl_register_map map;
+	struct cxl_dport *dport;
+	u32 lnkcap, port_num;
+	int rc;
+
+	if (pdev->bus != ctx->bus)
+		return 0;
+	if (!pci_is_pcie(pdev))
+		return 0;
+	if (type != ctx->type)
+		return 0;
+	if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
+				  &lnkcap))
+		return 0;
+
+	rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
+	if (rc)
+		dev_dbg(&port->dev, "failed to find component registers\n");
+
+	port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
+	cxl_device_lock(&port->dev);
+	dport = devm_cxl_add_dport(ctx->host, port, &pdev->dev, port_num,
+				   cxl_regmap_to_base(pdev, &map));
+	cxl_device_unlock(&port->dev);
+	if (IS_ERR(dport)) {
+		ctx->error = PTR_ERR(dport);
+		return PTR_ERR(dport);
+	}
+	ctx->count++;
+
+	dev_dbg(&port->dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev));
+
+	return 0;
+}
+
+/**
+ * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
+ * @host: devm context
+ * @port: cxl_port whose ->uport is the upstream of dports to be enumerated
+ *
+ * Returns a positive number of dports enumerated or a negative error
+ * code.
+ */
+int devm_cxl_port_enumerate_dports(struct device *host, struct cxl_port *port)
+{
+	struct pci_bus *bus = cxl_port_to_pci_bus(port);
+	struct cxl_walk_context ctx;
+	int type;
+
+	if (!bus)
+		return -ENXIO;
+
+	if (pci_is_root_bus(bus))
+		type = PCI_EXP_TYPE_ROOT_PORT;
+	else
+		type = PCI_EXP_TYPE_DOWNSTREAM;
+
+	ctx = (struct cxl_walk_context) {
+		.host = host,
+		.port = port,
+		.bus = bus,
+		.type = type,
+	};
+	pci_walk_bus(bus, match_add_dports, &ctx);
+
+	if (ctx.count == 0)
+		return -ENODEV;
+	if (ctx.error)
+		return ctx.error;
+	return ctx.count;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 369cc52e0837..fee9c7affef4 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -243,22 +243,10 @@ struct cxl_decoder *to_cxl_decoder(struct device *dev)
 }
 EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL);
 
-static void cxl_dport_release(struct cxl_dport *dport)
-{
-	list_del(&dport->list);
-	put_device(dport->dport);
-	kfree(dport);
-}
-
 static void cxl_port_release(struct device *dev)
 {
 	struct cxl_port *port = to_cxl_port(dev);
-	struct cxl_dport *dport, *_d;
 
-	cxl_device_lock(dev);
-	list_for_each_entry_safe(dport, _d, &port->dports, list)
-		cxl_dport_release(dport);
-	cxl_device_unlock(dev);
 	ida_free(&cxl_port_ida, port->id);
 	kfree(port);
 }
@@ -292,18 +280,7 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_port, CXL);
 static void unregister_port(void *_port)
 {
 	struct cxl_port *port = _port;
-	struct cxl_dport *dport;
 
-	cxl_device_lock(&port->dev);
-	list_for_each_entry(dport, &port->dports, list) {
-		char link_name[CXL_TARGET_STRLEN];
-
-		if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d",
-			     dport->port_id) >= CXL_TARGET_STRLEN)
-			continue;
-		sysfs_remove_link(&port->dev.kobj, link_name);
-	}
-	cxl_device_unlock(&port->dev);
 	device_unregister(&port->dev);
 }
 
@@ -532,51 +509,87 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new)
 	return dup ? -EEXIST : 0;
 }
 
+static void cxl_dport_remove(void *data)
+{
+	struct cxl_dport *dport = data;
+	struct cxl_port *port = dport->port;
+
+	put_device(dport->dport);
+	cxl_device_lock(&port->dev);
+	list_del(&dport->list);
+	cxl_device_unlock(&port->dev);
+}
+
+static void cxl_dport_unlink(void *data)
+{
+	struct cxl_dport *dport = data;
+	struct cxl_port *port = dport->port;
+	char link_name[CXL_TARGET_STRLEN];
+
+	sprintf(link_name, "dport%d", dport->port_id);
+	sysfs_remove_link(&port->dev.kobj, link_name);
+}
+
 /**
- * cxl_add_dport - append downstream port data to a cxl_port
+ * devm_cxl_add_dport - append downstream port data to a cxl_port
+ * @host: devm context for allocations
  * @port: the cxl_port that references this dport
  * @dport_dev: firmware or PCI device representing the dport
  * @port_id: identifier for this dport in a decoder's target list
  * @component_reg_phys: optional location of CXL component registers
  *
- * Note that all allocations and links are undone by cxl_port deletion
- * and release.
+ * Note that dports are appended to the devm release action's of the
+ * either the port's host (for root ports), or the port itself (for
+ * switch ports)
  */
-int cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id,
-		  resource_size_t component_reg_phys)
+struct cxl_dport *devm_cxl_add_dport(struct device *host, struct cxl_port *port,
+				     struct device *dport_dev, int port_id,
+				     resource_size_t component_reg_phys)
 {
 	char link_name[CXL_TARGET_STRLEN];
 	struct cxl_dport *dport;
 	int rc;
 
+	if (!host->driver) {
+		dev_WARN_ONCE(&port->dev, 1, "dport:%s bad devm context\n",
+			      dev_name(dport_dev));
+		return ERR_PTR(-ENXIO);
+	}
+
 	if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >=
 	    CXL_TARGET_STRLEN)
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
-	dport = kzalloc(sizeof(*dport), GFP_KERNEL);
+	dport = devm_kzalloc(host, sizeof(*dport), GFP_KERNEL);
 	if (!dport)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&dport->list);
-	dport->dport = get_device(dport_dev);
+	dport->dport = dport_dev;
 	dport->port_id = port_id;
 	dport->component_reg_phys = component_reg_phys;
 	dport->port = port;
 
 	rc = add_dport(port, dport);
 	if (rc)
-		goto err;
+		return ERR_PTR(rc);
+
+	get_device(dport_dev);
+	rc = devm_add_action_or_reset(host, cxl_dport_remove, dport);
+	if (rc)
+		return ERR_PTR(rc);
 
 	rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name);
 	if (rc)
-		goto err;
+		return ERR_PTR(rc);
 
-	return 0;
-err:
-	cxl_dport_release(dport);
-	return rc;
+	rc = devm_add_action_or_reset(host, cxl_dport_unlink, dport);
+	if (rc)
+		return ERR_PTR(rc);
+
+	return dport;
 }
-EXPORT_SYMBOL_NS_GPL(cxl_add_dport, CXL);
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL);
 
 static int decoder_populate_targets(struct cxl_decoder *cxld,
 				    struct cxl_port *port, int *target_map)
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 61b0db526fa2..0754c68ccd33 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -236,14 +236,6 @@ struct cxl_nvdimm {
 	struct nvdimm *nvdimm;
 };
 
-struct cxl_walk_context {
-	struct device *dev;
-	struct pci_bus *root;
-	struct cxl_port *port;
-	int error;
-	int count;
-};
-
 /**
  * struct cxl_port - logical collection of upstream port devices and
  *		     downstream port devices to construct a CXL memory
@@ -295,17 +287,17 @@ static inline bool is_cxl_root(struct cxl_port *port)
 
 bool is_cxl_port(struct device *dev);
 struct cxl_port *to_cxl_port(struct device *dev);
+struct pci_bus;
 int devm_cxl_register_pci_bus(struct device *host, struct device *uport,
 			      struct pci_bus *bus);
 struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port);
 struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
 				   resource_size_t component_reg_phys,
 				   struct cxl_port *parent_port);
-
-int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id,
-		  resource_size_t component_reg_phys);
 struct cxl_port *find_cxl_root(struct device *dev);
-
+struct cxl_dport *devm_cxl_add_dport(struct device *host, struct cxl_port *port,
+				     struct device *dport, int port_id,
+				     resource_size_t component_reg_phys);
 struct cxl_decoder *to_cxl_decoder(struct device *dev);
 bool is_root_decoder(struct device *dev);
 bool is_cxl_decoder(struct device *dev);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index eb00f597a157..103636fda198 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -57,4 +57,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
 	return pci_resource_start(pdev, map->barno) + map->block_offset;
 }
 
+int devm_cxl_port_enumerate_dports(struct device *host, struct cxl_port *port);
 #endif /* __CXL_PCI_H__ */
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index ddaee8a2c418..61123544aa49 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -3,8 +3,8 @@ ldflags-y += --wrap=acpi_table_parse_cedt
 ldflags-y += --wrap=is_acpi_device_node
 ldflags-y += --wrap=acpi_evaluate_integer
 ldflags-y += --wrap=acpi_pci_find_root
-ldflags-y += --wrap=pci_walk_bus
 ldflags-y += --wrap=nvdimm_bus_register
+ldflags-y += --wrap=devm_cxl_port_enumerate_dports
 
 DRIVERS := ../../../drivers
 CXL_SRC := $(DRIVERS)/cxl
@@ -30,6 +30,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pmem.o
 cxl_core-y += $(CXL_CORE_SRC)/regs.o
 cxl_core-y += $(CXL_CORE_SRC)/memdev.o
 cxl_core-y += $(CXL_CORE_SRC)/mbox.o
+cxl_core-y += $(CXL_CORE_SRC)/pci.o
 cxl_core-y += config_check.o
 
 obj-m += test/
diff --git a/tools/testing/cxl/mock_acpi.c b/tools/testing/cxl/mock_acpi.c
index c953e3ab6494..55813de26d46 100644
--- a/tools/testing/cxl/mock_acpi.c
+++ b/tools/testing/cxl/mock_acpi.c
@@ -4,7 +4,6 @@
 #include <linux/platform_device.h>
 #include <linux/device.h>
 #include <linux/acpi.h>
-#include <linux/pci.h>
 #include <cxl.h>
 #include "test/mock.h"
 
@@ -34,80 +33,3 @@ out:
 	put_cxl_mock_ops(index);
 	return found;
 }
-
-static int match_add_root_port(struct pci_dev *pdev, void *data)
-{
-	struct cxl_walk_context *ctx = data;
-	struct pci_bus *root_bus = ctx->root;
-	struct cxl_port *port = ctx->port;
-	int type = pci_pcie_type(pdev);
-	struct device *dev = ctx->dev;
-	u32 lnkcap, port_num;
-	int rc;
-
-	if (pdev->bus != root_bus)
-		return 0;
-	if (!pci_is_pcie(pdev))
-		return 0;
-	if (type != PCI_EXP_TYPE_ROOT_PORT)
-		return 0;
-	if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
-				  &lnkcap) != PCIBIOS_SUCCESSFUL)
-		return 0;
-
-	/* TODO walk DVSEC to find component register base */
-	port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
-	cxl_device_lock(&port->dev);
-	rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE);
-	cxl_device_unlock(&port->dev);
-	if (rc) {
-		dev_err(dev, "failed to add dport: %s (%d)\n",
-			dev_name(&pdev->dev), rc);
-		ctx->error = rc;
-		return rc;
-	}
-	ctx->count++;
-
-	dev_dbg(dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev));
-
-	return 0;
-}
-
-static int mock_add_root_port(struct platform_device *pdev, void *data)
-{
-	struct cxl_walk_context *ctx = data;
-	struct cxl_port *port = ctx->port;
-	struct device *dev = ctx->dev;
-	int rc;
-
-	cxl_device_lock(&port->dev);
-	rc = cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE);
-	cxl_device_unlock(&port->dev);
-	if (rc) {
-		dev_err(dev, "failed to add dport: %s (%d)\n",
-			dev_name(&pdev->dev), rc);
-		ctx->error = rc;
-		return rc;
-	}
-	ctx->count++;
-
-	dev_dbg(dev, "add dport%d: %s\n", pdev->id, dev_name(&pdev->dev));
-
-	return 0;
-}
-
-int match_add_root_ports(struct pci_dev *dev, void *data)
-{
-	int index, rc;
-	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-	struct platform_device *pdev = (struct platform_device *) dev;
-
-	if (ops && ops->is_mock_port(pdev))
-		rc = mock_add_root_port(pdev, data);
-	else
-		rc = match_add_root_port(dev, data);
-
-	put_cxl_mock_ops(index);
-
-	return rc;
-}
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 736d99006fb7..ef002e909d38 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -317,6 +317,19 @@ static bool is_mock_bridge(struct device *dev)
 	for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++)
 		if (dev == &cxl_host_bridge[i]->dev)
 			return true;
+	return false;
+}
+
+static bool is_mock_port(struct device *dev)
+{
+	int i;
+
+	if (is_mock_bridge(dev))
+		return true;
+
+	for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++)
+		if (dev == &cxl_root_port[i]->dev)
+			return true;
 
 	return false;
 }
@@ -366,26 +379,6 @@ static struct acpi_pci_root mock_pci_root[NR_CXL_HOST_BRIDGES] = {
 	},
 };
 
-static struct platform_device *mock_cxl_root_port(struct pci_bus *bus, int index)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(mock_pci_bus); i++)
-		if (bus == &mock_pci_bus[i])
-			return cxl_root_port[index + i * NR_CXL_ROOT_PORTS];
-	return NULL;
-}
-
-static bool is_mock_port(struct platform_device *pdev)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++)
-		if (pdev == cxl_root_port[i])
-			return true;
-	return false;
-}
-
 static bool is_mock_bus(struct pci_bus *bus)
 {
 	int i;
@@ -405,16 +398,47 @@ static struct acpi_pci_root *mock_acpi_pci_find_root(acpi_handle handle)
 	return &mock_pci_root[host_bridge_index(adev)];
 }
 
+static int mock_cxl_port_enumerate_dports(struct device *host,
+					  struct cxl_port *port)
+{
+	struct device *dev = &port->dev;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) {
+		struct platform_device *pdev = cxl_root_port[i];
+		struct cxl_dport *dport;
+
+		if (pdev->dev.parent != port->uport)
+			continue;
+
+		cxl_device_lock(&port->dev);
+		dport = devm_cxl_add_dport(host, port, &pdev->dev, pdev->id,
+					   CXL_RESOURCE_NONE);
+		cxl_device_unlock(&port->dev);
+
+		if (IS_ERR(dport)) {
+			dev_err(dev, "failed to add dport: %s (%ld)\n",
+				dev_name(&pdev->dev), PTR_ERR(dport));
+			return PTR_ERR(dport);
+		}
+
+		dev_dbg(dev, "add dport%d: %s\n", pdev->id,
+			dev_name(&pdev->dev));
+	}
+
+	return 0;
+}
+
 static struct cxl_mock_ops cxl_mock_ops = {
 	.is_mock_adev = is_mock_adev,
 	.is_mock_bridge = is_mock_bridge,
 	.is_mock_bus = is_mock_bus,
 	.is_mock_port = is_mock_port,
 	.is_mock_dev = is_mock_dev,
-	.mock_port = mock_cxl_root_port,
 	.acpi_table_parse_cedt = mock_acpi_table_parse_cedt,
 	.acpi_evaluate_integer = mock_acpi_evaluate_integer,
 	.acpi_pci_find_root = mock_acpi_pci_find_root,
+	.devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
 	.list = LIST_HEAD_INIT(cxl_mock_ops.list),
 };
 
@@ -598,3 +622,4 @@ module_init(cxl_test_init);
 module_exit(cxl_test_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_IMPORT_NS(ACPI);
+MODULE_IMPORT_NS(CXL);
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 17408f892df4..56b4b7d734bc 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -7,6 +7,8 @@
 #include <linux/export.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include <cxlmem.h>
+#include <cxlpci.h>
 #include "mock.h"
 
 static LIST_HEAD(mock);
@@ -114,32 +116,6 @@ struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle)
 }
 EXPORT_SYMBOL_GPL(__wrap_acpi_pci_find_root);
 
-void __wrap_pci_walk_bus(struct pci_bus *bus,
-			 int (*cb)(struct pci_dev *, void *), void *userdata)
-{
-	int index;
-	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
-	if (ops && ops->is_mock_bus(bus)) {
-		int rc, i;
-
-		/*
-		 * Simulate 2 root ports per host-bridge and no
-		 * depth recursion.
-		 */
-		for (i = 0; i < 2; i++) {
-			rc = cb((struct pci_dev *) ops->mock_port(bus, i),
-				userdata);
-			if (rc)
-				break;
-		}
-	} else
-		pci_walk_bus(bus, cb, userdata);
-
-	put_cxl_mock_ops(index);
-}
-EXPORT_SYMBOL_GPL(__wrap_pci_walk_bus);
-
 struct nvdimm_bus *
 __wrap_nvdimm_bus_register(struct device *dev,
 			   struct nvdimm_bus_descriptor *nd_desc)
@@ -155,5 +131,22 @@ __wrap_nvdimm_bus_register(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register);
 
+int __wrap_devm_cxl_port_enumerate_dports(struct device *host,
+					  struct cxl_port *port)
+{
+	int rc, index;
+	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+	if (ops && ops->is_mock_port(port->uport))
+		rc = ops->devm_cxl_port_enumerate_dports(host, port);
+	else
+		rc = devm_cxl_port_enumerate_dports(host, port);
+	put_cxl_mock_ops(index);
+
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL);
+
 MODULE_LICENSE("GPL v2");
 MODULE_IMPORT_NS(ACPI);
+MODULE_IMPORT_NS(CXL);
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index 15ed0fd877e4..99e7ff38090d 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -2,6 +2,7 @@
 
 #include <linux/list.h>
 #include <linux/acpi.h>
+#include <cxl.h>
 
 struct cxl_mock_ops {
 	struct list_head list;
@@ -15,10 +16,11 @@ struct cxl_mock_ops {
 					     struct acpi_object_list *arguments,
 					     unsigned long long *data);
 	struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle);
-	struct platform_device *(*mock_port)(struct pci_bus *bus, int index);
 	bool (*is_mock_bus)(struct pci_bus *bus);
-	bool (*is_mock_port)(struct platform_device *pdev);
+	bool (*is_mock_port)(struct device *dev);
 	bool (*is_mock_dev)(struct device *dev);
+	int (*devm_cxl_port_enumerate_dports)(struct device *host,
+					      struct cxl_port *port);
 };
 
 void register_cxl_mock_ops(struct cxl_mock_ops *ops);
-- 
cgit 


From d17d0540a0dbf109210f7b57a37571e2978da0fa Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 1 Feb 2022 12:24:30 -0800
Subject: cxl/core/hdm: Add CXL standard decoder enumeration to the core

Unlike the decoder enumeration for "root decoders" described by platform
firmware, standard decoders can be enumerated from the component
registers space once the base address has been identified (via PCI,
ACPI, or another mechanism).

Add common infrastructure for HDM (Host-managed-Device-Memory) Decoder
enumeration and share it between host-bridge, upstream switch port, and
cxl_test defined decoders.

The locking model for switch level decoders is to hold the port lock
over the enumeration. This facilitates moving the dport and decoder
enumeration to a 'port' driver. For now, the only enumerator of decoder
resources is the cxl_acpi root driver.

Co-developed-by: Ben Widawsky <ben.widawsky@intel.com>
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/164374688404.395335.9239248252443123526.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/acpi.c            |  43 +++-----
 drivers/cxl/core/Makefile     |   1 +
 drivers/cxl/core/core.h       |   2 +
 drivers/cxl/core/hdm.c        | 248 ++++++++++++++++++++++++++++++++++++++++++
 drivers/cxl/core/port.c       |  57 ++++++++--
 drivers/cxl/core/regs.c       |   5 +-
 drivers/cxl/cxl.h             |  33 +++++-
 drivers/cxl/cxlmem.h          |   8 ++
 tools/testing/cxl/Kbuild      |   4 +
 tools/testing/cxl/test/cxl.c  |  29 +++++
 tools/testing/cxl/test/mock.c |  50 +++++++++
 tools/testing/cxl/test/mock.h |   3 +
 12 files changed, 434 insertions(+), 49 deletions(-)
 create mode 100644 drivers/cxl/core/hdm.c

(limited to 'tools/testing')

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 259441245687..8c2ced91518b 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -168,10 +168,10 @@ static int add_host_bridge_uport(struct device *match, void *arg)
 	struct device *host = root_port->dev.parent;
 	struct acpi_device *bridge = to_cxl_host_bridge(host, match);
 	struct acpi_pci_root *pci_root;
-	int single_port_map[1], rc;
-	struct cxl_decoder *cxld;
 	struct cxl_dport *dport;
+	struct cxl_hdm *cxlhdm;
 	struct cxl_port *port;
+	int rc;
 
 	if (!bridge)
 		return 0;
@@ -200,37 +200,24 @@ static int add_host_bridge_uport(struct device *match, void *arg)
 	rc = devm_cxl_port_enumerate_dports(host, port);
 	if (rc < 0)
 		return rc;
-	if (rc > 1)
-		return 0;
-
-	/* TODO: Scan CHBCR for HDM Decoder resources */
-
-	/*
-	 * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability
-	 * Structure) single ported host-bridges need not publish a decoder
-	 * capability when a passthrough decode can be assumed, i.e. all
-	 * transactions that the uport sees are claimed and passed to the single
-	 * dport. Disable the range until the first CXL region is enumerated /
-	 * activated.
-	 */
-	cxld = cxl_switch_decoder_alloc(port, 1);
-	if (IS_ERR(cxld))
-		return PTR_ERR(cxld);
-
 	cxl_device_lock(&port->dev);
-	dport = list_first_entry(&port->dports, typeof(*dport), list);
-	cxl_device_unlock(&port->dev);
+	if (rc == 1) {
+		rc = devm_cxl_add_passthrough_decoder(host, port);
+		goto out;
+	}
 
-	single_port_map[0] = dport->port_id;
+	cxlhdm = devm_cxl_setup_hdm(host, port);
+	if (IS_ERR(cxlhdm)) {
+		rc = PTR_ERR(cxlhdm);
+		goto out;
+	}
 
-	rc = cxl_decoder_add(cxld, single_port_map);
+	rc = devm_cxl_enumerate_decoders(host, cxlhdm);
 	if (rc)
-		put_device(&cxld->dev);
-	else
-		rc = cxl_decoder_autoremove(host, cxld);
+		dev_err(&port->dev, "Couldn't enumerate decoders (%d)\n", rc);
 
-	if (rc == 0)
-		dev_dbg(host, "add: %s\n", dev_name(&cxld->dev));
+out:
+	cxl_device_unlock(&port->dev);
 	return rc;
 }
 
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 91057f0ec763..6d37cd78b151 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -8,3 +8,4 @@ cxl_core-y += regs.o
 cxl_core-y += memdev.o
 cxl_core-y += mbox.o
 cxl_core-y += pci.o
+cxl_core-y += hdm.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index e0c9aacc4e9c..1a50c0fc399c 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -14,6 +14,8 @@ struct cxl_mem_query_commands;
 int cxl_query_cmd(struct cxl_memdev *cxlmd,
 		  struct cxl_mem_query_commands __user *q);
 int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s);
+void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
+				   resource_size_t length);
 
 int cxl_memdev_init(void);
 void cxl_memdev_exit(void);
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
new file mode 100644
index 000000000000..84f4ed288a88
--- /dev/null
+++ b/drivers/cxl/core/hdm.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+
+#include "cxlmem.h"
+#include "core.h"
+
+/**
+ * DOC: cxl core hdm
+ *
+ * Compute Express Link Host Managed Device Memory, starting with the
+ * CXL 2.0 specification, is managed by an array of HDM Decoder register
+ * instances per CXL port and per CXL endpoint. Define common helpers
+ * for enumerating these registers and capabilities.
+ */
+
+static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
+			   int *target_map)
+{
+	int rc;
+
+	rc = cxl_decoder_add_locked(cxld, target_map);
+	if (rc) {
+		put_device(&cxld->dev);
+		dev_err(&port->dev, "Failed to add decoder\n");
+		return rc;
+	}
+
+	rc = cxl_decoder_autoremove(&port->dev, cxld);
+	if (rc)
+		return rc;
+
+	dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev));
+
+	return 0;
+}
+
+/*
+ * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability Structure)
+ * single ported host-bridges need not publish a decoder capability when a
+ * passthrough decode can be assumed, i.e. all transactions that the uport sees
+ * are claimed and passed to the single dport. Disable the range until the first
+ * CXL region is enumerated / activated.
+ */
+int devm_cxl_add_passthrough_decoder(struct device *host, struct cxl_port *port)
+{
+	struct cxl_decoder *cxld;
+	struct cxl_dport *dport;
+	int single_port_map[1];
+
+	cxld = cxl_switch_decoder_alloc(port, 1);
+	if (IS_ERR(cxld))
+		return PTR_ERR(cxld);
+
+	device_lock_assert(&port->dev);
+
+	dport = list_first_entry(&port->dports, typeof(*dport), list);
+	single_port_map[0] = dport->port_id;
+
+	return add_hdm_decoder(port, cxld, single_port_map);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, CXL);
+
+static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
+{
+	u32 hdm_cap;
+
+	hdm_cap = readl(cxlhdm->regs.hdm_decoder + CXL_HDM_DECODER_CAP_OFFSET);
+	cxlhdm->decoder_count = cxl_hdm_decoder_count(hdm_cap);
+	cxlhdm->target_count =
+		FIELD_GET(CXL_HDM_DECODER_TARGET_COUNT_MASK, hdm_cap);
+	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_11_8, hdm_cap))
+		cxlhdm->interleave_mask |= GENMASK(11, 8);
+	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
+		cxlhdm->interleave_mask |= GENMASK(14, 12);
+}
+
+static void __iomem *map_hdm_decoder_regs(struct cxl_port *port,
+					  void __iomem *crb)
+{
+	struct cxl_component_reg_map map;
+
+	cxl_probe_component_regs(&port->dev, crb, &map);
+	if (!map.hdm_decoder.valid) {
+		dev_err(&port->dev, "HDM decoder registers invalid\n");
+		return IOMEM_ERR_PTR(-ENXIO);
+	}
+
+	return crb + map.hdm_decoder.offset;
+}
+
+/**
+ * devm_cxl_setup_hdm - map HDM decoder component registers
+ * @host: devm context for allocations
+ * @port: cxl_port to map
+ */
+struct cxl_hdm *devm_cxl_setup_hdm(struct device *host, struct cxl_port *port)
+{
+	struct device *dev = &port->dev;
+	void __iomem *crb, *hdm;
+	struct cxl_hdm *cxlhdm;
+
+	cxlhdm = devm_kzalloc(host, sizeof(*cxlhdm), GFP_KERNEL);
+	if (!cxlhdm)
+		return ERR_PTR(-ENOMEM);
+
+	cxlhdm->port = port;
+	crb = devm_cxl_iomap_block(host, port->component_reg_phys,
+				   CXL_COMPONENT_REG_BLOCK_SIZE);
+	if (!crb) {
+		dev_err(dev, "No component registers mapped\n");
+		return ERR_PTR(-ENXIO);
+	}
+
+	hdm = map_hdm_decoder_regs(port, crb);
+	if (IS_ERR(hdm))
+		return ERR_CAST(hdm);
+	cxlhdm->regs.hdm_decoder = hdm;
+
+	parse_hdm_decoder_caps(cxlhdm);
+	if (cxlhdm->decoder_count == 0) {
+		dev_err(dev, "Spec violation. Caps invalid\n");
+		return ERR_PTR(-ENXIO);
+	}
+
+	return cxlhdm;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL);
+
+static int to_interleave_granularity(u32 ctrl)
+{
+	int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl);
+
+	return 256 << val;
+}
+
+static int to_interleave_ways(u32 ctrl)
+{
+	int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl);
+
+	switch (val) {
+	case 0 ... 4:
+		return 1 << val;
+	case 8 ... 10:
+		return 3 << (val - 8);
+	default:
+		return 0;
+	}
+}
+
+static void init_hdm_decoder(struct cxl_decoder *cxld, int *target_map,
+			     void __iomem *hdm, int which)
+{
+	u64 size, base;
+	u32 ctrl;
+	int i;
+	union {
+		u64 value;
+		unsigned char target_id[8];
+	} target_list;
+
+	ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
+	base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
+	size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
+
+	if (!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED))
+		size = 0;
+
+	cxld->decoder_range = (struct range) {
+		.start = base,
+		.end = base + size - 1,
+	};
+
+	/* switch decoders are always enabled if committed */
+	if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED) {
+		cxld->flags |= CXL_DECODER_F_ENABLE;
+		if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK)
+			cxld->flags |= CXL_DECODER_F_LOCK;
+	}
+	cxld->interleave_ways = to_interleave_ways(ctrl);
+	cxld->interleave_granularity = to_interleave_granularity(ctrl);
+
+	if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl))
+		cxld->target_type = CXL_DECODER_EXPANDER;
+	else
+		cxld->target_type = CXL_DECODER_ACCELERATOR;
+
+	target_list.value =
+		ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
+	for (i = 0; i < cxld->interleave_ways; i++)
+		target_map[i] = target_list.target_id[i];
+}
+
+/**
+ * devm_cxl_enumerate_decoders - add decoder objects per HDM register set
+ * @host: devm allocation context
+ * @cxlhdm: Structure to populate with HDM capabilities
+ */
+int devm_cxl_enumerate_decoders(struct device *host, struct cxl_hdm *cxlhdm)
+{
+	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+	struct cxl_port *port = cxlhdm->port;
+	int i, committed;
+	u32 ctrl;
+
+	/*
+	 * Since the register resource was recently claimed via request_region()
+	 * be careful about trusting the "not-committed" status until the commit
+	 * timeout has elapsed.  The commit timeout is 10ms (CXL 2.0
+	 * 8.2.5.12.20), but double it to be tolerant of any clock skew between
+	 * host and target.
+	 */
+	for (i = 0, committed = 0; i < cxlhdm->decoder_count; i++) {
+		ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
+		if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)
+			committed++;
+	}
+
+	/* ensure that future checks of committed can be trusted */
+	if (committed != cxlhdm->decoder_count)
+		msleep(20);
+
+	for (i = 0; i < cxlhdm->decoder_count; i++) {
+		int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 };
+		int rc, target_count = cxlhdm->target_count;
+		struct cxl_decoder *cxld;
+
+		cxld = cxl_switch_decoder_alloc(port, target_count);
+		if (IS_ERR(cxld)) {
+			dev_warn(&port->dev,
+				 "Failed to allocate the decoder\n");
+			return PTR_ERR(cxld);
+		}
+
+		init_hdm_decoder(cxld, target_map, cxlhdm->regs.hdm_decoder, i);
+		rc = add_hdm_decoder(port, cxld, target_map);
+		if (rc) {
+			dev_warn(&port->dev,
+				 "Failed to add decoder to port\n");
+			return rc;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_decoders, CXL);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index fee9c7affef4..4dfb9df9e648 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -594,16 +594,15 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL);
 static int decoder_populate_targets(struct cxl_decoder *cxld,
 				    struct cxl_port *port, int *target_map)
 {
-	int rc = 0, i;
+	int i, rc = 0;
 
 	if (!target_map)
 		return 0;
 
-	cxl_device_lock(&port->dev);
-	if (list_empty(&port->dports)) {
-		rc = -EINVAL;
-		goto out_unlock;
-	}
+	device_lock_assert(&port->dev);
+
+	if (list_empty(&port->dports))
+		return -EINVAL;
 
 	write_seqlock(&cxld->target_lock);
 	for (i = 0; i < cxld->nr_targets; i++) {
@@ -617,9 +616,6 @@ static int decoder_populate_targets(struct cxl_decoder *cxld,
 	}
 	write_sequnlock(&cxld->target_lock);
 
-out_unlock:
-	cxl_device_unlock(&port->dev);
-
 	return rc;
 }
 
@@ -721,7 +717,7 @@ struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
 EXPORT_SYMBOL_NS_GPL(cxl_switch_decoder_alloc, CXL);
 
 /**
- * cxl_decoder_add - Add a decoder with targets
+ * cxl_decoder_add_locked - Add a decoder with targets
  * @cxld: The cxl decoder allocated by cxl_decoder_alloc()
  * @target_map: A list of downstream ports that this decoder can direct memory
  *              traffic to. These numbers should correspond with the port number
@@ -731,12 +727,15 @@ EXPORT_SYMBOL_NS_GPL(cxl_switch_decoder_alloc, CXL);
  * is an endpoint device. A more awkward example is a hostbridge whose root
  * ports get hot added (technically possible, though unlikely).
  *
- * Context: Process context. Takes and releases the cxld's device lock.
+ * This is the locked variant of cxl_decoder_add().
+ *
+ * Context: Process context. Expects the device lock of the port that owns the
+ *	    @cxld to be held.
  *
  * Return: Negative error code if the decoder wasn't properly configured; else
  *	   returns 0.
  */
-int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
+int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map)
 {
 	struct cxl_port *port;
 	struct device *dev;
@@ -770,6 +769,40 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
 
 	return device_add(dev);
 }
+EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, CXL);
+
+/**
+ * cxl_decoder_add - Add a decoder with targets
+ * @cxld: The cxl decoder allocated by cxl_decoder_alloc()
+ * @target_map: A list of downstream ports that this decoder can direct memory
+ *              traffic to. These numbers should correspond with the port number
+ *              in the PCIe Link Capabilities structure.
+ *
+ * This is the unlocked variant of cxl_decoder_add_locked().
+ * See cxl_decoder_add_locked().
+ *
+ * Context: Process context. Takes and releases the device lock of the port that
+ *	    owns the @cxld.
+ */
+int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
+{
+	struct cxl_port *port;
+	int rc;
+
+	if (WARN_ON_ONCE(!cxld))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(IS_ERR(cxld)))
+		return PTR_ERR(cxld);
+
+	port = to_cxl_port(cxld->dev.parent);
+
+	cxl_device_lock(&port->dev);
+	rc = cxl_decoder_add_locked(cxld, target_map);
+	cxl_device_unlock(&port->dev);
+
+	return rc;
+}
 EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL);
 
 static void cxld_unregister(void *dev)
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index 65d7f5880671..718b6b0ae4b3 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -159,9 +159,8 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base,
 }
 EXPORT_SYMBOL_NS_GPL(cxl_probe_device_regs, CXL);
 
-static void __iomem *devm_cxl_iomap_block(struct device *dev,
-					  resource_size_t addr,
-					  resource_size_t length)
+void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
+				   resource_size_t length)
 {
 	void __iomem *ret_val;
 	struct resource *res;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 0754c68ccd33..c127d5c0ac96 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -17,6 +17,9 @@
  * (port-driver, region-driver, nvdimm object-drivers... etc).
  */
 
+/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */
+#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K
+
 /* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers*/
 #define CXL_CM_OFFSET 0x1000
 #define CXL_CM_CAP_HDR_OFFSET 0x0
@@ -36,11 +39,23 @@
 #define CXL_HDM_DECODER_CAP_OFFSET 0x0
 #define   CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0)
 #define   CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4)
-#define CXL_HDM_DECODER0_BASE_LOW_OFFSET 0x10
-#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET 0x14
-#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET 0x18
-#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET 0x1c
-#define CXL_HDM_DECODER0_CTRL_OFFSET 0x20
+#define   CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8)
+#define   CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9)
+#define CXL_HDM_DECODER_CTRL_OFFSET 0x4
+#define   CXL_HDM_DECODER_ENABLE BIT(1)
+#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10)
+#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14)
+#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18)
+#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c)
+#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20)
+#define   CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0)
+#define   CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4)
+#define   CXL_HDM_DECODER0_CTRL_LOCK BIT(8)
+#define   CXL_HDM_DECODER0_CTRL_COMMIT BIT(9)
+#define   CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10)
+#define   CXL_HDM_DECODER0_CTRL_TYPE BIT(12)
+#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24)
+#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28)
 
 static inline int cxl_hdm_decoder_count(u32 cap_hdr)
 {
@@ -162,7 +177,8 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
 #define CXL_DECODER_F_TYPE2 BIT(2)
 #define CXL_DECODER_F_TYPE3 BIT(3)
 #define CXL_DECODER_F_LOCK  BIT(4)
-#define CXL_DECODER_F_MASK  GENMASK(4, 0)
+#define CXL_DECODER_F_ENABLE    BIT(5)
+#define CXL_DECODER_F_MASK  GENMASK(5, 0)
 
 enum cxl_decoder_type {
        CXL_DECODER_ACCELERATOR = 2,
@@ -306,7 +322,12 @@ struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
 struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
 					     unsigned int nr_targets);
 int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);
+int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map);
 int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld);
+struct cxl_hdm;
+struct cxl_hdm *devm_cxl_setup_hdm(struct device *host, struct cxl_port *port);
+int devm_cxl_enumerate_decoders(struct device *host, struct cxl_hdm *cxlhdm);
+int devm_cxl_add_passthrough_decoder(struct device *host, struct cxl_port *port);
 
 extern struct bus_type cxl_bus_type;
 
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 8d96d009ad90..fca2d1b5f6ff 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -264,4 +264,12 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
 struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
 void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
 void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
+
+struct cxl_hdm {
+	struct cxl_component_regs regs;
+	unsigned int decoder_count;
+	unsigned int target_count;
+	unsigned int interleave_mask;
+	struct cxl_port *port;
+};
 #endif /* __CXL_MEM_H__ */
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 61123544aa49..3045d7cba0db 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -5,6 +5,9 @@ ldflags-y += --wrap=acpi_evaluate_integer
 ldflags-y += --wrap=acpi_pci_find_root
 ldflags-y += --wrap=nvdimm_bus_register
 ldflags-y += --wrap=devm_cxl_port_enumerate_dports
+ldflags-y += --wrap=devm_cxl_setup_hdm
+ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
+ldflags-y += --wrap=devm_cxl_enumerate_decoders
 
 DRIVERS := ../../../drivers
 CXL_SRC := $(DRIVERS)/cxl
@@ -31,6 +34,7 @@ cxl_core-y += $(CXL_CORE_SRC)/regs.o
 cxl_core-y += $(CXL_CORE_SRC)/memdev.o
 cxl_core-y += $(CXL_CORE_SRC)/mbox.o
 cxl_core-y += $(CXL_CORE_SRC)/pci.o
+cxl_core-y += $(CXL_CORE_SRC)/hdm.o
 cxl_core-y += config_check.o
 
 obj-m += test/
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index ef002e909d38..81c09380c537 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -8,6 +8,7 @@
 #include <linux/acpi.h>
 #include <linux/pci.h>
 #include <linux/mm.h>
+#include <cxlmem.h>
 #include "mock.h"
 
 #define NR_CXL_HOST_BRIDGES 4
@@ -398,6 +399,31 @@ static struct acpi_pci_root *mock_acpi_pci_find_root(acpi_handle handle)
 	return &mock_pci_root[host_bridge_index(adev)];
 }
 
+static struct cxl_hdm *mock_cxl_setup_hdm(struct device *host,
+					  struct cxl_port *port)
+{
+	struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL);
+
+	if (!cxlhdm)
+		return ERR_PTR(-ENOMEM);
+
+	cxlhdm->port = port;
+	return cxlhdm;
+}
+
+static int mock_cxl_add_passthrough_decoder(struct device *host,
+					    struct cxl_port *port)
+{
+	dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n");
+	return -EOPNOTSUPP;
+}
+
+static int mock_cxl_enumerate_decoders(struct device *host,
+				       struct cxl_hdm *cxlhdm)
+{
+	return 0;
+}
+
 static int mock_cxl_port_enumerate_dports(struct device *host,
 					  struct cxl_port *port)
 {
@@ -439,6 +465,9 @@ static struct cxl_mock_ops cxl_mock_ops = {
 	.acpi_evaluate_integer = mock_acpi_evaluate_integer,
 	.acpi_pci_find_root = mock_acpi_pci_find_root,
 	.devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
+	.devm_cxl_setup_hdm = mock_cxl_setup_hdm,
+	.devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder,
+	.devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders,
 	.list = LIST_HEAD_INIT(cxl_mock_ops.list),
 };
 
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 56b4b7d734bc..18d3b65e2a9b 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -131,6 +131,56 @@ __wrap_nvdimm_bus_register(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register);
 
+struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct device *host,
+					  struct cxl_port *port)
+{
+	int index;
+	struct cxl_hdm *cxlhdm;
+	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+	if (ops && ops->is_mock_port(port->uport))
+		cxlhdm = ops->devm_cxl_setup_hdm(host, port);
+	else
+		cxlhdm = devm_cxl_setup_hdm(host, port);
+	put_cxl_mock_ops(index);
+
+	return cxlhdm;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL);
+
+int __wrap_devm_cxl_add_passthrough_decoder(struct device *host,
+					    struct cxl_port *port)
+{
+	int rc, index;
+	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+	if (ops && ops->is_mock_port(port->uport))
+		rc = ops->devm_cxl_add_passthrough_decoder(host, port);
+	else
+		rc = devm_cxl_add_passthrough_decoder(host, port);
+	put_cxl_mock_ops(index);
+
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, CXL);
+
+int __wrap_devm_cxl_enumerate_decoders(struct device *host,
+				       struct cxl_hdm *cxlhdm)
+{
+	int rc, index;
+	struct cxl_port *port = cxlhdm->port;
+	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+	if (ops && ops->is_mock_port(port->uport))
+		rc = ops->devm_cxl_enumerate_decoders(host, cxlhdm);
+	else
+		rc = devm_cxl_enumerate_decoders(host, cxlhdm);
+	put_cxl_mock_ops(index);
+
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, CXL);
+
 int __wrap_devm_cxl_port_enumerate_dports(struct device *host,
 					  struct cxl_port *port)
 {
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index 99e7ff38090d..15e48063ea4b 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -21,6 +21,9 @@ struct cxl_mock_ops {
 	bool (*is_mock_dev)(struct device *dev);
 	int (*devm_cxl_port_enumerate_dports)(struct device *host,
 					      struct cxl_port *port);
+	struct cxl_hdm *(*devm_cxl_setup_hdm)(struct device *host, struct cxl_port *port);
+	int (*devm_cxl_add_passthrough_decoder)(struct device *host, struct cxl_port *port);
+	int (*devm_cxl_enumerate_decoders)(struct device *host, struct cxl_hdm *hdm);
 };
 
 void register_cxl_mock_ops(struct cxl_mock_ops *ops);
-- 
cgit 


From 54cdbf845cf719c09b45ae588cba469aabb3159c Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben.widawsky@intel.com>
Date: Tue, 1 Feb 2022 13:07:51 -0800
Subject: cxl/port: Add a driver for 'struct cxl_port' objects

The need for a CXL port driver and a dedicated cxl_bus_type is driven by
a need to simultaneously support 2 independent physical memory decode
domains (cache coherent CXL.mem and uncached PCI.mmio) that also
intersect at a single PCIe device node. A CXL Port is a device that
advertises a  CXL Component Register block with an "HDM Decoder
Capability Structure".

>From Documentation/driver-api/cxl/memory-devices.rst:

    Similar to how a RAID driver takes disk objects and assembles them into
    a new logical device, the CXL subsystem is tasked to take PCIe and ACPI
    objects and assemble them into a CXL.mem decode topology. The need for
    runtime configuration of the CXL.mem topology is also similar to RAID in
    that different environments with the same hardware configuration may
    decide to assemble the topology in contrasting ways. One may choose
    performance (RAID0) striping memory across multiple Host Bridges and
    endpoints while another may opt for fault tolerance and disable any
    striping in the CXL.mem topology.

The port driver identifies whether an endpoint Memory Expander is
connected to a CXL topology. If an active (bound to the 'cxl_port'
driver) CXL Port is not found at every PCIe Switch Upstream port and an
active "root" CXL Port then the device is just a plain PCIe endpoint
only capable of participating in PCI.mmio and DMA cycles, not CXL.mem
coherent interleave sets.

The 'cxl_port' driver lets the CXL subsystem leverage driver-core
infrastructure for setup and teardown of register resources and
communicating device activation status to userspace. The cxl_bus_type
can rendezvous the async arrival of platform level CXL resources (via
the 'cxl_acpi' driver) with the asynchronous enumeration of Memory
Expander endpoints, while also implementing a hierarchical locking model
independent of the associated 'struct pci_dev' locking model. The
locking for dport and decoder enumeration is now handled in the core
rather than callers.

For now the port driver only enumerates and registers CXL resources
(downstream port metadata and decoder resources) later it will be used
to take action on its decoders in response to CXL.mem region
provisioning requests.

Note1: cxlpci.h has long depended on pci.h, but port.c was the first to
not include pci.h. Carry that dependency in cxlpci.h.

Note2: cxl port enumeration and probing complicates CXL subsystem init
to the point that it helps to have centralized debug logging of probe
events in cxl_bus_probe().

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Co-developed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://lore.kernel.org/r/164374948116.464348.1772618057599155408.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/driver-api/cxl/memory-devices.rst | 302 ++++++++++++++++++++++++
 drivers/cxl/Kconfig                             |   5 +
 drivers/cxl/Makefile                            |   2 +
 drivers/cxl/acpi.c                              |  26 +-
 drivers/cxl/core/pci.c                          |   2 -
 drivers/cxl/core/port.c                         |  34 ++-
 drivers/cxl/cxl.h                               |   4 +
 drivers/cxl/cxlpci.h                            |   1 +
 drivers/cxl/port.c                              |  63 +++++
 tools/testing/cxl/Kbuild                        |   5 +
 tools/testing/cxl/test/cxl.c                    |   2 -
 11 files changed, 415 insertions(+), 31 deletions(-)
 create mode 100644 drivers/cxl/port.c

(limited to 'tools/testing')

diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst
index c8f7a16cd0e3..3498d38d7cbd 100644
--- a/Documentation/driver-api/cxl/memory-devices.rst
+++ b/Documentation/driver-api/cxl/memory-devices.rst
@@ -14,6 +14,303 @@ that optionally define a device's contribution to an interleaved address
 range across multiple devices underneath a host-bridge or interleaved
 across host-bridges.
 
+CXL Bus: Theory of Operation
+============================
+Similar to how a RAID driver takes disk objects and assembles them into a new
+logical device, the CXL subsystem is tasked to take PCIe and ACPI objects and
+assemble them into a CXL.mem decode topology. The need for runtime configuration
+of the CXL.mem topology is also similar to RAID in that different environments
+with the same hardware configuration may decide to assemble the topology in
+contrasting ways. One may choose performance (RAID0) striping memory across
+multiple Host Bridges and endpoints while another may opt for fault tolerance
+and disable any striping in the CXL.mem topology.
+
+Platform firmware enumerates a menu of interleave options at the "CXL root port"
+(Linux term for the top of the CXL decode topology). From there, PCIe topology
+dictates which endpoints can participate in which Host Bridge decode regimes.
+Each PCIe Switch in the path between the root and an endpoint introduces a point
+at which the interleave can be split. For example platform firmware may say at a
+given range only decodes to 1 one Host Bridge, but that Host Bridge may in turn
+interleave cycles across multiple Root Ports. An intervening Switch between a
+port and an endpoint may interleave cycles across multiple Downstream Switch
+Ports, etc.
+
+Here is a sample listing of a CXL topology defined by 'cxl_test'. The 'cxl_test'
+module generates an emulated CXL topology of 2 Host Bridges each with 2 Root
+Ports. Each of those Root Ports are connected to 2-way switches with endpoints
+connected to those downstream ports for a total of 8 endpoints::
+
+    # cxl list -BEMPu -b cxl_test
+    {
+      "bus":"root3",
+      "provider":"cxl_test",
+      "ports:root3":[
+        {
+          "port":"port5",
+          "host":"cxl_host_bridge.1",
+          "ports:port5":[
+            {
+              "port":"port8",
+              "host":"cxl_switch_uport.1",
+              "endpoints:port8":[
+                {
+                  "endpoint":"endpoint9",
+                  "host":"mem2",
+                  "memdev":{
+                    "memdev":"mem2",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x1",
+                    "numa_node":1,
+                    "host":"cxl_mem.1"
+                  }
+                },
+                {
+                  "endpoint":"endpoint15",
+                  "host":"mem6",
+                  "memdev":{
+                    "memdev":"mem6",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x5",
+                    "numa_node":1,
+                    "host":"cxl_mem.5"
+                  }
+                }
+              ]
+            },
+            {
+              "port":"port12",
+              "host":"cxl_switch_uport.3",
+              "endpoints:port12":[
+                {
+                  "endpoint":"endpoint17",
+                  "host":"mem8",
+                  "memdev":{
+                    "memdev":"mem8",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x7",
+                    "numa_node":1,
+                    "host":"cxl_mem.7"
+                  }
+                },
+                {
+                  "endpoint":"endpoint13",
+                  "host":"mem4",
+                  "memdev":{
+                    "memdev":"mem4",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x3",
+                    "numa_node":1,
+                    "host":"cxl_mem.3"
+                  }
+                }
+              ]
+            }
+          ]
+        },
+        {
+          "port":"port4",
+          "host":"cxl_host_bridge.0",
+          "ports:port4":[
+            {
+              "port":"port6",
+              "host":"cxl_switch_uport.0",
+              "endpoints:port6":[
+                {
+                  "endpoint":"endpoint7",
+                  "host":"mem1",
+                  "memdev":{
+                    "memdev":"mem1",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0",
+                    "numa_node":0,
+                    "host":"cxl_mem.0"
+                  }
+                },
+                {
+                  "endpoint":"endpoint14",
+                  "host":"mem5",
+                  "memdev":{
+                    "memdev":"mem5",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x4",
+                    "numa_node":0,
+                    "host":"cxl_mem.4"
+                  }
+                }
+              ]
+            },
+            {
+              "port":"port10",
+              "host":"cxl_switch_uport.2",
+              "endpoints:port10":[
+                {
+                  "endpoint":"endpoint16",
+                  "host":"mem7",
+                  "memdev":{
+                    "memdev":"mem7",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x6",
+                    "numa_node":0,
+                    "host":"cxl_mem.6"
+                  }
+                },
+                {
+                  "endpoint":"endpoint11",
+                  "host":"mem3",
+                  "memdev":{
+                    "memdev":"mem3",
+                    "pmem_size":"256.00 MiB (268.44 MB)",
+                    "ram_size":"256.00 MiB (268.44 MB)",
+                    "serial":"0x2",
+                    "numa_node":0,
+                    "host":"cxl_mem.2"
+                  }
+                }
+              ]
+            }
+          ]
+        }
+      ]
+    }
+
+In that listing each "root", "port", and "endpoint" object correspond a kernel
+'struct cxl_port' object. A 'cxl_port' is a device that can decode CXL.mem to
+its descendants. So "root" claims non-PCIe enumerable platform decode ranges and
+decodes them to "ports", "ports" decode to "endpoints", and "endpoints"
+represent the decode from SPA (System Physical Address) to DPA (Device Physical
+Address).
+
+Continuing the RAID analogy, disks have both topology metadata and on device
+metadata that determine RAID set assembly. CXL Port topology and CXL Port link
+status is metadata for CXL.mem set assembly. The CXL Port topology is enumerated
+by the arrival of a CXL.mem device. I.e. unless and until the PCIe core attaches
+the cxl_pci driver to a CXL Memory Expander there is no role for CXL Port
+objects. Conversely for hot-unplug / removal scenarios, there is no need for
+the Linux PCI core to tear down switch-level CXL resources because the endpoint
+->remove() event cleans up the port data that was established to support that
+Memory Expander.
+
+The port metadata and potential decode schemes that a give memory device may
+participate can be determined via a command like::
+
+    # cxl list -BDMu -d root -m mem3
+    {
+      "bus":"root3",
+      "provider":"cxl_test",
+      "decoders:root3":[
+        {
+          "decoder":"decoder3.1",
+          "resource":"0x8030000000",
+          "size":"512.00 MiB (536.87 MB)",
+          "volatile_capable":true,
+          "nr_targets":2
+        },
+        {
+          "decoder":"decoder3.3",
+          "resource":"0x8060000000",
+          "size":"512.00 MiB (536.87 MB)",
+          "pmem_capable":true,
+          "nr_targets":2
+        },
+        {
+          "decoder":"decoder3.0",
+          "resource":"0x8020000000",
+          "size":"256.00 MiB (268.44 MB)",
+          "volatile_capable":true,
+          "nr_targets":1
+        },
+        {
+          "decoder":"decoder3.2",
+          "resource":"0x8050000000",
+          "size":"256.00 MiB (268.44 MB)",
+          "pmem_capable":true,
+          "nr_targets":1
+        }
+      ],
+      "memdevs:root3":[
+        {
+          "memdev":"mem3",
+          "pmem_size":"256.00 MiB (268.44 MB)",
+          "ram_size":"256.00 MiB (268.44 MB)",
+          "serial":"0x2",
+          "numa_node":0,
+          "host":"cxl_mem.2"
+        }
+      ]
+    }
+
+...which queries the CXL topology to ask "given CXL Memory Expander with a kernel
+device name of 'mem3' which platform level decode ranges may this device
+participate". A given expander can participate in multiple CXL.mem interleave
+sets simultaneously depending on how many decoder resource it has. In this
+example mem3 can participate in one or more of a PMEM interleave that spans to
+Host Bridges, a PMEM interleave that targets a single Host Bridge, a Volatile
+memory interleave that spans 2 Host Bridges, and a Volatile memory interleave
+that only targets a single Host Bridge.
+
+Conversely the memory devices that can participate in a given platform level
+decode scheme can be determined via a command like the following::
+
+    # cxl list -MDu -d 3.2
+    [
+      {
+        "memdevs":[
+          {
+            "memdev":"mem1",
+            "pmem_size":"256.00 MiB (268.44 MB)",
+            "ram_size":"256.00 MiB (268.44 MB)",
+            "serial":"0",
+            "numa_node":0,
+            "host":"cxl_mem.0"
+          },
+          {
+            "memdev":"mem5",
+            "pmem_size":"256.00 MiB (268.44 MB)",
+            "ram_size":"256.00 MiB (268.44 MB)",
+            "serial":"0x4",
+            "numa_node":0,
+            "host":"cxl_mem.4"
+          },
+          {
+            "memdev":"mem7",
+            "pmem_size":"256.00 MiB (268.44 MB)",
+            "ram_size":"256.00 MiB (268.44 MB)",
+            "serial":"0x6",
+            "numa_node":0,
+            "host":"cxl_mem.6"
+          },
+          {
+            "memdev":"mem3",
+            "pmem_size":"256.00 MiB (268.44 MB)",
+            "ram_size":"256.00 MiB (268.44 MB)",
+            "serial":"0x2",
+            "numa_node":0,
+            "host":"cxl_mem.2"
+          }
+        ]
+      },
+      {
+        "root decoders":[
+          {
+            "decoder":"decoder3.2",
+            "resource":"0x8050000000",
+            "size":"256.00 MiB (268.44 MB)",
+            "pmem_capable":true,
+            "nr_targets":1
+          }
+        ]
+      }
+    ]
+
+...where the naming scheme for decoders is "decoder<port_id>.<instance_id>".
+
 Driver Infrastructure
 =====================
 
@@ -28,6 +325,11 @@ CXL Memory Device
 .. kernel-doc:: drivers/cxl/pci.c
    :internal:
 
+CXL Port
+--------
+.. kernel-doc:: drivers/cxl/port.c
+   :doc: cxl port
+
 CXL Core
 --------
 .. kernel-doc:: drivers/cxl/cxl.h
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index ef05e96f8f97..4f4f7587f6ca 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -77,4 +77,9 @@ config CXL_PMEM
 	  provisioning the persistent memory capacity of CXL memory expanders.
 
 	  If unsure say 'm'.
+
+config CXL_PORT
+	default CXL_BUS
+	tristate
+
 endif
diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile
index cf07ae6cea17..56fcac2323cb 100644
--- a/drivers/cxl/Makefile
+++ b/drivers/cxl/Makefile
@@ -3,7 +3,9 @@ obj-$(CONFIG_CXL_BUS) += core/
 obj-$(CONFIG_CXL_PCI) += cxl_pci.o
 obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o
 obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o
+obj-$(CONFIG_CXL_PORT) += cxl_port.o
 
 cxl_pci-y := pci.o
 cxl_acpi-y := acpi.o
 cxl_pmem-y := pmem.o
+cxl_port-y := port.o
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 8c2ced91518b..82591642ea90 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -169,7 +169,6 @@ static int add_host_bridge_uport(struct device *match, void *arg)
 	struct acpi_device *bridge = to_cxl_host_bridge(host, match);
 	struct acpi_pci_root *pci_root;
 	struct cxl_dport *dport;
-	struct cxl_hdm *cxlhdm;
 	struct cxl_port *port;
 	int rc;
 
@@ -197,28 +196,7 @@ static int add_host_bridge_uport(struct device *match, void *arg)
 		return PTR_ERR(port);
 	dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev));
 
-	rc = devm_cxl_port_enumerate_dports(host, port);
-	if (rc < 0)
-		return rc;
-	cxl_device_lock(&port->dev);
-	if (rc == 1) {
-		rc = devm_cxl_add_passthrough_decoder(host, port);
-		goto out;
-	}
-
-	cxlhdm = devm_cxl_setup_hdm(host, port);
-	if (IS_ERR(cxlhdm)) {
-		rc = PTR_ERR(cxlhdm);
-		goto out;
-	}
-
-	rc = devm_cxl_enumerate_decoders(host, cxlhdm);
-	if (rc)
-		dev_err(&port->dev, "Couldn't enumerate decoders (%d)\n", rc);
-
-out:
-	cxl_device_unlock(&port->dev);
-	return rc;
+	return 0;
 }
 
 struct cxl_chbs_context {
@@ -278,9 +256,7 @@ static int add_host_bridge_dport(struct device *match, void *arg)
 		return 0;
 	}
 
-	cxl_device_lock(&root_port->dev);
 	dport = devm_cxl_add_dport(host, root_port, match, uid, ctx.chbcr);
-	cxl_device_unlock(&root_port->dev);
 	if (IS_ERR(dport)) {
 		dev_err(host, "failed to add downstream port: %s\n",
 			dev_name(match));
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index c5a9e03ed477..8ec5f74da679 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -47,10 +47,8 @@ static int match_add_dports(struct pci_dev *pdev, void *data)
 		dev_dbg(&port->dev, "failed to find component registers\n");
 
 	port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
-	cxl_device_lock(&port->dev);
 	dport = devm_cxl_add_dport(ctx->host, port, &pdev->dev, port_num,
 				   cxl_regmap_to_base(pdev, &map));
-	cxl_device_unlock(&port->dev);
 	if (IS_ERR(dport)) {
 		ctx->error = PTR_ERR(dport);
 		return PTR_ERR(dport);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 0bbd8fb8f35d..a66284b7eb1b 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -40,6 +40,11 @@ static int cxl_device_id(struct device *dev)
 		return CXL_DEVICE_NVDIMM_BRIDGE;
 	if (dev->type == &cxl_nvdimm_type)
 		return CXL_DEVICE_NVDIMM;
+	if (is_cxl_port(dev)) {
+		if (is_cxl_root(to_cxl_port(dev)))
+			return CXL_DEVICE_ROOT;
+		return CXL_DEVICE_PORT;
+	}
 	return 0;
 }
 
@@ -298,6 +303,9 @@ static void unregister_port(void *_port)
 {
 	struct cxl_port *port = _port;
 
+	if (!is_cxl_root(port))
+		device_lock_assert(port->dev.parent);
+
 	device_unregister(&port->dev);
 }
 
@@ -526,15 +534,34 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new)
 	return dup ? -EEXIST : 0;
 }
 
+/*
+ * Since root-level CXL dports cannot be enumerated by PCI they are not
+ * enumerated by the common port driver that acquires the port lock over
+ * dport add/remove. Instead, root dports are manually added by a
+ * platform driver and cond_cxl_root_lock() is used to take the missing
+ * port lock in that case.
+ */
+static void cond_cxl_root_lock(struct cxl_port *port)
+{
+	if (is_cxl_root(port))
+		cxl_device_lock(&port->dev);
+}
+
+static void cond_cxl_root_unlock(struct cxl_port *port)
+{
+	if (is_cxl_root(port))
+		cxl_device_unlock(&port->dev);
+}
+
 static void cxl_dport_remove(void *data)
 {
 	struct cxl_dport *dport = data;
 	struct cxl_port *port = dport->port;
 
 	put_device(dport->dport);
-	cxl_device_lock(&port->dev);
+	cond_cxl_root_lock(port);
 	list_del(&dport->list);
-	cxl_device_unlock(&port->dev);
+	cond_cxl_root_unlock(port);
 }
 
 static void cxl_dport_unlink(void *data)
@@ -587,7 +614,9 @@ struct cxl_dport *devm_cxl_add_dport(struct device *host, struct cxl_port *port,
 	dport->component_reg_phys = component_reg_phys;
 	dport->port = port;
 
+	cond_cxl_root_lock(port);
 	rc = add_dport(port, dport);
+	cond_cxl_root_unlock(port);
 	if (rc)
 		return ERR_PTR(rc);
 
@@ -895,6 +924,7 @@ static int cxl_bus_probe(struct device *dev)
 	rc = to_cxl_drv(dev->driver)->probe(dev);
 	cxl_nested_unlock(dev);
 
+	dev_dbg(dev, "probe: %d\n", rc);
 	return rc;
 }
 
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index c127d5c0ac96..2b24eb56618f 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -163,6 +163,8 @@ int cxl_map_device_regs(struct pci_dev *pdev,
 enum cxl_regloc_type;
 int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
 		      struct cxl_register_map *map);
+void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
+				   resource_size_t length);
 
 #define CXL_RESOURCE_NONE ((resource_size_t) -1)
 #define CXL_TARGET_STRLEN 20
@@ -354,6 +356,8 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
 
 #define CXL_DEVICE_NVDIMM_BRIDGE	1
 #define CXL_DEVICE_NVDIMM		2
+#define CXL_DEVICE_PORT			3
+#define CXL_DEVICE_ROOT			4
 
 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
 #define CXL_MODALIAS_FMT "cxl:t%d"
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 103636fda198..47640f19e899 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -2,6 +2,7 @@
 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
 #ifndef __CXL_PCI_H__
 #define __CXL_PCI_H__
+#include <linux/pci.h>
 #include "cxl.h"
 
 #define CXL_MEMORY_PROGIF	0x10
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
new file mode 100644
index 000000000000..daa4c3c33aed
--- /dev/null
+++ b/drivers/cxl/port.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "cxlmem.h"
+#include "cxlpci.h"
+
+/**
+ * DOC: cxl port
+ *
+ * The port driver enumerates dport via PCI and scans for HDM
+ * (Host-managed-Device-Memory) decoder resources via the
+ * @component_reg_phys value passed in by the agent that registered the
+ * port. All descendant ports of a CXL root port (described by platform
+ * firmware) are managed in this drivers context. Each driver instance
+ * is responsible for tearing down the driver context of immediate
+ * descendant ports. The locking for this is validated by
+ * CONFIG_PROVE_CXL_LOCKING.
+ *
+ * The primary service this driver provides is presenting APIs to other
+ * drivers to utilize the decoders, and indicating to userspace (via bind
+ * status) the connectivity of the CXL.mem protocol throughout the
+ * PCIe topology.
+ */
+
+static int cxl_port_probe(struct device *dev)
+{
+	struct cxl_port *port = to_cxl_port(dev);
+	struct cxl_hdm *cxlhdm;
+	int rc;
+
+	rc = devm_cxl_port_enumerate_dports(dev, port);
+	if (rc < 0)
+		return rc;
+
+	if (rc == 1)
+		return devm_cxl_add_passthrough_decoder(dev, port);
+
+	cxlhdm = devm_cxl_setup_hdm(dev, port);
+	if (IS_ERR(cxlhdm))
+		return PTR_ERR(cxlhdm);
+
+	rc = devm_cxl_enumerate_decoders(dev, cxlhdm);
+	if (rc) {
+		dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+static struct cxl_driver cxl_port_driver = {
+	.name = "cxl_port",
+	.probe = cxl_port_probe,
+	.id = CXL_DEVICE_PORT,
+};
+
+module_cxl_driver(cxl_port_driver);
+MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(CXL);
+MODULE_ALIAS_CXL(CXL_DEVICE_PORT);
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 3045d7cba0db..27ae13e23e79 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -26,6 +26,11 @@ obj-m += cxl_pmem.o
 cxl_pmem-y := $(CXL_SRC)/pmem.o
 cxl_pmem-y += config_check.o
 
+obj-m += cxl_port.o
+
+cxl_port-y := $(CXL_SRC)/port.o
+cxl_port-y += config_check.o
+
 obj-m += cxl_core.o
 
 cxl_core-y := $(CXL_CORE_SRC)/port.o
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 81c09380c537..ce6ace286fc7 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -437,10 +437,8 @@ static int mock_cxl_port_enumerate_dports(struct device *host,
 		if (pdev->dev.parent != port->uport)
 			continue;
 
-		cxl_device_lock(&port->dev);
 		dport = devm_cxl_add_dport(host, port, &pdev->dev, pdev->id,
 					   CXL_RESOURCE_NONE);
-		cxl_device_unlock(&port->dev);
 
 		if (IS_ERR(dport)) {
 			dev_err(dev, "failed to add dport: %s (%ld)\n",
-- 
cgit 


From 664bf115833c2d4ee717ab63f4e6e72a25c66e77 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 1 Feb 2022 13:23:14 -0800
Subject: cxl/core/port: Remove @host argument for dport + decoder enumeration

Now that dport and decoder enumeration is centralized in the port
driver, the @host argument for these helpers can be made implicit. For
the root port the host is the port's uport device (ACPI0017 for
cxl_acpi), and for all other descendant ports the devm context is the
parent of @port.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ben Widawsky <ben.widawsky@intel.com>
Link: https://lore.kernel.org/r/164375043390.484143.17617734732003230076.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/acpi.c            |  2 +-
 drivers/cxl/core/hdm.c        | 12 +++++-------
 drivers/cxl/core/pci.c        |  7 ++-----
 drivers/cxl/core/port.c       |  9 +++++++--
 drivers/cxl/cxl.h             |  8 ++++----
 drivers/cxl/cxlpci.h          |  2 +-
 drivers/cxl/port.c            |  8 ++++----
 tools/testing/cxl/test/cxl.c  | 14 +++++---------
 tools/testing/cxl/test/mock.c | 28 ++++++++++++----------------
 tools/testing/cxl/test/mock.h |  9 ++++-----
 10 files changed, 45 insertions(+), 54 deletions(-)

(limited to 'tools/testing')

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 82591642ea90..683f2ca32c97 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -256,7 +256,7 @@ static int add_host_bridge_dport(struct device *match, void *arg)
 		return 0;
 	}
 
-	dport = devm_cxl_add_dport(host, root_port, match, uid, ctx.chbcr);
+	dport = devm_cxl_add_dport(root_port, match, uid, ctx.chbcr);
 	if (IS_ERR(dport)) {
 		dev_err(host, "failed to add downstream port: %s\n",
 			dev_name(match));
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 84f4ed288a88..80280db316c0 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -44,7 +44,7 @@ static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
  * are claimed and passed to the single dport. Disable the range until the first
  * CXL region is enumerated / activated.
  */
-int devm_cxl_add_passthrough_decoder(struct device *host, struct cxl_port *port)
+int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
 {
 	struct cxl_decoder *cxld;
 	struct cxl_dport *dport;
@@ -93,21 +93,20 @@ static void __iomem *map_hdm_decoder_regs(struct cxl_port *port,
 
 /**
  * devm_cxl_setup_hdm - map HDM decoder component registers
- * @host: devm context for allocations
  * @port: cxl_port to map
  */
-struct cxl_hdm *devm_cxl_setup_hdm(struct device *host, struct cxl_port *port)
+struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port)
 {
 	struct device *dev = &port->dev;
 	void __iomem *crb, *hdm;
 	struct cxl_hdm *cxlhdm;
 
-	cxlhdm = devm_kzalloc(host, sizeof(*cxlhdm), GFP_KERNEL);
+	cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
 	if (!cxlhdm)
 		return ERR_PTR(-ENOMEM);
 
 	cxlhdm->port = port;
-	crb = devm_cxl_iomap_block(host, port->component_reg_phys,
+	crb = devm_cxl_iomap_block(dev, port->component_reg_phys,
 				   CXL_COMPONENT_REG_BLOCK_SIZE);
 	if (!crb) {
 		dev_err(dev, "No component registers mapped\n");
@@ -195,10 +194,9 @@ static void init_hdm_decoder(struct cxl_decoder *cxld, int *target_map,
 
 /**
  * devm_cxl_enumerate_decoders - add decoder objects per HDM register set
- * @host: devm allocation context
  * @cxlhdm: Structure to populate with HDM capabilities
  */
-int devm_cxl_enumerate_decoders(struct device *host, struct cxl_hdm *cxlhdm)
+int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
 {
 	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
 	struct cxl_port *port = cxlhdm->port;
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 8ec5f74da679..c9a494d6976a 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -15,7 +15,6 @@
 
 struct cxl_walk_context {
 	struct pci_bus *bus;
-	struct device *host;
 	struct cxl_port *port;
 	int type;
 	int error;
@@ -47,7 +46,7 @@ static int match_add_dports(struct pci_dev *pdev, void *data)
 		dev_dbg(&port->dev, "failed to find component registers\n");
 
 	port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
-	dport = devm_cxl_add_dport(ctx->host, port, &pdev->dev, port_num,
+	dport = devm_cxl_add_dport(port, &pdev->dev, port_num,
 				   cxl_regmap_to_base(pdev, &map));
 	if (IS_ERR(dport)) {
 		ctx->error = PTR_ERR(dport);
@@ -62,13 +61,12 @@ static int match_add_dports(struct pci_dev *pdev, void *data)
 
 /**
  * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
- * @host: devm context
  * @port: cxl_port whose ->uport is the upstream of dports to be enumerated
  *
  * Returns a positive number of dports enumerated or a negative error
  * code.
  */
-int devm_cxl_port_enumerate_dports(struct device *host, struct cxl_port *port)
+int devm_cxl_port_enumerate_dports(struct cxl_port *port)
 {
 	struct pci_bus *bus = cxl_port_to_pci_bus(port);
 	struct cxl_walk_context ctx;
@@ -83,7 +81,6 @@ int devm_cxl_port_enumerate_dports(struct device *host, struct cxl_port *port)
 		type = PCI_EXP_TYPE_DOWNSTREAM;
 
 	ctx = (struct cxl_walk_context) {
-		.host = host,
 		.port = port,
 		.bus = bus,
 		.type = type,
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index a66284b7eb1b..62b9f5dc64b5 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -576,7 +576,6 @@ static void cxl_dport_unlink(void *data)
 
 /**
  * devm_cxl_add_dport - append downstream port data to a cxl_port
- * @host: devm context for allocations
  * @port: the cxl_port that references this dport
  * @dport_dev: firmware or PCI device representing the dport
  * @port_id: identifier for this dport in a decoder's target list
@@ -586,14 +585,20 @@ static void cxl_dport_unlink(void *data)
  * either the port's host (for root ports), or the port itself (for
  * switch ports)
  */
-struct cxl_dport *devm_cxl_add_dport(struct device *host, struct cxl_port *port,
+struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
 				     struct device *dport_dev, int port_id,
 				     resource_size_t component_reg_phys)
 {
 	char link_name[CXL_TARGET_STRLEN];
 	struct cxl_dport *dport;
+	struct device *host;
 	int rc;
 
+	if (is_cxl_root(port))
+		host = port->uport;
+	else
+		host = &port->dev;
+
 	if (!host->driver) {
 		dev_WARN_ONCE(&port->dev, 1, "dport:%s bad devm context\n",
 			      dev_name(dport_dev));
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 2b24eb56618f..89fbf49ebf98 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -313,7 +313,7 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
 				   resource_size_t component_reg_phys,
 				   struct cxl_port *parent_port);
 struct cxl_port *find_cxl_root(struct device *dev);
-struct cxl_dport *devm_cxl_add_dport(struct device *host, struct cxl_port *port,
+struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
 				     struct device *dport, int port_id,
 				     resource_size_t component_reg_phys);
 struct cxl_decoder *to_cxl_decoder(struct device *dev);
@@ -327,9 +327,9 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);
 int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map);
 int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld);
 struct cxl_hdm;
-struct cxl_hdm *devm_cxl_setup_hdm(struct device *host, struct cxl_port *port);
-int devm_cxl_enumerate_decoders(struct device *host, struct cxl_hdm *cxlhdm);
-int devm_cxl_add_passthrough_decoder(struct device *host, struct cxl_port *port);
+struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port);
+int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm);
+int devm_cxl_add_passthrough_decoder(struct cxl_port *port);
 
 extern struct bus_type cxl_bus_type;
 
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 47640f19e899..766de340c4ce 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -58,5 +58,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
 	return pci_resource_start(pdev, map->barno) + map->block_offset;
 }
 
-int devm_cxl_port_enumerate_dports(struct device *host, struct cxl_port *port);
+int devm_cxl_port_enumerate_dports(struct cxl_port *port);
 #endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index daa4c3c33aed..5a1aec28dc46 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -31,18 +31,18 @@ static int cxl_port_probe(struct device *dev)
 	struct cxl_hdm *cxlhdm;
 	int rc;
 
-	rc = devm_cxl_port_enumerate_dports(dev, port);
+	rc = devm_cxl_port_enumerate_dports(port);
 	if (rc < 0)
 		return rc;
 
 	if (rc == 1)
-		return devm_cxl_add_passthrough_decoder(dev, port);
+		return devm_cxl_add_passthrough_decoder(port);
 
-	cxlhdm = devm_cxl_setup_hdm(dev, port);
+	cxlhdm = devm_cxl_setup_hdm(port);
 	if (IS_ERR(cxlhdm))
 		return PTR_ERR(cxlhdm);
 
-	rc = devm_cxl_enumerate_decoders(dev, cxlhdm);
+	rc = devm_cxl_enumerate_decoders(cxlhdm);
 	if (rc) {
 		dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc);
 		return rc;
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index ce6ace286fc7..40ed567952e6 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -399,8 +399,7 @@ static struct acpi_pci_root *mock_acpi_pci_find_root(acpi_handle handle)
 	return &mock_pci_root[host_bridge_index(adev)];
 }
 
-static struct cxl_hdm *mock_cxl_setup_hdm(struct device *host,
-					  struct cxl_port *port)
+static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port)
 {
 	struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL);
 
@@ -411,21 +410,18 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct device *host,
 	return cxlhdm;
 }
 
-static int mock_cxl_add_passthrough_decoder(struct device *host,
-					    struct cxl_port *port)
+static int mock_cxl_add_passthrough_decoder(struct cxl_port *port)
 {
 	dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n");
 	return -EOPNOTSUPP;
 }
 
-static int mock_cxl_enumerate_decoders(struct device *host,
-				       struct cxl_hdm *cxlhdm)
+static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
 {
 	return 0;
 }
 
-static int mock_cxl_port_enumerate_dports(struct device *host,
-					  struct cxl_port *port)
+static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
 {
 	struct device *dev = &port->dev;
 	int i;
@@ -437,7 +433,7 @@ static int mock_cxl_port_enumerate_dports(struct device *host,
 		if (pdev->dev.parent != port->uport)
 			continue;
 
-		dport = devm_cxl_add_dport(host, port, &pdev->dev, pdev->id,
+		dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id,
 					   CXL_RESOURCE_NONE);
 
 		if (IS_ERR(dport)) {
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 18d3b65e2a9b..6e8c9d63c92d 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -131,66 +131,62 @@ __wrap_nvdimm_bus_register(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register);
 
-struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct device *host,
-					  struct cxl_port *port)
+struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port)
 {
 	int index;
 	struct cxl_hdm *cxlhdm;
 	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
 
 	if (ops && ops->is_mock_port(port->uport))
-		cxlhdm = ops->devm_cxl_setup_hdm(host, port);
+		cxlhdm = ops->devm_cxl_setup_hdm(port);
 	else
-		cxlhdm = devm_cxl_setup_hdm(host, port);
+		cxlhdm = devm_cxl_setup_hdm(port);
 	put_cxl_mock_ops(index);
 
 	return cxlhdm;
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL);
 
-int __wrap_devm_cxl_add_passthrough_decoder(struct device *host,
-					    struct cxl_port *port)
+int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port)
 {
 	int rc, index;
 	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
 
 	if (ops && ops->is_mock_port(port->uport))
-		rc = ops->devm_cxl_add_passthrough_decoder(host, port);
+		rc = ops->devm_cxl_add_passthrough_decoder(port);
 	else
-		rc = devm_cxl_add_passthrough_decoder(host, port);
+		rc = devm_cxl_add_passthrough_decoder(port);
 	put_cxl_mock_ops(index);
 
 	return rc;
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, CXL);
 
-int __wrap_devm_cxl_enumerate_decoders(struct device *host,
-				       struct cxl_hdm *cxlhdm)
+int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
 {
 	int rc, index;
 	struct cxl_port *port = cxlhdm->port;
 	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
 
 	if (ops && ops->is_mock_port(port->uport))
-		rc = ops->devm_cxl_enumerate_decoders(host, cxlhdm);
+		rc = ops->devm_cxl_enumerate_decoders(cxlhdm);
 	else
-		rc = devm_cxl_enumerate_decoders(host, cxlhdm);
+		rc = devm_cxl_enumerate_decoders(cxlhdm);
 	put_cxl_mock_ops(index);
 
 	return rc;
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, CXL);
 
-int __wrap_devm_cxl_port_enumerate_dports(struct device *host,
-					  struct cxl_port *port)
+int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
 {
 	int rc, index;
 	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
 
 	if (ops && ops->is_mock_port(port->uport))
-		rc = ops->devm_cxl_port_enumerate_dports(host, port);
+		rc = ops->devm_cxl_port_enumerate_dports(port);
 	else
-		rc = devm_cxl_port_enumerate_dports(host, port);
+		rc = devm_cxl_port_enumerate_dports(port);
 	put_cxl_mock_ops(index);
 
 	return rc;
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index 15e48063ea4b..738f24e3988a 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -19,11 +19,10 @@ struct cxl_mock_ops {
 	bool (*is_mock_bus)(struct pci_bus *bus);
 	bool (*is_mock_port)(struct device *dev);
 	bool (*is_mock_dev)(struct device *dev);
-	int (*devm_cxl_port_enumerate_dports)(struct device *host,
-					      struct cxl_port *port);
-	struct cxl_hdm *(*devm_cxl_setup_hdm)(struct device *host, struct cxl_port *port);
-	int (*devm_cxl_add_passthrough_decoder)(struct device *host, struct cxl_port *port);
-	int (*devm_cxl_enumerate_decoders)(struct device *host, struct cxl_hdm *hdm);
+	int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port);
+	struct cxl_hdm *(*devm_cxl_setup_hdm)(struct cxl_port *port);
+	int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port);
+	int (*devm_cxl_enumerate_decoders)(struct cxl_hdm *hdm);
 };
 
 void register_cxl_mock_ops(struct cxl_mock_ops *ops);
-- 
cgit 


From 523e594d9cc03db962c741ce02c8a58aab58a123 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben.widawsky@intel.com>
Date: Sun, 23 Jan 2022 16:31:13 -0800
Subject: cxl/pci: Implement wait for media active

CXL 2.0 8.1.3.8.2 states:

  Memory_Active: When set, indicates that the CXL Range 1 memory is
  fully initialized and available for software use. Must be set within
  Range 1. Memory_Active_Timeout of deassertion of reset to CXL device
  if CXL.mem HwInit Mode=1

Unfortunately, Memory_Active can take quite a long time depending on
media size (up to 256s per 2.0 spec). Provide a callback for the
eventual establishment of CXL.mem operations via the 'cxl_mem' driver
the 'struct cxl_memdev'. The implementation waits for 60s by default for
now and can be overridden by the mbox_ready_time module parameter.

Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
[djbw: switch to sleeping wait]
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/164298427373.3018233.9309741847039301834.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/cxlmem.h         |  2 ++
 drivers/cxl/pci.c            | 49 +++++++++++++++++++++++++++++++++++++++++++-
 tools/testing/cxl/test/mem.c |  8 ++++++++
 3 files changed, 58 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 00f55f4066b9..e70838e5dc17 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -132,6 +132,7 @@ struct cxl_endpoint_dvsec_info {
  * @component_reg_phys: register base of component registers
  * @info: Cached DVSEC information about the device.
  * @mbox_send: @dev specific transport for transmitting mailbox commands
+ * @wait_media_ready: @dev specific method to await media ready
  *
  * See section 8.2.9.5.2 Capacity Configuration and Label Storage for
  * details on capacity parameters.
@@ -165,6 +166,7 @@ struct cxl_dev_state {
 	struct cxl_endpoint_dvsec_info info;
 
 	int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
+	int (*wait_media_ready)(struct cxl_dev_state *cxlds);
 };
 
 enum cxl_opcode {
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 6b3270246545..c01bd44d11c4 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -49,7 +49,7 @@
 static unsigned short mbox_ready_timeout = 60;
 module_param(mbox_ready_timeout, ushort, 0644);
 MODULE_PARM_DESC(mbox_ready_timeout,
-		 "seconds to wait for mailbox ready status");
+		 "seconds to wait for mailbox ready / memory active status");
 
 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
 {
@@ -418,6 +418,51 @@ static int wait_for_valid(struct cxl_dev_state *cxlds)
 	return -ETIMEDOUT;
 }
 
+/*
+ * Wait up to @mbox_ready_timeout for the device to report memory
+ * active.
+ */
+static int wait_for_media_ready(struct cxl_dev_state *cxlds)
+{
+	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+	int d = cxlds->cxl_dvsec;
+	bool active = false;
+	u64 md_status;
+	int rc, i;
+
+	rc = wait_for_valid(cxlds);
+	if (rc)
+		return rc;
+
+	for (i = mbox_ready_timeout; i; i--) {
+		u32 temp;
+		int rc;
+
+		rc = pci_read_config_dword(
+			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
+		if (rc)
+			return rc;
+
+		active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
+		if (active)
+			break;
+		msleep(1000);
+	}
+
+	if (!active) {
+		dev_err(&pdev->dev,
+			"timeout awaiting memory active after %d seconds\n",
+			mbox_ready_timeout);
+		return -ETIMEDOUT;
+	}
+
+	md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+	if (!CXLMDEV_READY(md_status))
+		return -EIO;
+
+	return 0;
+}
+
 static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds)
 {
 	struct cxl_endpoint_dvsec_info *info = &cxlds->info;
@@ -528,6 +573,8 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		dev_warn(&pdev->dev,
 			 "Device DVSEC not present, skip CXL.mem init\n");
 
+	cxlds->wait_media_ready = wait_for_media_ready;
+
 	rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
 	if (rc)
 		return rc;
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 8c2086c4caef..3af3f94de0c3 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -4,6 +4,7 @@
 #include <linux/platform_device.h>
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/sizes.h>
 #include <linux/bits.h>
 #include <cxlmem.h>
@@ -236,6 +237,12 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *
 	return rc;
 }
 
+static int cxl_mock_wait_media_ready(struct cxl_dev_state *cxlds)
+{
+	msleep(100);
+	return 0;
+}
+
 static void label_area_release(void *lsa)
 {
 	vfree(lsa);
@@ -262,6 +269,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
 		return PTR_ERR(cxlds);
 
 	cxlds->mbox_send = cxl_mock_mbox_send;
+	cxlds->wait_media_ready = cxl_mock_wait_media_ready;
 	cxlds->payload_size = SZ_4K;
 
 	rc = cxl_enumerate_cmds(cxlds);
-- 
cgit 


From bcc79ea34398845d814170ddc06a457b35ae1975 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 31 Jan 2022 13:56:11 -0800
Subject: cxl/pci: Emit device serial number

Per the CXL specification (8.1.12.2 Memory Device PCIe Capabilities and
Extended Capabilities) the Device Serial Number capability is mandatory.
Emit it for user tooling to identify devices.

It is reasonable to ask whether the attribute should be added to the
list of PCI sysfs device attributes. The PCI layer can optionally emit
it too, but the CXL subsystem is aiming to preserve its independence and
the possibility of CXL topologies with non-PCI devices in it. To date
that has only proven useful for the 'cxl_test' model, but as can be seen
with seen with ACPI0016 devices, sometimes all that is needed is a
platform firmware table to point to CXL Component Registers in MMIO
space to define a "CXL" device.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/164366608838.196598.16856227191534267098.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/ABI/testing/sysfs-bus-cxl |  9 +++++++++
 drivers/cxl/core/memdev.c               | 11 +++++++++++
 drivers/cxl/cxlmem.h                    |  2 ++
 drivers/cxl/pci.c                       |  1 +
 tools/testing/cxl/test/mem.c            |  1 +
 5 files changed, 24 insertions(+)

(limited to 'tools/testing')

diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index 6d8cbf3355b5..87c0e5e65322 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -25,6 +25,15 @@ Description:
 		identically named field in the Identify Memory Device Output
 		Payload in the CXL-2.0 specification.
 
+What:		/sys/bus/cxl/devices/memX/serial
+Date:		January, 2022
+KernelVersion:	v5.18
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		(RO) 64-bit serial number per the PCIe Device Serial Number
+		capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2
+		Memory Device PCIe Capabilities and Extended Capabilities.
+
 What:		/sys/bus/cxl/devices/*/devtype
 Date:		June, 2021
 KernelVersion:	v5.14
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 61029cb7ac62..1e574b052583 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -89,7 +89,18 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
 static struct device_attribute dev_attr_pmem_size =
 	__ATTR(size, 0444, pmem_size_show, NULL);
 
+static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+	return sysfs_emit(buf, "%#llx\n", cxlds->serial);
+}
+static DEVICE_ATTR_RO(serial);
+
 static struct attribute *cxl_memdev_attributes[] = {
+	&dev_attr_serial.attr,
 	&dev_attr_firmware_version.attr,
 	&dev_attr_payload_max.attr,
 	&dev_attr_label_storage_size.attr,
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index e70838e5dc17..0ba0cf8dcdbc 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -131,6 +131,7 @@ struct cxl_endpoint_dvsec_info {
  * @next_persistent_bytes: persistent capacity change pending device reset
  * @component_reg_phys: register base of component registers
  * @info: Cached DVSEC information about the device.
+ * @serial: PCIe Device Serial Number
  * @mbox_send: @dev specific transport for transmitting mailbox commands
  * @wait_media_ready: @dev specific method to await media ready
  *
@@ -164,6 +165,7 @@ struct cxl_dev_state {
 
 	resource_size_t component_reg_phys;
 	struct cxl_endpoint_dvsec_info info;
+	u64 serial;
 
 	int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
 	int (*wait_media_ready)(struct cxl_dev_state *cxlds);
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index c01bd44d11c4..8a7267d116b7 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -567,6 +567,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (IS_ERR(cxlds))
 		return PTR_ERR(cxlds);
 
+	cxlds->serial = pci_get_dsn(pdev);
 	cxlds->cxl_dvsec = pci_find_dvsec_capability(
 		pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
 	if (!cxlds->cxl_dvsec)
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 3af3f94de0c3..36ef337c775c 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -268,6 +268,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
 	if (IS_ERR(cxlds))
 		return PTR_ERR(cxlds);
 
+	cxlds->serial = pdev->id;
 	cxlds->mbox_send = cxl_mock_mbox_send;
 	cxlds->wait_media_ready = cxl_mock_wait_media_ready;
 	cxlds->payload_size = SZ_4K;
-- 
cgit 


From cf1f6877b088cd9ddeb5f3db8ade3a61e3a3f9eb Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 23 Jan 2022 16:31:24 -0800
Subject: cxl/memdev: Add numa_node attribute

While CXL memory targets will have their own memory target node,
individual memory devices may be affinitized like other PCI devices.
Emit that attribute for memdevs.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/164298428430.3018233.16409089892707993289.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/ABI/testing/sysfs-bus-cxl |  9 +++++++++
 drivers/cxl/core/memdev.c               | 17 +++++++++++++++++
 tools/testing/cxl/test/cxl.c            |  1 +
 3 files changed, 27 insertions(+)

(limited to 'tools/testing')

diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index 87c0e5e65322..0b51cfec0c66 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -34,6 +34,15 @@ Description:
 		capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2
 		Memory Device PCIe Capabilities and Extended Capabilities.
 
+What:		/sys/bus/cxl/devices/memX/numa_node
+Date:		January, 2022
+KernelVersion:	v5.18
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		(RO) If NUMA is enabled and the platform has affinitized the
+		host PCI device for this memory device, emit the CPU node
+		affinity for this device.
+
 What:		/sys/bus/cxl/devices/*/devtype
 Date:		June, 2021
 KernelVersion:	v5.14
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 1e574b052583..b2773664e407 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -99,11 +99,19 @@ static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(serial);
 
+static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	return sprintf(buf, "%d\n", dev_to_node(dev));
+}
+static DEVICE_ATTR_RO(numa_node);
+
 static struct attribute *cxl_memdev_attributes[] = {
 	&dev_attr_serial.attr,
 	&dev_attr_firmware_version.attr,
 	&dev_attr_payload_max.attr,
 	&dev_attr_label_storage_size.attr,
+	&dev_attr_numa_node.attr,
 	NULL,
 };
 
@@ -117,8 +125,17 @@ static struct attribute *cxl_memdev_ram_attributes[] = {
 	NULL,
 };
 
+static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a,
+				  int n)
+{
+	if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr)
+		return 0;
+	return a->mode;
+}
+
 static struct attribute_group cxl_memdev_attribute_group = {
 	.attrs = cxl_memdev_attributes,
+	.is_visible = cxl_memdev_visible,
 };
 
 static struct attribute_group cxl_memdev_ram_attribute_group = {
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 40ed567952e6..cd2f20f2707f 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -583,6 +583,7 @@ static __init int cxl_test_init(void)
 		if (!pdev)
 			goto err_mem;
 		pdev->dev.parent = &port->dev;
+		set_dev_node(&pdev->dev, i % 2);
 
 		rc = platform_device_add(pdev);
 		if (rc) {
-- 
cgit 


From 8dd2bc0f8e02d39bd80851ca787bcbdb7d495e69 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben.widawsky@intel.com>
Date: Fri, 4 Feb 2022 07:18:31 -0800
Subject: cxl/mem: Add the cxl_mem driver

At this point the subsystem can enumerate all CXL ports (CXL.mem decode
resources in upstream switch ports and host bridges) in a system. The
last mile is connecting those ports to endpoints.

The cxl_mem driver connects an endpoint device to the platform CXL.mem
protoctol decode-topology. At ->probe() time it walks its
device-topology-ancestry and adds a CXL Port object at every Upstream
Port hop until it gets to CXL root. The CXL root object is only present
after a platform firmware driver registers platform CXL resources. For
ACPI based platform this is managed by the ACPI0017 device and the
cxl_acpi driver.

The ports are registered such that disabling a given port automatically
unregisters all descendant ports, and the chain can only be registered
after the root is established.

Given ACPI device scanning may run asynchronously compared to PCI device
scanning the root driver is tasked with rescanning the bus after the
root successfully probes.

Conversely if any ports in a chain between the root and an endpoint
becomes disconnected it subsequently triggers the endpoint to
unregister. Given lock depenedencies the endpoint unregistration happens
in a workqueue asynchronously. If userspace cares about synchronizing
delayed work after port events the /sys/bus/cxl/flush attribute is
available for that purpose.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
[djbw: clarify changelog, rework hotplug support]
Link: https://lore.kernel.org/r/164398782997.903003.9725273241627693186.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/ABI/testing/sysfs-bus-cxl         |   9 +
 Documentation/driver-api/cxl/memory-devices.rst |   9 +
 drivers/cxl/Kconfig                             |  16 ++
 drivers/cxl/Makefile                            |   2 +
 drivers/cxl/acpi.c                              |   3 +-
 drivers/cxl/core/memdev.c                       |  16 ++
 drivers/cxl/core/port.c                         | 105 ++++++++++-
 drivers/cxl/cxl.h                               |   6 +
 drivers/cxl/cxlmem.h                            |   8 +
 drivers/cxl/mem.c                               | 228 ++++++++++++++++++++++++
 drivers/cxl/port.c                              |  12 ++
 tools/testing/cxl/Kbuild                        |   6 +
 tools/testing/cxl/mock_mem.c                    |  10 ++
 13 files changed, 425 insertions(+), 5 deletions(-)
 create mode 100644 drivers/cxl/mem.c
 create mode 100644 tools/testing/cxl/mock_mem.c

(limited to 'tools/testing')

diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index 0b51cfec0c66..7c2b846521f3 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -1,3 +1,12 @@
+What:		/sys/bus/cxl/flush
+Date:		Januarry, 2022
+KernelVersion:	v5.18
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		(WO) If userspace manually unbinds a port the kernel schedules
+		all descendant memdevs for unbind. Writing '1' to this attribute
+		flushes that work.
+
 What:		/sys/bus/cxl/devices/memX/firmware_version
 Date:		December, 2020
 KernelVersion:	v5.12
diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst
index 3498d38d7cbd..db476bb170b6 100644
--- a/Documentation/driver-api/cxl/memory-devices.rst
+++ b/Documentation/driver-api/cxl/memory-devices.rst
@@ -325,6 +325,9 @@ CXL Memory Device
 .. kernel-doc:: drivers/cxl/pci.c
    :internal:
 
+.. kernel-doc:: drivers/cxl/mem.c
+   :doc: cxl mem
+
 CXL Port
 --------
 .. kernel-doc:: drivers/cxl/port.c
@@ -344,6 +347,12 @@ CXL Core
 .. kernel-doc:: drivers/cxl/core/port.c
    :identifiers:
 
+.. kernel-doc:: drivers/cxl/core/pci.c
+   :doc: cxl core pci
+
+.. kernel-doc:: drivers/cxl/core/pci.c
+   :identifiers:
+
 .. kernel-doc:: drivers/cxl/core/pmem.c
    :doc: cxl pmem
 
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 4f4f7587f6ca..b88ab956bb7c 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -78,6 +78,22 @@ config CXL_PMEM
 
 	  If unsure say 'm'.
 
+config CXL_MEM
+	tristate "CXL: Memory Expansion"
+	depends on CXL_PCI
+	default CXL_BUS
+	help
+	  The CXL.mem protocol allows a device to act as a provider of "System
+	  RAM" and/or "Persistent Memory" that is fully coherent as if the
+	  memory were attached to the typical CPU memory controller. This is
+	  known as HDM "Host-managed Device Memory".
+
+	  Say 'y/m' to enable a driver that will attach to CXL.mem devices for
+	  memory expansion and control of HDM. See Chapter 9.13 in the CXL 2.0
+	  specification for a detailed description of HDM.
+
+	  If unsure say 'm'.
+
 config CXL_PORT
 	default CXL_BUS
 	tristate
diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile
index 56fcac2323cb..ce267ef11d93 100644
--- a/drivers/cxl/Makefile
+++ b/drivers/cxl/Makefile
@@ -1,10 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_CXL_BUS) += core/
 obj-$(CONFIG_CXL_PCI) += cxl_pci.o
+obj-$(CONFIG_CXL_MEM) += cxl_mem.o
 obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o
 obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o
 obj-$(CONFIG_CXL_PORT) += cxl_port.o
 
+cxl_mem-y := mem.o
 cxl_pci-y := pci.o
 cxl_acpi-y := acpi.o
 cxl_pmem-y := pmem.o
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 7bd53dc691ec..d8295572bde9 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -314,7 +314,8 @@ static int cxl_acpi_probe(struct platform_device *pdev)
 	if (rc < 0)
 		return rc;
 
-	return 0;
+	/* In case PCI is scanned before ACPI re-trigger memdev attach */
+	return cxl_bus_rescan();
 }
 
 static const struct acpi_device_id cxl_acpi_ids[] = {
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index b2773664e407..1f76b28f9826 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -162,6 +162,12 @@ static const struct device_type cxl_memdev_type = {
 	.groups = cxl_memdev_attribute_groups,
 };
 
+bool is_cxl_memdev(struct device *dev)
+{
+	return dev->type == &cxl_memdev_type;
+}
+EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);
+
 /**
  * set_exclusive_cxl_commands() - atomically disable user cxl commands
  * @cxlds: The device state to operate on
@@ -213,6 +219,15 @@ static void cxl_memdev_unregister(void *_cxlmd)
 	put_device(dev);
 }
 
+static void detach_memdev(struct work_struct *work)
+{
+	struct cxl_memdev *cxlmd;
+
+	cxlmd = container_of(work, typeof(*cxlmd), detach_work);
+	device_release_driver(&cxlmd->dev);
+	put_device(&cxlmd->dev);
+}
+
 static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
 					   const struct file_operations *fops)
 {
@@ -237,6 +252,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
 	dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
 	dev->type = &cxl_memdev_type;
 	device_set_pm_not_required(dev);
+	INIT_WORK(&cxlmd->detach_work, detach_memdev);
 
 	cdev = &cxlmd->cdev;
 	cdev_init(cdev, fops);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index c5779c982c80..f460460b12b3 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/workqueue.h>
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -46,6 +47,8 @@ static int cxl_device_id(struct device *dev)
 			return CXL_DEVICE_ROOT;
 		return CXL_DEVICE_PORT;
 	}
+	if (is_cxl_memdev(dev))
+		return CXL_DEVICE_MEMORY_EXPANDER;
 	return 0;
 }
 
@@ -318,8 +321,10 @@ static void unregister_port(void *_port)
 {
 	struct cxl_port *port = _port;
 
-	if (!is_cxl_root(port))
+	if (!is_cxl_root(port)) {
 		device_lock_assert(port->dev.parent);
+		port->uport = NULL;
+	}
 
 	device_unregister(&port->dev);
 }
@@ -410,7 +415,9 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
 	if (parent_port)
 		port->depth = parent_port->depth + 1;
 	dev = &port->dev;
-	if (parent_port)
+	if (is_cxl_memdev(uport))
+		rc = dev_set_name(dev, "endpoint%d", port->id);
+	else if (parent_port)
 		rc = dev_set_name(dev, "port%d", port->id);
 	else
 		rc = dev_set_name(dev, "root%d", port->id);
@@ -790,6 +797,38 @@ static struct device *grandparent(struct device *dev)
 	return NULL;
 }
 
+static void delete_endpoint(void *data)
+{
+	struct cxl_memdev *cxlmd = data;
+	struct cxl_port *endpoint = dev_get_drvdata(&cxlmd->dev);
+	struct cxl_port *parent_port;
+	struct device *parent;
+
+	parent_port = cxl_mem_find_port(cxlmd);
+	if (!parent_port)
+		return;
+	parent = &parent_port->dev;
+
+	cxl_device_lock(parent);
+	if (parent->driver && endpoint->uport) {
+		devm_release_action(parent, cxl_unlink_uport, endpoint);
+		devm_release_action(parent, unregister_port, endpoint);
+	}
+	cxl_device_unlock(parent);
+	put_device(parent);
+	put_device(&endpoint->dev);
+}
+
+int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
+{
+	struct device *dev = &cxlmd->dev;
+
+	get_device(&endpoint->dev);
+	dev_set_drvdata(dev, endpoint);
+	return devm_add_action_or_reset(dev, delete_endpoint, cxlmd);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL);
+
 /*
  * The natural end of life of a non-root 'cxl_port' is when its parent port goes
  * through a ->remove() event ("top-down" unregistration). The unnatural trigger
@@ -1034,6 +1073,12 @@ retry:
 }
 EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_ports, CXL);
 
+struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd)
+{
+	return find_cxl_port(grandparent(&cxlmd->dev));
+}
+EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, CXL);
+
 struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port,
 					const struct device *dev)
 {
@@ -1352,12 +1397,54 @@ static void cxl_bus_remove(struct device *dev)
 	cxl_nested_unlock(dev);
 }
 
+static struct workqueue_struct *cxl_bus_wq;
+
+int cxl_bus_rescan(void)
+{
+	return bus_rescan_devices(&cxl_bus_type);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_bus_rescan, CXL);
+
+bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
+{
+	return queue_work(cxl_bus_wq, &cxlmd->detach_work);
+}
+EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
+
+/* for user tooling to ensure port disable work has completed */
+static ssize_t flush_store(struct bus_type *bus, const char *buf, size_t count)
+{
+	if (sysfs_streq(buf, "1")) {
+		flush_workqueue(cxl_bus_wq);
+		return count;
+	}
+
+	return -EINVAL;
+}
+
+static BUS_ATTR_WO(flush);
+
+static struct attribute *cxl_bus_attributes[] = {
+	&bus_attr_flush.attr,
+	NULL,
+};
+
+static struct attribute_group cxl_bus_attribute_group = {
+	.attrs = cxl_bus_attributes,
+};
+
+static const struct attribute_group *cxl_bus_attribute_groups[] = {
+	&cxl_bus_attribute_group,
+	NULL,
+};
+
 struct bus_type cxl_bus_type = {
 	.name = "cxl",
 	.uevent = cxl_bus_uevent,
 	.match = cxl_bus_match,
 	.probe = cxl_bus_probe,
 	.remove = cxl_bus_remove,
+	.bus_groups = cxl_bus_attribute_groups,
 };
 EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL);
 
@@ -1371,12 +1458,21 @@ static __init int cxl_core_init(void)
 	if (rc)
 		return rc;
 
+	cxl_bus_wq = alloc_ordered_workqueue("cxl_port", 0);
+	if (!cxl_bus_wq) {
+		rc = -ENOMEM;
+		goto err_wq;
+	}
+
 	rc = bus_register(&cxl_bus_type);
 	if (rc)
-		goto err;
+		goto err_bus;
+
 	return 0;
 
-err:
+err_bus:
+	destroy_workqueue(cxl_bus_wq);
+err_wq:
 	cxl_memdev_exit();
 	cxl_mbox_exit();
 	return rc;
@@ -1385,6 +1481,7 @@ err:
 static void cxl_core_exit(void)
 {
 	bus_unregister(&cxl_bus_type);
+	destroy_workqueue(cxl_bus_wq);
 	cxl_memdev_exit();
 	cxl_mbox_exit();
 }
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index de4fbe7cbf42..f5e5b4ac8228 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -328,6 +328,9 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
 				   struct cxl_port *parent_port);
 struct cxl_port *find_cxl_root(struct device *dev);
 int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
+int cxl_bus_rescan(void);
+struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd);
+bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd);
 
 struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
 				     struct device *dport, int port_id,
@@ -345,6 +348,8 @@ struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
 int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);
 int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map);
 int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld);
+int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
+
 struct cxl_hdm;
 struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port);
 int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm);
@@ -377,6 +382,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
 #define CXL_DEVICE_NVDIMM		2
 #define CXL_DEVICE_PORT			3
 #define CXL_DEVICE_ROOT			4
+#define CXL_DEVICE_MEMORY_EXPANDER	5
 
 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
 #define CXL_MODALIAS_FMT "cxl:t%d"
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 0ba0cf8dcdbc..5d33ce24fe09 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -34,12 +34,14 @@
  * @dev: driver core device object
  * @cdev: char dev core object for ioctl operations
  * @cxlds: The device state backing this device
+ * @detach_work: active memdev lost a port in its ancestry
  * @id: id number of this memdev instance.
  */
 struct cxl_memdev {
 	struct device dev;
 	struct cdev cdev;
 	struct cxl_dev_state *cxlds;
+	struct work_struct detach_work;
 	int id;
 };
 
@@ -48,6 +50,12 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
 	return container_of(dev, struct cxl_memdev, dev);
 }
 
+bool is_cxl_memdev(struct device *dev);
+static inline bool is_cxl_endpoint(struct cxl_port *port)
+{
+	return is_cxl_memdev(port->uport);
+}
+
 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
 
 /**
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
new file mode 100644
index 000000000000..49a4b1c47299
--- /dev/null
+++ b/drivers/cxl/mem.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "cxlmem.h"
+#include "cxlpci.h"
+
+/**
+ * DOC: cxl mem
+ *
+ * CXL memory endpoint devices and switches are CXL capable devices that are
+ * participating in CXL.mem protocol. Their functionality builds on top of the
+ * CXL.io protocol that allows enumerating and configuring components via
+ * standard PCI mechanisms.
+ *
+ * The cxl_mem driver owns kicking off the enumeration of this CXL.mem
+ * capability. With the detection of a CXL capable endpoint, the driver will
+ * walk up to find the platform specific port it is connected to, and determine
+ * if there are intervening switches in the path. If there are switches, a
+ * secondary action is to enumerate those (implemented in cxl_core). Finally the
+ * cxl_mem driver adds the device it is bound to as a CXL endpoint-port for use
+ * in higher level operations.
+ */
+
+static int wait_for_media(struct cxl_memdev *cxlmd)
+{
+	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_endpoint_dvsec_info *info = &cxlds->info;
+	int rc;
+
+	if (!info->mem_enabled)
+		return -EBUSY;
+
+	rc = cxlds->wait_media_ready(cxlds);
+	if (rc)
+		return rc;
+
+	/*
+	 * We know the device is active, and enabled, if any ranges are non-zero
+	 * we'll need to check later before adding the port since that owns the
+	 * HDM decoder registers.
+	 */
+	return 0;
+}
+
+static int create_endpoint(struct cxl_memdev *cxlmd,
+			   struct cxl_port *parent_port)
+{
+	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_port *endpoint;
+
+	endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev,
+				     cxlds->component_reg_phys, parent_port);
+	if (IS_ERR(endpoint))
+		return PTR_ERR(endpoint);
+
+	dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev));
+
+	if (!endpoint->dev.driver) {
+		dev_err(&cxlmd->dev, "%s failed probe\n",
+			dev_name(&endpoint->dev));
+		return -ENXIO;
+	}
+
+	return cxl_endpoint_autoremove(cxlmd, endpoint);
+}
+
+/**
+ * cxl_dvsec_decode_init() - Setup HDM decoding for the endpoint
+ * @cxlds: Device state
+ *
+ * Additionally, enables global HDM decoding. Warning: don't call this outside
+ * of probe. Once probe is complete, the port driver owns all access to the HDM
+ * decoder registers.
+ *
+ * Returns: false if DVSEC Ranges are being used instead of HDM
+ * decoders, or if it can not be determined if DVSEC Ranges are in use.
+ * Otherwise, returns true.
+ */
+__mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds)
+{
+	struct cxl_endpoint_dvsec_info *info = &cxlds->info;
+	struct cxl_register_map map;
+	struct cxl_component_reg_map *cmap = &map.component_map;
+	bool global_enable, do_hdm_init = false;
+	void __iomem *crb;
+	u32 global_ctrl;
+
+	/* map hdm decoder */
+	crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
+	if (!crb) {
+		dev_dbg(cxlds->dev, "Failed to map component registers\n");
+		return false;
+	}
+
+	cxl_probe_component_regs(cxlds->dev, crb, cmap);
+	if (!cmap->hdm_decoder.valid) {
+		dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n");
+		goto out;
+	}
+
+	global_ctrl = readl(crb + cmap->hdm_decoder.offset +
+			    CXL_HDM_DECODER_CTRL_OFFSET);
+	global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE;
+	if (!global_enable && info->ranges) {
+		dev_dbg(cxlds->dev,
+			"DVSEC ranges already programmed and HDM decoders not enabled.\n");
+		goto out;
+	}
+
+	do_hdm_init = true;
+
+	/*
+	 * Permanently (for this boot at least) opt the device into HDM
+	 * operation. Individual HDM decoders still need to be enabled after
+	 * this point.
+	 */
+	if (!global_enable) {
+		dev_dbg(cxlds->dev, "Enabling HDM decode\n");
+		writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
+		       crb + cmap->hdm_decoder.offset +
+			       CXL_HDM_DECODER_CTRL_OFFSET);
+	}
+
+out:
+	iounmap(crb);
+	return do_hdm_init;
+}
+
+static int cxl_mem_probe(struct device *dev)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_port *parent_port;
+	int rc;
+
+	/*
+	 * Someone is trying to reattach this device after it lost its port
+	 * connection (an endpoint port previously registered by this memdev was
+	 * disabled). This racy check is ok because if the port is still gone,
+	 * no harm done, and if the port hierarchy comes back it will re-trigger
+	 * this probe. Port rescan and memdev detach work share the same
+	 * single-threaded workqueue.
+	 */
+	if (work_pending(&cxlmd->detach_work))
+		return -EBUSY;
+
+	rc = wait_for_media(cxlmd);
+	if (rc) {
+		dev_err(dev, "Media not active (%d)\n", rc);
+		return rc;
+	}
+
+	/*
+	 * If DVSEC ranges are being used instead of HDM decoder registers there
+	 * is no use in trying to manage those.
+	 */
+	if (!cxl_dvsec_decode_init(cxlds)) {
+		struct cxl_endpoint_dvsec_info *info = &cxlds->info;
+		int i;
+
+		/* */
+		for (i = 0; i < 2; i++) {
+			u64 base, size;
+
+			/*
+			 * Give a nice warning to the user that BIOS has really
+			 * botched things for them if it didn't place DVSEC
+			 * ranges in the memory map.
+			 */
+			base = info->dvsec_range[i].start;
+			size = range_len(&info->dvsec_range[i]);
+			if (size && !region_intersects(base, size,
+						       IORESOURCE_SYSTEM_RAM,
+						       IORES_DESC_NONE)) {
+				dev_err(dev,
+					"DVSEC range %#llx-%#llx must be reserved by BIOS, but isn't\n",
+					base, base + size - 1);
+			}
+		}
+		dev_err(dev,
+			"Active DVSEC range registers in use. Will not bind.\n");
+		return -EBUSY;
+	}
+
+	rc = devm_cxl_enumerate_ports(cxlmd);
+	if (rc)
+		return rc;
+
+	parent_port = cxl_mem_find_port(cxlmd);
+	if (!parent_port) {
+		dev_err(dev, "CXL port topology not found\n");
+		return -ENXIO;
+	}
+
+	cxl_device_lock(&parent_port->dev);
+	if (!parent_port->dev.driver) {
+		dev_err(dev, "CXL port topology %s not enabled\n",
+			dev_name(&parent_port->dev));
+		rc = -ENXIO;
+		goto out;
+	}
+
+	rc = create_endpoint(cxlmd, parent_port);
+out:
+	cxl_device_unlock(&parent_port->dev);
+	put_device(&parent_port->dev);
+	return rc;
+}
+
+static struct cxl_driver cxl_mem_driver = {
+	.name = "cxl_mem",
+	.probe = cxl_mem_probe,
+	.id = CXL_DEVICE_MEMORY_EXPANDER,
+};
+
+module_cxl_driver(cxl_mem_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(CXL);
+MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER);
+/*
+ * create_endpoint() wants to validate port driver attach immediately after
+ * endpoint registration.
+ */
+MODULE_SOFTDEP("pre: cxl_port");
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 5a1aec28dc46..4d4e23b9adff 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -25,12 +25,24 @@
  * PCIe topology.
  */
 
+static void schedule_detach(void *cxlmd)
+{
+	schedule_cxl_memdev_detach(cxlmd);
+}
+
 static int cxl_port_probe(struct device *dev)
 {
 	struct cxl_port *port = to_cxl_port(dev);
 	struct cxl_hdm *cxlhdm;
 	int rc;
 
+	if (is_cxl_endpoint(port)) {
+		struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
+
+		get_device(&cxlmd->dev);
+		return devm_add_action_or_reset(dev, schedule_detach, cxlmd);
+	}
+
 	rc = devm_cxl_port_enumerate_dports(port);
 	if (rc < 0)
 		return rc;
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 27ae13e23e79..82e49ab0937d 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -31,6 +31,12 @@ obj-m += cxl_port.o
 cxl_port-y := $(CXL_SRC)/port.o
 cxl_port-y += config_check.o
 
+obj-m += cxl_mem.o
+
+cxl_mem-y := $(CXL_SRC)/mem.o
+cxl_mem-y += mock_mem.o
+cxl_mem-y += config_check.o
+
 obj-m += cxl_core.o
 
 cxl_core-y := $(CXL_CORE_SRC)/port.o
diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c
new file mode 100644
index 000000000000..d1dec5845139
--- /dev/null
+++ b/tools/testing/cxl/mock_mem.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+
+#include <linux/types.h>
+
+struct cxl_dev_state;
+bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds)
+{
+	return true;
+}
-- 
cgit 


From f246abd67ff0dcb471ce2361a913b935d4c8ac11 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 23 Jan 2022 16:31:51 -0800
Subject: tools/testing/cxl: Mock dvsec_ranges()

For test purposes, pretend that that CXL DVSEC ranges are not in active
use and the device is ready CXL.mem operation.

Link: https://lore.kernel.org/r/164298431119.3018233.17175518196764977542.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/cxl/test/mem.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 36ef337c775c..b6b726eff3e2 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -248,6 +248,14 @@ static void label_area_release(void *lsa)
 	vfree(lsa);
 }
 
+static void mock_validate_dvsec_ranges(struct cxl_dev_state *cxlds)
+{
+	struct cxl_endpoint_dvsec_info *info;
+
+	info = &cxlds->info;
+	info->mem_enabled = true;
+}
+
 static int cxl_mock_mem_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -285,6 +293,8 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
 	if (rc)
 		return rc;
 
+	mock_validate_dvsec_ranges(cxlds);
+
 	cxlmd = devm_cxl_add_memdev(cxlds);
 	if (IS_ERR(cxlmd))
 		return PTR_ERR(cxlmd);
-- 
cgit 


From a4a0ce242fcd7022349212c4e2f795762e6ff050 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 23 Jan 2022 16:31:56 -0800
Subject: tools/testing/cxl: Fix root port to host bridge assignment

Mocked root-ports are meant to be round-robin assigned to host-bridges.

Fixes: 67dcdd4d3b83 ("tools/testing/cxl: Introduce a mocked-up CXL port hierarchy")
Link: https://lore.kernel.org/r/164298431629.3018233.14004377108116384485.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/cxl/test/cxl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index cd2f20f2707f..7e4a0b1ee436 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -558,7 +558,7 @@ static __init int cxl_test_init(void)
 
 	for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) {
 		struct platform_device *bridge =
-			cxl_host_bridge[i / NR_CXL_ROOT_PORTS];
+			cxl_host_bridge[i % ARRAY_SIZE(cxl_host_bridge)];
 		struct platform_device *pdev;
 
 		pdev = platform_device_alloc("cxl_root_port", i);
-- 
cgit 


From c1915142e8c1168498c8b08cd4e02728d1c33563 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 23 Jan 2022 16:32:01 -0800
Subject: tools/testing/cxl: Mock one level of switches

The CXL port enumeration process adds intermediate CXL ports that are
discovered between "root" CXL ports enumerated by 'cxl_acpi' and
endpoints enumerated by 'cxl_pci + cxl_mem'. Test the dynamic discovery
of intermediate switch ports in a CXL topology.

Link: https://lore.kernel.org/r/164298432189.3018233.13142151550113000967.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/cxl/test/cxl.c | 138 ++++++++++++++++++++++++++++++-------------
 1 file changed, 97 insertions(+), 41 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 7e4a0b1ee436..ea88fabc3198 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -11,14 +11,21 @@
 #include <cxlmem.h>
 #include "mock.h"
 
-#define NR_CXL_HOST_BRIDGES 4
+#define NR_CXL_HOST_BRIDGES 2
 #define NR_CXL_ROOT_PORTS 2
+#define NR_CXL_SWITCH_PORTS 2
 
 static struct platform_device *cxl_acpi;
 static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES];
 static struct platform_device
 	*cxl_root_port[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS];
-struct platform_device *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS];
+static struct platform_device
+	*cxl_switch_uport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS];
+static struct platform_device
+	*cxl_switch_dport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS *
+			  NR_CXL_SWITCH_PORTS];
+struct platform_device
+	*cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * NR_CXL_SWITCH_PORTS];
 
 static struct acpi_device acpi0017_mock;
 static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = {
@@ -28,12 +35,6 @@ static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = {
 	[1] = {
 		.handle = &host_bridge[1],
 	},
-	[2] = {
-		.handle = &host_bridge[2],
-	},
-	[3] = {
-		.handle = &host_bridge[3],
-	},
 };
 
 static bool is_mock_dev(struct device *dev)
@@ -71,7 +72,7 @@ static struct {
 	} cfmws0;
 	struct {
 		struct acpi_cedt_cfmws cfmws;
-		u32 target[4];
+		u32 target[2];
 	} cfmws1;
 	struct {
 		struct acpi_cedt_cfmws cfmws;
@@ -79,7 +80,7 @@ static struct {
 	} cfmws2;
 	struct {
 		struct acpi_cedt_cfmws cfmws;
-		u32 target[4];
+		u32 target[2];
 	} cfmws3;
 } __packed mock_cedt = {
 	.cedt = {
@@ -105,22 +106,6 @@ static struct {
 		.uid = 1,
 		.cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20,
 	},
-	.chbs[2] = {
-		.header = {
-			.type = ACPI_CEDT_TYPE_CHBS,
-			.length = sizeof(mock_cedt.chbs[0]),
-		},
-		.uid = 2,
-		.cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20,
-	},
-	.chbs[3] = {
-		.header = {
-			.type = ACPI_CEDT_TYPE_CHBS,
-			.length = sizeof(mock_cedt.chbs[0]),
-		},
-		.uid = 3,
-		.cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20,
-	},
 	.cfmws0 = {
 		.cfmws = {
 			.header = {
@@ -142,14 +127,14 @@ static struct {
 				.type = ACPI_CEDT_TYPE_CFMWS,
 				.length = sizeof(mock_cedt.cfmws1),
 			},
-			.interleave_ways = 2,
+			.interleave_ways = 1,
 			.granularity = 4,
 			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
 					ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
 			.qtg_id = 1,
-			.window_size = SZ_256M * 4,
+			.window_size = SZ_256M * 2,
 		},
-		.target = { 0, 1, 2, 3 },
+		.target = { 0, 1, },
 	},
 	.cfmws2 = {
 		.cfmws = {
@@ -172,14 +157,14 @@ static struct {
 				.type = ACPI_CEDT_TYPE_CFMWS,
 				.length = sizeof(mock_cedt.cfmws3),
 			},
-			.interleave_ways = 2,
+			.interleave_ways = 1,
 			.granularity = 4,
 			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
 					ACPI_CEDT_CFMWS_RESTRICT_PMEM,
 			.qtg_id = 3,
-			.window_size = SZ_256M * 4,
+			.window_size = SZ_256M * 2,
 		},
-		.target = { 0, 1, 2, 3 },
+		.target = { 0, 1, },
 	},
 };
 
@@ -332,6 +317,17 @@ static bool is_mock_port(struct device *dev)
 		if (dev == &cxl_root_port[i]->dev)
 			return true;
 
+	for (i = 0; i < ARRAY_SIZE(cxl_switch_uport); i++)
+		if (dev == &cxl_switch_uport[i]->dev)
+			return true;
+
+	for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++)
+		if (dev == &cxl_switch_dport[i]->dev)
+			return true;
+
+	if (is_cxl_memdev(dev))
+		return is_mock_dev(dev->parent);
+
 	return false;
 }
 
@@ -372,12 +368,6 @@ static struct acpi_pci_root mock_pci_root[NR_CXL_HOST_BRIDGES] = {
 	[1] = {
 		.bus = &mock_pci_bus[1],
 	},
-	[2] = {
-		.bus = &mock_pci_bus[2],
-	},
-	[3] = {
-		.bus = &mock_pci_bus[3],
-	},
 };
 
 static bool is_mock_bus(struct pci_bus *bus)
@@ -446,6 +436,26 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
 			dev_name(&pdev->dev));
 	}
 
+	for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) {
+		struct platform_device *pdev = cxl_switch_dport[i];
+		struct cxl_dport *dport;
+
+		if (pdev->dev.parent != port->uport)
+			continue;
+
+		dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id,
+					   CXL_RESOURCE_NONE);
+
+		if (IS_ERR(dport)) {
+			dev_err(dev, "failed to add dport: %s (%ld)\n",
+				dev_name(&pdev->dev), PTR_ERR(dport));
+			return PTR_ERR(dport);
+		}
+
+		dev_dbg(dev, "add dport%d: %s\n", pdev->id,
+			dev_name(&pdev->dev));
+	}
+
 	return 0;
 }
 
@@ -574,15 +584,51 @@ static __init int cxl_test_init(void)
 		cxl_root_port[i] = pdev;
 	}
 
-	BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_root_port));
+	BUILD_BUG_ON(ARRAY_SIZE(cxl_switch_uport) != ARRAY_SIZE(cxl_root_port));
+	for (i = 0; i < ARRAY_SIZE(cxl_switch_uport); i++) {
+		struct platform_device *root_port = cxl_root_port[i];
+		struct platform_device *pdev;
+
+		pdev = platform_device_alloc("cxl_switch_uport", i);
+		if (!pdev)
+			goto err_port;
+		pdev->dev.parent = &root_port->dev;
+
+		rc = platform_device_add(pdev);
+		if (rc) {
+			platform_device_put(pdev);
+			goto err_uport;
+		}
+		cxl_switch_uport[i] = pdev;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) {
+		struct platform_device *uport =
+			cxl_switch_uport[i % ARRAY_SIZE(cxl_switch_uport)];
+		struct platform_device *pdev;
+
+		pdev = platform_device_alloc("cxl_switch_dport", i);
+		if (!pdev)
+			goto err_port;
+		pdev->dev.parent = &uport->dev;
+
+		rc = platform_device_add(pdev);
+		if (rc) {
+			platform_device_put(pdev);
+			goto err_dport;
+		}
+		cxl_switch_dport[i] = pdev;
+	}
+
+	BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_switch_dport));
 	for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) {
-		struct platform_device *port = cxl_root_port[i];
+		struct platform_device *dport = cxl_switch_dport[i];
 		struct platform_device *pdev;
 
 		pdev = alloc_memdev(i);
 		if (!pdev)
 			goto err_mem;
-		pdev->dev.parent = &port->dev;
+		pdev->dev.parent = &dport->dev;
 		set_dev_node(&pdev->dev, i % 2);
 
 		rc = platform_device_add(pdev);
@@ -611,6 +657,12 @@ err_add:
 err_mem:
 	for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--)
 		platform_device_unregister(cxl_mem[i]);
+err_dport:
+	for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--)
+		platform_device_unregister(cxl_switch_dport[i]);
+err_uport:
+	for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--)
+		platform_device_unregister(cxl_switch_uport[i]);
 err_port:
 	for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--)
 		platform_device_unregister(cxl_root_port[i]);
@@ -633,6 +685,10 @@ static __exit void cxl_test_exit(void)
 	platform_device_unregister(cxl_acpi);
 	for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--)
 		platform_device_unregister(cxl_mem[i]);
+	for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--)
+		platform_device_unregister(cxl_switch_dport[i]);
+	for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--)
+		platform_device_unregister(cxl_switch_uport[i]);
 	for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--)
 		platform_device_unregister(cxl_root_port[i]);
 	for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--)
-- 
cgit 


From 7c7d68db0254e1b50d2d80b47774fc605846d49c Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 23 Jan 2022 16:32:07 -0800
Subject: tools/testing/cxl: Enumerate mock decoders

Enumerate 2-decoders per switch port and endpoint in the cxl_test
topology.

Link: https://lore.kernel.org/r/164298432699.3018233.12131068635065601541.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/cxl/test/cxl.c | 118 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 98 insertions(+), 20 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index ea88fabc3198..1b36e67dcd7e 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -14,6 +14,7 @@
 #define NR_CXL_HOST_BRIDGES 2
 #define NR_CXL_ROOT_PORTS 2
 #define NR_CXL_SWITCH_PORTS 2
+#define NR_CXL_PORT_DECODERS 2
 
 static struct platform_device *cxl_acpi;
 static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES];
@@ -406,38 +407,115 @@ static int mock_cxl_add_passthrough_decoder(struct cxl_port *port)
 	return -EOPNOTSUPP;
 }
 
-static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
+
+struct target_map_ctx {
+	int *target_map;
+	int index;
+	int target_count;
+};
+
+static int map_targets(struct device *dev, void *data)
 {
+	struct platform_device *pdev = to_platform_device(dev);
+	struct target_map_ctx *ctx = data;
+
+	ctx->target_map[ctx->index++] = pdev->id;
+
+	if (ctx->index > ctx->target_count) {
+		dev_WARN_ONCE(dev, 1, "too many targets found?\n");
+		return -ENXIO;
+	}
+
 	return 0;
 }
 
-static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
+static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
 {
-	struct device *dev = &port->dev;
-	int i;
+	struct cxl_port *port = cxlhdm->port;
+	struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
+	int target_count, i;
+
+	if (is_cxl_endpoint(port))
+		target_count = 0;
+	else if (is_cxl_root(parent_port))
+		target_count = NR_CXL_ROOT_PORTS;
+	else
+		target_count = NR_CXL_SWITCH_PORTS;
+
+	for (i = 0; i < NR_CXL_PORT_DECODERS; i++) {
+		int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 };
+		struct target_map_ctx ctx = {
+			.target_map = target_map,
+			.target_count = target_count,
+		};
+		struct cxl_decoder *cxld;
+		int rc;
+
+		if (target_count)
+			cxld = cxl_switch_decoder_alloc(port, target_count);
+		else
+			cxld = cxl_endpoint_decoder_alloc(port);
+		if (IS_ERR(cxld)) {
+			dev_warn(&port->dev,
+				 "Failed to allocate the decoder\n");
+			return PTR_ERR(cxld);
+		}
 
-	for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) {
-		struct platform_device *pdev = cxl_root_port[i];
-		struct cxl_dport *dport;
+		cxld->decoder_range = (struct range) {
+			.start = 0,
+			.end = -1,
+		};
+
+		cxld->flags = CXL_DECODER_F_ENABLE;
+		cxld->interleave_ways = min_not_zero(target_count, 1);
+		cxld->interleave_granularity = SZ_4K;
+		cxld->target_type = CXL_DECODER_EXPANDER;
+
+		if (target_count) {
+			rc = device_for_each_child(port->uport, &ctx,
+						   map_targets);
+			if (rc) {
+				put_device(&cxld->dev);
+				return rc;
+			}
+		}
 
-		if (pdev->dev.parent != port->uport)
-			continue;
+		rc = cxl_decoder_add_locked(cxld, target_map);
+		if (rc) {
+			put_device(&cxld->dev);
+			dev_err(&port->dev, "Failed to add decoder\n");
+			return rc;
+		}
 
-		dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id,
-					   CXL_RESOURCE_NONE);
+		rc = cxl_decoder_autoremove(&port->dev, cxld);
+		if (rc)
+			return rc;
+		dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev));
+	}
 
-		if (IS_ERR(dport)) {
-			dev_err(dev, "failed to add dport: %s (%ld)\n",
-				dev_name(&pdev->dev), PTR_ERR(dport));
-			return PTR_ERR(dport);
-		}
+	return 0;
+}
 
-		dev_dbg(dev, "add dport%d: %s\n", pdev->id,
-			dev_name(&pdev->dev));
+static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
+{
+	struct device *dev = &port->dev;
+	struct platform_device **array;
+	int i, array_size;
+
+	if (port->depth == 1) {
+		array_size = ARRAY_SIZE(cxl_root_port);
+		array = cxl_root_port;
+	} else if (port->depth == 2) {
+		array_size = ARRAY_SIZE(cxl_switch_dport);
+		array = cxl_switch_dport;
+	} else {
+		dev_WARN_ONCE(&port->dev, 1, "unexpected depth %d\n",
+			      port->depth);
+		return -ENXIO;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) {
-		struct platform_device *pdev = cxl_switch_dport[i];
+	for (i = 0; i < array_size; i++) {
+		struct platform_device *pdev = array[i];
 		struct cxl_dport *dport;
 
 		if (pdev->dev.parent != port->uport)
-- 
cgit 


From 64cda3ae6bc785960cd1b4f78c19f7ca53e0130b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 23 Jan 2022 16:32:12 -0800
Subject: tools/testing/cxl: Add a physical_node link

Emulate what ACPI does to link a host bridge platform firmware device to
device node on the PCI bus. In this case it's just self referencing
link, but it otherwise lets the tooling test out its lookup code.

Link: https://lore.kernel.org/r/164298433209.3018233.18101085948127163720.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/cxl/test/cxl.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 1b36e67dcd7e..431f2bddf6c8 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -641,7 +641,12 @@ static __init int cxl_test_init(void)
 			platform_device_put(pdev);
 			goto err_bridge;
 		}
+
 		cxl_host_bridge[i] = pdev;
+		rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj,
+				       "physical_node");
+		if (rc)
+			goto err_bridge;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) {
@@ -745,8 +750,14 @@ err_port:
 	for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--)
 		platform_device_unregister(cxl_root_port[i]);
 err_bridge:
-	for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--)
+	for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) {
+		struct platform_device *pdev = cxl_host_bridge[i];
+
+		if (!pdev)
+			continue;
+		sysfs_remove_link(&pdev->dev.kobj, "physical_node");
 		platform_device_unregister(cxl_host_bridge[i]);
+	}
 err_populate:
 	depopulate_all_mock_resources();
 err_gen_pool_add:
@@ -769,8 +780,14 @@ static __exit void cxl_test_exit(void)
 		platform_device_unregister(cxl_switch_uport[i]);
 	for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--)
 		platform_device_unregister(cxl_root_port[i]);
-	for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--)
+	for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) {
+		struct platform_device *pdev = cxl_host_bridge[i];
+
+		if (!pdev)
+			continue;
+		sysfs_remove_link(&pdev->dev.kobj, "physical_node");
 		platform_device_unregister(cxl_host_bridge[i]);
+	}
 	depopulate_all_mock_resources();
 	gen_pool_destroy(cxl_mock_pool);
 	unregister_cxl_mock_ops(&cxl_mock_ops);
-- 
cgit 


From 2ed0dc5937d38f282841d22c5a5354ec264c7b8d Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Wed, 9 Feb 2022 19:43:33 +0100
Subject: selftests/bpf: Cover 4-byte load from remote_port in bpf_sk_lookup

Extend the context access tests for sk_lookup prog to cover the surprising
case of a 4-byte load from the remote_port field, where the expected value
is actually shifted by 16 bits.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220209184333.654927-3-jakub@cloudflare.com
---
 tools/include/uapi/linux/bpf.h                     | 3 ++-
 tools/testing/selftests/bpf/progs/test_sk_lookup.c | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a7f0ddedac1f..afe3d0d7f5f2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6453,7 +6453,8 @@ struct bpf_sk_lookup {
 	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
 	__u32 remote_ip4;	/* Network byte order */
 	__u32 remote_ip6[4];	/* Network byte order */
-	__u32 remote_port;	/* Network byte order */
+	__be16 remote_port;	/* Network byte order */
+	__u16 :16;		/* Zero padding */
 	__u32 local_ip4;	/* Network byte order */
 	__u32 local_ip6[4];	/* Network byte order */
 	__u32 local_port;	/* Host byte order */
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index 83b0aaa52ef7..bf5b7caefdd0 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -392,6 +392,7 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk;
 	int err, family;
+	__u32 val_u32;
 	bool v4;
 
 	v4 = (ctx->family == AF_INET);
@@ -418,6 +419,11 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 	if (LSW(ctx->remote_port, 0) != SRC_PORT)
 		return SK_DROP;
 
+	/* Load from remote_port field with zero padding (backward compatibility) */
+	val_u32 = *(__u32 *)&ctx->remote_port;
+	if (val_u32 != bpf_htonl(bpf_ntohs(SRC_PORT) << 16))
+		return SK_DROP;
+
 	/* Narrow loads from local_port field. Expect DST_PORT. */
 	if (LSB(ctx->local_port, 0) != ((DST_PORT >> 0) & 0xff) ||
 	    LSB(ctx->local_port, 1) != ((DST_PORT >> 8) & 0xff) ||
-- 
cgit 


From a086ee24cce21994150789fc83f31ee7f5a9cf2d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 9 Feb 2022 16:36:42 -0800
Subject: selftests: net: rename cmsg_so_mark

Rename the file in prep for generalization.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/.gitignore      |  2 +-
 tools/testing/selftests/net/Makefile        |  2 +-
 tools/testing/selftests/net/cmsg_sender.c   | 67 +++++++++++++++++++++++++++++
 tools/testing/selftests/net/cmsg_so_mark.c  | 67 -----------------------------
 tools/testing/selftests/net/cmsg_so_mark.sh |  8 ++--
 5 files changed, 73 insertions(+), 73 deletions(-)
 create mode 100644 tools/testing/selftests/net/cmsg_sender.c
 delete mode 100644 tools/testing/selftests/net/cmsg_so_mark.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 7581a7348e1b..21a411b04890 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -35,4 +35,4 @@ test_unix_oob
 gro
 ioam6_parser
 toeplitz
-cmsg_so_mark
+cmsg_sender
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 9897fa9ab953..8f4c1f16655f 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -52,7 +52,7 @@ TEST_GEN_FILES += gro
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 TEST_GEN_FILES += toeplitz
-TEST_GEN_FILES += cmsg_so_mark
+TEST_GEN_FILES += cmsg_sender
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
new file mode 100644
index 000000000000..27f2804892a7
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <errno.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/types.h>
+#include <sys/socket.h>
+
+int main(int argc, const char **argv)
+{
+	char cbuf[CMSG_SPACE(sizeof(__u32))];
+	struct addrinfo hints, *ai;
+	struct cmsghdr *cmsg;
+	struct iovec iov[1];
+	struct msghdr msg;
+	int mark;
+	int err;
+	int fd;
+
+	if (argc != 4) {
+		fprintf(stderr, "Usage: %s <dst_ip> <port> <mark>\n", argv[0]);
+		return 1;
+	}
+	mark = atoi(argv[3]);
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = AF_UNSPEC;
+	hints.ai_socktype = SOCK_DGRAM;
+
+	ai = NULL;
+	err = getaddrinfo(argv[1], argv[2], &hints, &ai);
+	if (err) {
+		fprintf(stderr, "Can't resolve address: %s\n", strerror(errno));
+		return 1;
+	}
+
+	fd = socket(ai->ai_family, SOCK_DGRAM, IPPROTO_UDP);
+	if (fd < 0) {
+		fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
+		freeaddrinfo(ai);
+		return 1;
+	}
+
+	iov[0].iov_base = "bla";
+	iov[0].iov_len = 4;
+
+	msg.msg_name = ai->ai_addr;
+	msg.msg_namelen = ai->ai_addrlen;
+	msg.msg_iov = iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cbuf;
+	msg.msg_controllen = sizeof(cbuf);
+
+	cmsg = CMSG_FIRSTHDR(&msg);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SO_MARK;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+	*(__u32 *)CMSG_DATA(cmsg) = mark;
+
+	err = sendmsg(fd, &msg, 0);
+
+	close(fd);
+	freeaddrinfo(ai);
+	return err != 4;
+}
diff --git a/tools/testing/selftests/net/cmsg_so_mark.c b/tools/testing/selftests/net/cmsg_so_mark.c
deleted file mode 100644
index 27f2804892a7..000000000000
--- a/tools/testing/selftests/net/cmsg_so_mark.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-#include <errno.h>
-#include <netdb.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <linux/types.h>
-#include <sys/socket.h>
-
-int main(int argc, const char **argv)
-{
-	char cbuf[CMSG_SPACE(sizeof(__u32))];
-	struct addrinfo hints, *ai;
-	struct cmsghdr *cmsg;
-	struct iovec iov[1];
-	struct msghdr msg;
-	int mark;
-	int err;
-	int fd;
-
-	if (argc != 4) {
-		fprintf(stderr, "Usage: %s <dst_ip> <port> <mark>\n", argv[0]);
-		return 1;
-	}
-	mark = atoi(argv[3]);
-
-	memset(&hints, 0, sizeof(hints));
-	hints.ai_family = AF_UNSPEC;
-	hints.ai_socktype = SOCK_DGRAM;
-
-	ai = NULL;
-	err = getaddrinfo(argv[1], argv[2], &hints, &ai);
-	if (err) {
-		fprintf(stderr, "Can't resolve address: %s\n", strerror(errno));
-		return 1;
-	}
-
-	fd = socket(ai->ai_family, SOCK_DGRAM, IPPROTO_UDP);
-	if (fd < 0) {
-		fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
-		freeaddrinfo(ai);
-		return 1;
-	}
-
-	iov[0].iov_base = "bla";
-	iov[0].iov_len = 4;
-
-	msg.msg_name = ai->ai_addr;
-	msg.msg_namelen = ai->ai_addrlen;
-	msg.msg_iov = iov;
-	msg.msg_iovlen = 1;
-	msg.msg_control = cbuf;
-	msg.msg_controllen = sizeof(cbuf);
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-	cmsg->cmsg_level = SOL_SOCKET;
-	cmsg->cmsg_type = SO_MARK;
-	cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
-	*(__u32 *)CMSG_DATA(cmsg) = mark;
-
-	err = sendmsg(fd, &msg, 0);
-
-	close(fd);
-	freeaddrinfo(ai);
-	return err != 4;
-}
diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh
index 19c6aab8d0e9..29a623aac74b 100755
--- a/tools/testing/selftests/net/cmsg_so_mark.sh
+++ b/tools/testing/selftests/net/cmsg_so_mark.sh
@@ -41,14 +41,14 @@ check_result() {
     fi
 }
 
-ip netns exec $NS ./cmsg_so_mark $TGT4 1234 $((MARK + 1))
+ip netns exec $NS ./cmsg_sender $TGT4 1234 $((MARK + 1))
 check_result $? 0 "IPv4 pass"
-ip netns exec $NS ./cmsg_so_mark $TGT6 1234 $((MARK + 1))
+ip netns exec $NS ./cmsg_sender $TGT6 1234 $((MARK + 1))
 check_result $? 0 "IPv6 pass"
 
-ip netns exec $NS ./cmsg_so_mark $TGT4 1234 $MARK
+ip netns exec $NS ./cmsg_sender $TGT4 1234 $MARK
 check_result $? 1 "IPv4 rejection"
-ip netns exec $NS ./cmsg_so_mark $TGT6 1234 $MARK
+ip netns exec $NS ./cmsg_sender $TGT6 1234 $MARK
 check_result $? 1 "IPv6 rejection"
 
 # Summary
-- 
cgit 


From 49b78613029642eec9601af6bc6c716e10929106 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 9 Feb 2022 16:36:43 -0800
Subject: selftests: net: make cmsg_so_mark ready for more options

Parametrize the code so that it can support UDP and ICMP
sockets in the future, and more cmsg types.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/cmsg_sender.c   | 135 +++++++++++++++++++++++-----
 tools/testing/selftests/net/cmsg_so_mark.sh |   8 +-
 2 files changed, 117 insertions(+), 26 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 27f2804892a7..4528ae638aea 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include <errno.h>
+#include <error.h>
 #include <netdb.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -8,60 +10,149 @@
 #include <linux/types.h>
 #include <sys/socket.h>
 
-int main(int argc, const char **argv)
+enum {
+	ERN_SUCCESS = 0,
+	/* Well defined errors, callers may depend on these */
+	ERN_SEND = 1,
+	/* Informational, can reorder */
+	ERN_HELP,
+	ERN_SEND_SHORT,
+	ERN_SOCK_CREATE,
+	ERN_RESOLVE,
+	ERN_CMSG_WR,
+};
+
+struct options {
+	bool silent_send;
+	const char *host;
+	const char *service;
+	struct {
+		unsigned int type;
+	} sock;
+	struct {
+		bool ena;
+		unsigned int val;
+	} mark;
+} opt = {
+	.sock = {
+		.type	= SOCK_DGRAM,
+	},
+};
+
+static void __attribute__((noreturn)) cs_usage(const char *bin)
+{
+	printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin);
+	printf("Options:\n"
+	       "\t\t-s      Silent send() failures\n"
+	       "\t\t-m val  Set SO_MARK with given value\n"
+	       "");
+	exit(ERN_HELP);
+}
+
+static void cs_parse_args(int argc, char *argv[])
+{
+	char o;
+
+	while ((o = getopt(argc, argv, "sm:")) != -1) {
+		switch (o) {
+		case 's':
+			opt.silent_send = true;
+			break;
+		case 'm':
+			opt.mark.ena = true;
+			opt.mark.val = atoi(optarg);
+			break;
+		}
+	}
+
+	if (optind != argc - 2)
+		cs_usage(argv[0]);
+
+	opt.host = argv[optind];
+	opt.service = argv[optind + 1];
+}
+
+static void
+cs_write_cmsg(struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 {
-	char cbuf[CMSG_SPACE(sizeof(__u32))];
-	struct addrinfo hints, *ai;
 	struct cmsghdr *cmsg;
+	size_t cmsg_len;
+
+	msg->msg_control = cbuf;
+	cmsg_len = 0;
+
+	if (opt.mark.ena) {
+		cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+		cmsg_len += CMSG_SPACE(sizeof(__u32));
+		if (cbuf_sz < cmsg_len)
+			error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SO_MARK;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+		*(__u32 *)CMSG_DATA(cmsg) = opt.mark.val;
+	}
+
+	if (cmsg_len)
+		msg->msg_controllen = cmsg_len;
+	else
+		msg->msg_control = NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	struct addrinfo hints, *ai;
 	struct iovec iov[1];
 	struct msghdr msg;
-	int mark;
+	char cbuf[1024];
 	int err;
 	int fd;
 
-	if (argc != 4) {
-		fprintf(stderr, "Usage: %s <dst_ip> <port> <mark>\n", argv[0]);
-		return 1;
-	}
-	mark = atoi(argv[3]);
+	cs_parse_args(argc, argv);
 
 	memset(&hints, 0, sizeof(hints));
 	hints.ai_family = AF_UNSPEC;
-	hints.ai_socktype = SOCK_DGRAM;
+	hints.ai_socktype = opt.sock.type;
 
 	ai = NULL;
-	err = getaddrinfo(argv[1], argv[2], &hints, &ai);
+	err = getaddrinfo(opt.host, opt.service, &hints, &ai);
 	if (err) {
-		fprintf(stderr, "Can't resolve address: %s\n", strerror(errno));
-		return 1;
+		fprintf(stderr, "Can't resolve address [%s]:%s: %s\n",
+			opt.host, opt.service, strerror(errno));
+		return ERN_SOCK_CREATE;
 	}
 
 	fd = socket(ai->ai_family, SOCK_DGRAM, IPPROTO_UDP);
 	if (fd < 0) {
 		fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
 		freeaddrinfo(ai);
-		return 1;
+		return ERN_RESOLVE;
 	}
 
 	iov[0].iov_base = "bla";
 	iov[0].iov_len = 4;
 
+	memset(&msg, 0, sizeof(msg));
 	msg.msg_name = ai->ai_addr;
 	msg.msg_namelen = ai->ai_addrlen;
 	msg.msg_iov = iov;
 	msg.msg_iovlen = 1;
-	msg.msg_control = cbuf;
-	msg.msg_controllen = sizeof(cbuf);
 
-	cmsg = CMSG_FIRSTHDR(&msg);
-	cmsg->cmsg_level = SOL_SOCKET;
-	cmsg->cmsg_type = SO_MARK;
-	cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
-	*(__u32 *)CMSG_DATA(cmsg) = mark;
+	cs_write_cmsg(&msg, cbuf, sizeof(cbuf));
 
 	err = sendmsg(fd, &msg, 0);
+	if (err < 0) {
+		if (!opt.silent_send)
+			fprintf(stderr, "send failed: %s\n", strerror(errno));
+		err = ERN_SEND;
+	} else if (err != 4) {
+		fprintf(stderr, "short send\n");
+		err = ERN_SEND_SHORT;
+	} else {
+		err = ERN_SUCCESS;
+	}
 
 	close(fd);
 	freeaddrinfo(ai);
-	return err != 4;
+	return err;
 }
diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh
index 29a623aac74b..841d706dc91b 100755
--- a/tools/testing/selftests/net/cmsg_so_mark.sh
+++ b/tools/testing/selftests/net/cmsg_so_mark.sh
@@ -41,14 +41,14 @@ check_result() {
     fi
 }
 
-ip netns exec $NS ./cmsg_sender $TGT4 1234 $((MARK + 1))
+ip netns exec $NS ./cmsg_sender -m $((MARK + 1)) $TGT4 1234
 check_result $? 0 "IPv4 pass"
-ip netns exec $NS ./cmsg_sender $TGT6 1234 $((MARK + 1))
+ip netns exec $NS ./cmsg_sender -m $((MARK + 1)) $TGT6 1234
 check_result $? 0 "IPv6 pass"
 
-ip netns exec $NS ./cmsg_sender $TGT4 1234 $MARK
+ip netns exec $NS ./cmsg_sender -s -m $MARK $TGT4 1234
 check_result $? 1 "IPv4 rejection"
-ip netns exec $NS ./cmsg_sender $TGT6 1234 $MARK
+ip netns exec $NS ./cmsg_sender -s -m $MARK $TGT6 1234
 check_result $? 1 "IPv6 rejection"
 
 # Summary
-- 
cgit 


From de17e305a81012120c23380a94ed22c7e8bf324f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 9 Feb 2022 16:36:44 -0800
Subject: selftests: net: cmsg_sender: support icmp and raw sockets

Support sending fake ICMP(v6) messages and UDP via RAW sockets.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/cmsg_sender.c | 64 ++++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 4528ae638aea..edb8c427c7cb 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -7,7 +7,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
 #include <linux/types.h>
+#include <linux/udp.h>
 #include <sys/socket.h>
 
 enum {
@@ -27,7 +30,9 @@ struct options {
 	const char *host;
 	const char *service;
 	struct {
+		unsigned int family;
 		unsigned int type;
+		unsigned int proto;
 	} sock;
 	struct {
 		bool ena;
@@ -35,7 +40,9 @@ struct options {
 	} mark;
 } opt = {
 	.sock = {
+		.family	= AF_UNSPEC,
 		.type	= SOCK_DGRAM,
+		.proto	= IPPROTO_UDP,
 	},
 };
 
@@ -44,6 +51,10 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
 	printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin);
 	printf("Options:\n"
 	       "\t\t-s      Silent send() failures\n"
+	       "\t\t-4/-6   Force IPv4 / IPv6 only\n"
+	       "\t\t-p prot Socket protocol\n"
+	       "\t\t        (u = UDP (default); i = ICMP; r = RAW)\n"
+	       "\n"
 	       "\t\t-m val  Set SO_MARK with given value\n"
 	       "");
 	exit(ERN_HELP);
@@ -53,11 +64,29 @@ static void cs_parse_args(int argc, char *argv[])
 {
 	char o;
 
-	while ((o = getopt(argc, argv, "sm:")) != -1) {
+	while ((o = getopt(argc, argv, "46sp:m:")) != -1) {
 		switch (o) {
 		case 's':
 			opt.silent_send = true;
 			break;
+		case '4':
+			opt.sock.family = AF_INET;
+			break;
+		case '6':
+			opt.sock.family = AF_INET6;
+			break;
+		case 'p':
+			if (*optarg == 'u' || *optarg == 'U') {
+				opt.sock.proto = IPPROTO_UDP;
+			} else if (*optarg == 'i' || *optarg == 'I') {
+				opt.sock.proto = IPPROTO_ICMP;
+			} else if (*optarg == 'r') {
+				opt.sock.type = SOCK_RAW;
+			} else {
+				printf("Error: unknown protocol: %s\n", optarg);
+				cs_usage(argv[0]);
+			}
+			break;
 		case 'm':
 			opt.mark.ena = true;
 			opt.mark.val = atoi(optarg);
@@ -101,6 +130,7 @@ cs_write_cmsg(struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 
 int main(int argc, char *argv[])
 {
+	char buf[] = "blablablabla";
 	struct addrinfo hints, *ai;
 	struct iovec iov[1];
 	struct msghdr msg;
@@ -111,26 +141,42 @@ int main(int argc, char *argv[])
 	cs_parse_args(argc, argv);
 
 	memset(&hints, 0, sizeof(hints));
-	hints.ai_family = AF_UNSPEC;
-	hints.ai_socktype = opt.sock.type;
+	hints.ai_family = opt.sock.family;
 
 	ai = NULL;
 	err = getaddrinfo(opt.host, opt.service, &hints, &ai);
 	if (err) {
-		fprintf(stderr, "Can't resolve address [%s]:%s: %s\n",
-			opt.host, opt.service, strerror(errno));
+		fprintf(stderr, "Can't resolve address [%s]:%s\n",
+			opt.host, opt.service);
 		return ERN_SOCK_CREATE;
 	}
 
-	fd = socket(ai->ai_family, SOCK_DGRAM, IPPROTO_UDP);
+	if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP)
+		opt.sock.proto = IPPROTO_ICMPV6;
+
+	fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto);
 	if (fd < 0) {
 		fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
 		freeaddrinfo(ai);
 		return ERN_RESOLVE;
 	}
 
-	iov[0].iov_base = "bla";
-	iov[0].iov_len = 4;
+	if (opt.sock.proto == IPPROTO_ICMP) {
+		buf[0] = ICMP_ECHO;
+		buf[1] = 0;
+	} else if (opt.sock.proto == IPPROTO_ICMPV6) {
+		buf[0] = ICMPV6_ECHO_REQUEST;
+		buf[1] = 0;
+	} else if (opt.sock.type == SOCK_RAW) {
+		struct udphdr hdr = { 1, 2, htons(sizeof(buf)), 0 };
+		struct sockaddr_in6 *sin6 = (void *)ai->ai_addr;;
+
+		memcpy(buf, &hdr, sizeof(hdr));
+		sin6->sin6_port = htons(opt.sock.proto);
+	}
+
+	iov[0].iov_base = buf;
+	iov[0].iov_len = sizeof(buf);
 
 	memset(&msg, 0, sizeof(msg));
 	msg.msg_name = ai->ai_addr;
@@ -145,7 +191,7 @@ int main(int argc, char *argv[])
 		if (!opt.silent_send)
 			fprintf(stderr, "send failed: %s\n", strerror(errno));
 		err = ERN_SEND;
-	} else if (err != 4) {
+	} else if (err != sizeof(buf)) {
 		fprintf(stderr, "short send\n");
 		err = ERN_SEND_SHORT;
 	} else {
-- 
cgit 


From 0344488e11cab982d2b2402a1689ced1815680fc Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 9 Feb 2022 16:36:45 -0800
Subject: selftests: net: cmsg_so_mark: test ICMP and RAW sockets

Use new capabilities of cmsg_sender to test ICMP and RAW sockets,
previously only UDP was tested.

Before SO_MARK support was added to ICMPv6:

  # ./cmsg_so_mark.sh
   Case ICMP rejection returned 0, expected 1
  FAIL - 1/12 cases failed

After:

  # ./cmsg_so_mark.sh
  OK

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/cmsg_so_mark.sh | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh
index 841d706dc91b..925f6b9deee2 100755
--- a/tools/testing/selftests/net/cmsg_so_mark.sh
+++ b/tools/testing/selftests/net/cmsg_so_mark.sh
@@ -18,6 +18,8 @@ trap cleanup EXIT
 # Namespaces
 ip netns add $NS
 
+ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
 # Connectivity
 ip -netns $NS link add type dummy
 ip -netns $NS link set dev dummy0 up
@@ -41,15 +43,21 @@ check_result() {
     fi
 }
 
-ip netns exec $NS ./cmsg_sender -m $((MARK + 1)) $TGT4 1234
-check_result $? 0 "IPv4 pass"
-ip netns exec $NS ./cmsg_sender -m $((MARK + 1)) $TGT6 1234
-check_result $? 0 "IPv6 pass"
+for i in 4 6; do
+    [ $i == 4 ] && TGT=$TGT4 || TGT=$TGT6
+
+    for p in u i r; do
+	[ $p == "u" ] && prot=UDP
+	[ $p == "i" ] && prot=ICMP
+	[ $p == "r" ] && prot=RAW
+
+	ip netns exec $NS ./cmsg_sender -$i -p $p -m $((MARK + 1)) $TGT 1234
+	check_result $? 0 "$prot pass"
 
-ip netns exec $NS ./cmsg_sender -s -m $MARK $TGT4 1234
-check_result $? 1 "IPv4 rejection"
-ip netns exec $NS ./cmsg_sender -s -m $MARK $TGT6 1234
-check_result $? 1 "IPv6 rejection"
+	ip netns exec $NS ./cmsg_sender -$i -p $p -m $MARK -s $TGT 1234
+	check_result $? 1 "$prot rejection"
+    done
+done
 
 # Summary
 if [ $BAD -ne 0 ]; then
-- 
cgit 


From 9bbfbc92c64a9f4d5ac4205071c5fc02a8201039 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 9 Feb 2022 16:36:46 -0800
Subject: selftests: net: cmsg_so_mark: test with SO_MARK set by setsockopt

Test if setting SO_MARK with setsockopt works and if cmsg
takes precedence over it.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/cmsg_sender.c   | 14 +++++++++++++-
 tools/testing/selftests/net/cmsg_so_mark.sh | 28 ++++++++++++++++++----------
 2 files changed, 31 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index edb8c427c7cb..c7586a4b0361 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -29,6 +29,9 @@ struct options {
 	bool silent_send;
 	const char *host;
 	const char *service;
+	struct {
+		unsigned int mark;
+	} sockopt;
 	struct {
 		unsigned int family;
 		unsigned int type;
@@ -56,6 +59,7 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
 	       "\t\t        (u = UDP (default); i = ICMP; r = RAW)\n"
 	       "\n"
 	       "\t\t-m val  Set SO_MARK with given value\n"
+	       "\t\t-M val  Set SO_MARK via setsockopt\n"
 	       "");
 	exit(ERN_HELP);
 }
@@ -64,7 +68,7 @@ static void cs_parse_args(int argc, char *argv[])
 {
 	char o;
 
-	while ((o = getopt(argc, argv, "46sp:m:")) != -1) {
+	while ((o = getopt(argc, argv, "46sp:m:M:")) != -1) {
 		switch (o) {
 		case 's':
 			opt.silent_send = true;
@@ -91,6 +95,9 @@ static void cs_parse_args(int argc, char *argv[])
 			opt.mark.ena = true;
 			opt.mark.val = atoi(optarg);
 			break;
+		case 'M':
+			opt.sockopt.mark = atoi(optarg);
+			break;
 		}
 	}
 
@@ -175,6 +182,11 @@ int main(int argc, char *argv[])
 		sin6->sin6_port = htons(opt.sock.proto);
 	}
 
+	if (opt.sockopt.mark &&
+	    setsockopt(fd, SOL_SOCKET, SO_MARK,
+		       &opt.sockopt.mark, sizeof(opt.sockopt.mark)))
+		error(ERN_SOCKOPT, errno, "setsockopt SO_MARK");
+
 	iov[0].iov_base = buf;
 	iov[0].iov_len = sizeof(buf);
 
diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh
index 925f6b9deee2..1650b8622f2f 100755
--- a/tools/testing/selftests/net/cmsg_so_mark.sh
+++ b/tools/testing/selftests/net/cmsg_so_mark.sh
@@ -43,19 +43,27 @@ check_result() {
     fi
 }
 
-for i in 4 6; do
-    [ $i == 4 ] && TGT=$TGT4 || TGT=$TGT6
+for ovr in setsock cmsg both; do
+    for i in 4 6; do
+	[ $i == 4 ] && TGT=$TGT4 || TGT=$TGT6
 
-    for p in u i r; do
-	[ $p == "u" ] && prot=UDP
-	[ $p == "i" ] && prot=ICMP
-	[ $p == "r" ] && prot=RAW
+	for p in u i r; do
+	    [ $p == "u" ] && prot=UDP
+	    [ $p == "i" ] && prot=ICMP
+	    [ $p == "r" ] && prot=RAW
 
-	ip netns exec $NS ./cmsg_sender -$i -p $p -m $((MARK + 1)) $TGT 1234
-	check_result $? 0 "$prot pass"
+	    [ $ovr == "setsock" ] && m="-M"
+	    [ $ovr == "cmsg" ]    && m="-m"
+	    [ $ovr == "both" ]    && m="-M $MARK -m"
 
-	ip netns exec $NS ./cmsg_sender -$i -p $p -m $MARK -s $TGT 1234
-	check_result $? 1 "$prot rejection"
+	    ip netns exec $NS ./cmsg_sender -$i -p $p $m $((MARK + 1)) $TGT 1234
+	    check_result $? 0 "$prot $ovr - pass"
+
+	    [ $ovr == "diff" ] && m="-M $((MARK + 1)) -m"
+
+	    ip netns exec $NS ./cmsg_sender -$i -p $p $m $MARK -s $TGT 1234
+	    check_result $? 1 "$prot $ovr - rejection"
+	done
     done
 done
 
-- 
cgit 


From 4d397424a5e0e130d5e8d0023776f0aa2e791f51 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 9 Feb 2022 16:36:47 -0800
Subject: selftests: net: cmsg_sender: support setting SO_TXTIME

Add ability to send delayed packets.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/cmsg_sender.c | 49 +++++++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index c7586a4b0361..5a722baa108b 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -6,9 +6,11 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>
 #include <unistd.h>
 #include <linux/icmp.h>
 #include <linux/icmpv6.h>
+#include <linux/net_tstamp.h>
 #include <linux/types.h>
 #include <linux/udp.h>
 #include <sys/socket.h>
@@ -23,6 +25,8 @@ enum {
 	ERN_SOCK_CREATE,
 	ERN_RESOLVE,
 	ERN_CMSG_WR,
+	ERN_SOCKOPT,
+	ERN_GETTIME,
 };
 
 struct options {
@@ -41,6 +45,10 @@ struct options {
 		bool ena;
 		unsigned int val;
 	} mark;
+	struct {
+		bool ena;
+		unsigned int delay;
+	} txtime;
 } opt = {
 	.sock = {
 		.family	= AF_UNSPEC,
@@ -49,6 +57,8 @@ struct options {
 	},
 };
 
+static struct timespec time_start_mono;
+
 static void __attribute__((noreturn)) cs_usage(const char *bin)
 {
 	printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin);
@@ -60,6 +70,7 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
 	       "\n"
 	       "\t\t-m val  Set SO_MARK with given value\n"
 	       "\t\t-M val  Set SO_MARK via setsockopt\n"
+	       "\t\t-d val  Set SO_TXTIME with given delay (usec)\n"
 	       "");
 	exit(ERN_HELP);
 }
@@ -68,7 +79,7 @@ static void cs_parse_args(int argc, char *argv[])
 {
 	char o;
 
-	while ((o = getopt(argc, argv, "46sp:m:M:")) != -1) {
+	while ((o = getopt(argc, argv, "46sp:m:M:d:")) != -1) {
 		switch (o) {
 		case 's':
 			opt.silent_send = true;
@@ -91,6 +102,7 @@ static void cs_parse_args(int argc, char *argv[])
 				cs_usage(argv[0]);
 			}
 			break;
+
 		case 'm':
 			opt.mark.ena = true;
 			opt.mark.val = atoi(optarg);
@@ -98,6 +110,10 @@ static void cs_parse_args(int argc, char *argv[])
 		case 'M':
 			opt.sockopt.mark = atoi(optarg);
 			break;
+		case 'd':
+			opt.txtime.ena = true;
+			opt.txtime.delay = atoi(optarg);
+			break;
 		}
 	}
 
@@ -109,7 +125,7 @@ static void cs_parse_args(int argc, char *argv[])
 }
 
 static void
-cs_write_cmsg(struct msghdr *msg, char *cbuf, size_t cbuf_sz)
+cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 {
 	struct cmsghdr *cmsg;
 	size_t cmsg_len;
@@ -128,6 +144,30 @@ cs_write_cmsg(struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 		cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
 		*(__u32 *)CMSG_DATA(cmsg) = opt.mark.val;
 	}
+	if (opt.txtime.ena) {
+		struct sock_txtime so_txtime = {
+			.clockid = CLOCK_MONOTONIC,
+		};
+		__u64 txtime;
+
+		if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
+			       &so_txtime, sizeof(so_txtime)))
+			error(ERN_SOCKOPT, errno, "setsockopt TXTIME");
+
+		txtime = time_start_mono.tv_sec * (1000ULL * 1000 * 1000) +
+			 time_start_mono.tv_nsec +
+			 opt.txtime.delay * 1000;
+
+		cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+		cmsg_len += CMSG_SPACE(sizeof(txtime));
+		if (cbuf_sz < cmsg_len)
+			error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_TXTIME;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(txtime));
+		memcpy(CMSG_DATA(cmsg), &txtime, sizeof(txtime));
+	}
 
 	if (cmsg_len)
 		msg->msg_controllen = cmsg_len;
@@ -187,6 +227,9 @@ int main(int argc, char *argv[])
 		       &opt.sockopt.mark, sizeof(opt.sockopt.mark)))
 		error(ERN_SOCKOPT, errno, "setsockopt SO_MARK");
 
+	if (clock_gettime(CLOCK_MONOTONIC, &time_start_mono))
+		error(ERN_GETTIME, errno, "gettime MONOTINIC");
+
 	iov[0].iov_base = buf;
 	iov[0].iov_len = sizeof(buf);
 
@@ -196,7 +239,7 @@ int main(int argc, char *argv[])
 	msg.msg_iov = iov;
 	msg.msg_iovlen = 1;
 
-	cs_write_cmsg(&msg, cbuf, sizeof(cbuf));
+	cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
 
 	err = sendmsg(fd, &msg, 0);
 	if (err < 0) {
-- 
cgit 


From eb8f3116fb3f51062ed8df9db3c4ca730eec3743 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 9 Feb 2022 16:36:48 -0800
Subject: selftests: net: cmsg_sender: support Tx timestamping

Support requesting Tx timestamps:

 $ ./cmsg_sender -p i -t -4 $tgt 123 -d 1000
 SCHED ts0 61us
   SND ts0 1071us

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/cmsg_sender.c | 123 +++++++++++++++++++++++++++++-
 1 file changed, 122 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 5a722baa108b..24444dc72543 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -8,6 +8,7 @@
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
+#include <linux/errqueue.h>
 #include <linux/icmp.h>
 #include <linux/icmpv6.h>
 #include <linux/net_tstamp.h>
@@ -27,6 +28,9 @@ enum {
 	ERN_CMSG_WR,
 	ERN_SOCKOPT,
 	ERN_GETTIME,
+	ERN_RECVERR,
+	ERN_CMSG_RD,
+	ERN_CMSG_RCV,
 };
 
 struct options {
@@ -49,6 +53,9 @@ struct options {
 		bool ena;
 		unsigned int delay;
 	} txtime;
+	struct {
+		bool ena;
+	} ts;
 } opt = {
 	.sock = {
 		.family	= AF_UNSPEC,
@@ -57,6 +64,7 @@ struct options {
 	},
 };
 
+static struct timespec time_start_real;
 static struct timespec time_start_mono;
 
 static void __attribute__((noreturn)) cs_usage(const char *bin)
@@ -71,6 +79,7 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
 	       "\t\t-m val  Set SO_MARK with given value\n"
 	       "\t\t-M val  Set SO_MARK via setsockopt\n"
 	       "\t\t-d val  Set SO_TXTIME with given delay (usec)\n"
+	       "\t\t-t      Enable time stamp reporting\n"
 	       "");
 	exit(ERN_HELP);
 }
@@ -79,7 +88,7 @@ static void cs_parse_args(int argc, char *argv[])
 {
 	char o;
 
-	while ((o = getopt(argc, argv, "46sp:m:M:d:")) != -1) {
+	while ((o = getopt(argc, argv, "46sp:m:M:d:t")) != -1) {
 		switch (o) {
 		case 's':
 			opt.silent_send = true;
@@ -114,6 +123,9 @@ static void cs_parse_args(int argc, char *argv[])
 			opt.txtime.ena = true;
 			opt.txtime.delay = atoi(optarg);
 			break;
+		case 't':
+			opt.ts.ena = true;
+			break;
 		}
 	}
 
@@ -168,6 +180,25 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 		cmsg->cmsg_len = CMSG_LEN(sizeof(txtime));
 		memcpy(CMSG_DATA(cmsg), &txtime, sizeof(txtime));
 	}
+	if (opt.ts.ena) {
+		__u32 val = SOF_TIMESTAMPING_SOFTWARE |
+			    SOF_TIMESTAMPING_OPT_TSONLY;
+
+		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
+			       &val, sizeof(val)))
+			error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING");
+
+		cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+		cmsg_len += CMSG_SPACE(sizeof(__u32));
+		if (cbuf_sz < cmsg_len)
+			error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SO_TIMESTAMPING;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+		*(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED |
+					    SOF_TIMESTAMPING_TX_SOFTWARE;
+	}
 
 	if (cmsg_len)
 		msg->msg_controllen = cmsg_len;
@@ -175,6 +206,86 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 		msg->msg_control = NULL;
 }
 
+static const char *cs_ts_info2str(unsigned int info)
+{
+	static const char *names[] = {
+		[SCM_TSTAMP_SND]	= "SND",
+		[SCM_TSTAMP_SCHED]	= "SCHED",
+		[SCM_TSTAMP_ACK]	= "ACK",
+	};
+
+	if (info < sizeof(names) / sizeof(names[0]))
+		return names[info];
+	return "unknown";
+}
+
+static void
+cs_read_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
+{
+	struct sock_extended_err *see;
+	struct scm_timestamping *ts;
+	struct cmsghdr *cmsg;
+	int i, err;
+
+	if (!opt.ts.ena)
+		return;
+	msg->msg_control = cbuf;
+	msg->msg_controllen = cbuf_sz;
+
+	while (true) {
+		ts = NULL;
+		see = NULL;
+		memset(cbuf, 0, cbuf_sz);
+
+		err = recvmsg(fd, msg, MSG_ERRQUEUE);
+		if (err < 0) {
+			if (errno == EAGAIN)
+				break;
+			error(ERN_RECVERR, errno, "recvmsg ERRQ");
+		}
+
+		for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+		     cmsg = CMSG_NXTHDR(msg, cmsg)) {
+			if (cmsg->cmsg_level == SOL_SOCKET &&
+			    cmsg->cmsg_type == SO_TIMESTAMPING_OLD) {
+				if (cmsg->cmsg_len < sizeof(*ts))
+					error(ERN_CMSG_RD, EINVAL, "TS cmsg");
+
+				ts = (void *)CMSG_DATA(cmsg);
+			}
+			if ((cmsg->cmsg_level == SOL_IP &&
+			     cmsg->cmsg_type == IP_RECVERR) ||
+			    (cmsg->cmsg_level == SOL_IPV6 &&
+			     cmsg->cmsg_type == IPV6_RECVERR)) {
+				if (cmsg->cmsg_len < sizeof(*see))
+					error(ERN_CMSG_RD, EINVAL, "sock_err cmsg");
+
+				see = (void *)CMSG_DATA(cmsg);
+			}
+		}
+
+		if (!ts)
+			error(ERN_CMSG_RCV, ENOENT, "TS cmsg not found");
+		if (!see)
+			error(ERN_CMSG_RCV, ENOENT, "sock_err cmsg not found");
+
+		for (i = 0; i < 3; i++) {
+			unsigned long long rel_time;
+
+			if (!ts->ts[i].tv_sec && !ts->ts[i].tv_nsec)
+				continue;
+
+			rel_time = (ts->ts[i].tv_sec - time_start_real.tv_sec) *
+				(1000ULL * 1000) +
+				(ts->ts[i].tv_nsec - time_start_real.tv_nsec) /
+				1000;
+			printf(" %5s ts%d %lluus\n",
+			       cs_ts_info2str(see->ee_info),
+			       i, rel_time);
+		}
+	}
+}
+
 int main(int argc, char *argv[])
 {
 	char buf[] = "blablablabla";
@@ -227,6 +338,8 @@ int main(int argc, char *argv[])
 		       &opt.sockopt.mark, sizeof(opt.sockopt.mark)))
 		error(ERN_SOCKOPT, errno, "setsockopt SO_MARK");
 
+	if (clock_gettime(CLOCK_REALTIME, &time_start_real))
+		error(ERN_GETTIME, errno, "gettime REALTIME");
 	if (clock_gettime(CLOCK_MONOTONIC, &time_start_mono))
 		error(ERN_GETTIME, errno, "gettime MONOTINIC");
 
@@ -246,13 +359,21 @@ int main(int argc, char *argv[])
 		if (!opt.silent_send)
 			fprintf(stderr, "send failed: %s\n", strerror(errno));
 		err = ERN_SEND;
+		goto err_out;
 	} else if (err != sizeof(buf)) {
 		fprintf(stderr, "short send\n");
 		err = ERN_SEND_SHORT;
+		goto err_out;
 	} else {
 		err = ERN_SUCCESS;
 	}
 
+	/* Make sure all timestamps have time to loop back */
+	usleep(opt.txtime.delay);
+
+	cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+
+err_out:
 	close(fd);
 	freeaddrinfo(ai);
 	return err;
-- 
cgit 


From af6ca20591efce42535e1c870b7ae6eae98df1b3 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 9 Feb 2022 16:36:49 -0800
Subject: selftests: net: test standard socket cmsgs across UDP and ICMP
 sockets

Test TIMESTAMPING and TXTIME across UDP / ICMP and IP versions.

Before ICMPv6 support:
  # ./tools/testing/selftests/net/cmsg_time.sh
    Case ICMPv6  - ts cnt returned '0', expected '2'
    Case ICMPv6  - ts0 SCHED returned '', expected 'OK'
    Case ICMPv6  - ts0 SND returned '', expected 'OK'
    Case ICMPv6  - TXTIME abs returned '', expected 'OK'
    Case ICMPv6  - TXTIME rel returned '', expected 'OK'
  FAIL - 5/36 cases failed

After:
  # ./tools/testing/selftests/net/cmsg_time.sh
  OK

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile     |  1 +
 tools/testing/selftests/net/cmsg_time.sh | 83 ++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100755 tools/testing/selftests/net/cmsg_time.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 8f4c1f16655f..3bfeaf06b960 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -30,6 +30,7 @@ TEST_PROGS += ioam6.sh
 TEST_PROGS += gro.sh
 TEST_PROGS += gre_gso.sh
 TEST_PROGS += cmsg_so_mark.sh
+TEST_PROGS += cmsg_time.sh
 TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh
new file mode 100755
index 000000000000..91161e1da734
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_time.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NS=ns
+IP4=172.16.0.1/24
+TGT4=172.16.0.2
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+
+cleanup()
+{
+    ip netns del $NS
+}
+
+trap cleanup EXIT
+
+# Namespaces
+ip netns add $NS
+
+ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP4 dev dummy0
+ip -netns $NS addr add $IP6 dev dummy0
+
+# Need FQ for TXTIME
+ip netns exec $NS tc qdisc replace dev dummy0 root fq
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+    ((TOTAL++))
+    if [ $1 -ne 0 ]; then
+	echo "  Case $4 returned $1, expected 0"
+	((BAD++))
+    elif [ "$2" != "$3" ]; then
+	echo "  Case $4 returned '$2', expected '$3'"
+	((BAD++))
+    fi
+}
+
+for i in "-4 $TGT4" "-6 $TGT6"; do
+    for p in u i r; do
+	[ $p == "u" ] && prot=UDPv${i:1:2}
+	[ $p == "i" ] && prot=ICMPv${i:1:2}
+	[ $p == "r" ] && prot=RAWv${i:1:2}
+
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234)
+	check_result $? "$ts" "" "$prot - no options"
+
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t | wc -l)
+	check_result $? "$ts" "2" "$prot - ts cnt"
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t |
+		 sed -n "s/.*SCHED ts0 [0-9].*/OK/p")
+	check_result $? "$ts" "OK" "$prot - ts0 SCHED"
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t |
+		 sed -n "s/.*SND ts0 [0-9].*/OK/p")
+	check_result $? "$ts" "OK" "$prot - ts0 SND"
+
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
+		 awk '/SND/ { if ($3 > 1000) print "OK"; }')
+	check_result $? "$ts" "OK" "$prot - TXTIME abs"
+
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
+		 awk '/SND/ {snd=$3}
+		      /SCHED/ {sch=$3}
+		      END { if (snd - sch > 500) print "OK"; }')
+	check_result $? "$ts" "OK" "$prot - TXTIME rel"
+    done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+    echo "FAIL - $BAD/$TOTAL cases failed"
+    exit 1
+else
+    echo "OK"
+    exit 0
+fi
-- 
cgit 


From 951cb0a3b5f2acc61ed84c752fb0bcef6806496f Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 19 Jan 2022 23:07:39 +0000
Subject: KVM: selftests: Add an option to disable MANUAL_PROTECT_ENABLE and
 INITIALLY_SET

Add an option to dirty_log_perf_test.c to disable
KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE and KVM_DIRTY_LOG_INITIALLY_SET so
the legacy dirty logging code path can be tested.

Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20220119230739.2234394-19-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_perf_test.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 1954b964d1cf..101759ac93a4 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -298,12 +298,18 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 static void help(char *name)
 {
 	puts("");
-	printf("usage: %s [-h] [-i iterations] [-p offset] "
+	printf("usage: %s [-h] [-i iterations] [-p offset] [-g]"
 	       "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
 	       "[-x memslots]\n", name);
 	puts("");
 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
 	       TEST_HOST_LOOP_N);
+	printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n"
+	       "     makes KVM_GET_DIRTY_LOG clear the dirty log (i.e.\n"
+	       "     KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE is not enabled)\n"
+	       "     and writes will be tracked as soon as dirty logging is\n"
+	       "     enabled on the memslot (i.e. KVM_DIRTY_LOG_INITIALLY_SET\n"
+	       "     is not enabled).\n");
 	printf(" -p: specify guest physical test memory offset\n"
 	       "     Warning: a low offset can conflict with the loaded test code.\n");
 	guest_modes_help();
@@ -343,8 +349,11 @@ int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:os:x:")) != -1) {
+	while ((opt = getopt(argc, argv, "ghi:p:m:b:f:v:os:x:")) != -1) {
 		switch (opt) {
+		case 'g':
+			dirty_log_manual_caps = 0;
+			break;
 		case 'i':
 			p.iterations = atoi(optarg);
 			break;
-- 
cgit 


From 6081f9c764263d642fc187675623fb47accb3649 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 3 Feb 2022 11:46:15 +0100
Subject: KVM: selftests: Adapt hyperv_cpuid test to the newly introduced
 Enlightened MSR-Bitmap

CPUID 0x40000000.EAX is now always present as it has Enlightened
MSR-Bitmap feature bit set. Adapt the test accordingly. Opportunistically
add a check for the supported eVMCS version range.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20220203104620.277031-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 29 +++++++++++++----------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 7e2d2d17d2ed..8c245ab2d98a 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -49,16 +49,13 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 			  bool evmcs_expected)
 {
 	int i;
-	int nent = 9;
+	int nent_expected = 10;
 	u32 test_val;
 
-	if (evmcs_expected)
-		nent += 1; /* 0x4000000A */
-
-	TEST_ASSERT(hv_cpuid_entries->nent == nent,
+	TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
 		    "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
-		    " with evmcs=%d (returned %d)",
-		    nent, evmcs_expected, hv_cpuid_entries->nent);
+		    " (returned %d)",
+		    nent_expected, hv_cpuid_entries->nent);
 
 	for (i = 0; i < hv_cpuid_entries->nent; i++) {
 		struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
@@ -68,9 +65,6 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 			    "function %x is our of supported range",
 			    entry->function);
 
-		TEST_ASSERT(evmcs_expected || (entry->function != 0x4000000A),
-			    "0x4000000A leaf should not be reported");
-
 		TEST_ASSERT(entry->index == 0,
 			    ".index field should be zero");
 
@@ -97,8 +91,20 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 				    "NoNonArchitecturalCoreSharing bit"
 				    " doesn't reflect SMT setting");
 			break;
-		}
+		case 0x4000000A:
+			TEST_ASSERT(entry->eax & (1UL << 19),
+				    "Enlightened MSR-Bitmap should always be supported"
+				    " 0x40000000.EAX: %x", entry->eax);
+			if (evmcs_expected)
+				TEST_ASSERT((entry->eax & 0xffff) == 0x101,
+				    "Supported Enlightened VMCS version range is supposed to be 1:1"
+				    " 0x40000000.EAX: %x", entry->eax);
+
+			break;
+		default:
+			break;
 
+		}
 		/*
 		 * If needed for debug:
 		 * fprintf(stdout,
@@ -107,7 +113,6 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 		 *	entry->edx);
 		 */
 	}
-
 }
 
 void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system)
-- 
cgit 


From 761b5ebaa12095ade05788951d8d8c489338cb75 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 3 Feb 2022 11:46:16 +0100
Subject: KVM: selftests: nVMX: Properly deal with 'hv_clean_fields'

Instead of just resetting 'hv_clean_fields' to 0 on every enlightened
vmresume, do the expected cleaning of the corresponding bit on enlightened
vmwrite. Avoid direct access to 'current_evmcs' from evmcs_test to support
the change.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20220203104620.277031-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/x86_64/evmcs.h | 150 ++++++++++++++++++++-
 tools/testing/selftests/kvm/x86_64/evmcs_test.c    |   5 +-
 2 files changed, 152 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index c9af97abd622..cc5d14a45702 100644
--- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -213,6 +213,25 @@ struct hv_enlightened_vmcs {
 	u64 padding64_6[7];
 };
 
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE                     0
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP                BIT(0)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP               BIT(1)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2             BIT(2)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1             BIT(3)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC             BIT(4)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT            BIT(5)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY            BIT(6)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN            BIT(7)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR                     BIT(8)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT             BIT(9)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC              BIT(10)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1               BIT(11)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2               BIT(12)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER             BIT(13)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1                BIT(14)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL    BIT(15)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL                      0xFFFF
+
 #define HV_X64_MSR_VP_ASSIST_PAGE		0x40000073
 #define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE	0x00000001
 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
@@ -648,381 +667,507 @@ static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
 	switch (encoding) {
 	case GUEST_RIP:
 		current_evmcs->guest_rip = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case GUEST_RSP:
 		current_evmcs->guest_rsp = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
 		break;
 	case GUEST_RFLAGS:
 		current_evmcs->guest_rflags = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
 		break;
 	case HOST_IA32_PAT:
 		current_evmcs->host_ia32_pat = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_IA32_EFER:
 		current_evmcs->host_ia32_efer = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_CR0:
 		current_evmcs->host_cr0 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_CR3:
 		current_evmcs->host_cr3 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_CR4:
 		current_evmcs->host_cr4 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_IA32_SYSENTER_ESP:
 		current_evmcs->host_ia32_sysenter_esp = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_IA32_SYSENTER_EIP:
 		current_evmcs->host_ia32_sysenter_eip = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_RIP:
 		current_evmcs->host_rip = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case IO_BITMAP_A:
 		current_evmcs->io_bitmap_a = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
 		break;
 	case IO_BITMAP_B:
 		current_evmcs->io_bitmap_b = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
 		break;
 	case MSR_BITMAP:
 		current_evmcs->msr_bitmap = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
 		break;
 	case GUEST_ES_BASE:
 		current_evmcs->guest_es_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_CS_BASE:
 		current_evmcs->guest_cs_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_SS_BASE:
 		current_evmcs->guest_ss_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_DS_BASE:
 		current_evmcs->guest_ds_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_FS_BASE:
 		current_evmcs->guest_fs_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_GS_BASE:
 		current_evmcs->guest_gs_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_LDTR_BASE:
 		current_evmcs->guest_ldtr_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_TR_BASE:
 		current_evmcs->guest_tr_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_GDTR_BASE:
 		current_evmcs->guest_gdtr_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_IDTR_BASE:
 		current_evmcs->guest_idtr_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case TSC_OFFSET:
 		current_evmcs->tsc_offset = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
 		break;
 	case VIRTUAL_APIC_PAGE_ADDR:
 		current_evmcs->virtual_apic_page_addr = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
 		break;
 	case VMCS_LINK_POINTER:
 		current_evmcs->vmcs_link_pointer = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case GUEST_IA32_DEBUGCTL:
 		current_evmcs->guest_ia32_debugctl = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case GUEST_IA32_PAT:
 		current_evmcs->guest_ia32_pat = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case GUEST_IA32_EFER:
 		current_evmcs->guest_ia32_efer = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case GUEST_PDPTR0:
 		current_evmcs->guest_pdptr0 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case GUEST_PDPTR1:
 		current_evmcs->guest_pdptr1 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case GUEST_PDPTR2:
 		current_evmcs->guest_pdptr2 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case GUEST_PDPTR3:
 		current_evmcs->guest_pdptr3 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case GUEST_PENDING_DBG_EXCEPTIONS:
 		current_evmcs->guest_pending_dbg_exceptions = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case GUEST_SYSENTER_ESP:
 		current_evmcs->guest_sysenter_esp = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case GUEST_SYSENTER_EIP:
 		current_evmcs->guest_sysenter_eip = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case CR0_GUEST_HOST_MASK:
 		current_evmcs->cr0_guest_host_mask = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
 		break;
 	case CR4_GUEST_HOST_MASK:
 		current_evmcs->cr4_guest_host_mask = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
 		break;
 	case CR0_READ_SHADOW:
 		current_evmcs->cr0_read_shadow = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
 		break;
 	case CR4_READ_SHADOW:
 		current_evmcs->cr4_read_shadow = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
 		break;
 	case GUEST_CR0:
 		current_evmcs->guest_cr0 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
 		break;
 	case GUEST_CR3:
 		current_evmcs->guest_cr3 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
 		break;
 	case GUEST_CR4:
 		current_evmcs->guest_cr4 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
 		break;
 	case GUEST_DR7:
 		current_evmcs->guest_dr7 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
 		break;
 	case HOST_FS_BASE:
 		current_evmcs->host_fs_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
 		break;
 	case HOST_GS_BASE:
 		current_evmcs->host_gs_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
 		break;
 	case HOST_TR_BASE:
 		current_evmcs->host_tr_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
 		break;
 	case HOST_GDTR_BASE:
 		current_evmcs->host_gdtr_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
 		break;
 	case HOST_IDTR_BASE:
 		current_evmcs->host_idtr_base = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
 		break;
 	case HOST_RSP:
 		current_evmcs->host_rsp = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
 		break;
 	case EPT_POINTER:
 		current_evmcs->ept_pointer = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
 		break;
 	case GUEST_BNDCFGS:
 		current_evmcs->guest_bndcfgs = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case XSS_EXIT_BITMAP:
 		current_evmcs->xss_exit_bitmap = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
 		break;
 	case GUEST_PHYSICAL_ADDRESS:
 		current_evmcs->guest_physical_address = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case EXIT_QUALIFICATION:
 		current_evmcs->exit_qualification = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case GUEST_LINEAR_ADDRESS:
 		current_evmcs->guest_linear_address = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case VM_EXIT_MSR_STORE_ADDR:
 		current_evmcs->vm_exit_msr_store_addr = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case VM_EXIT_MSR_LOAD_ADDR:
 		current_evmcs->vm_exit_msr_load_addr = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case VM_ENTRY_MSR_LOAD_ADDR:
 		current_evmcs->vm_entry_msr_load_addr = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case CR3_TARGET_VALUE0:
 		current_evmcs->cr3_target_value0 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case CR3_TARGET_VALUE1:
 		current_evmcs->cr3_target_value1 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case CR3_TARGET_VALUE2:
 		current_evmcs->cr3_target_value2 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case CR3_TARGET_VALUE3:
 		current_evmcs->cr3_target_value3 = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case TPR_THRESHOLD:
 		current_evmcs->tpr_threshold = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case GUEST_INTERRUPTIBILITY_INFO:
 		current_evmcs->guest_interruptibility_info = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
 		break;
 	case CPU_BASED_VM_EXEC_CONTROL:
 		current_evmcs->cpu_based_vm_exec_control = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC;
 		break;
 	case EXCEPTION_BITMAP:
 		current_evmcs->exception_bitmap = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN;
 		break;
 	case VM_ENTRY_CONTROLS:
 		current_evmcs->vm_entry_controls = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY;
 		break;
 	case VM_ENTRY_INTR_INFO_FIELD:
 		current_evmcs->vm_entry_intr_info_field = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
 		break;
 	case VM_ENTRY_EXCEPTION_ERROR_CODE:
 		current_evmcs->vm_entry_exception_error_code = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
 		break;
 	case VM_ENTRY_INSTRUCTION_LEN:
 		current_evmcs->vm_entry_instruction_len = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
 		break;
 	case HOST_IA32_SYSENTER_CS:
 		current_evmcs->host_ia32_sysenter_cs = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case PIN_BASED_VM_EXEC_CONTROL:
 		current_evmcs->pin_based_vm_exec_control = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
 		break;
 	case VM_EXIT_CONTROLS:
 		current_evmcs->vm_exit_controls = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
 		break;
 	case SECONDARY_VM_EXEC_CONTROL:
 		current_evmcs->secondary_vm_exec_control = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
 		break;
 	case GUEST_ES_LIMIT:
 		current_evmcs->guest_es_limit = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_CS_LIMIT:
 		current_evmcs->guest_cs_limit = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_SS_LIMIT:
 		current_evmcs->guest_ss_limit = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_DS_LIMIT:
 		current_evmcs->guest_ds_limit = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_FS_LIMIT:
 		current_evmcs->guest_fs_limit = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_GS_LIMIT:
 		current_evmcs->guest_gs_limit = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_LDTR_LIMIT:
 		current_evmcs->guest_ldtr_limit = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_TR_LIMIT:
 		current_evmcs->guest_tr_limit = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_GDTR_LIMIT:
 		current_evmcs->guest_gdtr_limit = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_IDTR_LIMIT:
 		current_evmcs->guest_idtr_limit = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_ES_AR_BYTES:
 		current_evmcs->guest_es_ar_bytes = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_CS_AR_BYTES:
 		current_evmcs->guest_cs_ar_bytes = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_SS_AR_BYTES:
 		current_evmcs->guest_ss_ar_bytes = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_DS_AR_BYTES:
 		current_evmcs->guest_ds_ar_bytes = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_FS_AR_BYTES:
 		current_evmcs->guest_fs_ar_bytes = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_GS_AR_BYTES:
 		current_evmcs->guest_gs_ar_bytes = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_LDTR_AR_BYTES:
 		current_evmcs->guest_ldtr_ar_bytes = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_TR_AR_BYTES:
 		current_evmcs->guest_tr_ar_bytes = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_ACTIVITY_STATE:
 		current_evmcs->guest_activity_state = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case GUEST_SYSENTER_CS:
 		current_evmcs->guest_sysenter_cs = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
 		break;
 	case VM_INSTRUCTION_ERROR:
 		current_evmcs->vm_instruction_error = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case VM_EXIT_REASON:
 		current_evmcs->vm_exit_reason = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case VM_EXIT_INTR_INFO:
 		current_evmcs->vm_exit_intr_info = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case VM_EXIT_INTR_ERROR_CODE:
 		current_evmcs->vm_exit_intr_error_code = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case IDT_VECTORING_INFO_FIELD:
 		current_evmcs->idt_vectoring_info_field = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case IDT_VECTORING_ERROR_CODE:
 		current_evmcs->idt_vectoring_error_code = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case VM_EXIT_INSTRUCTION_LEN:
 		current_evmcs->vm_exit_instruction_len = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case VMX_INSTRUCTION_INFO:
 		current_evmcs->vmx_instruction_info = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
 		break;
 	case PAGE_FAULT_ERROR_CODE_MASK:
 		current_evmcs->page_fault_error_code_mask = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case PAGE_FAULT_ERROR_CODE_MATCH:
 		current_evmcs->page_fault_error_code_match = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case CR3_TARGET_COUNT:
 		current_evmcs->cr3_target_count = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case VM_EXIT_MSR_STORE_COUNT:
 		current_evmcs->vm_exit_msr_store_count = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case VM_EXIT_MSR_LOAD_COUNT:
 		current_evmcs->vm_exit_msr_load_count = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case VM_ENTRY_MSR_LOAD_COUNT:
 		current_evmcs->vm_entry_msr_load_count = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 		break;
 	case HOST_ES_SELECTOR:
 		current_evmcs->host_es_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_CS_SELECTOR:
 		current_evmcs->host_cs_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_SS_SELECTOR:
 		current_evmcs->host_ss_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_DS_SELECTOR:
 		current_evmcs->host_ds_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_FS_SELECTOR:
 		current_evmcs->host_fs_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_GS_SELECTOR:
 		current_evmcs->host_gs_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case HOST_TR_SELECTOR:
 		current_evmcs->host_tr_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
 		break;
 	case GUEST_ES_SELECTOR:
 		current_evmcs->guest_es_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_CS_SELECTOR:
 		current_evmcs->guest_cs_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_SS_SELECTOR:
 		current_evmcs->guest_ss_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_DS_SELECTOR:
 		current_evmcs->guest_ds_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_FS_SELECTOR:
 		current_evmcs->guest_fs_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_GS_SELECTOR:
 		current_evmcs->guest_gs_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_LDTR_SELECTOR:
 		current_evmcs->guest_ldtr_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case GUEST_TR_SELECTOR:
 		current_evmcs->guest_tr_selector = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
 		break;
 	case VIRTUAL_PROCESSOR_ID:
 		current_evmcs->virtual_processor_id = value;
+		current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
 		break;
 	default: return 1;
 	}
@@ -1070,7 +1215,10 @@ static inline int evmcs_vmresume(void)
 {
 	int ret;
 
-	current_evmcs->hv_clean_fields = 0;
+	/* HOST_RIP */
+	current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+	/* HOST_RSP */
+	current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
 
 	__asm__ __volatile__("push %%rbp;"
 			     "push %%rcx;"
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 4c7841dfd481..655104051819 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -76,8 +76,9 @@ void guest_code(struct vmx_pages *vmx_pages)
 	current_evmcs->revision_id = EVMCS_VERSION;
 	GUEST_SYNC(6);
 
-	current_evmcs->pin_based_vm_exec_control |=
-		PIN_BASED_NMI_EXITING;
+	vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+		PIN_BASED_NMI_EXITING);
+
 	GUEST_ASSERT(!vmlaunch());
 	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
 
-- 
cgit 


From 70e477d996c88d3915d0a870bada29ecf55e45fc Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 3 Feb 2022 11:46:17 +0100
Subject: KVM: selftests: nVMX: Add enlightened MSR-Bitmap selftest

Introduce a test for enlightened MSR-Bitmap feature (Hyper-V on KVM):
- Intercept access to MSR_FS_BASE in L1 and check that this works
 with enlightened MSR-Bitmap disabled.
- Enabled enlightened MSR-Bitmap and check that the intercept still works
as expected.
- Intercept access to MSR_GS_BASE but don't clear the corresponding bit
from 'hv_clean_fields', KVM is supposed to skip updating MSR-Bitmap02 and
thus the consequent access to the MSR from L2 will not get intercepted.
- Finally, clear the corresponding bit from 'hv_clean_fields' and check
that access to MSR_GS_BASE is now intercepted.

The test works with the assumption, that access to MSR_FS_BASE/MSR_GS_BASE
is not intercepted for L1. If this ever becomes not true the test will
fail as nested_vmx_exit_handled_msr() always checks L1's MSR-Bitmap for
L2 irrespective of 'hv_clean_fields'. The behavior is correct as
enlightened MSR-Bitmap feature is just an optimization, KVM is not obliged
to ignore updates when the corresponding bit in 'hv_clean_fields' stays
clear.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20220203104620.277031-4-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/evmcs_test.c | 59 +++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 655104051819..d12e043aa2ee 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -10,6 +10,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/ioctl.h>
+#include <linux/bitmap.h>
 
 #include "test_util.h"
 
@@ -32,6 +33,22 @@ static void guest_nmi_handler(struct ex_regs *regs)
 {
 }
 
+/* Exits to L1 destroy GRPs! */
+static inline void rdmsr_fs_base(void)
+{
+	__asm__ __volatile__ ("mov $0xc0000100, %%rcx; rdmsr" : : :
+			      "rax", "rbx", "rcx", "rdx",
+			      "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
+			      "r13", "r14", "r15");
+}
+static inline void rdmsr_gs_base(void)
+{
+	__asm__ __volatile__ ("mov $0xc0000101, %%rcx; rdmsr" : : :
+			      "rax", "rbx", "rcx", "rdx",
+			      "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
+			      "r13", "r14", "r15");
+}
+
 void l2_guest_code(void)
 {
 	GUEST_SYNC(7);
@@ -41,6 +58,15 @@ void l2_guest_code(void)
 	/* Forced exit to L1 upon restore */
 	GUEST_SYNC(9);
 
+	vmcall();
+
+	/* MSR-Bitmap tests */
+	rdmsr_fs_base(); /* intercepted */
+	rdmsr_fs_base(); /* intercepted */
+	rdmsr_gs_base(); /* not intercepted */
+	vmcall();
+	rdmsr_gs_base(); /* intercepted */
+
 	/* Done, exit to L1 and never come back.  */
 	vmcall();
 }
@@ -91,6 +117,39 @@ void guest_code(struct vmx_pages *vmx_pages)
 
 	GUEST_SYNC(10);
 
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	current_evmcs->guest_rip += 3; /* vmcall */
+
+	/* Intercept RDMSR 0xc0000100 */
+	vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+		CPU_BASED_USE_MSR_BITMAPS);
+	set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+	current_evmcs->guest_rip += 2; /* rdmsr */
+
+	/* Enable enlightened MSR bitmap */
+	current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+	current_evmcs->guest_rip += 2; /* rdmsr */
+
+	/* Intercept RDMSR 0xc0000101 without telling KVM about it */
+	set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+	/* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+	current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+	GUEST_ASSERT(!vmresume());
+	/* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	current_evmcs->guest_rip += 3; /* vmcall */
+
+	/* Now tell KVM we've changed MSR-Bitmap */
+	current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+	current_evmcs->guest_rip += 2; /* rdmsr */
+
+	GUEST_ASSERT(!vmresume());
 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
 	GUEST_SYNC(11);
 
-- 
cgit 


From 0b815117da63ba71e06faffc7b9b43e49b08bfea Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 3 Feb 2022 11:46:18 +0100
Subject: KVM: selftests: nSVM: Set up MSR-Bitmap for SVM guests

Similar to VMX, allocate memory for MSR-Bitmap and fill in 'msrpm_base_pa'
in VMCB. To use it, tests will need to set INTERCEPT_MSR_PROT interception
along with the required bits in the MSR-Bitmap.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20220203104620.277031-5-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/x86_64/svm_util.h | 5 +++++
 tools/testing/selftests/kvm/lib/x86_64/svm.c          | 6 ++++++
 2 files changed, 11 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
index 587fbe408b99..e23c8a3e8a66 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -28,6 +28,11 @@ struct svm_test_data {
 	struct vmcb_save_area *save_area; /* gva */
 	void *save_area_hva;
 	uint64_t save_area_gpa;
+
+	/* MSR-Bitmap */
+	void *msr; /* gva */
+	void *msr_hva;
+	uint64_t msr_gpa;
 };
 
 struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 0ebc03ce079c..736ee4a23df6 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -43,6 +43,11 @@ vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
 	svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
 	svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
 
+	svm->msr = (void *)vm_vaddr_alloc_page(vm);
+	svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr);
+	svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
+	memset(svm->msr_hva, 0, getpagesize());
+
 	*p_svm_gva = svm_gva;
 	return svm;
 }
@@ -106,6 +111,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
 	save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
 	ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
 				(1ULL << INTERCEPT_VMMCALL);
+	ctrl->msrpm_base_pa = svm->msr_gpa;
 
 	vmcb->save.rip = (u64)guest_rip;
 	vmcb->save.rsp = (u64)guest_rsp;
-- 
cgit 


From 29f557d553494136a46096addbe1c14e9805b73e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 3 Feb 2022 11:46:19 +0100
Subject: KVM: selftests: nSVM: Update 'struct vmcb_control_area' definition

There's a copy of 'struct vmcb_control_area' definition in KVM selftests,
update it to allow testing of the newly introduced features.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20220203104620.277031-6-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/x86_64/svm.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h
index f4ea2355dbc2..2225e5077350 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm.h
@@ -99,7 +99,14 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 	u8 reserved_6[8];	/* Offset 0xe8 */
 	u64 avic_logical_id;	/* Offset 0xf0 */
 	u64 avic_physical_id;	/* Offset 0xf8 */
-	u8 reserved_7[768];
+	u8 reserved_7[8];
+	u64 vmsa_pa;		/* Used for an SEV-ES guest */
+	u8 reserved_8[720];
+	/*
+	 * Offset 0x3e0, 32 bytes reserved
+	 * for use by hypervisor/software.
+	 */
+	u8 reserved_sw[32];
 };
 
 
-- 
cgit 


From e67bd7df28a0f2193512184e24a2de4bfd77c69e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 3 Feb 2022 11:46:20 +0100
Subject: KVM: selftests: nSVM: Add enlightened MSR-Bitmap selftest

Introduce a new test for Hyper-V nSVM extensions (Hyper-V on KVM) and add
a test for enlightened MSR-Bitmap feature:

- Intercept access to MSR_FS_BASE in L1 and check that this works
  with enlightened MSR-Bitmap disabled.
- Enabled enlightened MSR-Bitmap and check that the intercept still works
  as expected.
- Intercept access to MSR_GS_BASE but don't clear the corresponding bit
  from clean fields mask, KVM is supposed to skip updating MSR-Bitmap02 and
  thus the consequent access to the MSR from L2 will not get intercepted.
- Finally, clear the corresponding bit from clean fields mask and check
  that access to MSR_GS_BASE is now intercepted.

The test works with the assumption, that access to MSR_FS_BASE/MSR_GS_BASE
is not intercepted for L1. If this ever becomes not true the test will
fail as nested_svm_exit_handled_msr() always checks L1's MSR-Bitmap for
L2 irrespective of clean fields. The behavior is correct as enlightened
MSR-Bitmap feature is just an optimization, KVM is not obliged to ignore
updates when the corresponding bit in clean fields stays clear.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20220203104620.277031-7-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile               |   1 +
 .../selftests/kvm/include/x86_64/svm_util.h        |   1 +
 .../testing/selftests/kvm/x86_64/hyperv_svm_test.c | 175 +++++++++++++++++++++
 3 files changed, 177 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 0e4926bc9a58..a1ce7dd40e3e 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -51,6 +51,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
 TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
 TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
index e23c8a3e8a66..a25aabd8f5e7 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -16,6 +16,7 @@
 #define CPUID_SVM_BIT		2
 #define CPUID_SVM		BIT_ULL(CPUID_SVM_BIT)
 
+#define SVM_EXIT_MSR		0x07c
 #define SVM_EXIT_VMMCALL	0x081
 
 struct svm_test_data {
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
new file mode 100644
index 000000000000..21f5ca9197da
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM_GET/SET_* tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Tests for Hyper-V extensions to SVM.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "hyperv.h"
+
+#define VCPU_ID		1
+#define L2_GUEST_STACK_SIZE 256
+
+struct hv_enlightenments {
+	struct __packed hv_enlightenments_control {
+		u32 nested_flush_hypercall:1;
+		u32 msr_bitmap:1;
+		u32 enlightened_npt_tlb: 1;
+		u32 reserved:29;
+	} __packed hv_enlightenments_control;
+	u32 hv_vp_id;
+	u64 hv_vm_id;
+	u64 partition_assist_page;
+	u64 reserved;
+} __packed;
+
+/*
+ * Hyper-V uses the software reserved clean bit in VMCB
+ */
+#define VMCB_HV_NESTED_ENLIGHTENMENTS (1U << 31)
+
+static inline void vmmcall(void)
+{
+	__asm__ __volatile__("vmmcall");
+}
+
+void l2_guest_code(void)
+{
+	GUEST_SYNC(3);
+	/* Exit to L1 */
+	vmmcall();
+
+	/* MSR-Bitmap tests */
+	rdmsr(MSR_FS_BASE); /* intercepted */
+	rdmsr(MSR_FS_BASE); /* intercepted */
+	rdmsr(MSR_GS_BASE); /* not intercepted */
+	vmmcall();
+	rdmsr(MSR_GS_BASE); /* intercepted */
+
+	GUEST_SYNC(5);
+
+	/* Done, exit to L1 and never come back.  */
+	vmmcall();
+}
+
+static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+	struct hv_enlightenments *hve =
+		(struct hv_enlightenments *)vmcb->control.reserved_sw;
+
+	GUEST_SYNC(1);
+
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48);
+
+	GUEST_ASSERT(svm->vmcb_gpa);
+	/* Prepare for L2 execution. */
+	generic_svm_setup(svm, l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(2);
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_SYNC(4);
+	vmcb->save.rip += 3;
+
+	/* Intercept RDMSR 0xc0000100 */
+	vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
+	set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+	vmcb->save.rip += 2; /* rdmsr */
+
+	/* Enable enlightened MSR bitmap */
+	hve->hv_enlightenments_control.msr_bitmap = 1;
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+	vmcb->save.rip += 2; /* rdmsr */
+
+	/* Intercept RDMSR 0xc0000101 without telling KVM about it */
+	set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
+	/* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+	vmcb->control.clean |= VMCB_HV_NESTED_ENLIGHTENMENTS;
+	run_guest(vmcb, svm->vmcb_gpa);
+	/* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	vmcb->save.rip += 3; /* vmcall */
+
+	/* Now tell KVM we've changed MSR-Bitmap */
+	vmcb->control.clean &= ~VMCB_HV_NESTED_ENLIGHTENMENTS;
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+	vmcb->save.rip += 2; /* rdmsr */
+
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_SYNC(6);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t nested_gva = 0;
+
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct ucall uc;
+	int stage;
+
+	if (!nested_svm_supported()) {
+		print_skip("Nested SVM not supported");
+		exit(KSFT_SKIP);
+	}
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_hv_cpuid(vm, VCPU_ID);
+	run = vcpu_state(vm, VCPU_ID);
+	vcpu_alloc_svm(vm, &nested_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+				  __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage, (ulong)uc.args[1]);
+
+	}
+
+done:
+	kvm_vm_free(vm);
+}
-- 
cgit 


From a5a358abbc392c7ad7b12a23c99e8602460b5804 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 9 Feb 2022 01:35:12 +0100
Subject: selftest/bpf: Check invalid length in test_xdp_update_frags

Update test_xdp_update_frags adding a test for a buffer size
set to (MAX_SKB_FRAGS + 2) * PAGE_SIZE. The kernel is supposed
to return -ENOMEM.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/3e4afa0ee4976854b2f0296998fe6754a80b62e5.1644366736.git.lorenzo@kernel.org
---
 .../selftests/bpf/prog_tests/xdp_adjust_frags.c    | 38 +++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
index d18e6f343c48..2f033da4cd45 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
@@ -5,11 +5,12 @@
 static void test_xdp_update_frags(void)
 {
 	const char *file = "./test_xdp_update_frags.o";
+	int err, prog_fd, max_skb_frags, buf_size, num;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	int err, prog_fd;
 	__u32 *offset;
 	__u8 *buf;
+	FILE *f;
 	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
 	obj = bpf_object__open(file);
@@ -99,6 +100,41 @@ static void test_xdp_update_frags(void)
 	ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]");
 
 	free(buf);
+
+	/* test_xdp_update_frags: unsupported buffer size */
+	f = fopen("/proc/sys/net/core/max_skb_frags", "r");
+	if (!ASSERT_OK_PTR(f, "max_skb_frag file pointer"))
+		goto out;
+
+	num = fscanf(f, "%d", &max_skb_frags);
+	fclose(f);
+
+	if (!ASSERT_EQ(num, 1, "max_skb_frags read failed"))
+		goto out;
+
+	/* xdp_buff linear area size is always set to 4096 in the
+	 * bpf_prog_test_run_xdp routine.
+	 */
+	buf_size = 4096 + (max_skb_frags + 1) * sysconf(_SC_PAGE_SIZE);
+	buf = malloc(buf_size);
+	if (!ASSERT_OK_PTR(buf, "alloc buf"))
+		goto out;
+
+	memset(buf, 0, buf_size);
+	offset = (__u32 *)buf;
+	*offset = 16;
+	buf[*offset] = 0xaa;
+	buf[*offset + 15] = 0xaa;
+
+	topts.data_in = buf;
+	topts.data_out = buf;
+	topts.data_size_in = buf_size;
+	topts.data_size_out = buf_size;
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_EQ(err, -ENOMEM,
+		  "unsupported buf size, possible non-default /proc/sys/net/core/max_skb_flags?");
+	free(buf);
 out:
 	bpf_object__close(obj);
 }
-- 
cgit 


From 446640e49ec00458655e29c31e55aabd6702985d Mon Sep 17 00:00:00 2001
From: Beau Belgrave <beaub@linux.microsoft.com>
Date: Tue, 18 Jan 2022 12:43:21 -0800
Subject: user_events: Add self-test for ftrace integration

Tests basic functionality of registering/deregistering, status and
writing data out via ftrace mechanisms within user_events.

Link: https://lkml.kernel.org/r/20220118204326.2169-8-beaub@linux.microsoft.com

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 tools/testing/selftests/user_events/Makefile      |   9 +
 tools/testing/selftests/user_events/ftrace_test.c | 387 ++++++++++++++++++++++
 tools/testing/selftests/user_events/settings      |   1 +
 3 files changed, 397 insertions(+)
 create mode 100644 tools/testing/selftests/user_events/Makefile
 create mode 100644 tools/testing/selftests/user_events/ftrace_test.c
 create mode 100644 tools/testing/selftests/user_events/settings

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/user_events/Makefile b/tools/testing/selftests/user_events/Makefile
new file mode 100644
index 000000000000..d66c551a6fe3
--- /dev/null
+++ b/tools/testing/selftests/user_events/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wl,-no-as-needed -Wall -I../../../../usr/include
+LDLIBS += -lrt -lpthread -lm
+
+TEST_GEN_PROGS = ftrace_test
+
+TEST_FILES := settings
+
+include ../lib.mk
diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c
new file mode 100644
index 000000000000..68010fd7b719
--- /dev/null
+++ b/tools/testing/selftests/user_events/ftrace_test.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * User Events FTrace Test Program
+ *
+ * Copyright (c) 2021 Beau Belgrave <beaub@linux.microsoft.com>
+ */
+
+#include <errno.h>
+#include <linux/user_events.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+const char *data_file = "/sys/kernel/debug/tracing/user_events_data";
+const char *status_file = "/sys/kernel/debug/tracing/user_events_status";
+const char *enable_file = "/sys/kernel/debug/tracing/events/user_events/__test_event/enable";
+const char *trace_file = "/sys/kernel/debug/tracing/trace";
+const char *fmt_file = "/sys/kernel/debug/tracing/events/user_events/__test_event/format";
+
+static int trace_bytes(void)
+{
+	int fd = open(trace_file, O_RDONLY);
+	char buf[256];
+	int bytes = 0, got;
+
+	if (fd == -1)
+		return -1;
+
+	while (true) {
+		got = read(fd, buf, sizeof(buf));
+
+		if (got == -1)
+			return -1;
+
+		if (got == 0)
+			break;
+
+		bytes += got;
+	}
+
+	close(fd);
+
+	return bytes;
+}
+
+static int skip_until_empty_line(FILE *fp)
+{
+	int c, last = 0;
+
+	while (true) {
+		c = getc(fp);
+
+		if (c == EOF)
+			break;
+
+		if (last == '\n' && c == '\n')
+			return 0;
+
+		last = c;
+	}
+
+	return -1;
+}
+
+static int get_print_fmt(char *buffer, int len)
+{
+	FILE *fp = fopen(fmt_file, "r");
+	char *newline;
+
+	if (!fp)
+		return -1;
+
+	/* Read until empty line (Skip Common) */
+	if (skip_until_empty_line(fp) < 0)
+		goto err;
+
+	/* Read until empty line (Skip Properties) */
+	if (skip_until_empty_line(fp) < 0)
+		goto err;
+
+	/* Read in print_fmt: */
+	if (fgets(buffer, len, fp) == NULL)
+		goto err;
+
+	newline = strchr(buffer, '\n');
+
+	if (newline)
+		*newline = '\0';
+
+	fclose(fp);
+
+	return 0;
+err:
+	fclose(fp);
+
+	return -1;
+}
+
+static int clear(void)
+{
+	int fd = open(data_file, O_RDWR);
+
+	if (fd == -1)
+		return -1;
+
+	if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1)
+		if (errno != ENOENT)
+			return -1;
+
+	close(fd);
+
+	return 0;
+}
+
+static int check_print_fmt(const char *event, const char *expected)
+{
+	struct user_reg reg = {0};
+	char print_fmt[256];
+	int ret;
+	int fd;
+
+	/* Ensure cleared */
+	ret = clear();
+
+	if (ret != 0)
+		return ret;
+
+	fd = open(data_file, O_RDWR);
+
+	if (fd == -1)
+		return fd;
+
+	reg.size = sizeof(reg);
+	reg.name_args = (__u64)event;
+
+	/* Register should work */
+	ret = ioctl(fd, DIAG_IOCSREG, &reg);
+
+	close(fd);
+
+	if (ret != 0)
+		return ret;
+
+	/* Ensure correct print_fmt */
+	ret = get_print_fmt(print_fmt, sizeof(print_fmt));
+
+	if (ret != 0)
+		return ret;
+
+	return strcmp(print_fmt, expected);
+}
+
+FIXTURE(user) {
+	int status_fd;
+	int data_fd;
+	int enable_fd;
+};
+
+FIXTURE_SETUP(user) {
+	self->status_fd = open(status_file, O_RDONLY);
+	ASSERT_NE(-1, self->status_fd);
+
+	self->data_fd = open(data_file, O_RDWR);
+	ASSERT_NE(-1, self->data_fd);
+
+	self->enable_fd = -1;
+}
+
+FIXTURE_TEARDOWN(user) {
+	close(self->status_fd);
+	close(self->data_fd);
+
+	if (self->enable_fd != -1) {
+		write(self->enable_fd, "0", sizeof("0"));
+		close(self->enable_fd);
+	}
+
+	ASSERT_EQ(0, clear());
+}
+
+TEST_F(user, register_events) {
+	struct user_reg reg = {0};
+	int page_size = sysconf(_SC_PAGESIZE);
+	char *status_page;
+
+	reg.size = sizeof(reg);
+	reg.name_args = (__u64)"__test_event u32 field1; u32 field2";
+
+	status_page = mmap(NULL, page_size, PROT_READ, MAP_SHARED,
+			   self->status_fd, 0);
+
+	/* Register should work */
+	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+	ASSERT_EQ(0, reg.write_index);
+	ASSERT_NE(0, reg.status_index);
+
+	/* Multiple registers should result in same index */
+	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+	ASSERT_EQ(0, reg.write_index);
+	ASSERT_NE(0, reg.status_index);
+
+	/* Ensure disabled */
+	self->enable_fd = open(enable_file, O_RDWR);
+	ASSERT_NE(-1, self->enable_fd);
+	ASSERT_NE(-1, write(self->enable_fd, "0", sizeof("0")))
+
+	/* MMAP should work and be zero'd */
+	ASSERT_NE(MAP_FAILED, status_page);
+	ASSERT_NE(NULL, status_page);
+	ASSERT_EQ(0, status_page[reg.status_index]);
+
+	/* Enable event and ensure bits updated in status */
+	ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
+	ASSERT_EQ(EVENT_STATUS_FTRACE, status_page[reg.status_index]);
+
+	/* Disable event and ensure bits updated in status */
+	ASSERT_NE(-1, write(self->enable_fd, "0", sizeof("0")))
+	ASSERT_EQ(0, status_page[reg.status_index]);
+
+	/* File still open should return -EBUSY for delete */
+	ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSDEL, "__test_event"));
+	ASSERT_EQ(EBUSY, errno);
+
+	/* Delete should work only after close */
+	close(self->data_fd);
+	self->data_fd = open(data_file, O_RDWR);
+	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSDEL, "__test_event"));
+
+	/* Unmap should work */
+	ASSERT_EQ(0, munmap(status_page, page_size));
+}
+
+TEST_F(user, write_events) {
+	struct user_reg reg = {0};
+	struct iovec io[3];
+	__u32 field1, field2;
+	int before = 0, after = 0;
+
+	reg.size = sizeof(reg);
+	reg.name_args = (__u64)"__test_event u32 field1; u32 field2";
+
+	field1 = 1;
+	field2 = 2;
+
+	io[0].iov_base = &reg.write_index;
+	io[0].iov_len = sizeof(reg.write_index);
+	io[1].iov_base = &field1;
+	io[1].iov_len = sizeof(field1);
+	io[2].iov_base = &field2;
+	io[2].iov_len = sizeof(field2);
+
+	/* Register should work */
+	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+	ASSERT_EQ(0, reg.write_index);
+	ASSERT_NE(0, reg.status_index);
+
+	/* Write should fail on invalid slot with ENOENT */
+	io[0].iov_base = &field2;
+	io[0].iov_len = sizeof(field2);
+	ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+	ASSERT_EQ(ENOENT, errno);
+	io[0].iov_base = &reg.write_index;
+	io[0].iov_len = sizeof(reg.write_index);
+
+	/* Enable event */
+	self->enable_fd = open(enable_file, O_RDWR);
+	ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
+
+	/* Write should make it out to ftrace buffers */
+	before = trace_bytes();
+	ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+	after = trace_bytes();
+	ASSERT_GT(after, before);
+}
+
+TEST_F(user, write_fault) {
+	struct user_reg reg = {0};
+	struct iovec io[2];
+	int l = sizeof(__u64);
+	void *anon;
+
+	reg.size = sizeof(reg);
+	reg.name_args = (__u64)"__test_event u64 anon";
+
+	anon = mmap(NULL, l, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	ASSERT_NE(MAP_FAILED, anon);
+
+	io[0].iov_base = &reg.write_index;
+	io[0].iov_len = sizeof(reg.write_index);
+	io[1].iov_base = anon;
+	io[1].iov_len = l;
+
+	/* Register should work */
+	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+	ASSERT_EQ(0, reg.write_index);
+	ASSERT_NE(0, reg.status_index);
+
+	/* Write should work normally */
+	ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 2));
+
+	/* Faulted data should zero fill and work */
+	ASSERT_EQ(0, madvise(anon, l, MADV_DONTNEED));
+	ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 2));
+	ASSERT_EQ(0, munmap(anon, l));
+}
+
+TEST_F(user, print_fmt) {
+	int ret;
+
+	ret = check_print_fmt("__test_event __rel_loc char[] data",
+			      "print fmt: \"data=%s\", __get_rel_str(data)");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event __data_loc char[] data",
+			      "print fmt: \"data=%s\", __get_str(data)");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event s64 data",
+			      "print fmt: \"data=%lld\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event u64 data",
+			      "print fmt: \"data=%llu\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event s32 data",
+			      "print fmt: \"data=%d\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event u32 data",
+			      "print fmt: \"data=%u\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event int data",
+			      "print fmt: \"data=%d\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event unsigned int data",
+			      "print fmt: \"data=%u\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event s16 data",
+			      "print fmt: \"data=%d\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event u16 data",
+			      "print fmt: \"data=%u\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event short data",
+			      "print fmt: \"data=%d\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event unsigned short data",
+			      "print fmt: \"data=%u\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event s8 data",
+			      "print fmt: \"data=%d\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event u8 data",
+			      "print fmt: \"data=%u\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event char data",
+			      "print fmt: \"data=%d\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event unsigned char data",
+			      "print fmt: \"data=%u\", REC->data");
+	ASSERT_EQ(0, ret);
+
+	ret = check_print_fmt("__test_event char[4] data",
+			      "print fmt: \"data=%s\", REC->data");
+	ASSERT_EQ(0, ret);
+}
+
+int main(int argc, char **argv)
+{
+	return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/user_events/settings b/tools/testing/selftests/user_events/settings
new file mode 100644
index 000000000000..ba4d85f74cd6
--- /dev/null
+++ b/tools/testing/selftests/user_events/settings
@@ -0,0 +1 @@
+timeout=90
-- 
cgit 


From 745bb7e683ff00e7b52e54b422fd05c58ae94e1a Mon Sep 17 00:00:00 2001
From: Beau Belgrave <beaub@linux.microsoft.com>
Date: Tue, 18 Jan 2022 12:43:22 -0800
Subject: user_events: Add self-test for dynamic_events integration

Tests matching deletes, creation of basic and complex types. Ensures
common patterns work correctly when interacting with dynamic_events
file.

Link: https://lkml.kernel.org/r/20220118204326.2169-9-beaub@linux.microsoft.com

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 tools/testing/selftests/user_events/Makefile   |   2 +-
 tools/testing/selftests/user_events/dyn_test.c | 130 +++++++++++++++++++++++++
 2 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/user_events/dyn_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/user_events/Makefile b/tools/testing/selftests/user_events/Makefile
index d66c551a6fe3..e824b9c2cae7 100644
--- a/tools/testing/selftests/user_events/Makefile
+++ b/tools/testing/selftests/user_events/Makefile
@@ -2,7 +2,7 @@
 CFLAGS += -Wl,-no-as-needed -Wall -I../../../../usr/include
 LDLIBS += -lrt -lpthread -lm
 
-TEST_GEN_PROGS = ftrace_test
+TEST_GEN_PROGS = ftrace_test dyn_test
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c
new file mode 100644
index 000000000000..d6265d14cd51
--- /dev/null
+++ b/tools/testing/selftests/user_events/dyn_test.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * User Events Dyn Events Test Program
+ *
+ * Copyright (c) 2021 Beau Belgrave <beaub@linux.microsoft.com>
+ */
+
+#include <errno.h>
+#include <linux/user_events.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+const char *dyn_file = "/sys/kernel/debug/tracing/dynamic_events";
+const char *clear = "!u:__test_event";
+
+static int Append(const char *value)
+{
+	int fd = open(dyn_file, O_RDWR | O_APPEND);
+	int ret = write(fd, value, strlen(value));
+
+	close(fd);
+	return ret;
+}
+
+#define CLEAR() \
+do { \
+	int ret = Append(clear); \
+	if (ret == -1) \
+		ASSERT_EQ(ENOENT, errno); \
+} while (0)
+
+#define TEST_PARSE(x) \
+do { \
+	ASSERT_NE(-1, Append(x)); \
+	CLEAR(); \
+} while (0)
+
+#define TEST_NPARSE(x) ASSERT_EQ(-1, Append(x))
+
+FIXTURE(user) {
+};
+
+FIXTURE_SETUP(user) {
+	CLEAR();
+}
+
+FIXTURE_TEARDOWN(user) {
+	CLEAR();
+}
+
+TEST_F(user, basic_types) {
+	/* All should work */
+	TEST_PARSE("u:__test_event u64 a");
+	TEST_PARSE("u:__test_event u32 a");
+	TEST_PARSE("u:__test_event u16 a");
+	TEST_PARSE("u:__test_event u8 a");
+	TEST_PARSE("u:__test_event char a");
+	TEST_PARSE("u:__test_event unsigned char a");
+	TEST_PARSE("u:__test_event int a");
+	TEST_PARSE("u:__test_event unsigned int a");
+	TEST_PARSE("u:__test_event short a");
+	TEST_PARSE("u:__test_event unsigned short a");
+	TEST_PARSE("u:__test_event char[20] a");
+	TEST_PARSE("u:__test_event unsigned char[20] a");
+	TEST_PARSE("u:__test_event char[0x14] a");
+	TEST_PARSE("u:__test_event unsigned char[0x14] a");
+	/* Bad size format should fail */
+	TEST_NPARSE("u:__test_event char[aa] a");
+	/* Large size should fail */
+	TEST_NPARSE("u:__test_event char[9999] a");
+	/* Long size string should fail */
+	TEST_NPARSE("u:__test_event char[0x0000000000001] a");
+}
+
+TEST_F(user, loc_types) {
+	/* All should work */
+	TEST_PARSE("u:__test_event __data_loc char[] a");
+	TEST_PARSE("u:__test_event __data_loc unsigned char[] a");
+	TEST_PARSE("u:__test_event __rel_loc char[] a");
+	TEST_PARSE("u:__test_event __rel_loc unsigned char[] a");
+}
+
+TEST_F(user, size_types) {
+	/* Should work */
+	TEST_PARSE("u:__test_event struct custom a 20");
+	/* Size not specified on struct should fail */
+	TEST_NPARSE("u:__test_event struct custom a");
+	/* Size specified on non-struct should fail */
+	TEST_NPARSE("u:__test_event char a 20");
+}
+
+TEST_F(user, flags) {
+	/* Should work */
+	TEST_PARSE("u:__test_event:BPF_ITER u32 a");
+	/* Forward compat */
+	TEST_PARSE("u:__test_event:BPF_ITER,FLAG_FUTURE u32 a");
+}
+
+TEST_F(user, matching) {
+	/* Register */
+	ASSERT_NE(-1, Append("u:__test_event struct custom a 20"));
+	/* Should not match */
+	TEST_NPARSE("!u:__test_event struct custom b");
+	/* Should match */
+	TEST_PARSE("!u:__test_event struct custom a");
+	/* Multi field reg */
+	ASSERT_NE(-1, Append("u:__test_event u32 a; u32 b"));
+	/* Non matching cases */
+	TEST_NPARSE("!u:__test_event u32 a");
+	TEST_NPARSE("!u:__test_event u32 b");
+	TEST_NPARSE("!u:__test_event u32 a; u32 ");
+	TEST_NPARSE("!u:__test_event u32 a; u32 a");
+	/* Matching case */
+	TEST_PARSE("!u:__test_event u32 a; u32 b");
+	/* Register */
+	ASSERT_NE(-1, Append("u:__test_event u32 a; u32 b"));
+	/* Ensure trailing semi-colon case */
+	TEST_PARSE("!u:__test_event u32 a; u32 b;");
+}
+
+int main(int argc, char **argv)
+{
+	return test_harness_run(argc, argv);
+}
-- 
cgit 


From 3a6163893a9a6a90e086234ee205c1f74eeb5f84 Mon Sep 17 00:00:00 2001
From: Beau Belgrave <beaub@linux.microsoft.com>
Date: Tue, 18 Jan 2022 12:43:23 -0800
Subject: user_events: Add self-test for perf_event integration

Tests perf can be attached to and written out correctly. Ensures attach
updates status bits in user programs.

Link: https://lkml.kernel.org/r/20220118204326.2169-10-beaub@linux.microsoft.com

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 tools/testing/selftests/user_events/Makefile    |   2 +-
 tools/testing/selftests/user_events/perf_test.c | 168 ++++++++++++++++++++++++
 2 files changed, 169 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/user_events/perf_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/user_events/Makefile b/tools/testing/selftests/user_events/Makefile
index e824b9c2cae7..c765d8635d9a 100644
--- a/tools/testing/selftests/user_events/Makefile
+++ b/tools/testing/selftests/user_events/Makefile
@@ -2,7 +2,7 @@
 CFLAGS += -Wl,-no-as-needed -Wall -I../../../../usr/include
 LDLIBS += -lrt -lpthread -lm
 
-TEST_GEN_PROGS = ftrace_test dyn_test
+TEST_GEN_PROGS = ftrace_test dyn_test perf_test
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c
new file mode 100644
index 000000000000..26851d51d6bb
--- /dev/null
+++ b/tools/testing/selftests/user_events/perf_test.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * User Events Perf Events Test Program
+ *
+ * Copyright (c) 2021 Beau Belgrave <beaub@linux.microsoft.com>
+ */
+
+#include <errno.h>
+#include <linux/user_events.h>
+#include <linux/perf_event.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+
+#include "../kselftest_harness.h"
+
+const char *data_file = "/sys/kernel/debug/tracing/user_events_data";
+const char *status_file = "/sys/kernel/debug/tracing/user_events_status";
+const char *id_file = "/sys/kernel/debug/tracing/events/user_events/__test_event/id";
+const char *fmt_file = "/sys/kernel/debug/tracing/events/user_events/__test_event/format";
+
+struct event {
+	__u32 index;
+	__u32 field1;
+	__u32 field2;
+};
+
+static long perf_event_open(struct perf_event_attr *pe, pid_t pid,
+			    int cpu, int group_fd, unsigned long flags)
+{
+	return syscall(__NR_perf_event_open, pe, pid, cpu, group_fd, flags);
+}
+
+static int get_id(void)
+{
+	FILE *fp = fopen(id_file, "r");
+	int ret, id = 0;
+
+	if (!fp)
+		return -1;
+
+	ret = fscanf(fp, "%d", &id);
+	fclose(fp);
+
+	if (ret != 1)
+		return -1;
+
+	return id;
+}
+
+static int get_offset(void)
+{
+	FILE *fp = fopen(fmt_file, "r");
+	int ret, c, last = 0, offset = 0;
+
+	if (!fp)
+		return -1;
+
+	/* Read until empty line */
+	while (true) {
+		c = getc(fp);
+
+		if (c == EOF)
+			break;
+
+		if (last == '\n' && c == '\n')
+			break;
+
+		last = c;
+	}
+
+	ret = fscanf(fp, "\tfield:u32 field1;\toffset:%d;", &offset);
+	fclose(fp);
+
+	if (ret != 1)
+		return -1;
+
+	return offset;
+}
+
+FIXTURE(user) {
+	int status_fd;
+	int data_fd;
+};
+
+FIXTURE_SETUP(user) {
+	self->status_fd = open(status_file, O_RDONLY);
+	ASSERT_NE(-1, self->status_fd);
+
+	self->data_fd = open(data_file, O_RDWR);
+	ASSERT_NE(-1, self->data_fd);
+}
+
+FIXTURE_TEARDOWN(user) {
+	close(self->status_fd);
+	close(self->data_fd);
+}
+
+TEST_F(user, perf_write) {
+	struct perf_event_attr pe = {0};
+	struct user_reg reg = {0};
+	int page_size = sysconf(_SC_PAGESIZE);
+	char *status_page;
+	struct event event;
+	struct perf_event_mmap_page *perf_page;
+	int id, fd, offset;
+	__u32 *val;
+
+	reg.size = sizeof(reg);
+	reg.name_args = (__u64)"__test_event u32 field1; u32 field2";
+
+	status_page = mmap(NULL, page_size, PROT_READ, MAP_SHARED,
+			   self->status_fd, 0);
+	ASSERT_NE(MAP_FAILED, status_page);
+
+	/* Register should work */
+	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+	ASSERT_EQ(0, reg.write_index);
+	ASSERT_NE(0, reg.status_index);
+	ASSERT_EQ(0, status_page[reg.status_index]);
+
+	/* Id should be there */
+	id = get_id();
+	ASSERT_NE(-1, id);
+	offset = get_offset();
+	ASSERT_NE(-1, offset);
+
+	pe.type = PERF_TYPE_TRACEPOINT;
+	pe.size = sizeof(pe);
+	pe.config = id;
+	pe.sample_type = PERF_SAMPLE_RAW;
+	pe.sample_period = 1;
+	pe.wakeup_events = 1;
+
+	/* Tracepoint attach should work */
+	fd = perf_event_open(&pe, 0, -1, -1, 0);
+	ASSERT_NE(-1, fd);
+
+	perf_page = mmap(NULL, page_size * 2, PROT_READ, MAP_SHARED, fd, 0);
+	ASSERT_NE(MAP_FAILED, perf_page);
+
+	/* Status should be updated */
+	ASSERT_EQ(EVENT_STATUS_PERF, status_page[reg.status_index]);
+
+	event.index = reg.write_index;
+	event.field1 = 0xc001;
+	event.field2 = 0xc01a;
+
+	/* Ensure write shows up at correct offset */
+	ASSERT_NE(-1, write(self->data_fd, &event, sizeof(event)));
+	val = (void *)(((char *)perf_page) + perf_page->data_offset);
+	ASSERT_EQ(PERF_RECORD_SAMPLE, *val);
+	/* Skip over header and size, move to offset */
+	val += 3;
+	val = (void *)((char *)val) + offset;
+	/* Ensure correct */
+	ASSERT_EQ(event.field1, *val++);
+	ASSERT_EQ(event.field2, *val++);
+}
+
+int main(int argc, char **argv)
+{
+	return test_harness_run(argc, argv);
+}
-- 
cgit 


From 7640e77035aadcd7d50f9c7583ca25a4e1aa2874 Mon Sep 17 00:00:00 2001
From: Beau Belgrave <beaub@linux.microsoft.com>
Date: Tue, 18 Jan 2022 12:43:24 -0800
Subject: user_events: Add self-test for validator boundaries

Tests to ensure validator boundary cases are working correctly within
close and far bounds. Ensures __data_loc and __rel_loc strings are
null terminated and within range. Ensures min size checks work as
expected.

Link: https://lkml.kernel.org/r/20220118204326.2169-11-beaub@linux.microsoft.com

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 tools/testing/selftests/user_events/ftrace_test.c | 65 +++++++++++++++++++++++
 1 file changed, 65 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c
index 68010fd7b719..a80fb5ef61d5 100644
--- a/tools/testing/selftests/user_events/ftrace_test.c
+++ b/tools/testing/selftests/user_events/ftrace_test.c
@@ -309,6 +309,71 @@ TEST_F(user, write_fault) {
 	ASSERT_EQ(0, munmap(anon, l));
 }
 
+TEST_F(user, write_validator) {
+	struct user_reg reg = {0};
+	struct iovec io[3];
+	int loc, bytes;
+	char data[8];
+	int before = 0, after = 0;
+
+	reg.size = sizeof(reg);
+	reg.name_args = (__u64)"__test_event __rel_loc char[] data";
+
+	/* Register should work */
+	ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+	ASSERT_EQ(0, reg.write_index);
+	ASSERT_NE(0, reg.status_index);
+
+	io[0].iov_base = &reg.write_index;
+	io[0].iov_len = sizeof(reg.write_index);
+	io[1].iov_base = &loc;
+	io[1].iov_len = sizeof(loc);
+	io[2].iov_base = data;
+	bytes = snprintf(data, sizeof(data), "Test") + 1;
+	io[2].iov_len = bytes;
+
+	/* Undersized write should fail */
+	ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 1));
+	ASSERT_EQ(EINVAL, errno);
+
+	/* Enable event */
+	self->enable_fd = open(enable_file, O_RDWR);
+	ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
+
+	/* Full in-bounds write should work */
+	before = trace_bytes();
+	loc = DYN_LOC(0, bytes);
+	ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+	after = trace_bytes();
+	ASSERT_GT(after, before);
+
+	/* Out of bounds write should fault (offset way out) */
+	loc = DYN_LOC(1024, bytes);
+	ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+	ASSERT_EQ(EFAULT, errno);
+
+	/* Out of bounds write should fault (offset 1 byte out) */
+	loc = DYN_LOC(1, bytes);
+	ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+	ASSERT_EQ(EFAULT, errno);
+
+	/* Out of bounds write should fault (size way out) */
+	loc = DYN_LOC(0, bytes + 1024);
+	ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+	ASSERT_EQ(EFAULT, errno);
+
+	/* Out of bounds write should fault (size 1 byte out) */
+	loc = DYN_LOC(0, bytes + 1);
+	ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+	ASSERT_EQ(EFAULT, errno);
+
+	/* Non-Null should fault */
+	memset(data, 'A', sizeof(data));
+	loc = DYN_LOC(0, bytes);
+	ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+	ASSERT_EQ(EFAULT, errno);
+}
+
 TEST_F(user, print_fmt) {
 	int ret;
 
-- 
cgit 


From b9605161e7be40fdd0fa0685b5c534e6201ac04b Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Thu, 10 Feb 2022 16:08:08 +0100
Subject: ipv6: Reject routes configurations that specify dsfield (tos)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ->rtm_tos option is normally used to route packets based on both
the destination address and the DS field. However it's ignored for
IPv6 routes. Setting ->rtm_tos for IPv6 is thus invalid as the route
is going to work only on the destination address anyway, so it won't
behave as specified.

Suggested-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c                         |  6 ++++++
 tools/testing/selftests/net/fib_tests.sh | 13 +++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'tools/testing')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f4884cda13b9..dd98a11fbdb6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5009,6 +5009,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 	err = -EINVAL;
 	rtm = nlmsg_data(nlh);
 
+	if (rtm->rtm_tos) {
+		NL_SET_ERR_MSG(extack,
+			       "Invalid dsfield (tos): option not available for IPv6");
+		goto errout;
+	}
+
 	*cfg = (struct fib6_config){
 		.fc_table = rtm->rtm_table,
 		.fc_dst_len = rtm->rtm_dst_len,
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index bb73235976b3..e2690cc42da3 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -988,12 +988,25 @@ ipv6_rt_replace()
 	ipv6_rt_replace_mpath
 }
 
+ipv6_rt_dsfield()
+{
+	echo
+	echo "IPv6 route with dsfield tests"
+
+	run_cmd "$IP -6 route flush 2001:db8:102::/64"
+
+	# IPv6 doesn't support routing based on dsfield
+	run_cmd "$IP -6 route add 2001:db8:102::/64 dsfield 0x04 via 2001:db8:101::2"
+	log_test $? 2 "Reject route with dsfield"
+}
+
 ipv6_route_test()
 {
 	route_setup
 
 	ipv6_rt_add
 	ipv6_rt_replace
+	ipv6_rt_dsfield
 
 	route_cleanup
 }
-- 
cgit 


From 2d03861e0d1d1ee81efc59338101cdd86a7474f6 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Tue, 8 Feb 2022 13:48:39 -0800
Subject: selftests/sgx: Fix NULL-pointer-dereference upon early test failure

== Background ==

The SGX selftests track parts of the enclave binaries in an array:
encl->segment_tbl[]. That array is dynamically allocated early
(but not first) in the test's lifetime. The array is referenced
at the end of the test in encl_delete().

== Problem ==

encl->segment_tbl[] can be NULL if the test fails before its
allocation. That leads to a NULL-pointer-dereference in encl_delete().
This is triggered during early failures of the selftest like if the
enclave binary ("test_encl.elf") is deleted.

== Solution ==

Ensure encl->segment_tbl[] is valid before attempting to access
its members. The offset with which it is accessed, encl->nr_segments,
is initialized before encl->segment_tbl[] and thus considered valid
to use after the encl->segment_tbl[] check succeeds.

Fixes: 3200505d4de6 ("selftests/sgx: Create a heap for the test enclave")
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lkml.kernel.org/r/90a31dfd640ea756fa324712e7cbab4a90fa7518.1644355600.git.reinette.chatre@intel.com
---
 tools/testing/selftests/sgx/load.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
index 9d4322c946e2..006b464c8fc9 100644
--- a/tools/testing/selftests/sgx/load.c
+++ b/tools/testing/selftests/sgx/load.c
@@ -21,7 +21,7 @@
 
 void encl_delete(struct encl *encl)
 {
-	struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1];
+	struct encl_segment *heap_seg;
 
 	if (encl->encl_base)
 		munmap((void *)encl->encl_base, encl->encl_size);
@@ -32,10 +32,11 @@ void encl_delete(struct encl *encl)
 	if (encl->fd)
 		close(encl->fd);
 
-	munmap(heap_seg->src, heap_seg->size);
-
-	if (encl->segment_tbl)
+	if (encl->segment_tbl) {
+		heap_seg = &encl->segment_tbl[encl->nr_segments - 1];
+		munmap(heap_seg->src, heap_seg->size);
 		free(encl->segment_tbl);
+	}
 
 	memset(encl, 0, sizeof(*encl));
 }
-- 
cgit 


From fff36bcbfde1126f6b81cb8ee12a58aada17ca29 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Tue, 8 Feb 2022 13:48:40 -0800
Subject: selftests/sgx: Do not attempt enclave build without valid enclave

It is not possible to build an enclave if it was not possible to load
the binary from which it should be constructed. Do not attempt
to make further progress but instead return with failure. A
"return false" from setup_test_encl() is expected to trip an
ASSERT_TRUE() and abort the rest of the test.

Fixes: 1b35eb719549 ("selftests/sgx: Encpsulate the test enclave creation")
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lkml.kernel.org/r/e3778c77f95e6dca348c732b12f155051d2899b4.1644355600.git.reinette.chatre@intel.com
---
 tools/testing/selftests/sgx/main.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index 370c4995f7c4..a7cd2c3e6f7e 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -147,6 +147,7 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
 	if (!encl_load("test_encl.elf", encl, heap_size)) {
 		encl_delete(encl);
 		TH_LOG("Failed to load the test enclave.\n");
+		return false;
 	}
 
 	if (!encl_measure(encl))
-- 
cgit 


From 2db703fc3b15e7ef68c82eca613a3c00d43d70af Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Tue, 8 Feb 2022 13:48:41 -0800
Subject: selftests/sgx: Ensure enclave data available during debug print

In support of debugging the SGX tests print details from
the enclave and its memory mappings if any failure is encountered
during enclave loading.

When a failure is encountered no data is printed because the
printing of the data is preceded by cleanup of the data.

Move the data cleanup after the data print.

Fixes: 147172148909 ("selftests/sgx: Dump segments and /proc/self/maps only on failure")
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lkml.kernel.org/r/dab672f771e9b99e50c17ae2a75dc0b020cb0ce9.1644355600.git.reinette.chatre@intel.com
---
 tools/testing/selftests/sgx/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index a7cd2c3e6f7e..b0bd95a4730d 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -186,8 +186,6 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
 	return true;
 
 err:
-	encl_delete(encl);
-
 	for (i = 0; i < encl->nr_segments; i++) {
 		seg = &encl->segment_tbl[i];
 
@@ -208,6 +206,8 @@ err:
 
 	TH_LOG("Failed to initialize the test enclave.\n");
 
+	encl_delete(encl);
+
 	return false;
 }
 
-- 
cgit 


From 5626de65f97ae152e6dafdc528a36c1cbb7146ee Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Tue, 8 Feb 2022 13:48:42 -0800
Subject: selftests/sgx: Remove extra newlines in test output

The TH_LOG() macro is an optional debug logging function made
available by kselftest itself. When TH_LOG_ENABLED is set it
prints the provided message with additional information and
formatting that already includes a newline.

Providing a newline to the message printed by TH_LOG() results
in a double newline that produces irregular test output.

Remove the unnecessary newlines from the text provided to
TH_LOG().

Fixes: 1b35eb719549 ("selftests/sgx: Encpsulate the test enclave creation")
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lkml.kernel.org/r/6fd171ba622aed172a7c5b129d34d50bd0482f24.1644355600.git.reinette.chatre@intel.com
---
 tools/testing/selftests/sgx/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index b0bd95a4730d..dd74fa42302e 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -146,7 +146,7 @@ static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
 
 	if (!encl_load("test_encl.elf", encl, heap_size)) {
 		encl_delete(encl);
-		TH_LOG("Failed to load the test enclave.\n");
+		TH_LOG("Failed to load the test enclave.");
 		return false;
 	}
 
@@ -204,7 +204,7 @@ err:
 		fclose(maps_file);
 	}
 
-	TH_LOG("Failed to initialize the test enclave.\n");
+	TH_LOG("Failed to initialize the test enclave.");
 
 	encl_delete(encl);
 
-- 
cgit 


From 889c5d60fbcf332c8b6ab7054d45f2768914a375 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 3 Feb 2022 10:05:32 -0500
Subject: selftests/rseq: Change type of rseq_offset to ptrdiff_t

Just before the 2.35 release of glibc, the __rseq_offset userspace ABI
was changed from int to ptrdiff_t.

Adapt to this change in the kernel selftests.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://sourceware.org/pipermail/libc-alpha/2022-February/136024.html
---
 tools/testing/selftests/rseq/rseq-x86.h | 14 +++++++-------
 tools/testing/selftests/rseq/rseq.c     |  5 +++--
 tools/testing/selftests/rseq/rseq.h     |  3 ++-
 3 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index f704d3664327..bd01dc41ca13 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -143,7 +143,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_offset]		"r" ((long)rseq_offset),
+		  [rseq_offset]		"r" (rseq_offset),
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
 		  [newv]		"r" (newv)
@@ -214,7 +214,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_offset]		"r" ((long)rseq_offset),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expectnot]		"r" (expectnot),
@@ -270,7 +270,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_offset]		"r" ((long)rseq_offset),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [count]		"er" (count)
@@ -329,7 +329,7 @@ int rseq_offset_deref_addv(intptr_t *ptr, long off, intptr_t inc, int cpu)
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_offset]		"r" ((long)rseq_offset),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* final store input */
 		  [ptr]			"m" (*ptr),
 		  [off]			"er" (off),
@@ -387,7 +387,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_offset]		"r" ((long)rseq_offset),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* try store input */
 		  [v2]			"m" (*v2),
 		  [newv2]		"r" (newv2),
@@ -469,7 +469,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
 		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_offset]		"r" ((long)rseq_offset),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* cmp2 input */
 		  [v2]			"m" (*v2),
 		  [expect2]		"r" (expect2),
@@ -581,7 +581,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
 #endif
 		: /* gcc asm goto does not allow outputs */
 		: [cpu_id]		"r" (cpu),
-		  [rseq_offset]		"r" ((long)rseq_offset),
+		  [rseq_offset]		"r" (rseq_offset),
 		  /* final store input */
 		  [v]			"m" (*v),
 		  [expect]		"r" (expect),
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 07ba0d463a96..986b9458efb2 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -27,16 +27,17 @@
 #include <signal.h>
 #include <limits.h>
 #include <dlfcn.h>
+#include <stddef.h>
 
 #include "../kselftest.h"
 #include "rseq.h"
 
-static const int *libc_rseq_offset_p;
+static const ptrdiff_t *libc_rseq_offset_p;
 static const unsigned int *libc_rseq_size_p;
 static const unsigned int *libc_rseq_flags_p;
 
 /* Offset from the thread pointer to the rseq area.  */
-int rseq_offset;
+ptrdiff_t rseq_offset;
 
 /* Size of the registered rseq area.  0 if the registration was
    unsuccessful.  */
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index 6bd0ac466b4a..9d850b290c2e 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -16,6 +16,7 @@
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <stddef.h>
 #include "rseq-abi.h"
 #include "compiler.h"
 
@@ -47,7 +48,7 @@
 #include "rseq-thread-pointer.h"
 
 /* Offset from the thread pointer to the rseq area.  */
-extern int rseq_offset;
+extern ptrdiff_t rseq_offset;
 /* Size of the registered rseq area.  0 if the registration was
    unsuccessful.  */
 extern unsigned int rseq_size;
-- 
cgit 


From 2504e5b9827f7fc76ed0e4593adc852ac7a19851 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Mon, 13 Sep 2021 11:47:20 +0530
Subject: selftests/powerpc/copyloops: Add memmove_64 test

While debugging an issue, we wanted to check whether the arch specific
kernel memmove implementation is correct.
This selftest could help test that.

Suggested-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Suggested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/57242c1fe7aba6b7f0fcd0490303bfd5f222ee00.1631512686.git.riteshh@linux.ibm.com
---
 .../testing/selftests/powerpc/copyloops/.gitignore |  1 +
 tools/testing/selftests/powerpc/copyloops/Makefile |  9 +++-
 .../selftests/powerpc/copyloops/asm/ppc_asm.h      |  1 +
 tools/testing/selftests/powerpc/copyloops/mem_64.S |  1 +
 .../selftests/powerpc/copyloops/memcpy_stubs.S     |  8 +++
 .../selftests/powerpc/copyloops/memmove_validate.c | 58 ++++++++++++++++++++++
 6 files changed, 77 insertions(+), 1 deletion(-)
 create mode 120000 tools/testing/selftests/powerpc/copyloops/mem_64.S
 create mode 100644 tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S
 create mode 100644 tools/testing/selftests/powerpc/copyloops/memmove_validate.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index 994b11af765c..7283e8b07b75 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -13,3 +13,4 @@ copyuser_64_exc_t0
 copyuser_64_exc_t1
 copyuser_64_exc_t2
 copy_mc_64
+memmove_64
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 3095b1f1c02b..77594e697f2f 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -13,7 +13,8 @@ TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
 		copyuser_p7_t0 copyuser_p7_t1 \
 		memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
 		memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
-		copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
+		copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 \
+		memmove_64
 
 EXTRA_SOURCES := validate.c ../harness.c stubs.S
 
@@ -56,3 +57,9 @@ $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
 		-D COPY_LOOP=test___copy_tofrom_user_base \
 		-D SELFTEST_CASE=$(subst copyuser_64_exc_t,,$(notdir $@)) \
 		-o $@ $^
+
+$(OUTPUT)/memmove_64: mem_64.S memcpy_64.S memmove_validate.c ../harness.c \
+		memcpy_stubs.S
+	$(CC) $(CPPFLAGS) $(CFLAGS) \
+		-D TEST_MEMMOVE=test_memmove \
+		-o $@ $^
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
index 58c1cef3e399..003e1b3d9300 100644
--- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
@@ -26,6 +26,7 @@
 #define _GLOBAL(A) FUNC_START(test_ ## A)
 #define _GLOBAL_TOC(A) _GLOBAL(A)
 #define _GLOBAL_TOC_KASAN(A) _GLOBAL(A)
+#define _GLOBAL_KASAN(A) _GLOBAL(A)
 
 #define PPC_MTOCRF(A, B)	mtocrf A, B
 
diff --git a/tools/testing/selftests/powerpc/copyloops/mem_64.S b/tools/testing/selftests/powerpc/copyloops/mem_64.S
new file mode 120000
index 000000000000..db254c9a5f5c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/mem_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/mem_64.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S b/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S
new file mode 100644
index 000000000000..d9baa832fa49
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/ppc_asm.h>
+
+FUNC_START(memcpy)
+	b test_memcpy
+
+FUNC_START(backwards_memcpy)
+	b test_backwards_memcpy
diff --git a/tools/testing/selftests/powerpc/copyloops/memmove_validate.c b/tools/testing/selftests/powerpc/copyloops/memmove_validate.c
new file mode 100644
index 000000000000..1a23218b5757
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/memmove_validate.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "utils.h"
+
+void *TEST_MEMMOVE(const void *s1, const void *s2, size_t n);
+
+#define BUF_LEN 65536
+#define MAX_OFFSET 512
+
+size_t max(size_t a, size_t b)
+{
+	if (a >= b)
+		return a;
+	return b;
+}
+
+static int testcase_run(void)
+{
+	size_t i, src_off, dst_off, len;
+
+	char *usermap = memalign(BUF_LEN, BUF_LEN);
+	char *kernelmap = memalign(BUF_LEN, BUF_LEN);
+
+	assert(usermap != NULL);
+	assert(kernelmap != NULL);
+
+	memset(usermap, 0, BUF_LEN);
+	memset(kernelmap, 0, BUF_LEN);
+
+	for (i = 0; i < BUF_LEN; i++) {
+		usermap[i] = i & 0xff;
+		kernelmap[i] = i & 0xff;
+	}
+
+	for (src_off = 0; src_off < MAX_OFFSET; src_off++) {
+		for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) {
+			for (len = 1; len < MAX_OFFSET - max(src_off, dst_off); len++) {
+
+				memmove(usermap + dst_off, usermap + src_off, len);
+				TEST_MEMMOVE(kernelmap + dst_off, kernelmap + src_off, len);
+				if (memcmp(usermap, kernelmap, MAX_OFFSET) != 0) {
+					printf("memmove failed at %ld %ld %ld\n",
+							src_off, dst_off, len);
+					abort();
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(testcase_run, "memmove");
+}
-- 
cgit 


From 4ddc844eb81da59bfb816d8d52089aba4e59e269 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 10 Feb 2022 18:56:08 +0100
Subject: net/sched: act_police: more accurate MTU policing

in current Linux, MTU policing does not take into account that packets at
the TC ingress have the L2 header pulled. Thus, the same TC police action
(with the same value of tcfp_mtu) behaves differently for ingress/egress.
In addition, the full GSO size is compared to tcfp_mtu: as a consequence,
the policer drops GSO packets even when individual segments have the L2 +
L3 + L4 + payload length below the configured valued of tcfp_mtu.

Improve the accuracy of MTU policing as follows:
 - account for mac_len for non-GSO packets at TC ingress.
 - compare MTU threshold with the segmented size for GSO packets.
Also, add a kselftest that verifies the correct behavior.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_police.c                             | 16 ++++++-
 .../testing/selftests/net/forwarding/tc_police.sh  | 52 ++++++++++++++++++++++
 2 files changed, 67 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0923aa2b8f8a..899fe025df77 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -239,6 +239,20 @@ release_idr:
 	return err;
 }
 
+static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit)
+{
+	u32 len;
+
+	if (skb_is_gso(skb))
+		return skb_gso_validate_mac_len(skb, limit);
+
+	len = qdisc_pkt_len(skb);
+	if (skb_at_tc_ingress(skb))
+		len += skb->mac_len;
+
+	return len <= limit;
+}
+
 static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
 			  struct tcf_result *res)
 {
@@ -261,7 +275,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
 			goto inc_overlimits;
 	}
 
-	if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
+	if (tcf_police_mtu_check(skb, p->tcfp_mtu)) {
 		if (!p->rate_present && !p->pps_present) {
 			ret = p->tcfp_result;
 			goto end;
diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
index 4f9f17cb45d6..0a51eef21b9e 100755
--- a/tools/testing/selftests/net/forwarding/tc_police.sh
+++ b/tools/testing/selftests/net/forwarding/tc_police.sh
@@ -37,6 +37,8 @@ ALL_TESTS="
 	police_tx_mirror_test
 	police_pps_rx_test
 	police_pps_tx_test
+	police_mtu_rx_test
+	police_mtu_tx_test
 "
 NUM_NETIFS=6
 source tc_common.sh
@@ -346,6 +348,56 @@ police_pps_tx_test()
 	tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
 }
 
+police_mtu_common_test() {
+	RET=0
+
+	local test_name=$1; shift
+	local dev=$1; shift
+	local direction=$1; shift
+
+	tc filter add dev $dev $direction protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action police mtu 1042 conform-exceed drop/ok
+
+	# to count "conform" packets
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action drop
+
+	mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+		-t udp sp=12345,dp=54321 -p 1001 -c 10 -q
+
+	mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+		-t udp sp=12345,dp=54321 -p 1000 -c 3 -q
+
+	tc_check_packets "dev $dev $direction" 101 13
+	check_err $? "wrong packet counter"
+
+	# "exceed" packets
+	local overlimits_t0=$(tc_rule_stats_get ${dev} 1 ${direction} .overlimits)
+	test ${overlimits_t0} = 10
+	check_err $? "wrong overlimits, expected 10 got ${overlimits_t0}"
+
+	# "conform" packets
+	tc_check_packets "dev $h2 ingress" 101 3
+	check_err $? "forwarding error"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $dev $direction protocol ip pref 1 handle 101 flower
+
+	log_test "$test_name"
+}
+
+police_mtu_rx_test()
+{
+	police_mtu_common_test "police mtu (rx)" $rp1 ingress
+}
+
+police_mtu_tx_test()
+{
+	police_mtu_common_test "police mtu (tx)" $rp2 egress
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
-- 
cgit 


From 12d8c11198af191d06d02b01624d84c669ef7231 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 14 Feb 2022 09:38:10 +0000
Subject: selftests: net: cmsg_sender: Fix spelling mistake "MONOTINIC" ->
 "MONOTONIC"

There is a spelling mistake in an error message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/cmsg_sender.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 24444dc72543..efa617bd34e2 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -341,7 +341,7 @@ int main(int argc, char *argv[])
 	if (clock_gettime(CLOCK_REALTIME, &time_start_real))
 		error(ERN_GETTIME, errno, "gettime REALTIME");
 	if (clock_gettime(CLOCK_MONOTONIC, &time_start_mono))
-		error(ERN_GETTIME, errno, "gettime MONOTINIC");
+		error(ERN_GETTIME, errno, "gettime MONOTONIC");
 
 	iov[0].iov_base = buf;
 	iov[0].iov_len = sizeof(buf);
-- 
cgit 


From c7ef9ebbed20b860f70cc7bece65622b570a8a93 Mon Sep 17 00:00:00 2001
From: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Date: Fri, 11 Feb 2022 19:22:09 +0100
Subject: KVM: s390: selftests: Test TEST PROTECTION emulation

Test the emulation of TEST PROTECTION in the presence of storage keys.
Emulation only occurs under certain conditions, one of which is the host
page being protected.
Trigger this by protecting the test pages via mprotect.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Link: https://lore.kernel.org/r/20220211182215.2730017-5-scgl@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
---
 tools/testing/selftests/kvm/.gitignore    |   1 +
 tools/testing/selftests/kvm/Makefile      |   1 +
 tools/testing/selftests/kvm/s390x/tprot.c | 227 ++++++++++++++++++++++++++++++
 3 files changed, 229 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/s390x/tprot.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index dce7de7755e6..7903580a48ac 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -8,6 +8,7 @@
 /s390x/memop
 /s390x/resets
 /s390x/sync_regs_test
+/s390x/tprot
 /x86_64/amx_test
 /x86_64/cpuid_test
 /x86_64/cr4_cpuid_sync_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 0e4926bc9a58..086f490e808d 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -121,6 +121,7 @@ TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
 TEST_GEN_PROGS_s390x = s390x/memop
 TEST_GEN_PROGS_s390x += s390x/resets
 TEST_GEN_PROGS_s390x += s390x/sync_regs_test
+TEST_GEN_PROGS_s390x += s390x/tprot
 TEST_GEN_PROGS_s390x += demand_paging_test
 TEST_GEN_PROGS_s390x += dirty_log_test
 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c
new file mode 100644
index 000000000000..c097b9db495e
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/tprot.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test TEST PROTECTION emulation.
+ *
+ * Copyright IBM Corp. 2021
+ */
+
+#include <sys/mman.h>
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+#define CR0_FETCH_PROTECTION_OVERRIDE	(1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE	(1UL << (63 - 39))
+
+#define VCPU_ID 1
+
+static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
+static uint8_t *const page_store_prot = pages[0];
+static uint8_t *const page_fetch_prot = pages[1];
+
+/* Nonzero return value indicates that address not mapped */
+static int set_storage_key(void *addr, uint8_t key)
+{
+	int not_mapped = 0;
+
+	asm volatile (
+		       "lra	%[addr], 0(0,%[addr])\n"
+		"	jz	0f\n"
+		"	llill	%[not_mapped],1\n"
+		"	j	1f\n"
+		"0:	sske	%[key], %[addr]\n"
+		"1:"
+		: [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
+		: [key] "r" (key)
+		: "cc"
+	);
+	return -not_mapped;
+}
+
+enum permission {
+	READ_WRITE = 0,
+	READ = 1,
+	RW_PROTECTED = 2,
+	TRANSL_UNAVAIL = 3,
+};
+
+static enum permission test_protection(void *addr, uint8_t key)
+{
+	uint64_t mask;
+
+	asm volatile (
+		       "tprot	%[addr], 0(%[key])\n"
+		"	ipm	%[mask]\n"
+		: [mask] "=r" (mask)
+		: [addr] "Q" (*(char *)addr),
+		  [key] "a" (key)
+		: "cc"
+	);
+
+	return (enum permission)(mask >> 28);
+}
+
+enum stage {
+	STAGE_END,
+	STAGE_INIT_SIMPLE,
+	TEST_SIMPLE,
+	STAGE_INIT_FETCH_PROT_OVERRIDE,
+	TEST_FETCH_PROT_OVERRIDE,
+	TEST_STORAGE_PROT_OVERRIDE,
+};
+
+struct test {
+	enum stage stage;
+	void *addr;
+	uint8_t key;
+	enum permission expected;
+} tests[] = {
+	/*
+	 * We perform each test in the array by executing TEST PROTECTION on
+	 * the specified addr with the specified key and checking if the returned
+	 * permissions match the expected value.
+	 * Both guest and host cooperate to set up the required test conditions.
+	 * A central condition is that the page targeted by addr has to be DAT
+	 * protected in the host mappings, in order for KVM to emulate the
+	 * TEST PROTECTION instruction.
+	 * Since the page tables are shared, the host uses mprotect to achieve
+	 * this.
+	 *
+	 * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted
+	 * by SIE, not KVM, but there is no harm in testing them also.
+	 * See Enhanced Suppression-on-Protection Facilities in the
+	 * Interpretive-Execution Mode
+	 */
+	/*
+	 * guest: set storage key of page_store_prot to 1
+	 *        storage key of page_fetch_prot to 9 and enable
+	 *        protection for it
+	 * STAGE_INIT_SIMPLE
+	 * host: write protect both via mprotect
+	 */
+	/* access key 0 matches any storage key -> RW */
+	{ TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE },
+	/* access key matches storage key -> RW */
+	{ TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE },
+	/* mismatched keys, but no fetch protection -> RO */
+	{ TEST_SIMPLE, page_store_prot, 0x20, READ },
+	/* access key 0 matches any storage key -> RW */
+	{ TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE },
+	/* access key matches storage key -> RW */
+	{ TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE },
+	/* mismatched keys, fetch protection -> inaccessible */
+	{ TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED },
+	/* page 0 not mapped yet -> translation not available */
+	{ TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL },
+	/*
+	 * host: try to map page 0
+	 * guest: set storage key of page 0 to 9 and enable fetch protection
+	 * STAGE_INIT_FETCH_PROT_OVERRIDE
+	 * host: write protect page 0
+	 *       enable fetch protection override
+	 */
+	/* mismatched keys, fetch protection, but override applies -> RO */
+	{ TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ },
+	/* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */
+	{ TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED },
+	/*
+	 * host: enable storage protection override
+	 */
+	/* mismatched keys, but override applies (storage key 9) -> RW */
+	{ TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE },
+	/* mismatched keys, no fetch protection, override doesn't apply -> RO */
+	{ TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ },
+	/* mismatched keys, but override applies (storage key 9) -> RW */
+	{ TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE },
+	/* end marker */
+	{ STAGE_END, 0, 0, 0 },
+};
+
+static enum stage perform_next_stage(int *i, bool mapped_0)
+{
+	enum stage stage = tests[*i].stage;
+	enum permission result;
+	bool skip;
+
+	for (; tests[*i].stage == stage; (*i)++) {
+		/*
+		 * Some fetch protection override tests require that page 0
+		 * be mapped, however, when the hosts tries to map that page via
+		 * vm_vaddr_alloc, it may happen that some other page gets mapped
+		 * instead.
+		 * In order to skip these tests we detect this inside the guest
+		 */
+		skip = tests[*i].addr < (void *)4096 &&
+		       tests[*i].expected != TRANSL_UNAVAIL &&
+		       !mapped_0;
+		if (!skip) {
+			result = test_protection(tests[*i].addr, tests[*i].key);
+			GUEST_ASSERT_2(result == tests[*i].expected, *i, result);
+		}
+	}
+	return stage;
+}
+
+static void guest_code(void)
+{
+	bool mapped_0;
+	int i = 0;
+
+	GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0);
+	GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0);
+	GUEST_SYNC(STAGE_INIT_SIMPLE);
+	GUEST_SYNC(perform_next_stage(&i, false));
+
+	/* Fetch-protection override */
+	mapped_0 = !set_storage_key((void *)0, 0x98);
+	GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE);
+	GUEST_SYNC(perform_next_stage(&i, mapped_0));
+
+	/* Storage-protection override */
+	GUEST_SYNC(perform_next_stage(&i, mapped_0));
+}
+
+#define HOST_SYNC(vmp, stage)							\
+({										\
+	struct kvm_vm *__vm = (vmp);						\
+	struct ucall uc;							\
+	int __stage = (stage);							\
+										\
+	vcpu_run(__vm, VCPU_ID);						\
+	get_ucall(__vm, VCPU_ID, &uc);						\
+	if (uc.cmd == UCALL_ABORT) {						\
+		TEST_FAIL("line %lu: %s, hints: %lu, %lu", uc.args[1],		\
+			  (const char *)uc.args[0], uc.args[2], uc.args[3]);	\
+	}									\
+	ASSERT_EQ(uc.cmd, UCALL_SYNC);						\
+	ASSERT_EQ(uc.args[1], __stage);						\
+})
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	vm_vaddr_t guest_0_page;
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	run = vcpu_state(vm, VCPU_ID);
+
+	HOST_SYNC(vm, STAGE_INIT_SIMPLE);
+	mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
+	HOST_SYNC(vm, TEST_SIMPLE);
+
+	guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
+	if (guest_0_page != 0)
+		print_skip("Did not allocate page at 0 for fetch protection override tests");
+	HOST_SYNC(vm, STAGE_INIT_FETCH_PROT_OVERRIDE);
+	if (guest_0_page == 0)
+		mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
+	run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+	run->kvm_dirty_regs = KVM_SYNC_CRS;
+	HOST_SYNC(vm, TEST_FETCH_PROT_OVERRIDE);
+
+	run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+	run->kvm_dirty_regs = KVM_SYNC_CRS;
+	HOST_SYNC(vm, TEST_STORAGE_PROT_OVERRIDE);
+}
-- 
cgit 


From 05515d341fe5c3674ab944fe50c4bde8f9727723 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Tue, 15 Feb 2022 08:48:24 +0100
Subject: selftests: kvm: Check whether SIDA memop fails for normal guests

Commit 2c212e1baedc ("KVM: s390: Return error on SIDA memop on normal
guest") fixed the behavior of the SIDA memops for normal guests. It
would be nice to have a way to test whether the current kernel has
the fix applied or not. Thus add a check to the KVM selftests for
these two memops.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220215074824.188440-1-thuth@redhat.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
---
 tools/testing/selftests/kvm/s390x/memop.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index 9f49ead380ab..d19c3ffdea3f 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -160,6 +160,21 @@ int main(int argc, char *argv[])
 	run->psw_mask &= ~(3UL << (63 - 17));   /* Disable AR mode */
 	vcpu_run(vm, VCPU_ID);                  /* Run to sync new state */
 
+	/* Check that the SIDA calls are rejected for non-protected guests */
+	ksmo.gaddr = 0;
+	ksmo.flags = 0;
+	ksmo.size = 8;
+	ksmo.op = KVM_S390_MEMOP_SIDA_READ;
+	ksmo.buf = (uintptr_t)mem1;
+	ksmo.sida_offset = 0x1c0;
+	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	TEST_ASSERT(rv == -1 && errno == EINVAL,
+		    "ioctl does not reject SIDA_READ in non-protected mode");
+	ksmo.op = KVM_S390_MEMOP_SIDA_WRITE;
+	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	TEST_ASSERT(rv == -1 && errno == EINVAL,
+		    "ioctl does not reject SIDA_WRITE in non-protected mode");
+
 	kvm_vm_free(vm);
 
 	return 0;
-- 
cgit 


From 3673d4b9cf68164678c6bb8a380bfb9eebb49432 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 24 Jan 2022 17:17:48 +0000
Subject: kselftest/arm64: Remove local ARRAY_SIZE() definitions

An ARRAY_SIZE() has been added to kselftest.h so remove the local versions
in some of the arm64 selftests.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220124171748.2195875-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/abi/syscall-abi.c | 1 -
 tools/testing/selftests/arm64/fp/sve-ptrace.c   | 2 --
 2 files changed, 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
index d8eeeafb50dc..1e13b7523918 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi.c
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -18,7 +18,6 @@
 
 #include "../../kselftest.h"
 
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
 #define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1)
 
 extern void do_syscall(int sve_vl);
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index a3c1e67441f9..4bd333768cc4 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -21,8 +21,6 @@
 
 #include "../../kselftest.h"
 
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
-
 /* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
 #ifndef NT_ARM_SVE
 #define NT_ARM_SVE 0x405
-- 
cgit 


From 396520759bd3bf4a557e4edba9a63afc13cc5773 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 26 Jan 2022 17:44:21 +0000
Subject: kselftest/arm64: Remove local definitions of MTE prctls

The GCR EL1 test unconditionally includes local definitions of the prctls
it tests. Since not only will the kselftest build infrastructure ensure
that the in tree uapi headers are available but the toolchain being used to
build kselftest may ensure that system uapi headers with MTE support are
available this causes the compiler to warn about duplicate definitions.
Remove these duplicate definitions.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220126174421.1712795-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
index a876db1f096a..325bca0de0f6 100644
--- a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
+++ b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
@@ -19,17 +19,6 @@
 #include "kselftest.h"
 #include "mte_common_util.h"
 
-#define PR_SET_TAGGED_ADDR_CTRL 55
-#define PR_GET_TAGGED_ADDR_CTRL 56
-# define PR_TAGGED_ADDR_ENABLE  (1UL << 0)
-# define PR_MTE_TCF_SHIFT	1
-# define PR_MTE_TCF_NONE	(0UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_SYNC	(1UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_ASYNC	(2UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_MASK	(3UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TAG_SHIFT	3
-# define PR_MTE_TAG_MASK	(0xffffUL << PR_MTE_TAG_SHIFT)
-
 #include "mte_def.h"
 
 #define NUM_ITERATIONS		1024
-- 
cgit 


From d53f8f8dbe97e4ed7d52e57581d1a8f6e62a7643 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 9 Feb 2022 15:22:35 +0000
Subject: kselftest/arm64: mte: user_mem: introduce tag_offset and tag_len

These can be used to place an MTE tag at an address that is not at a
page size boundary.

The kernel prior to 295cf156231c ("arm64: Avoid premature usercopy failure"),
would infinite loop if an MTE tag was placed not at a PAGE_SIZE boundary.
This is because the kernel checked if the pages were readable by checking the
first byte of each page, but would then fault in the middle of the page due
to the MTE tag.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220209152240.52788-2-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/mte/check_user_mem.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 1de7a0abd0ae..5a5a7e1f5789 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -19,7 +19,8 @@
 
 static size_t page_sz;
 
-static int check_usermem_access_fault(int mem_type, int mode, int mapping)
+static int check_usermem_access_fault(int mem_type, int mode, int mapping,
+                                      int tag_offset, int tag_len)
 {
 	int fd, i, err;
 	char val = 'A';
@@ -54,10 +55,12 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping)
 	if (i < len)
 		goto usermem_acc_err;
 
-	/* Tag the next half of memory with different value */
-	ptr_next = (void *)((unsigned long)ptr + page_sz);
+	if (!tag_len)
+		tag_len = len - tag_offset;
+	/* Tag a part of memory with different value */
+	ptr_next = (void *)((unsigned long)ptr + tag_offset);
 	ptr_next = mte_insert_new_tag(ptr_next);
-	mte_set_tag_address_range(ptr_next, page_sz);
+	mte_set_tag_address_range(ptr_next, tag_len);
 
 	lseek(fd, 0, 0);
 	/* Copy from file into buffer with invalid tag */
@@ -100,14 +103,14 @@ int main(int argc, char *argv[])
 	/* Set test plan */
 	ksft_set_plan(4);
 
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, page_sz, 0),
 		"Check memory access from kernel in sync mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, page_sz, 0),
 		"Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
 
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, page_sz, 0),
 		"Check memory access from kernel in async mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, page_sz, 0),
 		"Check memory access from kernel in async mode, shared mapping and mmap memory\n");
 
 	mte_restore_setup();
-- 
cgit 


From ff0b9aba30aeca68de09b784093f4482108586a9 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 9 Feb 2022 15:22:36 +0000
Subject: kselftest/arm64: mte: user_mem: rework error handling

Future commits will have multiple iterations of tests in this function,
so make the error handling assume it will pass and then bail out if there
is an error.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220209152240.52788-3-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/mte/check_user_mem.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 5a5a7e1f5789..2afcc9fb9ae8 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -27,7 +27,7 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 	size_t len, read_len;
 	void *ptr, *ptr_next;
 
-	err = KSFT_FAIL;
+	err = KSFT_PASS;
 	len = 2 * page_sz;
 	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
 	fd = create_temp_file();
@@ -71,14 +71,22 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 	 * mode without fault but may not fail in async mode as per the
 	 * implemented MTE userspace support in Arm64 kernel.
 	 */
-	if (mode == MTE_SYNC_ERR &&
-	    !cur_mte_cxt.fault_valid && read_len < len) {
-		err = KSFT_PASS;
-	} else if (mode == MTE_ASYNC_ERR &&
-		   !cur_mte_cxt.fault_valid && read_len == len) {
-		err = KSFT_PASS;
+	if (cur_mte_cxt.fault_valid)
+		goto usermem_acc_err;
+
+	if (mode == MTE_SYNC_ERR && read_len < len) {
+		/* test passed */
+	} else if (mode == MTE_ASYNC_ERR && read_len == len) {
+		/* test passed */
+	} else {
+		goto usermem_acc_err;
 	}
+
+	goto exit;
+
 usermem_acc_err:
+	err = KSFT_FAIL;
+exit:
 	mte_free_memory((void *)ptr, len, mem_type, true);
 	close(fd);
 	return err;
-- 
cgit 


From 682b064bae871deb213ed2e97fe4a5d4a5132e37 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 9 Feb 2022 15:22:37 +0000
Subject: kselftest/arm64: mte: user_mem: check different offsets and sizes

To check there are no assumptions in the kernel about buffer sizes or alignments of
user space pointers, expand the test to cover different sizes and offsets.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220209152240.52788-4-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/mte/check_user_mem.c | 45 +++++++++++++---------
 1 file changed, 27 insertions(+), 18 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 2afcc9fb9ae8..89c861ee68fa 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -26,6 +26,8 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 	char val = 'A';
 	size_t len, read_len;
 	void *ptr, *ptr_next;
+	int fileoff, ptroff, size;
+	int sizes[] = {1, 2, 3, 8, 16, 32, 4096, page_sz};
 
 	err = KSFT_PASS;
 	len = 2 * page_sz;
@@ -62,24 +64,31 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 	ptr_next = mte_insert_new_tag(ptr_next);
 	mte_set_tag_address_range(ptr_next, tag_len);
 
-	lseek(fd, 0, 0);
-	/* Copy from file into buffer with invalid tag */
-	read_len = read(fd, ptr, len);
-	mte_wait_after_trig();
-	/*
-	 * Accessing user memory in kernel with invalid tag should fail in sync
-	 * mode without fault but may not fail in async mode as per the
-	 * implemented MTE userspace support in Arm64 kernel.
-	 */
-	if (cur_mte_cxt.fault_valid)
-		goto usermem_acc_err;
-
-	if (mode == MTE_SYNC_ERR && read_len < len) {
-		/* test passed */
-	} else if (mode == MTE_ASYNC_ERR && read_len == len) {
-		/* test passed */
-	} else {
-		goto usermem_acc_err;
+	for (fileoff = 0; fileoff < 16; fileoff++) {
+		for (ptroff = 0; ptroff < 16; ptroff++) {
+			for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+				size = sizes[i];
+				lseek(fd, 0, 0);
+				/* Copy from file into buffer with invalid tag */
+				read_len = read(fd, ptr + ptroff, size);
+				mte_wait_after_trig();
+				/*
+				 * Accessing user memory in kernel with invalid tag should fail in sync
+				 * mode without fault but may not fail in async mode as per the
+				 * implemented MTE userspace support in Arm64 kernel.
+				 */
+				if (cur_mte_cxt.fault_valid) {
+					goto usermem_acc_err;
+				}
+				if (mode == MTE_SYNC_ERR && read_len < len) {
+					/* test passed */
+				} else if (mode == MTE_ASYNC_ERR && read_len == size) {
+					/* test passed */
+				} else {
+					goto usermem_acc_err;
+				}
+			}
+		}
 	}
 
 	goto exit;
-- 
cgit 


From b9fc700176f1cc3d9aef7dd51423150cc1567a9a Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 9 Feb 2022 15:22:38 +0000
Subject: kselftest/arm64: mte: user_mem: add test type enum

The test is currently hardcoded to use the `read` syscall, this commit adds
a test_type enum to support expanding the test coverage to other syscalls.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220209152240.52788-5-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/mte/check_user_mem.c | 38 +++++++++++++++-------
 1 file changed, 26 insertions(+), 12 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 89c861ee68fa..58b1b272ca80 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -19,12 +19,18 @@
 
 static size_t page_sz;
 
+enum test_type {
+	READ_TEST,
+	LAST_TEST,
+};
+
 static int check_usermem_access_fault(int mem_type, int mode, int mapping,
-                                      int tag_offset, int tag_len)
+                                      int tag_offset, int tag_len,
+                                      enum test_type test_type)
 {
 	int fd, i, err;
 	char val = 'A';
-	size_t len, read_len;
+	ssize_t len, syscall_len;
 	void *ptr, *ptr_next;
 	int fileoff, ptroff, size;
 	int sizes[] = {1, 2, 3, 8, 16, 32, 4096, page_sz};
@@ -46,9 +52,9 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 	}
 	mte_initialize_current_context(mode, (uintptr_t)ptr, len);
 	/* Copy from file into buffer with valid tag */
-	read_len = read(fd, ptr, len);
+	syscall_len = read(fd, ptr, len);
 	mte_wait_after_trig();
-	if (cur_mte_cxt.fault_valid || read_len < len)
+	if (cur_mte_cxt.fault_valid || syscall_len < len)
 		goto usermem_acc_err;
 	/* Verify same pattern is read */
 	for (i = 0; i < len; i++)
@@ -69,8 +75,16 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 			for (i = 0; i < ARRAY_SIZE(sizes); i++) {
 				size = sizes[i];
 				lseek(fd, 0, 0);
-				/* Copy from file into buffer with invalid tag */
-				read_len = read(fd, ptr + ptroff, size);
+
+				/* perform file operation on buffer with invalid tag */
+				switch (test_type) {
+				case READ_TEST:
+					syscall_len = read(fd, ptr + ptroff, size);
+					break;
+				case LAST_TEST:
+					goto usermem_acc_err;
+				}
+
 				mte_wait_after_trig();
 				/*
 				 * Accessing user memory in kernel with invalid tag should fail in sync
@@ -80,9 +94,9 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 				if (cur_mte_cxt.fault_valid) {
 					goto usermem_acc_err;
 				}
-				if (mode == MTE_SYNC_ERR && read_len < len) {
+				if (mode == MTE_SYNC_ERR && syscall_len < len) {
 					/* test passed */
-				} else if (mode == MTE_ASYNC_ERR && read_len == size) {
+				} else if (mode == MTE_ASYNC_ERR && syscall_len == size) {
 					/* test passed */
 				} else {
 					goto usermem_acc_err;
@@ -120,14 +134,14 @@ int main(int argc, char *argv[])
 	/* Set test plan */
 	ksft_set_plan(4);
 
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, page_sz, 0),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, page_sz, 0, READ_TEST),
 		"Check memory access from kernel in sync mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, page_sz, 0),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, page_sz, 0, READ_TEST),
 		"Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
 
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, page_sz, 0),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, page_sz, 0, READ_TEST),
 		"Check memory access from kernel in async mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, page_sz, 0),
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, page_sz, 0, READ_TEST),
 		"Check memory access from kernel in async mode, shared mapping and mmap memory\n");
 
 	mte_restore_setup();
-- 
cgit 


From e8d3974f34fa8ac38915c307677657b4d6acc619 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 9 Feb 2022 15:22:39 +0000
Subject: kselftest/arm64: mte: user_mem: add more test types

To expand the test coverage for MTE tags in userspace memory,
also perform the test with `write`, `readv` and `writev` syscalls.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220209152240.52788-6-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/mte/check_user_mem.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 58b1b272ca80..bb4974c437f8 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -11,6 +11,7 @@
 #include <string.h>
 #include <ucontext.h>
 #include <unistd.h>
+#include <sys/uio.h>
 #include <sys/mman.h>
 
 #include "kselftest.h"
@@ -21,6 +22,9 @@ static size_t page_sz;
 
 enum test_type {
 	READ_TEST,
+	WRITE_TEST,
+	READV_TEST,
+	WRITEV_TEST,
 	LAST_TEST,
 };
 
@@ -81,6 +85,23 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping,
 				case READ_TEST:
 					syscall_len = read(fd, ptr + ptroff, size);
 					break;
+				case WRITE_TEST:
+					syscall_len = write(fd, ptr + ptroff, size);
+					break;
+				case READV_TEST: {
+					struct iovec iov[1];
+					iov[0].iov_base = ptr + ptroff;
+					iov[0].iov_len = size;
+					syscall_len = readv(fd, iov, 1);
+					break;
+				}
+				case WRITEV_TEST: {
+					struct iovec iov[1];
+					iov[0].iov_base = ptr + ptroff;
+					iov[0].iov_len = size;
+					syscall_len = writev(fd, iov, 1);
+					break;
+				}
 				case LAST_TEST:
 					goto usermem_acc_err;
 				}
-- 
cgit 


From 0a775ccb81207413d07214ac6eaed75d0e4376b1 Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 9 Feb 2022 15:22:40 +0000
Subject: kselftest/arm64: mte: user_mem: test a wider range of values

Instead of hard coding a small amount of tests, generate a wider
range of tests to try catch any corner cases that could show up.

These new tests test different MTE tag lengths and offsets, which
previously would have caused infinite loops in the kernel. This was
fixed by 295cf156231c ("arm64: Avoid premature usercopy failure"),
so these are regressions tests for that corner case.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220209152240.52788-7-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/mte/check_user_mem.c | 94 +++++++++++++++++++---
 1 file changed, 83 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index bb4974c437f8..f4ae5f87a3b7 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -3,6 +3,7 @@
 
 #define _GNU_SOURCE
 
+#include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
@@ -20,6 +21,8 @@
 
 static size_t page_sz;
 
+#define TEST_NAME_MAX 100
+
 enum test_type {
 	READ_TEST,
 	WRITE_TEST,
@@ -136,9 +139,67 @@ exit:
 	return err;
 }
 
+void format_test_name(char* name, int name_len, int type, int sync, int map, int len, int offset) {
+	const char* test_type;
+	const char* mte_type;
+	const char* map_type;
+
+	switch (type) {
+	case READ_TEST:
+		test_type = "read";
+		break;
+	case WRITE_TEST:
+		test_type = "write";
+		break;
+	case READV_TEST:
+		test_type = "readv";
+		break;
+	case WRITEV_TEST:
+		test_type = "writev";
+		break;
+	default:
+		assert(0);
+		break;
+	}
+
+	switch (sync) {
+	case MTE_SYNC_ERR:
+		mte_type = "MTE_SYNC_ERR";
+		break;
+	case MTE_ASYNC_ERR:
+		mte_type = "MTE_ASYNC_ERR";
+		break;
+	default:
+		assert(0);
+		break;
+	}
+
+	switch (map) {
+	case MAP_SHARED:
+		map_type = "MAP_SHARED";
+		break;
+	case MAP_PRIVATE:
+		map_type = "MAP_PRIVATE";
+		break;
+	default:
+		assert(0);
+		break;
+	}
+
+	snprintf(name, name_len,
+	         "test type: %s, %s, %s, tag len: %d, tag offset: %d\n",
+	         test_type, mte_type, map_type, len, offset);
+}
+
 int main(int argc, char *argv[])
 {
 	int err;
+	int t, s, m, l, o;
+	int mte_sync[] = {MTE_SYNC_ERR, MTE_ASYNC_ERR};
+	int maps[] = {MAP_SHARED, MAP_PRIVATE};
+	int tag_lens[] = {0, MT_GRANULE_SIZE};
+	int tag_offsets[] = {page_sz, MT_GRANULE_SIZE};
+	char test_name[TEST_NAME_MAX];
 
 	page_sz = getpagesize();
 	if (!page_sz) {
@@ -153,17 +214,28 @@ int main(int argc, char *argv[])
 	mte_register_signal(SIGSEGV, mte_default_handler);
 
 	/* Set test plan */
-	ksft_set_plan(4);
-
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, page_sz, 0, READ_TEST),
-		"Check memory access from kernel in sync mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, page_sz, 0, READ_TEST),
-		"Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
-
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, page_sz, 0, READ_TEST),
-		"Check memory access from kernel in async mode, private mapping and mmap memory\n");
-	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, page_sz, 0, READ_TEST),
-		"Check memory access from kernel in async mode, shared mapping and mmap memory\n");
+	ksft_set_plan(64);
+
+	for (t = 0; t < LAST_TEST; t++) {
+		for (s = 0; s < ARRAY_SIZE(mte_sync); s++) {
+			for (m = 0; m < ARRAY_SIZE(maps); m++) {
+				for (l = 0; l < ARRAY_SIZE(tag_lens); l++) {
+					for (o = 0; o < ARRAY_SIZE(tag_offsets); o++) {
+						int sync = mte_sync[s];
+						int map = maps[m];
+						int offset = tag_offsets[o];
+						int tag_len = tag_lens[l];
+						int res = check_usermem_access_fault(USE_MMAP, sync,
+						                                     map, offset,
+						                                     tag_len, t);
+						format_test_name(test_name, TEST_NAME_MAX,
+						                 t, sync, map, tag_len, offset);
+						evaluate_test(res, test_name);
+					}
+				}
+			}
+		}
+	}
 
 	mte_restore_setup();
 	ksft_print_cnts();
-- 
cgit 


From d3b0b80064e0416850f818184b8f7bba9fdf8c40 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 11 Feb 2022 11:09:27 -0800
Subject: selftests/bpf: Fix GCC11 compiler warnings in -O2 mode

When compiling selftests in -O2 mode with GCC1, we get three new
compilations warnings about potentially uninitialized variables.

Compiler is wrong 2 out of 3 times, but this patch makes GCC11 happy
anyways, as it doesn't cost us anything and makes optimized selftests
build less annoying.

The amazing one is tc_redirect case of token that is malloc()'ed before
ASSERT_OK_PTR() check is done on it. Seems like GCC pessimistically
assumes that libbpf_get_error() will dereference the contents of the
pointer (no it won't), so the only way I found to shut GCC up was to do
zero-initializaing calloc(). This one was new to me.

For linfo case, GCC didn't realize that linfo_size will be initialized
by the function that is returning linfo_size as out parameter.

core_reloc.c case was a real bug, we can goto cleanup before initializing
obj. But we don't need to do any clean up, so just continue iteration
intstead.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220211190927.1434329-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/btf.c         | 2 +-
 tools/testing/selftests/bpf/prog_tests/core_reloc.c  | 2 +-
 tools/testing/selftests/bpf/prog_tests/tc_redirect.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 8b652f5ce423..ec823561b912 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -6556,7 +6556,7 @@ done:
 static void do_test_info_raw(unsigned int test_num)
 {
 	const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1];
-	unsigned int raw_btf_size, linfo_str_off, linfo_size;
+	unsigned int raw_btf_size, linfo_str_off, linfo_size = 0;
 	int btf_fd = -1, prog_fd = -1, err = 0;
 	void *raw_btf, *patched_linfo = NULL;
 	const char *ret_next_str;
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index b8bdd1c3efca..68e4c8dafa00 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -872,7 +872,7 @@ void test_core_reloc(void)
 		if (test_case->btf_src_file) {
 			err = access(test_case->btf_src_file, R_OK);
 			if (!ASSERT_OK(err, "btf_src_file"))
-				goto cleanup;
+				continue;
 		}
 
 		open_opts.btf_custom_path = test_case->btf_src_file;
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index c2426df58e17..647b0a833628 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -140,7 +140,7 @@ static struct nstoken *open_netns(const char *name)
 	int err;
 	struct nstoken *token;
 
-	token = malloc(sizeof(struct nstoken));
+	token = calloc(1, sizeof(struct nstoken));
 	if (!ASSERT_OK_PTR(token, "malloc token"))
 		return NULL;
 
-- 
cgit 


From 189e0ecabc1717db62deab751afa755f07bdbcf8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 11 Feb 2022 21:57:33 -0800
Subject: selftests/bpf: Add Skeleton templated wrapper as an example

Add an example of how to build C++ template-based BPF skeleton wrapper.
It's an actually runnable valid use of skeleton through more C++-like
interface. Note that skeleton destuction happens implicitly through
Skeleton<T>'s destructor.

Also make test_cpp runnable as it would have crashed on invalid btf
passed into btf_dump__new().

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220212055733.539056-3-andrii@kernel.org
---
 tools/testing/selftests/bpf/test_cpp.cpp | 87 +++++++++++++++++++++++++++++++-
 1 file changed, 85 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp
index e00201de2890..773f165c4898 100644
--- a/tools/testing/selftests/bpf/test_cpp.cpp
+++ b/tools/testing/selftests/bpf/test_cpp.cpp
@@ -5,18 +5,100 @@
 #include <bpf/btf.h>
 #include "test_core_extern.skel.h"
 
-/* do nothing, just make sure we can link successfully */
+template <typename T>
+class Skeleton {
+private:
+	T *skel;
+public:
+	Skeleton(): skel(nullptr) { }
+
+	~Skeleton() { if (skel) T::destroy(skel); }
+
+	int open(const struct bpf_object_open_opts *opts = nullptr)
+	{
+		int err;
+
+		if (skel)
+			return -EBUSY;
+
+		skel = T::open(opts);
+		err = libbpf_get_error(skel);
+		if (err) {
+			skel = nullptr;
+			return err;
+		}
+
+		return 0;
+	}
+
+	int load() { return T::load(skel); }
+
+	int attach() { return T::attach(skel); }
+
+	void detach() { return T::detach(skel); }
+
+	const T* operator->() const { return skel; }
+
+	T* operator->() { return skel; }
+
+	const T *get() const { return skel; }
+};
 
 static void dump_printf(void *ctx, const char *fmt, va_list args)
 {
 }
 
+static void try_skeleton_template()
+{
+	Skeleton<test_core_extern> skel;
+	std::string prog_name;
+	int err;
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+
+	err = skel.open(&opts);
+	if (err) {
+		fprintf(stderr, "Skeleton open failed: %d\n", err);
+		return;
+	}
+
+	skel->data->kern_ver = 123;
+	skel->data->int_val = skel->data->ushort_val;
+
+	err = skel.load();
+	if (err) {
+		fprintf(stderr, "Skeleton load failed: %d\n", err);
+		return;
+	}
+
+	if (!skel->kconfig->CONFIG_BPF_SYSCALL)
+		fprintf(stderr, "Seems like CONFIG_BPF_SYSCALL isn't set?!\n");
+
+	err = skel.attach();
+	if (err) {
+		fprintf(stderr, "Skeleton attach failed: %d\n", err);
+		return;
+	}
+
+	prog_name = bpf_program__name(skel->progs.handle_sys_enter);
+	if (prog_name != "handle_sys_enter")
+		fprintf(stderr, "Unexpected program name: %s\n", prog_name.c_str());
+
+	bpf_link__destroy(skel->links.handle_sys_enter);
+	skel->links.handle_sys_enter = bpf_program__attach(skel->progs.handle_sys_enter);
+
+	skel.detach();
+
+	/* destructor will destory underlying skeleton */
+}
+
 int main(int argc, char *argv[])
 {
 	struct btf_dump_opts opts = { };
 	struct test_core_extern *skel;
 	struct btf *btf;
 
+	try_skeleton_template();
+
 	/* libbpf.h */
 	libbpf_set_print(NULL);
 
@@ -25,7 +107,8 @@ int main(int argc, char *argv[])
 
 	/* btf.h */
 	btf = btf__new(NULL, 0);
-	btf_dump__new(btf, dump_printf, nullptr, &opts);
+	if (!libbpf_get_error(btf))
+		btf_dump__new(btf, dump_printf, nullptr, &opts);
 
 	/* BPF skeleton */
 	skel = test_core_extern__open_and_load();
-- 
cgit 


From 5e5a6c5441654d1b9e576ce4ca8a1759e701079e Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 15 Feb 2022 13:41:08 +0100
Subject: lkdtm: Add a test for function descriptors protection

Add WRITE_OPD to check that you can't modify function
descriptors.

Gives the following result when function descriptors are
not protected:

	lkdtm: Performing direct entry WRITE_OPD
	lkdtm: attempting bad 16 bytes write at c00000000269b358
	lkdtm: FAIL: survived bad write
	lkdtm: do_nothing was hijacked!

Looks like a standard compiler barrier() is not enough to force
GCC to use the modified function descriptor. Had to add a fake empty
inline assembly to force GCC to reload the function descriptor.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/7eeba50d16a35e9d799820e43304150225f20197.1644928018.git.christophe.leroy@csgroup.eu
---
 drivers/misc/lkdtm/core.c               |  1 +
 drivers/misc/lkdtm/lkdtm.h              |  1 +
 drivers/misc/lkdtm/perms.c              | 22 ++++++++++++++++++++++
 tools/testing/selftests/lkdtm/tests.txt |  1 +
 4 files changed, 25 insertions(+)

(limited to 'tools/testing')

diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index f69b964b9952..e2228b6fc09b 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -149,6 +149,7 @@ static const struct crashtype crashtypes[] = {
 	CRASHTYPE(WRITE_RO),
 	CRASHTYPE(WRITE_RO_AFTER_INIT),
 	CRASHTYPE(WRITE_KERN),
+	CRASHTYPE(WRITE_OPD),
 	CRASHTYPE(REFCOUNT_INC_OVERFLOW),
 	CRASHTYPE(REFCOUNT_ADD_OVERFLOW),
 	CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW),
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index d6137c70ebbe..305fc2ec3f25 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -106,6 +106,7 @@ void __init lkdtm_perms_init(void);
 void lkdtm_WRITE_RO(void);
 void lkdtm_WRITE_RO_AFTER_INIT(void);
 void lkdtm_WRITE_KERN(void);
+void lkdtm_WRITE_OPD(void);
 void lkdtm_EXEC_DATA(void);
 void lkdtm_EXEC_STACK(void);
 void lkdtm_EXEC_KMALLOC(void);
diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 1cf24c4a79e9..2c6aba3ff32b 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -44,6 +44,11 @@ static noinline void do_overwritten(void)
 	return;
 }
 
+static noinline void do_almost_nothing(void)
+{
+	pr_info("do_nothing was hijacked!\n");
+}
+
 static void *setup_function_descriptor(func_desc_t *fdesc, void *dst)
 {
 	if (!have_function_descriptors())
@@ -144,6 +149,23 @@ void lkdtm_WRITE_KERN(void)
 	do_overwritten();
 }
 
+void lkdtm_WRITE_OPD(void)
+{
+	size_t size = sizeof(func_desc_t);
+	void (*func)(void) = do_nothing;
+
+	if (!have_function_descriptors()) {
+		pr_info("XFAIL: Platform doesn't use function descriptors.\n");
+		return;
+	}
+	pr_info("attempting bad %zu bytes write at %px\n", size, do_nothing);
+	memcpy(do_nothing, do_almost_nothing, size);
+	pr_err("FAIL: survived bad write\n");
+
+	asm("" : "=m"(func));
+	func();
+}
+
 void lkdtm_EXEC_DATA(void)
 {
 	execute_location(data_area, CODE_WRITE);
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 6b36b7f5dcf9..243c781f0780 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -44,6 +44,7 @@ ACCESS_NULL
 WRITE_RO
 WRITE_RO_AFTER_INIT
 WRITE_KERN
+WRITE_OPD
 REFCOUNT_INC_OVERFLOW
 REFCOUNT_ADD_OVERFLOW
 REFCOUNT_INC_NOT_ZERO_OVERFLOW
-- 
cgit 


From 704c91e59fe045708aa3f11d0c2bf9c83e39d24c Mon Sep 17 00:00:00 2001
From: Mauricio Vásquez <mauricio@kinvolk.io>
Date: Tue, 15 Feb 2022 17:58:56 -0500
Subject: selftests/bpf: Test "bpftool gen min_core_btf"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit reuses the core_reloc test to check if the BTF files
generated with "bpftool gen min_core_btf" are correct. This introduces
test_core_btfgen() that runs all the core_reloc tests, but this time
the source BTF files are generated by using "bpftool gen min_core_btf".

The goal of this test is to check that the generated files are usable,
and not to check if the algorithm is creating an optimized BTF file.

Signed-off-by: Mauricio Vásquez <mauricio@kinvolk.io>
Signed-off-by: Rafael David Tinoco <rafael.tinoco@aquasec.com>
Signed-off-by: Lorenzo Fontana <lorenzo.fontana@elastic.co>
Signed-off-by: Leonardo Di Donato <leonardo.didonato@elastic.co>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220215225856.671072-8-mauricio@kinvolk.io
---
 .../testing/selftests/bpf/prog_tests/core_reloc.c  | 50 +++++++++++++++++++++-
 1 file changed, 48 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 68e4c8dafa00..baf53c23c08d 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -2,6 +2,7 @@
 #include <test_progs.h>
 #include "progs/core_reloc_types.h"
 #include "bpf_testmod/bpf_testmod.h"
+#include <linux/limits.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <bpf/btf.h>
@@ -836,13 +837,27 @@ static size_t roundup_page(size_t sz)
 	return (sz + page_size - 1) / page_size * page_size;
 }
 
-void test_core_reloc(void)
+static int run_btfgen(const char *src_btf, const char *dst_btf, const char *objpath)
+{
+	char command[4096];
+	int n;
+
+	n = snprintf(command, sizeof(command),
+		     "./tools/build/bpftool/bpftool gen min_core_btf %s %s %s",
+		     src_btf, dst_btf, objpath);
+	if (n < 0 || n >= sizeof(command))
+		return -1;
+
+	return system(command);
+}
+
+static void run_core_reloc_tests(bool use_btfgen)
 {
 	const size_t mmap_sz = roundup_page(sizeof(struct data));
 	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
 	struct core_reloc_test_case *test_case;
 	const char *tp_name, *probe_name;
-	int err, i, equal;
+	int err, i, equal, fd;
 	struct bpf_link *link = NULL;
 	struct bpf_map *data_map;
 	struct bpf_program *prog;
@@ -854,6 +869,7 @@ void test_core_reloc(void)
 	my_pid_tgid = getpid() | ((uint64_t)syscall(SYS_gettid) << 32);
 
 	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+		char btf_file[] = "/tmp/core_reloc.btf.XXXXXX";
 		test_case = &test_cases[i];
 		if (!test__start_subtest(test_case->case_name))
 			continue;
@@ -863,6 +879,25 @@ void test_core_reloc(void)
 			continue;
 		}
 
+		/* generate a "minimal" BTF file and use it as source */
+		if (use_btfgen) {
+			if (!test_case->btf_src_file || test_case->fails) {
+				test__skip();
+				continue;
+			}
+
+			fd = mkstemp(btf_file);
+			if (!ASSERT_GE(fd, 0, "btf_tmp"))
+				goto cleanup;
+			close(fd); /* we only need the path */
+			err = run_btfgen(test_case->btf_src_file, btf_file,
+					 test_case->bpf_obj_file);
+			if (!ASSERT_OK(err, "run_btfgen"))
+				goto cleanup;
+
+			test_case->btf_src_file = btf_file;
+		}
+
 		if (test_case->setup) {
 			err = test_case->setup(test_case);
 			if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err))
@@ -954,8 +989,19 @@ cleanup:
 			CHECK_FAIL(munmap(mmap_data, mmap_sz));
 			mmap_data = NULL;
 		}
+		remove(btf_file);
 		bpf_link__destroy(link);
 		link = NULL;
 		bpf_object__close(obj);
 	}
 }
+
+void test_core_reloc(void)
+{
+	run_core_reloc_tests(false);
+}
+
+void test_core_btfgen(void)
+{
+	run_core_reloc_tests(true);
+}
-- 
cgit 


From 6f97c7c605d63d8f83f102a768ddfd0ad553fa3f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 16 Feb 2022 17:21:17 -0800
Subject: selftests: net: test IPV6_DONTFRAG

Test setting IPV6_DONTFRAG via setsockopt and cmsg
across socket types.

Output without the kernel support (this series):

    Case DONTFRAG ICMP setsock returned 0, expected 1
    Case DONTFRAG ICMP cmsg returned 0, expected 1
    Case DONTFRAG ICMP both returned 0, expected 1
    Case DONTFRAG ICMP diff returned 0, expected 1
  FAIL - 4/24 cases failed

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/cmsg_ipv6.sh  |  65 ++++++++++++++++++
 tools/testing/selftests/net/cmsg_sender.c | 105 +++++++++++++++++++++++-------
 2 files changed, 147 insertions(+), 23 deletions(-)
 create mode 100755 tools/testing/selftests/net/cmsg_ipv6.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
new file mode 100755
index 000000000000..fb5a8ab7c909
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_ipv6.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NS=ns
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+
+cleanup()
+{
+    ip netns del $NS
+}
+
+trap cleanup EXIT
+
+NSEXE="ip netns exec $NS"
+
+# Namespaces
+ip netns add $NS
+
+$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP6 dev dummy0
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+    ((TOTAL++))
+    if [ $1 -ne $2 ]; then
+	echo "  Case $3 returned $1, expected $2"
+	((BAD++))
+    fi
+}
+
+# IPV6_DONTFRAG
+for ovr in setsock cmsg both diff; do
+    for df in 0 1; do
+	for p in u i r; do
+	    [ $p == "u" ] && prot=UDP
+	    [ $p == "i" ] && prot=ICMP
+	    [ $p == "r" ] && prot=RAW
+
+	    [ $ovr == "setsock" ] && m="-F $df"
+	    [ $ovr == "cmsg" ]    && m="-f $df"
+	    [ $ovr == "both" ]    && m="-F $df -f $df"
+	    [ $ovr == "diff" ]    && m="-F $((1 - df)) -f $df"
+
+	    $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234
+	    check_result $? $df "DONTFRAG $prot $ovr"
+	done
+    done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+    echo "FAIL - $BAD/$TOTAL cases failed"
+    exit 1
+else
+    echo "OK"
+    exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index efa617bd34e2..844ca6134662 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -33,22 +33,26 @@ enum {
 	ERN_CMSG_RCV,
 };
 
+struct option_cmsg_u32 {
+	bool ena;
+	unsigned int val;
+};
+
 struct options {
 	bool silent_send;
 	const char *host;
 	const char *service;
+	unsigned int size;
 	struct {
 		unsigned int mark;
+		unsigned int dontfrag;
 	} sockopt;
 	struct {
 		unsigned int family;
 		unsigned int type;
 		unsigned int proto;
 	} sock;
-	struct {
-		bool ena;
-		unsigned int val;
-	} mark;
+	struct option_cmsg_u32 mark;
 	struct {
 		bool ena;
 		unsigned int delay;
@@ -56,7 +60,11 @@ struct options {
 	struct {
 		bool ena;
 	} ts;
+	struct {
+		struct option_cmsg_u32 dontfrag;
+	} v6;
 } opt = {
+	.size = 13,
 	.sock = {
 		.family	= AF_UNSPEC,
 		.type	= SOCK_DGRAM,
@@ -72,6 +80,7 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
 	printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin);
 	printf("Options:\n"
 	       "\t\t-s      Silent send() failures\n"
+	       "\t\t-S      send() size\n"
 	       "\t\t-4/-6   Force IPv4 / IPv6 only\n"
 	       "\t\t-p prot Socket protocol\n"
 	       "\t\t        (u = UDP (default); i = ICMP; r = RAW)\n"
@@ -80,6 +89,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
 	       "\t\t-M val  Set SO_MARK via setsockopt\n"
 	       "\t\t-d val  Set SO_TXTIME with given delay (usec)\n"
 	       "\t\t-t      Enable time stamp reporting\n"
+	       "\t\t-f val  Set don't fragment via cmsg\n"
+	       "\t\t-F val  Set don't fragment via setsockopt\n"
 	       "");
 	exit(ERN_HELP);
 }
@@ -88,11 +99,14 @@ static void cs_parse_args(int argc, char *argv[])
 {
 	char o;
 
-	while ((o = getopt(argc, argv, "46sp:m:M:d:t")) != -1) {
+	while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:")) != -1) {
 		switch (o) {
 		case 's':
 			opt.silent_send = true;
 			break;
+		case 'S':
+			opt.size = atoi(optarg);
+			break;
 		case '4':
 			opt.sock.family = AF_INET;
 			break;
@@ -126,6 +140,13 @@ static void cs_parse_args(int argc, char *argv[])
 		case 't':
 			opt.ts.ena = true;
 			break;
+		case 'f':
+			opt.v6.dontfrag.ena = true;
+			opt.v6.dontfrag.val = atoi(optarg);
+			break;
+		case 'F':
+			opt.sockopt.dontfrag = atoi(optarg);
+			break;
 		}
 	}
 
@@ -136,6 +157,38 @@ static void cs_parse_args(int argc, char *argv[])
 	opt.service = argv[optind + 1];
 }
 
+static void memrnd(void *s, size_t n)
+{
+	int *dword = s;
+	char *byte;
+
+	for (; n >= 4; n -= 4)
+		*dword++ = rand();
+	byte = (void *)dword;
+	while (n--)
+		*byte++ = rand();
+}
+
+static void
+ca_write_cmsg_u32(char *cbuf, size_t cbuf_sz, size_t *cmsg_len,
+		  int level, int optname, struct option_cmsg_u32 *uopt)
+{
+	struct cmsghdr *cmsg;
+
+	if (!uopt->ena)
+		return;
+
+	cmsg = (struct cmsghdr *)(cbuf + *cmsg_len);
+	*cmsg_len += CMSG_SPACE(sizeof(__u32));
+	if (cbuf_sz < *cmsg_len)
+		error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+	cmsg->cmsg_level = level;
+	cmsg->cmsg_type = optname;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+	*(__u32 *)CMSG_DATA(cmsg) = uopt->val;
+}
+
 static void
 cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 {
@@ -145,17 +198,11 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 	msg->msg_control = cbuf;
 	cmsg_len = 0;
 
-	if (opt.mark.ena) {
-		cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
-		cmsg_len += CMSG_SPACE(sizeof(__u32));
-		if (cbuf_sz < cmsg_len)
-			error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+			  SOL_SOCKET, SO_MARK, &opt.mark);
+	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+			  SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag);
 
-		cmsg->cmsg_level = SOL_SOCKET;
-		cmsg->cmsg_type = SO_MARK;
-		cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
-		*(__u32 *)CMSG_DATA(cmsg) = opt.mark.val;
-	}
 	if (opt.txtime.ena) {
 		struct sock_txtime so_txtime = {
 			.clockid = CLOCK_MONOTONIC,
@@ -286,18 +333,33 @@ cs_read_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 	}
 }
 
+static void ca_set_sockopts(int fd)
+{
+	if (opt.sockopt.mark &&
+	    setsockopt(fd, SOL_SOCKET, SO_MARK,
+		       &opt.sockopt.mark, sizeof(opt.sockopt.mark)))
+		error(ERN_SOCKOPT, errno, "setsockopt SO_MARK");
+	if (opt.sockopt.dontfrag &&
+	    setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG,
+		       &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag)))
+		error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG");
+}
+
 int main(int argc, char *argv[])
 {
-	char buf[] = "blablablabla";
 	struct addrinfo hints, *ai;
 	struct iovec iov[1];
 	struct msghdr msg;
 	char cbuf[1024];
+	char *buf;
 	int err;
 	int fd;
 
 	cs_parse_args(argc, argv);
 
+	buf = malloc(opt.size);
+	memrnd(buf, opt.size);
+
 	memset(&hints, 0, sizeof(hints));
 	hints.ai_family = opt.sock.family;
 
@@ -326,17 +388,14 @@ int main(int argc, char *argv[])
 		buf[0] = ICMPV6_ECHO_REQUEST;
 		buf[1] = 0;
 	} else if (opt.sock.type == SOCK_RAW) {
-		struct udphdr hdr = { 1, 2, htons(sizeof(buf)), 0 };
+		struct udphdr hdr = { 1, 2, htons(opt.size), 0 };
 		struct sockaddr_in6 *sin6 = (void *)ai->ai_addr;;
 
 		memcpy(buf, &hdr, sizeof(hdr));
 		sin6->sin6_port = htons(opt.sock.proto);
 	}
 
-	if (opt.sockopt.mark &&
-	    setsockopt(fd, SOL_SOCKET, SO_MARK,
-		       &opt.sockopt.mark, sizeof(opt.sockopt.mark)))
-		error(ERN_SOCKOPT, errno, "setsockopt SO_MARK");
+	ca_set_sockopts(fd);
 
 	if (clock_gettime(CLOCK_REALTIME, &time_start_real))
 		error(ERN_GETTIME, errno, "gettime REALTIME");
@@ -344,7 +403,7 @@ int main(int argc, char *argv[])
 		error(ERN_GETTIME, errno, "gettime MONOTONIC");
 
 	iov[0].iov_base = buf;
-	iov[0].iov_len = sizeof(buf);
+	iov[0].iov_len = opt.size;
 
 	memset(&msg, 0, sizeof(msg));
 	msg.msg_name = ai->ai_addr;
@@ -360,7 +419,7 @@ int main(int argc, char *argv[])
 			fprintf(stderr, "send failed: %s\n", strerror(errno));
 		err = ERN_SEND;
 		goto err_out;
-	} else if (err != sizeof(buf)) {
+	} else if (err != (int)opt.size) {
 		fprintf(stderr, "short send\n");
 		err = ERN_SEND_SHORT;
 		goto err_out;
-- 
cgit 


From 9657ad09e1fa04911034d8aac28d4377dc991eb5 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 16 Feb 2022 17:21:18 -0800
Subject: selftests: net: test IPV6_TCLASS

Test setting IPV6_TCLASS via setsockopt and cmsg
across socket types.

Output without the kernel support (this series):

  Case TCLASS ICMP cmsg - packet data returned 1, expected 0
  Case TCLASS ICMP cmsg - rejection returned 0, expected 1
  Case TCLASS ICMP diff - pass returned 1, expected 0
  Case TCLASS ICMP diff - packet data returned 1, expected 0
  Case TCLASS ICMP diff - rejection returned 0, expected 1

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/cmsg_ipv6.sh  | 51 +++++++++++++++++++++++++++++++
 tools/testing/selftests/net/cmsg_sender.c | 19 +++++++++++-
 2 files changed, 69 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
index fb5a8ab7c909..f7bb6ce68c88 100755
--- a/tools/testing/selftests/net/cmsg_ipv6.sh
+++ b/tools/testing/selftests/net/cmsg_ipv6.sh
@@ -1,12 +1,16 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ksft_skip=4
+
 NS=ns
 IP6=2001:db8:1::1/64
 TGT6=2001:db8:1::2
+TMPF=`mktemp`
 
 cleanup()
 {
+    rm -f $TMPF
     ip netns del $NS
 }
 
@@ -14,6 +18,12 @@ trap cleanup EXIT
 
 NSEXE="ip netns exec $NS"
 
+tcpdump -h | grep immediate-mode >> /dev/null
+if [ $? -ne 0 ]; then
+    echo "SKIP - tcpdump with --immediate-mode option required"
+    exit $ksft_skip
+fi
+
 # Namespaces
 ip netns add $NS
 
@@ -55,6 +65,47 @@ for ovr in setsock cmsg both diff; do
     done
 done
 
+# IPV6_TCLASS
+TOS=0x10
+TOS2=0x20
+
+ip -6 -netns $NS rule add tos $TOS lookup 300
+ip -6 -netns $NS route add table 300 prohibit any
+
+for ovr in setsock cmsg both diff; do
+    for p in u i r; do
+	[ $p == "u" ] && prot=UDP
+	[ $p == "i" ] && prot=ICMP
+	[ $p == "r" ] && prot=RAW
+
+	[ $ovr == "setsock" ] && m="-C"
+	[ $ovr == "cmsg" ]    && m="-c"
+	[ $ovr == "both" ]    && m="-C $((TOS2)) -c"
+	[ $ovr == "diff" ]    && m="-C $((TOS )) -c"
+
+	$NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+	BG=$!
+	sleep 0.05
+
+	$NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
+	check_result $? 0 "TCLASS $prot $ovr - pass"
+
+	while [ -d /proc/$BG ]; do
+	    $NSEXE ./cmsg_sender -6 -p u $TGT6 1234
+	done
+
+	tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null
+	check_result $? 0 "TCLASS $prot $ovr - packet data"
+	rm $TMPF
+
+	[ $ovr == "both" ]    && m="-C $((TOS )) -c"
+	[ $ovr == "diff" ]    && m="-C $((TOS2)) -c"
+
+	$NSEXE ./cmsg_sender -6 -p $p $m $((TOS)) -s $TGT6 1234
+	check_result $? 1 "TCLASS $prot $ovr - rejection"
+    done
+done
+
 # Summary
 if [ $BAD -ne 0 ]; then
     echo "FAIL - $BAD/$TOTAL cases failed"
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 844ca6134662..4033cf93eabf 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -46,6 +46,7 @@ struct options {
 	struct {
 		unsigned int mark;
 		unsigned int dontfrag;
+		unsigned int tclass;
 	} sockopt;
 	struct {
 		unsigned int family;
@@ -62,6 +63,7 @@ struct options {
 	} ts;
 	struct {
 		struct option_cmsg_u32 dontfrag;
+		struct option_cmsg_u32 tclass;
 	} v6;
 } opt = {
 	.size = 13,
@@ -91,6 +93,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
 	       "\t\t-t      Enable time stamp reporting\n"
 	       "\t\t-f val  Set don't fragment via cmsg\n"
 	       "\t\t-F val  Set don't fragment via setsockopt\n"
+	       "\t\t-c val  Set TCLASS via cmsg\n"
+	       "\t\t-C val  Set TCLASS via setsockopt\n"
 	       "");
 	exit(ERN_HELP);
 }
@@ -99,7 +103,7 @@ static void cs_parse_args(int argc, char *argv[])
 {
 	char o;
 
-	while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:")) != -1) {
+	while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:")) != -1) {
 		switch (o) {
 		case 's':
 			opt.silent_send = true;
@@ -147,6 +151,13 @@ static void cs_parse_args(int argc, char *argv[])
 		case 'F':
 			opt.sockopt.dontfrag = atoi(optarg);
 			break;
+		case 'c':
+			opt.v6.tclass.ena = true;
+			opt.v6.tclass.val = atoi(optarg);
+			break;
+		case 'C':
+			opt.sockopt.tclass = atoi(optarg);
+			break;
 		}
 	}
 
@@ -202,6 +213,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 			  SOL_SOCKET, SO_MARK, &opt.mark);
 	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
 			  SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag);
+	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+			  SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass);
 
 	if (opt.txtime.ena) {
 		struct sock_txtime so_txtime = {
@@ -343,6 +356,10 @@ static void ca_set_sockopts(int fd)
 	    setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG,
 		       &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag)))
 		error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG");
+	if (opt.sockopt.tclass &&
+	    setsockopt(fd, SOL_IPV6, IPV6_TCLASS,
+		       &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
+		error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS");
 }
 
 int main(int argc, char *argv[])
-- 
cgit 


From 05ae83d5a4a23f7323dc0341b675c0a2002d94dd Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 16 Feb 2022 17:21:19 -0800
Subject: selftests: net: test IPV6_HOPLIMIT

Test setting IPV6_HOPLIMIT via setsockopt and cmsg
across socket types.

Output without the kernel support (this series):

  Case HOPLIMIT ICMP cmsg - packet data returned 1, expected 0
  Case HOPLIMIT ICMP diff - packet data returned 1, expected 0

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/cmsg_ipv6.sh  | 31 +++++++++++++++++++++++++++++++
 tools/testing/selftests/net/cmsg_sender.c | 19 ++++++++++++++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
index f7bb6ce68c88..e42c36e0d741 100755
--- a/tools/testing/selftests/net/cmsg_ipv6.sh
+++ b/tools/testing/selftests/net/cmsg_ipv6.sh
@@ -106,6 +106,37 @@ for ovr in setsock cmsg both diff; do
     done
 done
 
+# IPV6_HOPLIMIT
+LIM=4
+
+for ovr in setsock cmsg both diff; do
+    for p in u i r; do
+	[ $p == "u" ] && prot=UDP
+	[ $p == "i" ] && prot=ICMP
+	[ $p == "r" ] && prot=RAW
+
+	[ $ovr == "setsock" ] && m="-L"
+	[ $ovr == "cmsg" ]    && m="-l"
+	[ $ovr == "both" ]    && m="-L $LIM -l"
+	[ $ovr == "diff" ]    && m="-L $((LIM + 1)) -l"
+
+	$NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+	BG=$!
+	sleep 0.05
+
+	$NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
+	check_result $? 0 "HOPLIMIT $prot $ovr - pass"
+
+	while [ -d /proc/$BG ]; do
+	    $NSEXE ./cmsg_sender -6 -p u $TGT6 1234
+	done
+
+	tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null
+	check_result $? 0 "HOPLIMIT $prot $ovr - packet data"
+	rm $TMPF
+    done
+done
+
 # Summary
 if [ $BAD -ne 0 ]; then
     echo "FAIL - $BAD/$TOTAL cases failed"
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 4033cf93eabf..6136aa7df1c4 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -47,6 +47,7 @@ struct options {
 		unsigned int mark;
 		unsigned int dontfrag;
 		unsigned int tclass;
+		unsigned int hlimit;
 	} sockopt;
 	struct {
 		unsigned int family;
@@ -64,6 +65,7 @@ struct options {
 	struct {
 		struct option_cmsg_u32 dontfrag;
 		struct option_cmsg_u32 tclass;
+		struct option_cmsg_u32 hlimit;
 	} v6;
 } opt = {
 	.size = 13,
@@ -95,6 +97,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
 	       "\t\t-F val  Set don't fragment via setsockopt\n"
 	       "\t\t-c val  Set TCLASS via cmsg\n"
 	       "\t\t-C val  Set TCLASS via setsockopt\n"
+	       "\t\t-l val  Set HOPLIMIT via cmsg\n"
+	       "\t\t-L val  Set HOPLIMIT via setsockopt\n"
 	       "");
 	exit(ERN_HELP);
 }
@@ -103,7 +107,7 @@ static void cs_parse_args(int argc, char *argv[])
 {
 	char o;
 
-	while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:")) != -1) {
+	while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:")) != -1) {
 		switch (o) {
 		case 's':
 			opt.silent_send = true;
@@ -158,6 +162,13 @@ static void cs_parse_args(int argc, char *argv[])
 		case 'C':
 			opt.sockopt.tclass = atoi(optarg);
 			break;
+		case 'l':
+			opt.v6.hlimit.ena = true;
+			opt.v6.hlimit.val = atoi(optarg);
+			break;
+		case 'L':
+			opt.sockopt.hlimit = atoi(optarg);
+			break;
 		}
 	}
 
@@ -215,6 +226,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 			  SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag);
 	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
 			  SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass);
+	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+			  SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit);
 
 	if (opt.txtime.ena) {
 		struct sock_txtime so_txtime = {
@@ -360,6 +373,10 @@ static void ca_set_sockopts(int fd)
 	    setsockopt(fd, SOL_IPV6, IPV6_TCLASS,
 		       &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
 		error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS");
+	if (opt.sockopt.hlimit &&
+	    setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
+		       &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
+		error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
 }
 
 int main(int argc, char *argv[])
-- 
cgit 


From a22982c39eb1d68914c8b541e7ef10044c5f1b1e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 16 Feb 2022 17:21:20 -0800
Subject: selftests: net: basic test for IPV6_2292*

Add a basic test to make sure ping sockets don't crash
with IPV6_2292* options.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/cmsg_ipv6.sh  |  9 +++++++++
 tools/testing/selftests/net/cmsg_sender.c | 33 ++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
index e42c36e0d741..2d89cb0ad288 100755
--- a/tools/testing/selftests/net/cmsg_ipv6.sh
+++ b/tools/testing/selftests/net/cmsg_ipv6.sh
@@ -137,6 +137,15 @@ for ovr in setsock cmsg both diff; do
     done
 done
 
+# IPV6 exthdr
+for p in u i r; do
+    # Very basic "does it crash" test
+    for h in h d r; do
+	$NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234
+	check_result $? 0 "ExtHdr $prot $ovr - pass"
+    done
+done
+
 # Summary
 if [ $BAD -ne 0 ]; then
     echo "FAIL - $BAD/$TOTAL cases failed"
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 6136aa7df1c4..aed7845c08a8 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -66,6 +66,7 @@ struct options {
 		struct option_cmsg_u32 dontfrag;
 		struct option_cmsg_u32 tclass;
 		struct option_cmsg_u32 hlimit;
+		struct option_cmsg_u32 exthdr;
 	} v6;
 } opt = {
 	.size = 13,
@@ -99,6 +100,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
 	       "\t\t-C val  Set TCLASS via setsockopt\n"
 	       "\t\t-l val  Set HOPLIMIT via cmsg\n"
 	       "\t\t-L val  Set HOPLIMIT via setsockopt\n"
+	       "\t\t-H type Add an IPv6 header option\n"
+	       "\t\t        (h = HOP; d = DST; r = RTDST)"
 	       "");
 	exit(ERN_HELP);
 }
@@ -107,7 +110,7 @@ static void cs_parse_args(int argc, char *argv[])
 {
 	char o;
 
-	while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:")) != -1) {
+	while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) {
 		switch (o) {
 		case 's':
 			opt.silent_send = true;
@@ -169,6 +172,23 @@ static void cs_parse_args(int argc, char *argv[])
 		case 'L':
 			opt.sockopt.hlimit = atoi(optarg);
 			break;
+		case 'H':
+			opt.v6.exthdr.ena = true;
+			switch (optarg[0]) {
+			case 'h':
+				opt.v6.exthdr.val = IPV6_HOPOPTS;
+				break;
+			case 'd':
+				opt.v6.exthdr.val = IPV6_DSTOPTS;
+				break;
+			case 'r':
+				opt.v6.exthdr.val = IPV6_RTHDRDSTOPTS;
+				break;
+			default:
+				printf("Error: hdr type: %s\n", optarg);
+				break;
+			}
+			break;
 		}
 	}
 
@@ -272,6 +292,17 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 		*(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED |
 					    SOF_TIMESTAMPING_TX_SOFTWARE;
 	}
+	if (opt.v6.exthdr.ena) {
+		cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+		cmsg_len += CMSG_SPACE(8);
+		if (cbuf_sz < cmsg_len)
+			error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+		cmsg->cmsg_level = SOL_IPV6;
+		cmsg->cmsg_type = opt.v6.exthdr.val;
+		cmsg->cmsg_len = CMSG_LEN(8);
+		*(__u64 *)CMSG_DATA(cmsg) = 0;
+	}
 
 	if (cmsg_len)
 		msg->msg_controllen = cmsg_len;
-- 
cgit 


From b38101c57acf9543ed7c4b0f43fd65ea27240772 Mon Sep 17 00:00:00 2001
From: Yucong Sun <fallentree@fb.com>
Date: Thu, 17 Feb 2022 07:52:12 -0800
Subject: selftests/bpf: Fix vmtest.sh to launch smp vm.

Fix typo in vmtest.sh to make sure it launch proper vm with 8 cpus.

Signed-off-by: Yucong Sun <fallentree@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220217155212.2309672-1-fallentree@fb.com
---
 tools/testing/selftests/bpf/vmtest.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index b3afd43549fa..e0bb04a97e10 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -241,7 +241,7 @@ EOF
 		-nodefaults \
 		-display none \
 		-serial mon:stdio \
-		"${qemu_flags[@]}" \
+		"${QEMU_FLAGS[@]}" \
 		-enable-kvm \
 		-m 4G \
 		-drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \
-- 
cgit 


From b75dacaac4650478ed5a9d33975b91b99016daff Mon Sep 17 00:00:00 2001
From: Yucong Sun <fallentree@fb.com>
Date: Thu, 17 Feb 2022 10:02:10 -0800
Subject: selftests/bpf: Fix crash in core_reloc when bpftool btfgen fails

Avoid unnecessary goto cleanup, as there is nothing to clean up.

Signed-off-by: Yucong Sun <fallentree@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220217180210.2981502-1-fallentree@fb.com
---
 tools/testing/selftests/bpf/prog_tests/core_reloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index baf53c23c08d..8fbb40a832d5 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -888,12 +888,12 @@ static void run_core_reloc_tests(bool use_btfgen)
 
 			fd = mkstemp(btf_file);
 			if (!ASSERT_GE(fd, 0, "btf_tmp"))
-				goto cleanup;
+				continue;
 			close(fd); /* we only need the path */
 			err = run_btfgen(test_case->btf_src_file, btf_file,
 					 test_case->bpf_obj_file);
 			if (!ASSERT_OK(err, "run_btfgen"))
-				goto cleanup;
+				continue;
 
 			test_case->btf_src_file = btf_file;
 		}
-- 
cgit 


From b06e15ebd5bfb670f93c7f11a29b8299c1178bc6 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Mon, 14 Feb 2022 23:41:08 +0500
Subject: selftests/x86: Add validity check and allow field splitting

Add check to test if CC has a string. CC can have multiple sub-strings
like "ccache gcc". Erorr pops up if it is treated as single string and
double quotes are used around it. This can be fixed by removing the
quotes and not treating CC as a single string.

Fixes: e9886ace222e ("selftests, x86: Rework x86 target architecture detection")
Reported-by: "kernelci.org bot" <bot@kernelci.org>
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lkml.kernel.org/r/20220214184109.3739179-2-usama.anjum@collabora.com
---
 tools/testing/selftests/x86/check_cc.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
index 3e2089c8cf54..8c669c0d662e 100755
--- a/tools/testing/selftests/x86/check_cc.sh
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -7,7 +7,7 @@ CC="$1"
 TESTPROG="$2"
 shift 2
 
-if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
     echo 1
 else
     echo 0
-- 
cgit 


From 6170abb21e2380477080b25145da9747ad467d3d Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Mon, 14 Feb 2022 23:41:09 +0500
Subject: selftests/sgx: Treat CC as one argument

CC can have multiple sub-strings like "ccache gcc". For check_cc.sh,
CC needs to be treated like one argument. Put double quotes around it to
make CC one string and hence one argument.

Fixes: 2adcba79e69d ("selftests/x86: Add a selftest for SGX")
Reported-by: "kernelci.org bot" <bot@kernelci.org>
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lkml.kernel.org/r/20220214184109.3739179-3-usama.anjum@collabora.com
---
 tools/testing/selftests/sgx/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
index 2956584e1e37..75af864e07b6 100644
--- a/tools/testing/selftests/sgx/Makefile
+++ b/tools/testing/selftests/sgx/Makefile
@@ -4,7 +4,7 @@ include ../lib.mk
 
 .PHONY: all clean
 
-CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \
+CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \
 			    ../x86/trivial_64bit_program.c)
 
 ifndef OBJCOPY
-- 
cgit 


From d17b968b98761e30b94177f0f9a2f5d9aae15da2 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 17 Feb 2022 19:03:05 -0800
Subject: selftests: mptcp: increase timeout to 20 minutes

With the increase number of tests, one CI instance, using a debug kernel
config and not recent hardware, takes around 10 minutes to execute the
slowest MPTCP test: mptcp_join.sh.

Even if most CIs don't take that long to execute these tests --
typically max 10 minutes to run all selftests -- it will help some of
them if the timeout is increased.

The timeout could be disabled but it is always good to have an extra
safeguard, just in case.

Please note that on slow public CIs with kernel debug settings, it has
been observed it can easily take up to 45 minutes to execute all tests
in this very slow environment with other jobs running in parallel.
The slowest test, mptcp_join.sh takes ~30 minutes in this case.

In such environments, the selftests timeout set in the 'settings' file
is disabled because this environment is known as being exceptionnally
slow. It has been decided not to take such exceptional environments into
account and set the timeout to 20min.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/settings | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings
index a62d2fa1275c..79b65bdf05db 100644
--- a/tools/testing/selftests/net/mptcp/settings
+++ b/tools/testing/selftests/net/mptcp/settings
@@ -1 +1 @@
-timeout=600
+timeout=1200
-- 
cgit 


From bccefb7624395183e5602d168f4343b9ddbb72b9 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Thu, 17 Feb 2022 19:03:06 -0800
Subject: selftests: mptcp: simplify pm_nl_change_endpoint

This patch simplified pm_nl_change_endpoint(), using id-based address
lookups only. And dropped the fragile way of parsing 'addr' and 'id'
from the output of pm_nl_show_endpoints().

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 37 ++++++-------------------
 1 file changed, 8 insertions(+), 29 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 18bb0d0cf4bd..bbcacaaf81ce 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -240,16 +240,6 @@ is_v6()
 	[ -z "${1##*:*}" ]
 }
 
-is_addr()
-{
-	[ -z "${1##*[.:]*}" ]
-}
-
-is_number()
-{
-	[[ $1 == ?(-)+([0-9]) ]]
-}
-
 # $1: ns, $2: port
 wait_local_port_listen()
 {
@@ -379,16 +369,13 @@ pm_nl_show_endpoints()
 pm_nl_change_endpoint()
 {
 	local ns=$1
-	local flags=$2
-	local id=$3
-	local addr=$4
-	local port=""
+	local id=$2
+	local flags=$3
 
 	if [ $ip_mptcp -eq 1 ]; then
 		ip -n $ns mptcp endpoint change id $id ${flags//","/" "}
 	else
-		if [ $5 -ne 0 ]; then port="port $5"; fi
-		ip netns exec $ns ./pm_nl_ctl set $addr flags $flags $port
+		ip netns exec $ns ./pm_nl_ctl set id $id flags $flags
 	fi
 }
 
@@ -591,24 +578,16 @@ do_transfer()
 		for netns in "$ns1" "$ns2"; do
 			pm_nl_show_endpoints $netns | while read line; do
 				local arr=($line)
-				local addr
-				local port=0
+				local nr=0
 				local id
 
 				for i in ${arr[@]}; do
-					if is_addr $i; then
-						addr=$i
-					elif is_number $i; then
-						# The minimum expected port number is 10000
-						if [ $i -gt 10000 ]; then
-							port=$i
-						# The maximum id number is 255
-						elif [ $i -lt 255 ]; then
-							id=$i
-						fi
+					if [ $i = "id" ]; then
+						id=${arr[$nr+1]}
 					fi
+					let nr+=1
 				done
-				pm_nl_change_endpoint $netns $sflags $id $addr $port
+				pm_nl_change_endpoint $netns $id $sflags
 			done
 		done
 	fi
-- 
cgit 


From 22514d52962b58771ac3eb61f8c4573617d1d73d Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 17 Feb 2022 19:03:07 -0800
Subject: selftests: mptcp: join: exit after usage()

With an error if it is an unknown option.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index bbcacaaf81ce..1a881a21e7ef 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -2077,8 +2077,14 @@ all_tests()
 	fullmesh_tests
 }
 
+# [$1: error message]
 usage()
 {
+	if [ -n "${1}" ]; then
+		echo "${1}"
+		ret=1
+	fi
+
 	echo "mptcp_join usage:"
 	echo "  -f subflows_tests"
 	echo "  -e subflows_error_tests"
@@ -2099,6 +2105,8 @@ usage()
 	echo "  -C enable data checksum"
 	echo "  -i use ip mptcp"
 	echo "  -h help"
+
+	exit ${ret}
 }
 
 sin=$(mktemp)
@@ -2187,9 +2195,12 @@ while getopts 'fesltra64bpkdmchCSi' opt; do
 			;;
 		i)
 			;;
-		h | *)
+		h)
 			usage
 			;;
+		*)
+			usage "Unknown option: -${opt}"
+			;;
 	esac
 done
 
-- 
cgit 


From 0a40e273be0416a9a00ecea89b7f61c841382b3e Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 17 Feb 2022 19:03:08 -0800
Subject: selftests: mptcp: join: remove unused vars

Shellcheck found that these variables were set but never used.

Note that rndh is no longer prefixed with '0-' but it doesn't change
anything.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 1a881a21e7ef..c6379093f38a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -42,7 +42,7 @@ init()
 {
 	capout=$(mktemp)
 
-	rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+	rndh=$(mktemp -u XXXXXX)
 
 	ns1="ns1-$rndh"
 	ns2="ns2-$rndh"
@@ -665,8 +665,6 @@ run_tests()
 	addr_nr_ns2="${6:-0}"
 	speed="${7:-fast}"
 	sflags="${8:-""}"
-	lret=0
-	oldin=""
 
 	# create the input file for the failure test when
 	# the first failure test run
@@ -694,7 +692,6 @@ run_tests()
 
 	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
 		${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${sflags}
-	lret=$?
 }
 
 dump_stats()
-- 
cgit 


From 93827ad58f6296bf15fa72265e55fb0f335fcf84 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 17 Feb 2022 19:03:09 -0800
Subject: selftests: mptcp: join: create tmp files only if needed

These tmp files will only be created when a test will be launched.

This avoid 'dd' output when '-h' is used for example.

While at it, also avoid creating netns that will be removed when
starting the first test.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 37 ++++++++++++++++---------
 1 file changed, 24 insertions(+), 13 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index c6379093f38a..63340bb76920 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -17,6 +17,7 @@ capture=0
 checksum=0
 ip_mptcp=0
 do_all_tests=1
+init=0
 
 TEST_COUNT=0
 
@@ -38,7 +39,7 @@ CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
 			       6 0 0 65535,
 			       6 0 0 0"
 
-init()
+init_partial()
 {
 	capout=$(mktemp)
 
@@ -98,6 +99,21 @@ cleanup_partial()
 	done
 }
 
+init() {
+	init=1
+
+	sin=$(mktemp)
+	sout=$(mktemp)
+	cin=$(mktemp)
+	cinsent=$(mktemp)
+	cout=$(mktemp)
+
+	trap cleanup EXIT
+
+	make_file "$cin" "client" 1
+	make_file "$sin" "server" 1
+}
+
 cleanup()
 {
 	rm -f "$cin" "$cout" "$sinfail"
@@ -107,8 +123,13 @@ cleanup()
 
 reset()
 {
-	cleanup_partial
-	init
+	if [ "${init}" != "1" ]; then
+		init
+	else
+		cleanup_partial
+	fi
+
+	init_partial
 }
 
 reset_with_cookies()
@@ -2106,16 +2127,6 @@ usage()
 	exit ${ret}
 }
 
-sin=$(mktemp)
-sout=$(mktemp)
-cin=$(mktemp)
-cinsent=$(mktemp)
-cout=$(mktemp)
-init
-make_file "$cin" "client" 1
-make_file "$sin" "server" 1
-trap cleanup EXIT
-
 for arg in "$@"; do
 	# check for "capture/checksum" args before launching tests
 	if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then
-- 
cgit 


From 87154755d90ed60919cc5709e322b397701e4f58 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 17 Feb 2022 19:03:10 -0800
Subject: selftests: mptcp: join: check for tools only if needed

To allow showing the 'help' menu even if these tools are not available.

While at it, also avoid launching the command then checking $?. Instead,
the check is directly done in the 'if'.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 38 +++++++++++++------------
 1 file changed, 20 insertions(+), 18 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 63340bb76920..725924012b41 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -99,9 +99,29 @@ cleanup_partial()
 	done
 }
 
+check_tools()
+{
+	if ! ip -Version &> /dev/null; then
+		echo "SKIP: Could not run test without ip tool"
+		exit $ksft_skip
+	fi
+
+	if ! iptables -V &> /dev/null; then
+		echo "SKIP: Could not run all tests without iptables tool"
+		exit $ksft_skip
+	fi
+
+	if ! ip6tables -V &> /dev/null; then
+		echo "SKIP: Could not run all tests without ip6tables tool"
+		exit $ksft_skip
+	fi
+}
+
 init() {
 	init=1
 
+	check_tools
+
 	sin=$(mktemp)
 	sout=$(mktemp)
 	cin=$(mktemp)
@@ -183,24 +203,6 @@ reset_with_allow_join_id0()
 	ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
 }
 
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not run test without ip tool"
-	exit $ksft_skip
-fi
-
-iptables -V > /dev/null 2>&1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not run all tests without iptables tool"
-	exit $ksft_skip
-fi
-
-ip6tables -V > /dev/null 2>&1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not run all tests without ip6tables tool"
-	exit $ksft_skip
-fi
-
 print_file_err()
 {
 	ls -l "$1" 1>&2
-- 
cgit 


From 24720d7452df2dff2e539d9dff28904e25bb1c6d Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Thu, 17 Feb 2022 19:03:11 -0800
Subject: selftests: mptcp: add csum mib check for mptcp_connect

This patch added the data checksum error mib counters check for the
script mptcp_connect.sh when the data checksum is enabled.

In do_transfer(), got the mib counters twice, before and after running
the mptcp_connect commands. The latter minus the former is the actual
number of the data checksum mib counter.

The output looks like this:

ns1 MPTCP -> ns2 (dead:beef:1::2:10007) MPTCP   (duration    86ms) [ OK ]
ns1 MPTCP -> ns2 (10.0.2.1:10008      ) MPTCP   (duration    66ms) [ FAIL ]
server got 1 data checksum error[s]

Fixes: 94d66ba1d8e48 ("selftests: mptcp: enable checksum in mptcp_connect.sh")
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/255
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index cb5809b89081..5b7a40d73253 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -432,6 +432,8 @@ do_transfer()
 	local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
 	local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
 	local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+	local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+	local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
 
 	timeout ${timeout_test} \
 		ip netns exec ${listener_ns} \
@@ -524,6 +526,23 @@ do_transfer()
 		fi
 	fi
 
+	if $checksum; then
+		local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+		local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+
+		local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
+		if [ $csum_err_s_nr -gt 0 ]; then
+			printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]"
+			rets=1
+		fi
+
+		local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
+		if [ $csum_err_c_nr -gt 0 ]; then
+			printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]"
+			retc=1
+		fi
+	fi
+
 	if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
 		printf "[ OK ]"
 	fi
-- 
cgit 


From b2125513dfc0dd0ec5a9605138a3c356592cfb73 Mon Sep 17 00:00:00 2001
From: Peter Gonda <pgonda@google.com>
Date: Fri, 11 Feb 2022 11:36:34 -0800
Subject: KVM: SEV: Allow SEV intra-host migration of VM with mirrors

For SEV-ES VMs with mirrors to be intra-host migrated they need to be
able to migrate with the mirror. This is due to that fact that all VMSAs
need to be added into the VM with LAUNCH_UPDATE_VMSA before
lAUNCH_FINISH. Allowing migration with mirrors allows users of SEV-ES to
keep the mirror VMs VMSAs during migration.

Adds a list of mirror VMs for the original VM iterate through during its
migration. During the iteration the owner pointers can be updated from
the source to the destination. This fixes the ASID leaking issue which
caused the blocking of migration of VMs with mirrors.

Signed-off-by: Peter Gonda <pgonda@google.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Marc Orr <marcorr@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Message-Id: <20220211193634.3183388-1-pgonda@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c                             | 57 ++++++++++++++--------
 arch/x86/kvm/svm/svm.h                             |  3 +-
 .../selftests/kvm/x86_64/sev_migrate_tests.c       | 47 ++++++++++++------
 3 files changed, 73 insertions(+), 34 deletions(-)

(limited to 'tools/testing')

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index f4d88292f337..789b69294d28 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -258,6 +258,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
 		goto e_free;
 
 	INIT_LIST_HEAD(&sev->regions_list);
+	INIT_LIST_HEAD(&sev->mirror_vms);
 
 	return 0;
 
@@ -1623,9 +1624,12 @@ static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
 	}
 }
 
-static void sev_migrate_from(struct kvm_sev_info *dst,
-			      struct kvm_sev_info *src)
+static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
 {
+	struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info;
+	struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info;
+	struct kvm_sev_info *mirror;
+
 	dst->active = true;
 	dst->asid = src->asid;
 	dst->handle = src->handle;
@@ -1639,6 +1643,30 @@ static void sev_migrate_from(struct kvm_sev_info *dst,
 	src->enc_context_owner = NULL;
 
 	list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
+
+	/*
+	 * If this VM has mirrors, "transfer" each mirror's refcount of the
+	 * source to the destination (this KVM).  The caller holds a reference
+	 * to the source, so there's no danger of use-after-free.
+	 */
+	list_cut_before(&dst->mirror_vms, &src->mirror_vms, &src->mirror_vms);
+	list_for_each_entry(mirror, &dst->mirror_vms, mirror_entry) {
+		kvm_get_kvm(dst_kvm);
+		kvm_put_kvm(src_kvm);
+		mirror->enc_context_owner = dst_kvm;
+	}
+
+	/*
+	 * If this VM is a mirror, remove the old mirror from the owners list
+	 * and add the new mirror to the list.
+	 */
+	if (is_mirroring_enc_context(dst_kvm)) {
+		struct kvm_sev_info *owner_sev_info =
+			&to_kvm_svm(dst->enc_context_owner)->sev_info;
+
+		list_del(&src->mirror_entry);
+		list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms);
+	}
 }
 
 static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
@@ -1708,15 +1736,6 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
 
 	src_sev = &to_kvm_svm(source_kvm)->sev_info;
 
-	/*
-	 * VMs mirroring src's encryption context rely on it to keep the
-	 * ASID allocated, but below we are clearing src_sev->asid.
-	 */
-	if (src_sev->num_mirrored_vms) {
-		ret = -EBUSY;
-		goto out_unlock;
-	}
-
 	dst_sev->misc_cg = get_current_misc_cg();
 	cg_cleanup_sev = dst_sev;
 	if (dst_sev->misc_cg != src_sev->misc_cg) {
@@ -1738,7 +1757,8 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
 		if (ret)
 			goto out_source_vcpu;
 	}
-	sev_migrate_from(dst_sev, src_sev);
+
+	sev_migrate_from(kvm, source_kvm);
 	kvm_vm_dead(source_kvm);
 	cg_cleanup_sev = src_sev;
 	ret = 0;
@@ -2008,10 +2028,10 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
 	 */
 	source_sev = &to_kvm_svm(source_kvm)->sev_info;
 	kvm_get_kvm(source_kvm);
-	source_sev->num_mirrored_vms++;
+	mirror_sev = &to_kvm_svm(kvm)->sev_info;
+	list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms);
 
 	/* Set enc_context_owner and copy its encryption context over */
-	mirror_sev = &to_kvm_svm(kvm)->sev_info;
 	mirror_sev->enc_context_owner = source_kvm;
 	mirror_sev->active = true;
 	mirror_sev->asid = source_sev->asid;
@@ -2019,6 +2039,7 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
 	mirror_sev->es_active = source_sev->es_active;
 	mirror_sev->handle = source_sev->handle;
 	INIT_LIST_HEAD(&mirror_sev->regions_list);
+	INIT_LIST_HEAD(&mirror_sev->mirror_vms);
 	ret = 0;
 
 	/*
@@ -2041,19 +2062,17 @@ void sev_vm_destroy(struct kvm *kvm)
 	struct list_head *head = &sev->regions_list;
 	struct list_head *pos, *q;
 
-	WARN_ON(sev->num_mirrored_vms);
-
 	if (!sev_guest(kvm))
 		return;
 
+	WARN_ON(!list_empty(&sev->mirror_vms));
+
 	/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
 	if (is_mirroring_enc_context(kvm)) {
 		struct kvm *owner_kvm = sev->enc_context_owner;
-		struct kvm_sev_info *owner_sev = &to_kvm_svm(owner_kvm)->sev_info;
 
 		mutex_lock(&owner_kvm->lock);
-		if (!WARN_ON(!owner_sev->num_mirrored_vms))
-			owner_sev->num_mirrored_vms--;
+		list_del(&sev->mirror_entry);
 		mutex_unlock(&owner_kvm->lock);
 		kvm_put_kvm(owner_kvm);
 		return;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index c1f0cc4a9369..dddcaa827c5f 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -79,7 +79,8 @@ struct kvm_sev_info {
 	struct list_head regions_list;  /* List of registered regions */
 	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */
 	struct kvm *enc_context_owner; /* Owner of copied encryption context */
-	unsigned long num_mirrored_vms; /* Number of VMs sharing this ASID */
+	struct list_head mirror_vms; /* List of VMs mirroring */
+	struct list_head mirror_entry; /* Use as a list entry of mirrors */
 	struct misc_cg *misc_cg; /* For misc cgroup accounting */
 	atomic_t migration_in_progress;
 };
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
index 80056bbbb003..2e5a42cb470b 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
@@ -341,35 +341,54 @@ static void test_sev_mirror_parameters(void)
 
 static void test_sev_move_copy(void)
 {
-	struct kvm_vm *dst_vm, *sev_vm, *mirror_vm, *dst_mirror_vm;
-	int ret;
+	struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm,
+		      *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm;
 
 	sev_vm = sev_vm_create(/* es= */ false);
 	dst_vm = aux_vm_create(true);
+	dst2_vm = aux_vm_create(true);
+	dst3_vm = aux_vm_create(true);
 	mirror_vm = aux_vm_create(false);
 	dst_mirror_vm = aux_vm_create(false);
+	dst2_mirror_vm = aux_vm_create(false);
+	dst3_mirror_vm = aux_vm_create(false);
 
 	sev_mirror_create(mirror_vm->fd, sev_vm->fd);
-	ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd);
-	TEST_ASSERT(ret == -1 && errno == EBUSY,
-		    "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret,
-		    errno);
 
-	/* The mirror itself can be migrated.  */
 	sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd);
-	ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd);
-	TEST_ASSERT(ret == -1 && errno == EBUSY,
-		    "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret,
-		    errno);
+	sev_migrate_from(dst_vm->fd, sev_vm->fd);
+
+	sev_migrate_from(dst2_vm->fd, dst_vm->fd);
+	sev_migrate_from(dst2_mirror_vm->fd, dst_mirror_vm->fd);
+
+	sev_migrate_from(dst3_mirror_vm->fd, dst2_mirror_vm->fd);
+	sev_migrate_from(dst3_vm->fd, dst2_vm->fd);
+
+	kvm_vm_free(dst_vm);
+	kvm_vm_free(sev_vm);
+	kvm_vm_free(dst2_vm);
+	kvm_vm_free(dst3_vm);
+	kvm_vm_free(mirror_vm);
+	kvm_vm_free(dst_mirror_vm);
+	kvm_vm_free(dst2_mirror_vm);
+	kvm_vm_free(dst3_mirror_vm);
 
 	/*
-	 * mirror_vm is not a mirror anymore, dst_mirror_vm is.  Thus,
-	 * the owner can be copied as soon as dst_mirror_vm is gone.
+	 * Run similar test be destroy mirrors before mirrored VMs to ensure
+	 * destruction is done safely.
 	 */
-	kvm_vm_free(dst_mirror_vm);
+	sev_vm = sev_vm_create(/* es= */ false);
+	dst_vm = aux_vm_create(true);
+	mirror_vm = aux_vm_create(false);
+	dst_mirror_vm = aux_vm_create(false);
+
+	sev_mirror_create(mirror_vm->fd, sev_vm->fd);
+
+	sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd);
 	sev_migrate_from(dst_vm->fd, sev_vm->fd);
 
 	kvm_vm_free(mirror_vm);
+	kvm_vm_free(dst_mirror_vm);
 	kvm_vm_free(dst_vm);
 	kvm_vm_free(sev_vm);
 }
-- 
cgit 


From 1e8ff29fbbde01e18d97f7b68d1e90260844ca19 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Feb 2022 05:07:09 -0500
Subject: selftests: KVM: allow sev_migrate_tests on machines without SEV-ES

I managed to get hold of a machine that has SEV but not SEV-ES, and
sev_migrate_tests fails because sev_vm_create(true) returns ENOTTY.
Fix this, and while at it also return KSFT_SKIP on machines that do
not have SEV at all, instead of returning 0.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/x86_64/sev_migrate_tests.c       | 78 ++++++++++++++++------
 1 file changed, 57 insertions(+), 21 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
index 2e5a42cb470b..d1dc1acf997c 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
@@ -21,6 +21,8 @@
 #define NR_LOCK_TESTING_THREADS 3
 #define NR_LOCK_TESTING_ITERATIONS 10000
 
+bool have_sev_es;
+
 static int __sev_ioctl(int vm_fd, int cmd_id, void *data, __u32 *fw_error)
 {
 	struct kvm_sev_cmd cmd = {
@@ -172,10 +174,18 @@ static void test_sev_migrate_parameters(void)
 		*sev_es_vm_no_vmsa;
 	int ret;
 
-	sev_vm = sev_vm_create(/* es= */ false);
-	sev_es_vm = sev_vm_create(/* es= */ true);
 	vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
 	vm_no_sev = aux_vm_create(true);
+	ret = __sev_migrate_from(vm_no_vcpu->fd, vm_no_sev->fd);
+	TEST_ASSERT(ret == -1 && errno == EINVAL,
+		    "Migrations require SEV enabled. ret %d, errno: %d\n", ret,
+		    errno);
+
+	if (!have_sev_es)
+		goto out;
+
+	sev_vm = sev_vm_create(/* es= */ false);
+	sev_es_vm = sev_vm_create(/* es= */ true);
 	sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
 	sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL);
 	vm_vcpu_add(sev_es_vm_no_vmsa, 1);
@@ -204,14 +214,10 @@ static void test_sev_migrate_parameters(void)
 		"SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n",
 		ret, errno);
 
-	ret = __sev_migrate_from(vm_no_vcpu->fd, vm_no_sev->fd);
-	TEST_ASSERT(ret == -1 && errno == EINVAL,
-		    "Migrations require SEV enabled. ret %d, errno: %d\n", ret,
-		    errno);
-
 	kvm_vm_free(sev_vm);
 	kvm_vm_free(sev_es_vm);
 	kvm_vm_free(sev_es_vm_no_vmsa);
+out:
 	kvm_vm_free(vm_no_vcpu);
 	kvm_vm_free(vm_no_sev);
 }
@@ -300,7 +306,6 @@ static void test_sev_mirror_parameters(void)
 	int ret;
 
 	sev_vm = sev_vm_create(/* es= */ false);
-	sev_es_vm = sev_vm_create(/* es= */ true);
 	vm_with_vcpu = aux_vm_create(true);
 	vm_no_vcpu = aux_vm_create(false);
 
@@ -310,6 +315,21 @@ static void test_sev_mirror_parameters(void)
 		"Should not be able copy context to self. ret: %d, errno: %d\n",
 		ret, errno);
 
+	ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd);
+	TEST_ASSERT(ret == -1 && errno == EINVAL,
+		    "Copy context requires SEV enabled. ret %d, errno: %d\n", ret,
+		    errno);
+
+	ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n",
+		ret, errno);
+
+	if (!have_sev_es)
+		goto out;
+
+	sev_es_vm = sev_vm_create(/* es= */ true);
 	ret = __sev_mirror_create(sev_vm->fd, sev_es_vm->fd);
 	TEST_ASSERT(
 		ret == -1 && errno == EINVAL,
@@ -322,19 +342,10 @@ static void test_sev_mirror_parameters(void)
 		"Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n",
 		ret, errno);
 
-	ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd);
-	TEST_ASSERT(ret == -1 && errno == EINVAL,
-		    "Copy context requires SEV enabled. ret %d, errno: %d\n", ret,
-		    errno);
-
-	ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd);
-	TEST_ASSERT(
-		ret == -1 && errno == EINVAL,
-		"SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n",
-		ret, errno);
+	kvm_vm_free(sev_es_vm);
 
+out:
 	kvm_vm_free(sev_vm);
-	kvm_vm_free(sev_es_vm);
 	kvm_vm_free(vm_with_vcpu);
 	kvm_vm_free(vm_no_vcpu);
 }
@@ -393,11 +404,35 @@ static void test_sev_move_copy(void)
 	kvm_vm_free(sev_vm);
 }
 
+#define X86_FEATURE_SEV (1 << 1)
+#define X86_FEATURE_SEV_ES (1 << 3)
+
 int main(int argc, char *argv[])
 {
+	struct kvm_cpuid_entry2 *cpuid;
+
+	if (!kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM) &&
+	    !kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
+		print_skip("Capabilities not available");
+		exit(KSFT_SKIP);
+	}
+
+	cpuid = kvm_get_supported_cpuid_entry(0x80000000);
+	if (cpuid->eax < 0x8000001f) {
+		print_skip("AMD memory encryption not available");
+		exit(KSFT_SKIP);
+	}
+	cpuid = kvm_get_supported_cpuid_entry(0x8000001f);
+	if (!(cpuid->eax & X86_FEATURE_SEV)) {
+		print_skip("AMD SEV not available");
+		exit(KSFT_SKIP);
+	}
+	have_sev_es = !!(cpuid->eax & X86_FEATURE_SEV_ES);
+
 	if (kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
 		test_sev_migrate_from(/* es= */ false);
-		test_sev_migrate_from(/* es= */ true);
+		if (have_sev_es)
+			test_sev_migrate_from(/* es= */ true);
 		test_sev_migrate_locking();
 		test_sev_migrate_parameters();
 		if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
@@ -405,7 +440,8 @@ int main(int argc, char *argv[])
 	}
 	if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
 		test_sev_mirror(/* es= */ false);
-		test_sev_mirror(/* es= */ true);
+		if (have_sev_es)
+			test_sev_mirror(/* es= */ true);
 		test_sev_mirror_parameters();
 	}
 	return 0;
-- 
cgit 


From a33c0c792d0aa2140d79799ca41d68428d2206cc Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 17 Feb 2022 11:40:05 -0800
Subject: selftests/bpf: Fix a clang deprecated-declarations compilation error

Build the kernel and selftest with clang compiler with LLVM=1,
  make -j LLVM=1
  make -C tools/testing/selftests/bpf -j LLVM=1

I hit the following selftests/bpf compilation error:
  In file included from test_cpp.cpp:3:
  /.../tools/testing/selftests/bpf/tools/include/bpf/libbpf.h:73:8:
    error: 'relaxed_core_relocs' is deprecated: libbpf v0.6+: field has no effect [-Werror,-Wdeprecated-declarations]
  struct bpf_object_open_opts {
         ^
  test_cpp.cpp:56:2: note: in implicit move constructor for 'bpf_object_open_opts' first required here
          LIBBPF_OPTS(bpf_object_open_opts, opts);
          ^
  /.../tools/testing/selftests/bpf/tools/include/bpf/libbpf_common.h:77:3: note: expanded from macro 'LIBBPF_OPTS'
                  (struct TYPE) {                                             \
                  ^
  /.../tools/testing/selftests/bpf/tools/include/bpf/libbpf.h:90:2: note: 'relaxed_core_relocs' has been explicitly marked deprecated here
          LIBBPF_DEPRECATED_SINCE(0, 6, "field has no effect")
          ^
  /.../tools/testing/selftests/bpf/tools/include/bpf/libbpf_common.h:24:4: note: expanded from macro 'LIBBPF_DEPRECATED_SINCE'
                  (LIBBPF_DEPRECATED("libbpf v" # major "." # minor "+: " msg))
                   ^
  /.../tools/testing/selftests/bpf/tools/include/bpf/libbpf_common.h:19:47: note: expanded from macro 'LIBBPF_DEPRECATED'
  #define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg)))

There are two ways to fix the issue, one is to use GCC diagnostic ignore pragma, and the
other is to open code bpf_object_open_opts instead of using LIBBPF_OPTS.
Since in general LIBBPF_OPTS is preferred, the patch fixed the issue by
adding proper GCC diagnostic ignore pragmas.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220217194005.2765348-1-yhs@fb.com
---
 tools/testing/selftests/bpf/test_cpp.cpp | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp
index 773f165c4898..19ad172036da 100644
--- a/tools/testing/selftests/bpf/test_cpp.cpp
+++ b/tools/testing/selftests/bpf/test_cpp.cpp
@@ -1,6 +1,9 @@
 /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 #include <iostream>
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #include <bpf/libbpf.h>
+#pragma GCC diagnostic pop
 #include <bpf/bpf.h>
 #include <bpf/btf.h>
 #include "test_core_extern.skel.h"
-- 
cgit 


From aa0eab8639ff0fb1edf18b8616e6ae2c38ca5854 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Wed, 2 Feb 2022 12:03:00 +0100
Subject: tools: Move gfp.h and slab.h from radix-tree to lib

Merge radix-tree definitions from gfp.h and slab.h with these
in tools/lib, so they can be used in other test suites.
Fix style issues in slab.h. Update radix-tree test files.

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@kernel.org>
Link: https://lore.kernel.org/r/b76ddb8a12fdf9870b55c1401213e44f5e0d0da3.1643796665.git.karolinadrobnik@gmail.com
---
 tools/include/linux/gfp.h             | 28 ++++++++++++++++++++++++++
 tools/include/linux/slab.h            | 28 ++++++++++++++++++++++++++
 tools/lib/slab.c                      | 38 +++++++++++++++++++++++++++++++++++
 tools/testing/radix-tree/Makefile     |  3 ++-
 tools/testing/radix-tree/linux.c      | 27 -------------------------
 tools/testing/radix-tree/linux/gfp.h  | 33 ------------------------------
 tools/testing/radix-tree/linux/slab.h | 27 -------------------------
 7 files changed, 96 insertions(+), 88 deletions(-)
 create mode 100644 tools/include/linux/slab.h
 create mode 100644 tools/lib/slab.c
 delete mode 100644 tools/testing/radix-tree/linux/gfp.h
 delete mode 100644 tools/testing/radix-tree/linux/slab.h

(limited to 'tools/testing')

diff --git a/tools/include/linux/gfp.h b/tools/include/linux/gfp.h
index 22030756fbc0..b238dbc9eb85 100644
--- a/tools/include/linux/gfp.h
+++ b/tools/include/linux/gfp.h
@@ -1,4 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _TOOLS_INCLUDE_LINUX_GFP_H
 #define _TOOLS_INCLUDE_LINUX_GFP_H
 
+#include <linux/types.h>
+
+#define __GFP_BITS_SHIFT 26
+#define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+
+#define __GFP_HIGH		0x20u
+#define __GFP_IO		0x40u
+#define __GFP_FS		0x80u
+#define __GFP_NOWARN		0x200u
+#define __GFP_ZERO		0x8000u
+#define __GFP_ATOMIC		0x80000u
+#define __GFP_ACCOUNT		0x100000u
+#define __GFP_DIRECT_RECLAIM	0x400000u
+#define __GFP_KSWAPD_RECLAIM	0x2000000u
+
+#define __GFP_RECLAIM	(__GFP_DIRECT_RECLAIM | __GFP_KSWAPD_RECLAIM)
+
+#define GFP_ZONEMASK	0x0fu
+#define GFP_ATOMIC	(__GFP_HIGH | __GFP_ATOMIC | __GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
+
+static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
+{
+	return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
+}
+
 #endif /* _TOOLS_INCLUDE_LINUX_GFP_H */
diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h
new file mode 100644
index 000000000000..07d7930d4003
--- /dev/null
+++ b/tools/include/linux/slab.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_SLAB_H
+#define _TOOLS_SLAB_H
+
+#include <linux/types.h>
+#include <linux/gfp.h>
+
+#define SLAB_PANIC 2
+#define SLAB_RECLAIM_ACCOUNT    0x00020000UL            /* Objects are reclaimable */
+
+#define kzalloc_node(size, flags, node) kmalloc(size, flags)
+
+void *kmalloc(size_t size, gfp_t gfp);
+void kfree(void *p);
+
+static inline void *kzalloc(size_t size, gfp_t gfp)
+{
+	return kmalloc(size, gfp | __GFP_ZERO);
+}
+
+void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
+void kmem_cache_free(struct kmem_cache *cachep, void *objp);
+
+struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
+			unsigned int align, unsigned int flags,
+			void (*ctor)(void *));
+
+#endif		/* _TOOLS_SLAB_H */
diff --git a/tools/lib/slab.c b/tools/lib/slab.c
new file mode 100644
index 000000000000..959997fb0652
--- /dev/null
+++ b/tools/lib/slab.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <string.h>
+
+#include <urcu/uatomic.h>
+#include <linux/slab.h>
+#include <malloc.h>
+#include <linux/gfp.h>
+
+int kmalloc_nr_allocated;
+int kmalloc_verbose;
+
+void *kmalloc(size_t size, gfp_t gfp)
+{
+	void *ret;
+
+	if (!(gfp & __GFP_DIRECT_RECLAIM))
+		return NULL;
+
+	ret = malloc(size);
+	uatomic_inc(&kmalloc_nr_allocated);
+	if (kmalloc_verbose)
+		printf("Allocating %p from malloc\n", ret);
+	if (gfp & __GFP_ZERO)
+		memset(ret, 0, size);
+	return ret;
+}
+
+void kfree(void *p)
+{
+	if (!p)
+		return;
+	uatomic_dec(&kmalloc_nr_allocated);
+	if (kmalloc_verbose)
+		printf("Freeing %p to malloc\n", p);
+	free(p);
+}
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index aa6abfe0749c..c4ea4fbb0bfc 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -5,7 +5,8 @@ CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 LDLIBS+= -lpthread -lurcu
 TARGETS = main idr-test multiorder xarray
-CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o
+CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o \
+			 slab.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
 	 regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \
 	 iteration_check_2.o benchmark.o
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 2d9c59df60de..81539f543954 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -14,7 +14,6 @@
 
 int nr_allocated;
 int preempt_count;
-int kmalloc_verbose;
 int test_verbose;
 
 struct kmem_cache {
@@ -78,32 +77,6 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 	pthread_mutex_unlock(&cachep->lock);
 }
 
-void *kmalloc(size_t size, gfp_t gfp)
-{
-	void *ret;
-
-	if (!(gfp & __GFP_DIRECT_RECLAIM))
-		return NULL;
-
-	ret = malloc(size);
-	uatomic_inc(&nr_allocated);
-	if (kmalloc_verbose)
-		printf("Allocating %p from malloc\n", ret);
-	if (gfp & __GFP_ZERO)
-		memset(ret, 0, size);
-	return ret;
-}
-
-void kfree(void *p)
-{
-	if (!p)
-		return;
-	uatomic_dec(&nr_allocated);
-	if (kmalloc_verbose)
-		printf("Freeing %p to malloc\n", p);
-	free(p);
-}
-
 struct kmem_cache *
 kmem_cache_create(const char *name, unsigned int size, unsigned int align,
 		unsigned int flags, void (*ctor)(void *))
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
deleted file mode 100644
index 32159c08a52e..000000000000
--- a/tools/testing/radix-tree/linux/gfp.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _GFP_H
-#define _GFP_H
-
-#include <linux/types.h>
-
-#define __GFP_BITS_SHIFT 26
-#define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
-
-#define __GFP_HIGH		0x20u
-#define __GFP_IO		0x40u
-#define __GFP_FS		0x80u
-#define __GFP_NOWARN		0x200u
-#define __GFP_ZERO		0x8000u
-#define __GFP_ATOMIC		0x80000u
-#define __GFP_ACCOUNT		0x100000u
-#define __GFP_DIRECT_RECLAIM	0x400000u
-#define __GFP_KSWAPD_RECLAIM	0x2000000u
-
-#define __GFP_RECLAIM	(__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
-
-#define GFP_ZONEMASK	0x0fu
-#define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
-#define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
-#define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
-
-
-static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
-{
-	return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
-}
-
-#endif
diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h
deleted file mode 100644
index 2958830ce4d7..000000000000
--- a/tools/testing/radix-tree/linux/slab.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef SLAB_H
-#define SLAB_H
-
-#include <linux/types.h>
-#include <linux/gfp.h>
-
-#define SLAB_HWCACHE_ALIGN 1
-#define SLAB_PANIC 2
-#define SLAB_RECLAIM_ACCOUNT    0x00020000UL            /* Objects are reclaimable */
-
-void *kmalloc(size_t size, gfp_t);
-void kfree(void *);
-
-static inline void *kzalloc(size_t size, gfp_t gfp)
-{
-        return kmalloc(size, gfp | __GFP_ZERO);
-}
-
-void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
-void kmem_cache_free(struct kmem_cache *cachep, void *objp);
-
-struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
-			unsigned int align, unsigned int flags,
-			void (*ctor)(void *));
-
-#endif		/* SLAB_H */
-- 
cgit 


From 16802e55dea9534c18a30bd8eeefea8a06337916 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Wed, 2 Feb 2022 12:03:09 +0100
Subject: memblock tests: Add skeleton of the memblock simulator

Add basic project files, together with local stubs of required headers.
Update tools/include/slab.h to include definitions used by memblock.

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@kernel.org>
Link: https://lore.kernel.org/r/d296fceb023a04b316a31fbff9acf1e76ac684e4.1643796665.git.karolinadrobnik@gmail.com
---
 MAINTAINERS                                     |  1 +
 tools/include/linux/slab.h                      | 10 +++++
 tools/testing/memblock/.gitignore               |  4 ++
 tools/testing/memblock/Makefile                 | 52 +++++++++++++++++++++++++
 tools/testing/memblock/asm/dma.h                |  5 +++
 tools/testing/memblock/internal.h               | 12 ++++++
 tools/testing/memblock/lib/slab.c               |  9 +++++
 tools/testing/memblock/linux/init.h             | 34 ++++++++++++++++
 tools/testing/memblock/linux/kernel.h           | 12 ++++++
 tools/testing/memblock/linux/kmemleak.h         | 18 +++++++++
 tools/testing/memblock/linux/memory_hotplug.h   | 19 +++++++++
 tools/testing/memblock/linux/mmzone.h           | 35 +++++++++++++++++
 tools/testing/memblock/linux/printk.h           | 25 ++++++++++++
 tools/testing/memblock/main.c                   |  6 +++
 tools/testing/memblock/mmzone.c                 | 20 ++++++++++
 tools/testing/memblock/scripts/Makefile.include | 17 ++++++++
 16 files changed, 279 insertions(+)
 create mode 100644 tools/testing/memblock/.gitignore
 create mode 100644 tools/testing/memblock/Makefile
 create mode 100644 tools/testing/memblock/asm/dma.h
 create mode 100644 tools/testing/memblock/internal.h
 create mode 100644 tools/testing/memblock/lib/slab.c
 create mode 100644 tools/testing/memblock/linux/init.h
 create mode 100644 tools/testing/memblock/linux/kernel.h
 create mode 100644 tools/testing/memblock/linux/kmemleak.h
 create mode 100644 tools/testing/memblock/linux/memory_hotplug.h
 create mode 100644 tools/testing/memblock/linux/mmzone.h
 create mode 100644 tools/testing/memblock/linux/printk.h
 create mode 100644 tools/testing/memblock/main.c
 create mode 100644 tools/testing/memblock/mmzone.c
 create mode 100644 tools/testing/memblock/scripts/Makefile.include

(limited to 'tools/testing')

diff --git a/MAINTAINERS b/MAINTAINERS
index fca970a46e77..38502bf0231a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12423,6 +12423,7 @@ S:	Maintained
 F:	Documentation/core-api/boot-time-mm.rst
 F:	include/linux/memblock.h
 F:	mm/memblock.c
+F:	tools/testing/memblock/
 
 MEMORY CONTROLLER DRIVERS
 M:	Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h
index 07d7930d4003..f41d8a0eb1a4 100644
--- a/tools/include/linux/slab.h
+++ b/tools/include/linux/slab.h
@@ -13,6 +13,16 @@
 void *kmalloc(size_t size, gfp_t gfp);
 void kfree(void *p);
 
+bool slab_is_available(void);
+
+enum slab_state {
+	DOWN,
+	PARTIAL,
+	PARTIAL_NODE,
+	UP,
+	FULL
+};
+
 static inline void *kzalloc(size_t size, gfp_t gfp)
 {
 	return kmalloc(size, gfp | __GFP_ZERO);
diff --git a/tools/testing/memblock/.gitignore b/tools/testing/memblock/.gitignore
new file mode 100644
index 000000000000..654338e0be52
--- /dev/null
+++ b/tools/testing/memblock/.gitignore
@@ -0,0 +1,4 @@
+main
+memblock.c
+linux/memblock.h
+asm/cmpxchg.h
diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile
new file mode 100644
index 000000000000..e43ed9de9bcf
--- /dev/null
+++ b/tools/testing/memblock/Makefile
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Memblock simulator requires AddressSanitizer (libasan) and liburcu development
+# packages installed
+CFLAGS += -I. -I../../include -Wall -O2 -fsanitize=address \
+	  -fsanitize=undefined -D CONFIG_PHYS_ADDR_T_64BIT
+LDFLAGS += -fsanitize=address -fsanitize=undefined
+TARGETS = main
+OFILES = main.o memblock.o lib/slab.o mmzone.o slab.o
+EXTR_SRC = ../../../mm/memblock.c
+
+ifeq ($(BUILD), 32)
+	CFLAGS += -m32
+	LDFLAGS += -m32
+endif
+
+# Process user parameters
+include scripts/Makefile.include
+
+main: $(OFILES)
+
+$(OFILES): include
+
+include: ../../../include/linux/memblock.h ../../include/linux/*.h \
+	../../include/asm/*.h
+
+	@mkdir -p linux
+	test -L linux/memblock.h || ln -s ../../../../include/linux/memblock.h linux/memblock.h
+	test -L asm/cmpxchg.h || ln -s ../../../arch/x86/include/asm/cmpxchg.h asm/cmpxchg.h
+
+memblock.c: $(EXTR_SRC)
+	test -L memblock.c || ln -s $(EXTR_SRC) memblock.c
+
+clean:
+	$(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/cmpxchg.h
+
+help:
+	@echo  'Memblock simulator'
+	@echo  ''
+	@echo  'Available targets:'
+	@echo  '  main		  - Build the memblock simulator'
+	@echo  '  clean		  - Remove generated files and symlinks in the directory'
+	@echo  ''
+	@echo  'Configuration:'
+	@echo  '  make NUMA=1               - simulate enabled NUMA'
+	@echo  '  make MOVABLE_NODE=1       - override `movable_node_is_enabled`'
+	@echo  '                              definition to simulate movable NUMA nodes'
+	@echo  '  make 32BIT_PHYS_ADDR_T=1  - Use 32 bit physical addresses'
+
+vpath %.c ../../lib
+
+.PHONY: clean include help
diff --git a/tools/testing/memblock/asm/dma.h b/tools/testing/memblock/asm/dma.h
new file mode 100644
index 000000000000..13ff8e5d22ef
--- /dev/null
+++ b/tools/testing/memblock/asm/dma.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_DMA_H
+#define _TOOLS_DMA_H
+
+#endif
diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h
new file mode 100644
index 000000000000..94b52a8718b5
--- /dev/null
+++ b/tools/testing/memblock/internal.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MM_INTERNAL_H
+#define _MM_INTERNAL_H
+
+struct page {};
+
+void memblock_free_pages(struct page *page, unsigned long pfn,
+			 unsigned int order)
+{
+}
+
+#endif
diff --git a/tools/testing/memblock/lib/slab.c b/tools/testing/memblock/lib/slab.c
new file mode 100644
index 000000000000..6be6020328fb
--- /dev/null
+++ b/tools/testing/memblock/lib/slab.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/slab.h>
+
+enum slab_state slab_state;
+
+bool slab_is_available(void)
+{
+	return slab_state >= UP;
+}
diff --git a/tools/testing/memblock/linux/init.h b/tools/testing/memblock/linux/init.h
new file mode 100644
index 000000000000..828e0ee0bc6c
--- /dev/null
+++ b/tools/testing/memblock/linux/init.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_INIT_H
+#define _LINUX_INIT_H
+
+#include <linux/compiler.h>
+#include <asm/export.h>
+#include <linux/memory_hotplug.h>
+
+#define __section(section)              __attribute__((__section__(section)))
+
+#define __initconst
+#define __meminit
+#define __meminitdata
+#define __refdata
+#define __initdata
+
+struct obs_kernel_param {
+	const char *str;
+	int (*setup_func)(char *st);
+	int early;
+};
+
+#define __setup_param(str, unique_id, fn, early)			\
+	static const char __setup_str_##unique_id[] __initconst		\
+		__aligned(1) = str;					\
+	static struct obs_kernel_param __setup_##unique_id		\
+		__used __section(".init.setup")				\
+		__aligned(__alignof__(struct obs_kernel_param)) =	\
+		{ __setup_str_##unique_id, fn, early }
+
+#define early_param(str, fn)						\
+	__setup_param(str, fn, fn, 1)
+
+#endif
diff --git a/tools/testing/memblock/linux/kernel.h b/tools/testing/memblock/linux/kernel.h
new file mode 100644
index 000000000000..d2f148bd8902
--- /dev/null
+++ b/tools/testing/memblock/linux/kernel.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _MEMBLOCK_LINUX_KERNEL_H
+#define _MEMBLOCK_LINUX_KERNEL_H
+
+#include <../../include/linux/kernel.h>
+#include <linux/errno.h>
+#include <string.h>
+#include <linux/printk.h>
+#include <linux/linkage.h>
+#include <linux/kconfig.h>
+
+#endif
diff --git a/tools/testing/memblock/linux/kmemleak.h b/tools/testing/memblock/linux/kmemleak.h
new file mode 100644
index 000000000000..462f8c5e8aa0
--- /dev/null
+++ b/tools/testing/memblock/linux/kmemleak.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _KMEMLEAK_H
+#define _KMEMLEAK_H
+
+static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size)
+{
+}
+
+static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size,
+				       int min_count, gfp_t gfp)
+{
+}
+
+static inline void dump_stack(void)
+{
+}
+
+#endif
diff --git a/tools/testing/memblock/linux/memory_hotplug.h b/tools/testing/memblock/linux/memory_hotplug.h
new file mode 100644
index 000000000000..47988765a219
--- /dev/null
+++ b/tools/testing/memblock/linux/memory_hotplug.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MEMORY_HOTPLUG_H
+#define _LINUX_MEMORY_HOTPLUG_H
+
+#include <linux/numa.h>
+#include <linux/pfn.h>
+#include <linux/cache.h>
+#include <linux/types.h>
+
+static inline bool movable_node_is_enabled(void)
+{
+#ifdef MOVABLE_NODE
+	return true;
+#else
+	return false;
+#endif
+}
+
+#endif
diff --git a/tools/testing/memblock/linux/mmzone.h b/tools/testing/memblock/linux/mmzone.h
new file mode 100644
index 000000000000..7c2eb5c9bb54
--- /dev/null
+++ b/tools/testing/memblock/linux/mmzone.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_MMZONE_H
+#define _TOOLS_MMZONE_H
+
+#include <linux/atomic.h>
+
+struct pglist_data *first_online_pgdat(void);
+struct pglist_data *next_online_pgdat(struct pglist_data *pgdat);
+
+#define for_each_online_pgdat(pgdat)			\
+	for (pgdat = first_online_pgdat();		\
+	     pgdat;					\
+	     pgdat = next_online_pgdat(pgdat))
+
+enum zone_type {
+	__MAX_NR_ZONES
+};
+
+#define MAX_NR_ZONES __MAX_NR_ZONES
+#define MAX_ORDER 11
+#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
+
+#define pageblock_order		(MAX_ORDER - 1)
+#define pageblock_nr_pages	BIT(pageblock_order)
+
+struct zone {
+	atomic_long_t		managed_pages;
+};
+
+typedef struct pglist_data {
+	struct zone node_zones[MAX_NR_ZONES];
+
+} pg_data_t;
+
+#endif
diff --git a/tools/testing/memblock/linux/printk.h b/tools/testing/memblock/linux/printk.h
new file mode 100644
index 000000000000..61af424d8c6c
--- /dev/null
+++ b/tools/testing/memblock/linux/printk.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PRINTK_H
+#define _PRINTK_H
+
+#include <stdio.h>
+#include <asm/bug.h>
+
+/*
+ * memblock_dbg is called with u64 arguments that don't match the "%llu"
+ * specifier in printf. This results in warnings that cannot be fixed without
+ * modifying memblock.c, which we wish to avoid. As these messaged are not used
+ * in testing anyway, the mismatch can be ignored.
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat"
+#define printk printf
+#pragma GCC diagnostic push
+
+#define pr_info printk
+#define pr_debug printk
+#define pr_cont printk
+#define pr_err printk
+#define pr_warn printk
+
+#endif
diff --git a/tools/testing/memblock/main.c b/tools/testing/memblock/main.c
new file mode 100644
index 000000000000..62958da35d0f
--- /dev/null
+++ b/tools/testing/memblock/main.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+int main(int argc, char **argv)
+{
+	return 0;
+}
diff --git a/tools/testing/memblock/mmzone.c b/tools/testing/memblock/mmzone.c
new file mode 100644
index 000000000000..7b0909e8b759
--- /dev/null
+++ b/tools/testing/memblock/mmzone.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/mmzone.h>
+
+struct pglist_data *first_online_pgdat(void)
+{
+	return NULL;
+}
+
+struct pglist_data *next_online_pgdat(struct pglist_data *pgdat)
+{
+	return NULL;
+}
+
+void reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
+{
+}
+
+void atomic_long_set(atomic_long_t *v, long i)
+{
+}
diff --git a/tools/testing/memblock/scripts/Makefile.include b/tools/testing/memblock/scripts/Makefile.include
new file mode 100644
index 000000000000..699b0d6cda07
--- /dev/null
+++ b/tools/testing/memblock/scripts/Makefile.include
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+# Definitions for user-provided arguments
+
+# Simulate CONFIG_NUMA=y
+ifeq ($(NUMA), 1)
+	CFLAGS += -D CONFIG_NUMA
+endif
+
+# Simulate movable NUMA memory regions
+ifeq ($(MOVABLE_NODE), 1)
+	CFLAGS += -D MOVABLE_NODE
+endif
+
+# Use 32 bit physical addresses
+ifeq ($(32BIT_PHYS_ADDR_T), 1)
+	CFLAGS += -U CONFIG_PHYS_ADDR_T_64BIT
+endif
-- 
cgit 


From f3252a22d1f59d89cca769431efa1c95d6343929 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Wed, 2 Feb 2022 12:03:10 +0100
Subject: memblock tests: Add memblock reset function

Memblock simulator needs to be able to reset memblock data structures
between different test cases. Add a function that sets all fields to
their default values.

Add a test checking if memblock is being initialized to expected values.

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@kernel.org>
Link: https://lore.kernel.org/r/8c185aa7e0dd68c2c7e937c9a06c90ae413e240f.1643796665.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/Makefile          |  4 +++-
 tools/testing/memblock/main.c            |  2 ++
 tools/testing/memblock/tests/basic_api.c | 32 ++++++++++++++++++++++++++++++++
 tools/testing/memblock/tests/basic_api.h | 10 ++++++++++
 tools/testing/memblock/tests/common.c    | 27 +++++++++++++++++++++++++++
 tools/testing/memblock/tests/common.h    | 15 +++++++++++++++
 6 files changed, 89 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/memblock/tests/basic_api.c
 create mode 100644 tools/testing/memblock/tests/basic_api.h
 create mode 100644 tools/testing/memblock/tests/common.c
 create mode 100644 tools/testing/memblock/tests/common.h

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile
index e43ed9de9bcf..29715327a2d3 100644
--- a/tools/testing/memblock/Makefile
+++ b/tools/testing/memblock/Makefile
@@ -6,7 +6,9 @@ CFLAGS += -I. -I../../include -Wall -O2 -fsanitize=address \
 	  -fsanitize=undefined -D CONFIG_PHYS_ADDR_T_64BIT
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 TARGETS = main
-OFILES = main.o memblock.o lib/slab.o mmzone.o slab.o
+TEST_OFILES = tests/basic_api.o tests/common.o
+DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o
+OFILES = main.o $(DEP_OFILES) $(TEST_OFILES)
 EXTR_SRC = ../../../mm/memblock.c
 
 ifeq ($(BUILD), 32)
diff --git a/tools/testing/memblock/main.c b/tools/testing/memblock/main.c
index 62958da35d0f..da65b0adee91 100644
--- a/tools/testing/memblock/main.c
+++ b/tools/testing/memblock/main.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
+#include "tests/basic_api.h"
 
 int main(int argc, char **argv)
 {
+	memblock_basic_checks();
 	return 0;
 }
diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
new file mode 100644
index 000000000000..7f2597b3dd4d
--- /dev/null
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <string.h>
+#include <linux/memblock.h>
+#include "basic_api.h"
+
+#define EXPECTED_MEMBLOCK_REGIONS			128
+
+static int memblock_initialization_check(void)
+{
+	reset_memblock();
+
+	assert(memblock.memory.regions);
+	assert(memblock.memory.cnt == 1);
+	assert(memblock.memory.max == EXPECTED_MEMBLOCK_REGIONS);
+	assert(strcmp(memblock.memory.name, "memory") == 0);
+
+	assert(memblock.reserved.regions);
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.memory.max == EXPECTED_MEMBLOCK_REGIONS);
+	assert(strcmp(memblock.reserved.name, "reserved") == 0);
+
+	assert(!memblock.bottom_up);
+	assert(memblock.current_limit == MEMBLOCK_ALLOC_ANYWHERE);
+
+	return 0;
+}
+
+int memblock_basic_checks(void)
+{
+	memblock_initialization_check();
+	return 0;
+}
diff --git a/tools/testing/memblock/tests/basic_api.h b/tools/testing/memblock/tests/basic_api.h
new file mode 100644
index 000000000000..1ceecfca1f47
--- /dev/null
+++ b/tools/testing/memblock/tests/basic_api.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MEMBLOCK_BASIC_H
+#define _MEMBLOCK_BASIC_H
+
+#include <assert.h>
+#include "common.h"
+
+int memblock_basic_checks(void);
+
+#endif
diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c
new file mode 100644
index 000000000000..03de6eab0c3c
--- /dev/null
+++ b/tools/testing/memblock/tests/common.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "tests/common.h"
+#include <string.h>
+
+#define INIT_MEMBLOCK_REGIONS			128
+#define INIT_MEMBLOCK_RESERVED_REGIONS		INIT_MEMBLOCK_REGIONS
+
+void reset_memblock(void)
+{
+	memset(memblock.memory.regions, 0,
+	       memblock.memory.cnt * sizeof(struct memblock_region));
+	memset(memblock.reserved.regions, 0,
+	       memblock.reserved.cnt * sizeof(struct memblock_region));
+
+	memblock.memory.cnt	= 1;
+	memblock.memory.max	= INIT_MEMBLOCK_REGIONS;
+	memblock.memory.name	= "memory";
+	memblock.memory.total_size = 0;
+
+	memblock.reserved.cnt	= 1;
+	memblock.reserved.max	= INIT_MEMBLOCK_RESERVED_REGIONS;
+	memblock.reserved.name	= "reserved";
+	memblock.reserved.total_size = 0;
+
+	memblock.bottom_up	= false;
+	memblock.current_limit	= MEMBLOCK_ALLOC_ANYWHERE;
+}
diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h
new file mode 100644
index 000000000000..48efc4270ea1
--- /dev/null
+++ b/tools/testing/memblock/tests/common.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MEMBLOCK_TEST_H
+#define _MEMBLOCK_TEST_H
+
+#include <linux/types.h>
+#include <linux/memblock.h>
+
+struct region {
+	phys_addr_t base;
+	phys_addr_t size;
+};
+
+void reset_memblock(void);
+
+#endif
-- 
cgit 


From b03e19465b972bd06104207380e0e42e7f03ab29 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 19 Feb 2022 20:27:20 -0800
Subject: selftests/bpf: Fix btfgen tests

There turned out to be a few problems with btfgen selftests.

First, core_btfgen tests are failing in BPF CI due to the use of
full-featured bpftool, which has extra dependencies on libbfd, libcap,
etc, which are present in BPF CI's build environment, but those shared
libraries are missing in QEMU image in which test_progs is running.

To fix this problem, use minimal bootstrap version of bpftool instead.
It only depend on libelf and libz, same as libbpf, so doesn't add any
new requirements (and bootstrap bpftool still implementes entire
`bpftool gen` functionality, which is quite convenient).

Second problem is even more interesting. Both core_btfgen and core_reloc
reuse the same set of struct core_reloc_test_case array of test case
definitions. That in itself is not a problem, but btfgen test replaces
test_case->btf_src_file property with the path to temporary file into
which minimized BTF is output by bpftool. This interferes with original
core_reloc tests, depending on order of tests execution (core_btfgen is
run first in sequential mode and skrews up subsequent core_reloc run by
pointing to already deleted temporary file, instead of the original BTF
files) and whether those two runs share the same process (in parallel
mode the chances are high for them to run in two separate processes and
so not interfere with each other).

To prevent this interference, create and use local copy of a test
definition. Mark original array as constant to catch accidental
modifcations. Note that setup_type_id_case_success() and
setup_type_id_case_success() still modify common test_case->output
memory area, but it is ok as each setup function has to re-initialize it
completely anyways. In sequential mode it leads to deterministic and
correct initialization. In parallel mode they will either each have
their own process, or if core_reloc and core_btfgen happen to be run by
the same worker process, they will still do that sequentially within the
worker process. If they are sharded across multiple processes, they
don't really share anything anyways.

Also, rename core_btfgen into core_reloc_btfgen, as it is indeed just
a "flavor" of core_reloc test, not an independent set of tests. So make
it more obvious.

Last problem that needed solving was that location of bpftool differs
between test_progs and test_progs' flavors (e.g., test_progs-no_alu32).
To keep it simple, create a symlink to bpftool both inside
selftests/bpf/ directory and selftests/bpf/<flavor> subdirectory. That
way, from inside core_reloc test, location to bpftool is just "./bpftool".

v2->v3:
  - fix bpftool location relative the test_progs-no_alu32;
v1->v2:
  - fix corruption of core_reloc_test_case.

Fixes: 704c91e59fe0 ("selftests/bpf: Test "bpftool gen min_core_btf")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yucong Sun <sunyucong@gmail.com>
Link: https://lore.kernel.org/bpf/20220220042720.3336684-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/.gitignore              |  1 +
 tools/testing/selftests/bpf/Makefile                |  3 ++-
 tools/testing/selftests/bpf/prog_tests/core_reloc.c | 17 +++++++++++------
 3 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 1dad8d617da8..a7eead8820a0 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+bpftool
 bpf-helpers*
 bpf-syscall*
 test_verifier
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 91ea729990da..fe12b4f5fe20 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -470,6 +470,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
 	$$(call msg,BINARY,,$$@)
 	$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
 	$(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.o $$@
+	$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/bootstrap/bpftool $(if $2,$2/)bpftool
 
 endef
 
@@ -555,7 +556,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
 
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR)	\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
-	feature								\
+	feature bpftool							\
 	$(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h no_alu32 bpf_gcc bpf_testmod.ko)
 
 .PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 8fbb40a832d5..f28f75aa9154 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -512,7 +512,7 @@ static int __trigger_module_test_read(const struct core_reloc_test_case *test)
 }
 
 
-static struct core_reloc_test_case test_cases[] = {
+static const struct core_reloc_test_case test_cases[] = {
 	/* validate we can find kernel image and use its BTF for relocs */
 	{
 		.case_name = "kernel",
@@ -843,7 +843,7 @@ static int run_btfgen(const char *src_btf, const char *dst_btf, const char *objp
 	int n;
 
 	n = snprintf(command, sizeof(command),
-		     "./tools/build/bpftool/bpftool gen min_core_btf %s %s %s",
+		     "./bpftool gen min_core_btf %s %s %s",
 		     src_btf, dst_btf, objpath);
 	if (n < 0 || n >= sizeof(command))
 		return -1;
@@ -855,7 +855,7 @@ static void run_core_reloc_tests(bool use_btfgen)
 {
 	const size_t mmap_sz = roundup_page(sizeof(struct data));
 	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
-	struct core_reloc_test_case *test_case;
+	struct core_reloc_test_case *test_case, test_case_copy;
 	const char *tp_name, *probe_name;
 	int err, i, equal, fd;
 	struct bpf_link *link = NULL;
@@ -870,7 +870,10 @@ static void run_core_reloc_tests(bool use_btfgen)
 
 	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
 		char btf_file[] = "/tmp/core_reloc.btf.XXXXXX";
-		test_case = &test_cases[i];
+
+		test_case_copy = test_cases[i];
+		test_case = &test_case_copy;
+
 		if (!test__start_subtest(test_case->case_name))
 			continue;
 
@@ -881,6 +884,7 @@ static void run_core_reloc_tests(bool use_btfgen)
 
 		/* generate a "minimal" BTF file and use it as source */
 		if (use_btfgen) {
+
 			if (!test_case->btf_src_file || test_case->fails) {
 				test__skip();
 				continue;
@@ -989,7 +993,8 @@ cleanup:
 			CHECK_FAIL(munmap(mmap_data, mmap_sz));
 			mmap_data = NULL;
 		}
-		remove(btf_file);
+		if (use_btfgen)
+			remove(test_case->btf_src_file);
 		bpf_link__destroy(link);
 		link = NULL;
 		bpf_object__close(obj);
@@ -1001,7 +1006,7 @@ void test_core_reloc(void)
 	run_core_reloc_tests(false);
 }
 
-void test_core_btfgen(void)
+void test_core_reloc_btfgen(void)
 {
 	run_core_reloc_tests(true);
 }
-- 
cgit 


From 1f1180d46d21506325b7217da5a546235a2263a2 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Wed, 2 Feb 2022 12:03:11 +0100
Subject: memblock tests: Add memblock_add tests

Add checks for adding a new region in different scenarios:
 - The region does not overlap with existing entries
 - The region overlaps with one of the previous entries: from the top
  (its end address is bigger than the base of the existing region) or
  from the bottom (its base address is smaller than the end address of
  one of the regions)
 - The region is within an already defined region
 - The same region is added twice to the collection of available memory
   regions

Add checks for memblock initialization to verify it sets memblock data
structures to expected values.

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@kernel.org>
Link: https://lore.kernel.org/r/b6c26525025bccec0bf7419473d4d1293eb82b3b.1643796665.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/tests/basic_api.c | 215 +++++++++++++++++++++++++++++++
 1 file changed, 215 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 7f2597b3dd4d..01dd64b9a41b 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include <string.h>
 #include <linux/memblock.h>
+#include <linux/sizes.h>
 #include "basic_api.h"
 
 #define EXPECTED_MEMBLOCK_REGIONS			128
@@ -25,8 +26,222 @@ static int memblock_initialization_check(void)
 	return 0;
 }
 
+/*
+ * A simple test that adds a memory block of a specified base address
+ * and size to the collection of available memory regions (memblock.memory).
+ * It checks if a new entry was created and if region counter and total memory
+ * were correctly updated.
+ */
+static int memblock_add_simple_check(void)
+{
+	struct memblock_region *rgn;
+
+	rgn = &memblock.memory.regions[0];
+
+	struct region r = {
+		.base = SZ_1G,
+		.size = SZ_4M
+	};
+
+	reset_memblock();
+	memblock_add(r.base, r.size);
+
+	assert(rgn->base == r.base);
+	assert(rgn->size == r.size);
+
+	assert(memblock.memory.cnt == 1);
+	assert(memblock.memory.total_size == r.size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to add two memory blocks that don't overlap with one
+ * another. It checks if two correctly initialized entries were added to the
+ * collection of available memory regions (memblock.memory) and if this
+ * change was reflected in memblock.memory's total size and region counter.
+ */
+static int memblock_add_disjoint_check(void)
+{
+	struct memblock_region *rgn1, *rgn2;
+
+	rgn1 = &memblock.memory.regions[0];
+	rgn2 = &memblock.memory.regions[1];
+
+	struct region r1 = {
+		.base = SZ_1G,
+		.size = SZ_8K
+	};
+	struct region r2 = {
+		.base = SZ_1G + SZ_16K,
+		.size = SZ_8K
+	};
+
+	reset_memblock();
+	memblock_add(r1.base, r1.size);
+	memblock_add(r2.base, r2.size);
+
+	assert(rgn1->base == r1.base);
+	assert(rgn1->size == r1.size);
+
+	assert(rgn2->base == r2.base);
+	assert(rgn2->size == r2.size);
+
+	assert(memblock.memory.cnt == 2);
+	assert(memblock.memory.total_size == r1.size + r2.size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to add two memory blocks, where the second one overlaps
+ * with the beginning of the first entry (that is r1.base < r2.base + r2.size).
+ * After this, it checks if two entries are merged into one region that starts
+ * at r2.base and has size of two regions minus their intersection. It also
+ * verifies the reported total size of the available memory and region counter.
+ */
+static int memblock_add_overlap_top_check(void)
+{
+	struct memblock_region *rgn;
+	phys_addr_t total_size;
+
+	rgn = &memblock.memory.regions[0];
+
+	struct region r1 = {
+		.base = SZ_512M,
+		.size = SZ_1G
+	};
+	struct region r2 = {
+		.base = SZ_256M,
+		.size = SZ_512M
+	};
+
+	total_size = (r1.base - r2.base) + r1.size;
+
+	reset_memblock();
+	memblock_add(r1.base, r1.size);
+	memblock_add(r2.base, r2.size);
+
+	assert(rgn->base == r2.base);
+	assert(rgn->size == total_size);
+
+	assert(memblock.memory.cnt == 1);
+	assert(memblock.memory.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to add two memory blocks, where the second one overlaps
+ * with the end of the first entry (that is r2.base < r1.base + r1.size).
+ * After this, it checks if two entries are merged into one region that starts
+ * at r1.base and has size of two regions minus their intersection. It verifies
+ * that memblock can still see only one entry and has a correct total size of
+ * the available memory.
+ */
+static int memblock_add_overlap_bottom_check(void)
+{
+	struct memblock_region *rgn;
+	phys_addr_t total_size;
+
+	rgn = &memblock.memory.regions[0];
+
+	struct region r1 = {
+		.base = SZ_128M,
+		.size = SZ_512M
+	};
+	struct region r2 = {
+		.base = SZ_256M,
+		.size = SZ_1G
+	};
+
+	total_size = (r2.base - r1.base) + r2.size;
+
+	reset_memblock();
+	memblock_add(r1.base, r1.size);
+	memblock_add(r2.base, r2.size);
+
+	assert(rgn->base == r1.base);
+	assert(rgn->size == total_size);
+
+	assert(memblock.memory.cnt == 1);
+	assert(memblock.memory.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to add two memory blocks, where the second one is
+ * within the range of the first entry (that is r1.base < r2.base &&
+ * r2.base + r2.size < r1.base + r1.size). It checks if two entries are merged
+ * into one region that stays the same. The counter and total size of available
+ * memory are expected to not be updated.
+ */
+static int memblock_add_within_check(void)
+{
+	struct memblock_region *rgn;
+
+	rgn = &memblock.memory.regions[0];
+
+	struct region r1 = {
+		.base = SZ_8M,
+		.size = SZ_32M
+	};
+	struct region r2 = {
+		.base = SZ_16M,
+		.size = SZ_1M
+	};
+
+	reset_memblock();
+	memblock_add(r1.base, r1.size);
+	memblock_add(r2.base, r2.size);
+
+	assert(rgn->base == r1.base);
+	assert(rgn->size == r1.size);
+
+	assert(memblock.memory.cnt == 1);
+	assert(memblock.memory.total_size == r1.size);
+
+	return 0;
+}
+
+/*
+ * A simple test that tries to add the same memory block twice. The counter
+ * and total size of available memory are expected to not be updated.
+ */
+static int memblock_add_twice_check(void)
+{
+	struct region r = {
+		.base = SZ_16K,
+		.size = SZ_2M
+	};
+
+	reset_memblock();
+
+	memblock_add(r.base, r.size);
+	memblock_add(r.base, r.size);
+
+	assert(memblock.memory.cnt == 1);
+	assert(memblock.memory.total_size == r.size);
+
+	return 0;
+}
+
+static int memblock_add_checks(void)
+{
+	memblock_add_simple_check();
+	memblock_add_disjoint_check();
+	memblock_add_overlap_top_check();
+	memblock_add_overlap_bottom_check();
+	memblock_add_within_check();
+	memblock_add_twice_check();
+
+	return 0;
+}
+
 int memblock_basic_checks(void)
 {
 	memblock_initialization_check();
+	memblock_add_checks();
 	return 0;
 }
-- 
cgit 


From 83787a803793881a2e6cf47ea0706cc8fe4573d7 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Wed, 2 Feb 2022 12:03:12 +0100
Subject: memblock tests: Add memblock_reserve tests

Add checks for marking a region as reserved in different scenarios:
 - The region does not overlap with existing entries
 - The region overlaps with one of the previous entries: from the top
   (its end address is bigger than the base of the existing region) or
   from the bottom (its base address is smaller than the end address of
   one of the regions)
 - The region is within an already defined region
 - The same region is marked as reserved twice

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@kernel.org>
Link: https://lore.kernel.org/r/cac867d2b6c17e53d9e977b5d6cd88cc4e9453b6.1643796665.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/tests/basic_api.c | 217 +++++++++++++++++++++++++++++++
 1 file changed, 217 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 01dd64b9a41b..70f62aae9df9 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -239,9 +239,226 @@ static int memblock_add_checks(void)
 	return 0;
 }
 
+ /*
+  * A simple test that marks a memory block of a specified base address
+  * and size as reserved and to the collection of reserved memory regions
+  * (memblock.reserved). It checks if a new entry was created and if region
+  * counter and total memory size were correctly updated.
+  */
+static int memblock_reserve_simple_check(void)
+{
+	struct memblock_region *rgn;
+
+	rgn =  &memblock.reserved.regions[0];
+
+	struct region r = {
+		.base = SZ_2G,
+		.size = SZ_128M
+	};
+
+	reset_memblock();
+	memblock_reserve(r.base, r.size);
+
+	assert(rgn->base == r.base);
+	assert(rgn->size == r.size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to mark two memory blocks that don't overlap as reserved
+ * and checks if two entries were correctly added to the collection of reserved
+ * memory regions (memblock.reserved) and if this change was reflected in
+ * memblock.reserved's total size and region counter.
+ */
+static int memblock_reserve_disjoint_check(void)
+{
+	struct memblock_region *rgn1, *rgn2;
+
+	rgn1 = &memblock.reserved.regions[0];
+	rgn2 = &memblock.reserved.regions[1];
+
+	struct region r1 = {
+		.base = SZ_256M,
+		.size = SZ_16M
+	};
+	struct region r2 = {
+		.base = SZ_512M,
+		.size = SZ_512M
+	};
+
+	reset_memblock();
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	assert(rgn1->base == r1.base);
+	assert(rgn1->size == r1.size);
+
+	assert(rgn2->base == r2.base);
+	assert(rgn2->size == r2.size);
+
+	assert(memblock.reserved.cnt == 2);
+	assert(memblock.reserved.total_size == r1.size + r2.size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to mark two memory blocks as reserved, where the
+ * second one overlaps with the beginning of the first (that is
+ * r1.base < r2.base + r2.size).
+ * It checks if two entries are merged into one region that starts at r2.base
+ * and has size of two regions minus their intersection. The test also verifies
+ * that memblock can still see only one entry and has a correct total size of
+ * the reserved memory.
+ */
+static int memblock_reserve_overlap_top_check(void)
+{
+	struct memblock_region *rgn;
+	phys_addr_t total_size;
+
+	rgn = &memblock.reserved.regions[0];
+
+	struct region r1 = {
+		.base = SZ_1G,
+		.size = SZ_1G
+	};
+	struct region r2 = {
+		.base = SZ_128M,
+		.size = SZ_1G
+	};
+
+	total_size = (r1.base - r2.base) + r1.size;
+
+	reset_memblock();
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	assert(rgn->base == r2.base);
+	assert(rgn->size == total_size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to mark two memory blocks as reserved, where the
+ * second one overlaps with the end of the first entry (that is
+ * r2.base < r1.base + r1.size).
+ * It checks if two entries are merged into one region that starts at r1.base
+ * and has size of two regions minus their intersection. It verifies that
+ * memblock can still see only one entry and has a correct total size of the
+ * reserved memory.
+ */
+static int memblock_reserve_overlap_bottom_check(void)
+{
+	struct memblock_region *rgn;
+	phys_addr_t total_size;
+
+	rgn = &memblock.reserved.regions[0];
+
+	struct region r1 = {
+		.base = SZ_2K,
+		.size = SZ_128K
+	};
+	struct region r2 = {
+		.base = SZ_128K,
+		.size = SZ_128K
+	};
+
+	total_size = (r2.base - r1.base) + r2.size;
+
+	reset_memblock();
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	assert(rgn->base == r1.base);
+	assert(rgn->size == total_size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to mark two memory blocks as reserved, where the second
+ * one is within the range of the first entry (that is
+ * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)).
+ * It checks if two entries are merged into one region that stays the
+ * same. The counter and total size of available memory are expected to not be
+ * updated.
+ */
+static int memblock_reserve_within_check(void)
+{
+	struct memblock_region *rgn;
+
+	rgn = &memblock.reserved.regions[0];
+
+	struct region r1 = {
+		.base = SZ_1M,
+		.size = SZ_8M
+	};
+	struct region r2 = {
+		.base = SZ_2M,
+		.size = SZ_64K
+	};
+
+	reset_memblock();
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	assert(rgn->base == r1.base);
+	assert(rgn->size == r1.size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == r1.size);
+
+	return 0;
+}
+
+/*
+ * A simple test that tries to reserve the same memory block twice.
+ * The region counter and total size of reserved memory are expected to not
+ * be updated.
+ */
+static int memblock_reserve_twice_check(void)
+{
+	struct region r = {
+		.base = SZ_16K,
+		.size = SZ_2M
+	};
+
+	reset_memblock();
+
+	memblock_reserve(r.base, r.size);
+	memblock_reserve(r.base, r.size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == r.size);
+
+	return 0;
+}
+
+static int memblock_reserve_checks(void)
+{
+	memblock_reserve_simple_check();
+	memblock_reserve_disjoint_check();
+	memblock_reserve_overlap_top_check();
+	memblock_reserve_overlap_bottom_check();
+	memblock_reserve_within_check();
+	memblock_reserve_twice_check();
+
+	return 0;
+}
+
 int memblock_basic_checks(void)
 {
 	memblock_initialization_check();
 	memblock_add_checks();
+	memblock_reserve_checks();
+
 	return 0;
 }
-- 
cgit 


From b4d968931e5e07cdff262bfd9b60291d072ad0d7 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Wed, 2 Feb 2022 12:03:13 +0100
Subject: memblock tests: Add memblock_remove tests

Add checks for removing a region from available memory in different
scenarios:
 - The requested region matches one in the collection of available
   memory regions
 - The requested region does not exist in memblock.memory
 - The region overlaps with one of the entries: from the top (its end
   address is bigger than the base of the existing region) or from the
   bottom (its base address is smaller than the end address of one of
   the regions)
 - The region is within an already defined region

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@kernel.org>
Link: https://lore.kernel.org/r/8e6aa005407bbe1a75b75e85ac04ebb51318a52a.1643796665.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/tests/basic_api.c | 205 +++++++++++++++++++++++++++++++
 1 file changed, 205 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 70f62aae9df9..8a0d2ffa4e21 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -454,11 +454,216 @@ static int memblock_reserve_checks(void)
 	return 0;
 }
 
+ /*
+  * A simple test that tries to remove the first entry of the array of
+  * available memory regions. By "removing" a region we mean overwriting it
+  * with the next region in memblock.memory. To check this is the case, the
+  * test adds two memory blocks and verifies that the value of the latter
+  * was used to erase r1 region.  It also checks if the region counter and
+  * total size were updated to expected values.
+  */
+static int memblock_remove_simple_check(void)
+{
+	struct memblock_region *rgn;
+
+	rgn = &memblock.memory.regions[0];
+
+	struct region r1 = {
+		.base = SZ_2K,
+		.size = SZ_4K
+	};
+	struct region r2 = {
+		.base = SZ_128K,
+		.size = SZ_4M
+	};
+
+	reset_memblock();
+	memblock_add(r1.base, r1.size);
+	memblock_add(r2.base, r2.size);
+	memblock_remove(r1.base, r1.size);
+
+	assert(rgn->base == r2.base);
+	assert(rgn->size == r2.size);
+
+	assert(memblock.memory.cnt == 1);
+	assert(memblock.memory.total_size == r2.size);
+
+	return 0;
+}
+
+ /*
+  * A test that tries to remove a region that was not registered as available
+  * memory (i.e. has no corresponding entry in memblock.memory). It verifies
+  * that array, regions counter and total size were not modified.
+  */
+static int memblock_remove_absent_check(void)
+{
+	struct memblock_region *rgn;
+
+	rgn = &memblock.memory.regions[0];
+
+	struct region r1 = {
+		.base = SZ_512K,
+		.size = SZ_4M
+	};
+	struct region r2 = {
+		.base = SZ_64M,
+		.size = SZ_1G
+	};
+
+	reset_memblock();
+	memblock_add(r1.base, r1.size);
+	memblock_remove(r2.base, r2.size);
+
+	assert(rgn->base == r1.base);
+	assert(rgn->size == r1.size);
+
+	assert(memblock.memory.cnt == 1);
+	assert(memblock.memory.total_size == r1.size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to remove a region which overlaps with the beginning of
+ * the already existing entry r1 (that is r1.base < r2.base + r2.size). It
+ * checks if only the intersection of both regions is removed from the available
+ * memory pool. The test also checks if the regions counter and total size are
+ * updated to expected values.
+ */
+static int memblock_remove_overlap_top_check(void)
+{
+	struct memblock_region *rgn;
+	phys_addr_t r1_end, r2_end, total_size;
+
+	rgn = &memblock.memory.regions[0];
+
+	struct region r1 = {
+		.base = SZ_32M,
+		.size = SZ_32M
+	};
+	struct region r2 = {
+		.base = SZ_16M,
+		.size = SZ_32M
+	};
+
+	r1_end = r1.base + r1.size;
+	r2_end = r2.base + r2.size;
+	total_size = r1_end - r2_end;
+
+	reset_memblock();
+	memblock_add(r1.base, r1.size);
+	memblock_remove(r2.base, r2.size);
+
+	assert(rgn->base == r1.base + r2.base);
+	assert(rgn->size == total_size);
+
+	assert(memblock.memory.cnt == 1);
+	assert(memblock.memory.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to remove a region which overlaps with the end of the
+ * first entry (that is r2.base < r1.base + r1.size). It checks if only the
+ * intersection of both regions is removed from the available memory pool.
+ * The test also checks if the regions counter and total size are updated to
+ * expected values.
+ */
+static int memblock_remove_overlap_bottom_check(void)
+{
+	struct memblock_region *rgn;
+	phys_addr_t total_size;
+
+	rgn = &memblock.memory.regions[0];
+
+	struct region r1 = {
+		.base = SZ_2M,
+		.size = SZ_64M
+	};
+	struct region r2 = {
+		.base = SZ_32M,
+		.size = SZ_256M
+	};
+
+	total_size = r2.base - r1.base;
+
+	reset_memblock();
+	memblock_add(r1.base, r1.size);
+	memblock_remove(r2.base, r2.size);
+
+	assert(rgn->base == r1.base);
+	assert(rgn->size == total_size);
+
+	assert(memblock.memory.cnt == 1);
+	assert(memblock.memory.total_size == total_size);
+	return 0;
+}
+
+/*
+ * A test that tries to remove a region which is within the range of the
+ * already existing entry (that is
+ * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)).
+ * It checks if the region is split into two - one that ends at r2.base and
+ * second that starts at r2.base + size, with appropriate sizes. The test
+ * also checks if the region counter and total size were updated to
+ * expected values.
+ */
+static int memblock_remove_within_check(void)
+{
+	struct memblock_region *rgn1, *rgn2;
+	phys_addr_t r1_size, r2_size, total_size;
+
+	rgn1 = &memblock.memory.regions[0];
+	rgn2 = &memblock.memory.regions[1];
+
+	struct region r1 = {
+		.base = SZ_1M,
+		.size = SZ_32M
+	};
+	struct region r2 = {
+		.base = SZ_16M,
+		.size = SZ_1M
+	};
+
+	r1_size = r2.base - r1.base;
+	r2_size = (r1.base + r1.size) - (r2.base + r2.size);
+	total_size = r1_size + r2_size;
+
+	reset_memblock();
+	memblock_add(r1.base, r1.size);
+	memblock_remove(r2.base, r2.size);
+
+	assert(rgn1->base == r1.base);
+	assert(rgn1->size == r1_size);
+
+	assert(rgn2->base == r2.base + r2.size);
+	assert(rgn2->size == r2_size);
+
+	assert(memblock.memory.cnt == 2);
+	assert(memblock.memory.total_size == total_size);
+
+	return 0;
+}
+
+static int memblock_remove_checks(void)
+{
+	memblock_remove_simple_check();
+	memblock_remove_absent_check();
+	memblock_remove_overlap_top_check();
+	memblock_remove_overlap_bottom_check();
+	memblock_remove_within_check();
+
+	return 0;
+}
+
 int memblock_basic_checks(void)
 {
 	memblock_initialization_check();
 	memblock_add_checks();
 	memblock_reserve_checks();
+	memblock_remove_checks();
 
 	return 0;
 }
-- 
cgit 


From e393c093ec7640dc42ad63fb414a9680b69f26f1 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Wed, 2 Feb 2022 12:03:14 +0100
Subject: memblock tests: Add memblock_add_node test

Add a simple test for NUMA-aware variant of memblock_add function.

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@kernel.org>
Link: https://lore.kernel.org/r/e2d0e6dd264c8c169242b556f7c5b12153f3dee5.1643796665.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/tests/basic_api.c | 34 ++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 8a0d2ffa4e21..2194a141cfe6 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -55,6 +55,39 @@ static int memblock_add_simple_check(void)
 	return 0;
 }
 
+/*
+ * A simple test that adds a memory block of a specified base address, size
+ * NUMA node and memory flags to the collection of available memory regions.
+ * It checks if the new entry, region counter and total memory size have
+ * expected values.
+ */
+static int memblock_add_node_simple_check(void)
+{
+	struct memblock_region *rgn;
+
+	rgn = &memblock.memory.regions[0];
+
+	struct region r = {
+		.base = SZ_1M,
+		.size = SZ_16M
+	};
+
+	reset_memblock();
+	memblock_add_node(r.base, r.size, 1, MEMBLOCK_HOTPLUG);
+
+	assert(rgn->base == r.base);
+	assert(rgn->size == r.size);
+#ifdef CONFIG_NUMA
+	assert(rgn->nid == 1);
+#endif
+	assert(rgn->flags == MEMBLOCK_HOTPLUG);
+
+	assert(memblock.memory.cnt == 1);
+	assert(memblock.memory.total_size == r.size);
+
+	return 0;
+}
+
 /*
  * A test that tries to add two memory blocks that don't overlap with one
  * another. It checks if two correctly initialized entries were added to the
@@ -230,6 +263,7 @@ static int memblock_add_twice_check(void)
 static int memblock_add_checks(void)
 {
 	memblock_add_simple_check();
+	memblock_add_node_simple_check();
 	memblock_add_disjoint_check();
 	memblock_add_overlap_top_check();
 	memblock_add_overlap_bottom_check();
-- 
cgit 


From dd45dc071101a45619cbaff0f77d07abdf25caf0 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Wed, 2 Feb 2022 12:03:15 +0100
Subject: memblock tests: Add memblock_free tests

Add checks for removing a region from reserved memory in different
scenarios:
 - The requested region matches one in the collection of reserved
   memory regions
 - The requested region does not exist in memblock.reserved
 - The region overlaps with one of the entries: from the top (its
   end address is bigger than the base of the existing region) or
   from the bottom (its base address is smaller than the end address
   of one of the regions)
 - The region is within an already defined region

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@kernel.org>
Link: https://lore.kernel.org/r/30af95c82754ad8029404c3b528a5ef1c05d1ed6.1643796665.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/tests/basic_api.c | 203 +++++++++++++++++++++++++++++++
 1 file changed, 203 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 2194a141cfe6..fbb989f6ddbf 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -692,12 +692,215 @@ static int memblock_remove_checks(void)
 	return 0;
 }
 
+/*
+ * A simple test that tries to free a memory block that was marked earlier
+ * as reserved. By "freeing" a region we mean overwriting it with the next
+ * entry in memblock.reserved. To check this is the case, the test reserves
+ * two memory regions and verifies that the value of the latter was used to
+ * erase r1 region.
+ * The test also checks if the region counter and total size were updated.
+ */
+static int memblock_free_simple_check(void)
+{
+	struct memblock_region *rgn;
+
+	rgn = &memblock.reserved.regions[0];
+
+	struct region r1 = {
+		.base = SZ_4M,
+		.size = SZ_1M
+	};
+	struct region r2 = {
+		.base = SZ_8M,
+		.size = SZ_1M
+	};
+
+	reset_memblock();
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+	memblock_free((void *)r1.base, r1.size);
+
+	assert(rgn->base == r2.base);
+	assert(rgn->size == r2.size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == r2.size);
+
+	return 0;
+}
+
+ /*
+  * A test that tries to free a region that was not marked as reserved
+  * (i.e. has no corresponding entry in memblock.reserved). It verifies
+  * that array, regions counter and total size were not modified.
+  */
+static int memblock_free_absent_check(void)
+{
+	struct memblock_region *rgn;
+
+	rgn = &memblock.reserved.regions[0];
+
+	struct region r1 = {
+		.base = SZ_2M,
+		.size = SZ_8K
+	};
+	struct region r2 = {
+		.base = SZ_16M,
+		.size = SZ_128M
+	};
+
+	reset_memblock();
+	memblock_reserve(r1.base, r1.size);
+	memblock_free((void *)r2.base, r2.size);
+
+	assert(rgn->base == r1.base);
+	assert(rgn->size == r1.size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == r1.size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to free a region which overlaps with the beginning of
+ * the already existing entry r1 (that is r1.base < r2.base + r2.size). It
+ * checks if only the intersection of both regions is freed. The test also
+ * checks if the regions counter and total size are updated to expected
+ * values.
+ */
+static int memblock_free_overlap_top_check(void)
+{
+	struct memblock_region *rgn;
+	phys_addr_t total_size;
+
+	rgn = &memblock.reserved.regions[0];
+
+	struct region r1 = {
+		.base = SZ_8M,
+		.size = SZ_32M
+	};
+	struct region r2 = {
+		.base = SZ_1M,
+		.size = SZ_8M
+	};
+
+	total_size = (r1.size + r1.base) - (r2.base + r2.size);
+
+	reset_memblock();
+	memblock_reserve(r1.base, r1.size);
+	memblock_free((void *)r2.base, r2.size);
+
+	assert(rgn->base == r2.base + r2.size);
+	assert(rgn->size == total_size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to free a region which overlaps with the end of the
+ * first entry (that is r2.base < r1.base + r1.size). It checks if only the
+ * intersection of both regions is freed. The test also checks if the
+ * regions counter and total size are updated to expected values.
+ */
+static int memblock_free_overlap_bottom_check(void)
+{
+	struct memblock_region *rgn;
+	phys_addr_t total_size;
+
+	rgn = &memblock.reserved.regions[0];
+
+	struct region r1 = {
+		.base = SZ_8M,
+		.size = SZ_32M
+	};
+	struct region r2 = {
+		.base = SZ_32M,
+		.size = SZ_32M
+	};
+
+	total_size = r2.base - r1.base;
+
+	reset_memblock();
+	memblock_reserve(r1.base, r1.size);
+	memblock_free((void *)r2.base, r2.size);
+
+	assert(rgn->base == r1.base);
+	assert(rgn->size == total_size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to free a region which is within the range of the
+ * already existing entry (that is
+ * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)).
+ * It checks if the region is split into two - one that ends at r2.base and
+ * second that starts at r2.base + size, with appropriate sizes. It is
+ * expected that the region counter and total size fields were updated t
+ * reflect that change.
+ */
+static int memblock_free_within_check(void)
+{
+	struct memblock_region *rgn1, *rgn2;
+	phys_addr_t r1_size, r2_size, total_size;
+
+	rgn1 = &memblock.reserved.regions[0];
+	rgn2 = &memblock.reserved.regions[1];
+
+	struct region r1 = {
+		.base = SZ_1M,
+		.size = SZ_8M
+	};
+	struct region r2 = {
+		.base = SZ_4M,
+		.size = SZ_1M
+	};
+
+	r1_size = r2.base - r1.base;
+	r2_size = (r1.base + r1.size) - (r2.base + r2.size);
+	total_size = r1_size + r2_size;
+
+	reset_memblock();
+	memblock_reserve(r1.base, r1.size);
+	memblock_free((void *)r2.base, r2.size);
+
+	assert(rgn1->base == r1.base);
+	assert(rgn1->size == r1_size);
+
+	assert(rgn2->base == r2.base + r2.size);
+	assert(rgn2->size == r2_size);
+
+	assert(memblock.reserved.cnt == 2);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+static int memblock_free_checks(void)
+{
+	memblock_free_simple_check();
+	memblock_free_absent_check();
+	memblock_free_overlap_top_check();
+	memblock_free_overlap_bottom_check();
+	memblock_free_within_check();
+
+	return 0;
+}
+
 int memblock_basic_checks(void)
 {
 	memblock_initialization_check();
 	memblock_add_checks();
 	memblock_reserve_checks();
 	memblock_remove_checks();
+	memblock_free_checks();
 
 	return 0;
 }
-- 
cgit 


From 25bd462fa42f58ca43c881b486726bb81be5aa2b Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sat, 19 Feb 2022 17:45:20 +0200
Subject: selftests: fib_test: Add a test case for IPv4 broadcast neighbours

Test that resolved neighbours for IPv4 broadcast addresses are
unaffected by the configuration of matching broadcast routes, whereas
unresolved neighbours are invalidated.

Without previous patch:

 # ./fib_tests.sh -t ipv4_bcast_neigh

 IPv4 broadcast neighbour tests
     TEST: Resolved neighbour for broadcast address                      [ OK ]
     TEST: Resolved neighbour for network broadcast address              [ OK ]
     TEST: Unresolved neighbour for broadcast address                    [FAIL]
     TEST: Unresolved neighbour for network broadcast address            [FAIL]

 Tests passed:   2
 Tests failed:   2

With previous patch:

 # ./fib_tests.sh -t ipv4_bcast_neigh

 IPv4 broadcast neighbour tests
     TEST: Resolved neighbour for broadcast address                      [ OK ]
     TEST: Resolved neighbour for network broadcast address              [ OK ]
     TEST: Unresolved neighbour for broadcast address                    [ OK ]
     TEST: Unresolved neighbour for network broadcast address            [ OK ]

 Tests passed:   4
 Tests failed:   0

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 58 +++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index e2690cc42da3..2271a8727f62 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,7 +9,7 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle"
+TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh"
 
 VERBOSE=0
 PAUSE_ON_FAIL=no
@@ -1954,6 +1954,61 @@ ipv6_mangle_test()
 	route_cleanup
 }
 
+ip_neigh_get_check()
+{
+	ip neigh help 2>&1 | grep -q 'ip neigh get'
+	if [ $? -ne 0 ]; then
+		echo "iproute2 command does not support neigh get. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+ipv4_bcast_neigh_test()
+{
+	local rc
+
+	echo
+	echo "IPv4 broadcast neighbour tests"
+
+	ip_neigh_get_check || return 1
+
+	setup
+
+	set -e
+	run_cmd "$IP neigh add 192.0.2.111 lladdr 00:11:22:33:44:55 nud perm dev dummy0"
+	run_cmd "$IP neigh add 192.0.2.255 lladdr 00:11:22:33:44:55 nud perm dev dummy0"
+
+	run_cmd "$IP neigh get 192.0.2.111 dev dummy0"
+	run_cmd "$IP neigh get 192.0.2.255 dev dummy0"
+
+	run_cmd "$IP address add 192.0.2.1/24 broadcast 192.0.2.111 dev dummy0"
+
+	run_cmd "$IP neigh add 203.0.113.111 nud failed dev dummy0"
+	run_cmd "$IP neigh add 203.0.113.255 nud failed dev dummy0"
+
+	run_cmd "$IP neigh get 203.0.113.111 dev dummy0"
+	run_cmd "$IP neigh get 203.0.113.255 dev dummy0"
+
+	run_cmd "$IP address add 203.0.113.1/24 broadcast 203.0.113.111 dev dummy0"
+	set +e
+
+	run_cmd "$IP neigh get 192.0.2.111 dev dummy0"
+	log_test $? 0 "Resolved neighbour for broadcast address"
+
+	run_cmd "$IP neigh get 192.0.2.255 dev dummy0"
+	log_test $? 0 "Resolved neighbour for network broadcast address"
+
+	run_cmd "$IP neigh get 203.0.113.111 dev dummy0"
+	log_test $? 2 "Unresolved neighbour for broadcast address"
+
+	run_cmd "$IP neigh get 203.0.113.255 dev dummy0"
+	log_test $? 2 "Unresolved neighbour for network broadcast address"
+
+	cleanup
+}
+
 ################################################################################
 # usage
 
@@ -2028,6 +2083,7 @@ do
 	ipv4_route_v6_gw)		ipv4_route_v6_gw_test;;
 	ipv4_mangle)			ipv4_mangle_test;;
 	ipv6_mangle)			ipv6_mangle_test;;
+	ipv4_bcast_neigh)		ipv4_bcast_neigh_test;;
 
 	help) echo "Test names: $TESTS"; exit 0;;
 	esac
-- 
cgit 


From 13c6a37d409db9abc9c0bfc6d0a2f07bf0fff60e Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sun, 20 Feb 2022 08:01:38 +0530
Subject: selftests/bpf: Add test for reg2btf_ids out of bounds access

This test tries to pass a PTR_TO_BTF_ID_OR_NULL to the release function,
which would trigger a out of bounds access without the fix in commit
45ce4b4f9009 ("bpf: Fix crash due to out of bounds access into reg2btf_ids.")
but after the fix, it should only index using base_type(reg->type),
which should be less than __BPF_REG_TYPE_MAX, and also not permit any
type flags to be set for the reg->type.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220220023138.2224652-1-memxor@gmail.com
---
 tools/testing/selftests/bpf/verifier/calls.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 829be2b9e08e..0a8ea60c2a80 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -96,6 +96,25 @@
 		{ "bpf_kfunc_call_test_mem_len_fail1", 2 },
 	},
 },
+{
+	"calls: trigger reg2btf_ids[reg->type] for reg->type > __BPF_REG_TYPE_MAX",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "arg#0 pointer type STRUCT prog_test_ref_kfunc must point",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_acquire", 3 },
+		{ "bpf_kfunc_call_test_release", 5 },
+	},
+},
 {
 	"calls: basic sanity",
 	.insns = {
-- 
cgit 


From f64ae40de5efaa33c36f4e2226b33824ba1b42a7 Mon Sep 17 00:00:00 2001
From: Maciek Machnikowski <maciek@machnikowski.net>
Date: Mon, 21 Feb 2022 21:06:37 +0100
Subject: testptp: add option to shift clock by nanoseconds

Add option to shift the clock by a specified number of nanoseconds.

The new argument -n will specify the number of nanoseconds to add to the
ptp clock. Since the API doesn't support negative shifts those needs to
be calculated by subtracting full seconds and adding a nanosecond offset.

Signed-off-by: Maciek Machnikowski <maciek@machnikowski.net>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Link: https://lore.kernel.org/r/20220221200637.125595-1-maciek@machnikowski.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/ptp/testptp.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index c0f6a062364d..198ad5f32187 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -133,6 +133,7 @@ static void usage(char *progname)
 		"            0 - none\n"
 		"            1 - external time stamp\n"
 		"            2 - periodic output\n"
+		" -n val     shift the ptp clock time by 'val' nanoseconds\n"
 		" -p val     enable output with a period of 'val' nanoseconds\n"
 		" -H val     set output phase to 'val' nanoseconds (requires -p)\n"
 		" -w val     set output pulse width to 'val' nanoseconds (requires -p)\n"
@@ -165,6 +166,7 @@ int main(int argc, char *argv[])
 	clockid_t clkid;
 	int adjfreq = 0x7fffffff;
 	int adjtime = 0;
+	int adjns = 0;
 	int capabilities = 0;
 	int extts = 0;
 	int flagtest = 0;
@@ -186,7 +188,7 @@ int main(int argc, char *argv[])
 
 	progname = strrchr(argv[0], '/');
 	progname = progname ? 1+progname : argv[0];
-	while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:p:P:sSt:T:w:z"))) {
+	while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:n:p:P:sSt:T:w:z"))) {
 		switch (c) {
 		case 'c':
 			capabilities = 1;
@@ -223,6 +225,9 @@ int main(int argc, char *argv[])
 				return -1;
 			}
 			break;
+		case 'n':
+			adjns = atoi(optarg);
+			break;
 		case 'p':
 			perout = atoll(optarg);
 			break;
@@ -305,11 +310,16 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	if (adjtime) {
+	if (adjtime || adjns) {
 		memset(&tx, 0, sizeof(tx));
-		tx.modes = ADJ_SETOFFSET;
+		tx.modes = ADJ_SETOFFSET | ADJ_NANO;
 		tx.time.tv_sec = adjtime;
-		tx.time.tv_usec = 0;
+		tx.time.tv_usec = adjns;
+		while (tx.time.tv_usec < 0) {
+			tx.time.tv_sec  -= 1;
+			tx.time.tv_usec += 1000000000;
+		}
+
 		if (clock_adjtime(clkid, &tx) < 0) {
 			perror("clock_adjtime");
 		} else {
-- 
cgit 


From b2b681a412517bf477238de62b1d227361fa04fe Mon Sep 17 00:00:00 2001
From: Hans Schultz <schultz.hans@gmail.com>
Date: Wed, 23 Feb 2022 11:16:50 +0100
Subject: selftests: forwarding: tests of locked port feature

These tests check that the basic locked port feature works, so that
no 'host' can communicate (ping) through a locked port unless the
MAC address of the 'host' interface is in the forwarding database of
the bridge.

Signed-off-by: Hans Schultz <schultz.hans+netdev@gmail.com>
Acked-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/Makefile    |   1 +
 .../selftests/net/forwarding/bridge_locked_port.sh | 180 +++++++++++++++++++++
 tools/testing/selftests/net/forwarding/lib.sh      |   8 +
 3 files changed, 189 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/bridge_locked_port.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 72ee644d47bf..8fa97ae9af9e 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0+ OR MIT
 
 TEST_PROGS = bridge_igmp.sh \
+	bridge_locked_port.sh \
 	bridge_port_isolation.sh \
 	bridge_sticky_fdb.sh \
 	bridge_vlan_aware.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
new file mode 100755
index 000000000000..6e98efa6d371
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
@@ -0,0 +1,180 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="locked_port_ipv4 locked_port_ipv6 locked_port_vlan"
+NUM_NETIFS=4
+CHECK_TC="no"
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+	vrf_create "vrf-vlan-h1"
+	ip link set dev vrf-vlan-h1 up
+	vlan_create $h1 100 vrf-vlan-h1 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 100
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+	vrf_create "vrf-vlan-h2"
+	ip link set dev vrf-vlan-h2 up
+	vlan_create $h2 100 vrf-vlan-h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 100
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge vlan_filtering 1
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	bridge link set dev $swp1 learning off
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+locked_port_ipv4()
+{
+	RET=0
+
+	check_locked_port_support || return 0
+
+	ping_do $h1 192.0.2.2
+	check_err $? "Ping did not work before locking port"
+
+	bridge link set dev $swp1 locked on
+
+	ping_do $h1 192.0.2.2
+	check_fail $? "Ping worked after locking port, but before adding FDB entry"
+
+	bridge fdb add `mac_get $h1` dev $swp1 master static
+
+	ping_do $h1 192.0.2.2
+	check_err $? "Ping did not work after locking port and adding FDB entry"
+
+	bridge link set dev $swp1 locked off
+	bridge fdb del `mac_get $h1` dev $swp1 master static
+
+	ping_do $h1 192.0.2.2
+	check_err $? "Ping did not work after unlocking port and removing FDB entry."
+
+	log_test "Locked port ipv4"
+}
+
+locked_port_vlan()
+{
+	RET=0
+
+	check_locked_port_support || return 0
+
+	bridge vlan add vid 100 dev $swp1
+	bridge vlan add vid 100 dev $swp2
+
+	ping_do $h1.100 198.51.100.2
+	check_err $? "Ping through vlan did not work before locking port"
+
+	bridge link set dev $swp1 locked on
+	ping_do $h1.100 198.51.100.2
+	check_fail $? "Ping through vlan worked after locking port, but before adding FDB entry"
+
+	bridge fdb add `mac_get $h1` dev $swp1 vlan 100 master static
+
+	ping_do $h1.100 198.51.100.2
+	check_err $? "Ping through vlan did not work after locking port and adding FDB entry"
+
+	bridge link set dev $swp1 locked off
+	bridge fdb del `mac_get $h1` dev $swp1 vlan 100 master static
+
+	ping_do $h1.100 198.51.100.2
+	check_err $? "Ping through vlan did not work after unlocking port and removing FDB entry"
+
+	bridge vlan del vid 100 dev $swp1
+	bridge vlan del vid 100 dev $swp2
+	log_test "Locked port vlan"
+}
+
+locked_port_ipv6()
+{
+	RET=0
+	check_locked_port_support || return 0
+
+	ping6_do $h1 2001:db8:1::2
+	check_err $? "Ping6 did not work before locking port"
+
+	bridge link set dev $swp1 locked on
+
+	ping6_do $h1 2001:db8:1::2
+	check_fail $? "Ping6 worked after locking port, but before adding FDB entry"
+
+	bridge fdb add `mac_get $h1` dev $swp1 master static
+	ping6_do $h1 2001:db8:1::2
+	check_err $? "Ping6 did not work after locking port and adding FDB entry"
+
+	bridge link set dev $swp1 locked off
+	bridge fdb del `mac_get $h1` dev $swp1 master static
+
+	ping6_do $h1 2001:db8:1::2
+	check_err $? "Ping6 did not work after unlocking port and removing FDB entry"
+
+	log_test "Locked port ipv6"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index e7e434a4758b..159afc7f0979 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -126,6 +126,14 @@ check_ethtool_lanes_support()
 	fi
 }
 
+check_locked_port_support()
+{
+	if ! bridge -d link show | grep -q " locked"; then
+		echo "SKIP: iproute2 too old; Locked port feature not supported."
+		return $ksft_skip
+	fi
+}
+
 if [[ "$(id -u)" -ne 0 ]]; then
 	echo "SKIP: need root privileges"
 	exit $ksft_skip
-- 
cgit 


From 4327b9eaf8a4ddd2c534e4f4ed7c949cf3d2be1e Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Wed, 16 Feb 2022 08:11:01 -0800
Subject: livepatch: Skip livepatch tests if ftrace cannot be configured

livepatch has a set of selftests that are used to validate the behavior of
the livepatching subsystem.  One of the testcases in the livepatch
testsuite is test-ftrace.sh, which among other things, validates that
livepatching gracefully fails when ftrace is disabled.  In the event that
ftrace cannot be disabled using 'sysctl kernel.ftrace_enabled=0', the test
will fail later due to it unexpectedly successfully loading the
test_klp_livepatch module.

While the livepatch selftests are careful to remove any of the livepatch
test modules between testcases to avoid this situation, ftrace may still
fail to be disabled if another trace is active on the system that was
enabled with FTRACE_OPS_FL_PERMANENT.  For example, any active BPF programs
that use trampolines will cause this test to fail due to the trampoline
being implemented with register_ftrace_direct().  The following is an
example of such a trace:

tcp_drop (1) R I D      tramp: ftrace_regs_caller+0x0/0x58
(call_direct_funcs+0x0/0x30)
        direct-->bpf_trampoline_6442550536_0+0x0/0x1000

In order to make the test more resilient to system state that is out of its
control, this patch updates set_ftrace_enabled() to detect sysctl failures,
and skip the testrun when appropriate.

Suggested-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: David Vernet <void@manifault.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Tested-by: Petr Mladek <pmladek@suse.com>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20220216161100.3243100-1-void@manifault.com
---
 tools/testing/selftests/livepatch/functions.sh   | 22 +++++++++++++++++++---
 tools/testing/selftests/livepatch/test-ftrace.sh |  3 ++-
 2 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 846c7ed71556..9230b869371d 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -75,9 +75,25 @@ function set_dynamic_debug() {
 }
 
 function set_ftrace_enabled() {
-	result=$(sysctl -q kernel.ftrace_enabled="$1" 2>&1 && \
-		 sysctl kernel.ftrace_enabled 2>&1)
-	echo "livepatch: $result" > /dev/kmsg
+	local can_fail=0
+	if [[ "$1" == "--fail" ]] ; then
+		can_fail=1
+		shift
+	fi
+
+	local err=$(sysctl -q kernel.ftrace_enabled="$1" 2>&1)
+	local result=$(sysctl --values kernel.ftrace_enabled)
+
+	if [[ "$result" != "$1" ]] ; then
+		if [[ $can_fail -eq 1 ]] ; then
+			echo "livepatch: $err" > /dev/kmsg
+			return
+		fi
+
+		skip "failed to set kernel.ftrace_enabled = $1"
+	fi
+
+	echo "livepatch: kernel.ftrace_enabled = $result" > /dev/kmsg
 }
 
 function cleanup() {
diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh
index 552e165512f4..825540a5194d 100755
--- a/tools/testing/selftests/livepatch/test-ftrace.sh
+++ b/tools/testing/selftests/livepatch/test-ftrace.sh
@@ -25,7 +25,8 @@ if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]]
 	die "livepatch kselftest(s) failed"
 fi
 
-set_ftrace_enabled 0
+# Check that ftrace could not get disabled when a livepatch is enabled
+set_ftrace_enabled --fail 0
 if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then
 	echo -e "FAIL\n\n"
 	die "livepatch kselftest(s) failed"
-- 
cgit 


From e005ff01bfdb653ac2f03614625cb9aefa86fa9c Mon Sep 17 00:00:00 2001
From: Cristian Marussi <cristian.marussi@arm.com>
Date: Wed, 26 Jan 2022 10:32:30 +0000
Subject: selftests/kselftest/runner.sh: Pass optional command parameters in
 environment

Some testcases allow for optional commandline parameters but as of now
there is now way to provide such arguments to the runner script.

Add support to retrieve such optional command parameters fron environment
variables named so as to include the all-uppercase test executable name,
sanitized substituting any non-acceptable varname characters with "_",
following the pattern:

	KSELFTEST_<UPPERCASE_SANITIZED_TEST_NAME>_ARGS="options"

Optional command parameters support is not available if 'tr' is not
installed on the test system.

Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/kselftest/runner.sh | 30 ++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index a9ba782d8ca0..294619ade49f 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -18,6 +18,8 @@ if [ -z "$BASE_DIR" ]; then
 	exit 1
 fi
 
+TR_CMD=$(command -v tr)
+
 # If Perl is unavailable, we must fall back to line-at-a-time prefixing
 # with sed instead of unbuffered output.
 tap_prefix()
@@ -49,6 +51,31 @@ run_one()
 
 	# Reset any "settings"-file variables.
 	export kselftest_timeout="$kselftest_default_timeout"
+
+	# Safe default if tr not available
+	kselftest_cmd_args_ref="KSELFTEST_ARGS"
+
+	# Optional arguments for this command, possibly defined as an
+	# environment variable built using the test executable in all
+	# uppercase and sanitized substituting non acceptable shell
+	# variable name characters with "_" as in:
+	#
+	# 	KSELFTEST_<UPPERCASE_SANITIZED_TESTNAME>_ARGS="<options>"
+	#
+	# e.g.
+	#
+	# 	rtctest --> KSELFTEST_RTCTEST_ARGS="/dev/rtc1"
+	#
+	# 	cpu-on-off-test.sh --> KSELFTEST_CPU_ON_OFF_TEST_SH_ARGS="-a -p 10"
+	#
+	if [ -n "$TR_CMD" ]; then
+		BASENAME_SANITIZED=$(echo "$BASENAME_TEST" | \
+					$TR_CMD -d "[:blank:][:cntrl:]" | \
+					$TR_CMD -c "[:alnum:]_" "_" | \
+					$TR_CMD [:lower:] [:upper:])
+		kselftest_cmd_args_ref="KSELFTEST_${BASENAME_SANITIZED}_ARGS"
+	fi
+
 	# Load per-test-directory kselftest "settings" file.
 	settings="$BASE_DIR/$DIR/settings"
 	if [ -r "$settings" ] ; then
@@ -69,7 +96,8 @@ run_one()
 		echo "# Warning: file $TEST is missing!"
 		echo "not ok $test_num $TEST_HDR_MSG"
 	else
-		cmd="./$BASENAME_TEST"
+		eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}"
+		cmd="./$BASENAME_TEST $kselftest_cmd_args"
 		if [ ! -x "$TEST" ]; then
 			echo "# Warning: file $TEST is not executable"
 
-- 
cgit 


From cef757808666de8400d09ac11521418d8122edd5 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 19 Jan 2022 15:33:32 +0500
Subject: selftests: futex: set DEFAULT_INSTALL_HDR_PATH

If only futex selftest is compiled, uapi header files are copied to the
selftests/futex/functional directory. This copy isn't needed. Set the
DEFAULT_INSTALL_HDR_PATH variable to 1 to use the default header install
path only. This removes extra copy of header file.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/futex/functional/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 5cc38de9d8ea..9a8c3700d773 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -24,6 +24,7 @@ TEST_PROGS := run.sh
 
 top_srcdir = ../../../../..
 KSFT_KHDR_INSTALL := 1
+DEFAULT_INSTALL_HDR_PATH := 1
 include ../../lib.mk
 
 $(TEST_GEN_FILES): $(HEADERS)
-- 
cgit 


From 5ad51ab618de5d05f4e692ebabeb6fe6289aaa57 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 19 Jan 2022 15:15:22 +0500
Subject: selftests: set the BUILD variable to absolute path

The build of kselftests fails if relative path is specified through
KBUILD_OUTPUT or O=<path> method. BUILD variable is used to determine
the path of the output objects. When make is run from other directories
with relative paths, the exact path of the build objects is ambiguous
and build fails.

	make[1]: Entering directory '/home/usama/repos/kernel/linux_mainline2/tools/testing/selftests/alsa'
	gcc     mixer-test.c -L/usr/lib/x86_64-linux-gnu -lasound  -o build/kselftest/alsa/mixer-test
	/usr/bin/ld: cannot open output file build/kselftest/alsa/mixer-test

Set the BUILD variable to the absolute path of the output directory.
Make the logic readable and easy to follow. Use spaces instead of tabs
for indentation as if with tab indentation is considered recipe in make.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/Makefile | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d08fe4cfe811..a7b63860b7bc 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -114,19 +114,27 @@ ifdef building_out_of_srctree
 override LDFLAGS =
 endif
 
-ifneq ($(O),)
-	BUILD := $(O)/kselftest
+top_srcdir ?= ../../..
+
+ifeq ("$(origin O)", "command line")
+  KBUILD_OUTPUT := $(O)
+endif
+
+ifneq ($(KBUILD_OUTPUT),)
+  # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot
+  # expand a shell special character '~'. We use a somewhat tedious way here.
+  abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd)
+  $(if $(abs_objtree),, \
+    $(error failed to create output directory "$(KBUILD_OUTPUT)"))
+  # $(realpath ...) resolves symlinks
+  abs_objtree := $(realpath $(abs_objtree))
+  BUILD := $(abs_objtree)/kselftest
 else
-	ifneq ($(KBUILD_OUTPUT),)
-		BUILD := $(KBUILD_OUTPUT)/kselftest
-	else
-		BUILD := $(shell pwd)
-		DEFAULT_INSTALL_HDR_PATH := 1
-	endif
+  BUILD := $(CURDIR)
+  DEFAULT_INSTALL_HDR_PATH := 1
 endif
 
 # Prepare for headers install
-top_srcdir ?= ../../..
 include $(top_srcdir)/scripts/subarch.include
 ARCH           ?= $(SUBARCH)
 export KSFT_KHDR_INSTALL_DONE := 1
-- 
cgit 


From 250f8c1137578e6210becaed63ec6e9a8eea430e Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 19 Jan 2022 15:15:23 +0500
Subject: selftests: Add and export a kernel uapi headers path

Kernel uapi headers can be present at different paths depending upon
how the build was invoked. It becomes impossible for the tests to
include the correct headers directory. Set and export KHDR_INCLUDES
variable to make it possible for sub make files to include the header
files.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index a7b63860b7bc..21f983dfd047 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -129,8 +129,11 @@ ifneq ($(KBUILD_OUTPUT),)
   # $(realpath ...) resolves symlinks
   abs_objtree := $(realpath $(abs_objtree))
   BUILD := $(abs_objtree)/kselftest
+  KHDR_INCLUDES := -I${abs_objtree}/usr/include
 else
   BUILD := $(CURDIR)
+  abs_srctree := $(shell cd $(top_srcdir) && pwd)
+  KHDR_INCLUDES := -I${abs_srctree}/usr/include
   DEFAULT_INSTALL_HDR_PATH := 1
 endif
 
@@ -139,6 +142,7 @@ include $(top_srcdir)/scripts/subarch.include
 ARCH           ?= $(SUBARCH)
 export KSFT_KHDR_INSTALL_DONE := 1
 export BUILD
+export KHDR_INCLUDES
 
 # set default goal to all, so make without a target runs all, even when
 # all isn't the first target in the file.
-- 
cgit 


From afe5fba8d10b40fdc517b46a2b59d2172686221e Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 19 Jan 2022 15:15:24 +0500
Subject: selftests: Correct the headers install path

uapi headers should be installed at the top of the object tree,
"<obj_tree>/usr/include". There is no need for kernel headers to
be present at kselftest build directory, "<obj_tree>/kselftest/usr/
include" as well. This duplication can be avoided by correctly
specifying the INSTALL_HDR_PATH.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 21f983dfd047..80e5498eab92 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -167,7 +167,7 @@ khdr:
 ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
 	$(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
 else
-	$(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \
+	$(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$(abs_objtree)/usr \
 		ARCH=$(ARCH) -C $(top_srcdir) headers_install
 endif
 
-- 
cgit 


From bd7d481c37716b21b251e6f8248ed6d8f2183445 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 19 Jan 2022 15:15:25 +0500
Subject: selftests: futex: Add the uapi headers include variable

Out of tree build of this test fails if relative path of the output
directory is specified. KBUILD_OUTPUT also doesn't point to the correct
directory when relative path is used. Thus out of tree builds fail.
Remove the un-needed include paths and use KHDR_INCLUDES to correctly
reach the headers.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/futex/functional/Makefile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 9a8c3700d773..b8152c573e8a 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
-INCLUDES := -I../include -I../../ -I../../../../../usr/include/ \
-	-I$(KBUILD_OUTPUT)/kselftest/usr/include
-CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
+INCLUDES := -I../include -I../../ -I../../../../../usr/include/
+CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) $(KHDR_INCLUDES)
 LDLIBS := -lpthread -lrt
 
 HEADERS := \
-- 
cgit 


From 0cc5963b4cc348fc37586ae8cbd91db1398037d1 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 19 Jan 2022 15:15:26 +0500
Subject: selftests: kvm: Add the uapi headers include variable

Out of tree build of this test fails if relative path of the output
directory is specified. Add KHDR_INCLUDES to correctly reach the
headers.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/kvm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 17c3f0749f05..b970397f725c 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -149,7 +149,7 @@ endif
 CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
 	-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
 	-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
-	-I$(<D) -Iinclude/$(UNAME_M) -I.. $(EXTRA_CFLAGS)
+	-I$(<D) -Iinclude/$(UNAME_M) -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
 
 no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
         $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
-- 
cgit 


From cb542c66f304c85c19eb07a88dded326e5bfcfe5 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 19 Jan 2022 15:15:27 +0500
Subject: selftests: landlock: Add the uapi headers include variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Out of tree build of this test fails if relative path of the output
directory is specified. Add the KHDR_INCLUDES to correctly reach the
headers.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Reviewed-by: Mickaël Salaün <mic@linux.microsoft.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/landlock/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile
index a99596ca9882..0b0049e133bb 100644
--- a/tools/testing/selftests/landlock/Makefile
+++ b/tools/testing/selftests/landlock/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-CFLAGS += -Wall -O2
+CFLAGS += -Wall -O2 $(KHDR_INCLUDES)
 
 src_test := $(wildcard *_test.c)
 
-- 
cgit 


From 50f4143df0a63220f2108dc62164c432e07e410b Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 19 Jan 2022 15:15:28 +0500
Subject: selftests: net: Add the uapi headers include variable

Out of tree build of this test fails if relative path of the output
directory is specified. Add the KHDR_INCLUDES to correctly reach the
headers.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/net/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 9897fa9ab953..0b1488616c55 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -2,7 +2,7 @@
 # Makefile for net selftests
 
 CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
-CFLAGS += -I../../../../usr/include/
+CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES)
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
 	      rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh
-- 
cgit 


From 5faa35d0b8cc1403acb6760e9e95905cfd691872 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 19 Jan 2022 15:15:29 +0500
Subject: selftests: mptcp: Add the uapi headers include variable

Out of tree build of this test fails if relative path of the output
directory is specified. Add the KHDR_INCLUDES to correctly reach the
headers.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/net/mptcp/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 0356c4501c99..f905d5358e68 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -3,7 +3,7 @@
 top_srcdir = ../../../../..
 KSFT_KHDR_INSTALL := 1
 
-CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g  -I$(top_srcdir)/usr/include
+CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
 
 TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
 	      simult_flows.sh mptcp_sockopt.sh
-- 
cgit 


From 4a8900207abdc629933f07329bd884c60b50f074 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 19 Jan 2022 15:15:30 +0500
Subject: selftests: vm: Add the uapi headers include variable

Out of tree build of this test fails if relative path of the output
directory is specified. Add the KHDR_INCLUDES to correctly reach the
headers.

Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Tested-by: Alistair Popple <apopple@nvidia.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 1607322a112c..033e9e11e2da 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -23,7 +23,7 @@ MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/p
 # LDLIBS.
 MAKEFLAGS += --no-builtin-rules
 
-CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
+CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
 LDLIBS = -lrt -lpthread
 TEST_GEN_FILES = compaction_test
 TEST_GEN_FILES += gup_test
-- 
cgit 


From 681696862bc1823595c05960a83766d1aa965c17 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 19 Jan 2022 15:15:31 +0500
Subject: selftests: vm: remove dependecy from internal kernel macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The defination of swap() is used from kernel's internal header when this
test is built in source tree. The build fails when this test is built
out of source tree as defination of swap() isn't found. Selftests
shouldn't depend on kernel's internal header files. They can only depend
on uapi header files. Add the defination of swap() to fix the build
error:

	gcc -Wall  -I/linux_mainline2/build/usr/include -no-pie    userfaultfd.c -lrt -lpthread -o /linux_mainline2/build/kselftest/vm/userfaultfd
	userfaultfd.c: In function ‘userfaultfd_stress’:
	userfaultfd.c:1530:3: warning: implicit declaration of function ‘swap’; did you mean ‘swab’? [-Wimplicit-function-declaration]
	 1530 |   swap(area_src, area_dst);
	      |   ^~~~
	      |   swab
	/usr/bin/ld: /tmp/cclUUH7V.o: in function `userfaultfd_stress':
	userfaultfd.c:(.text+0x4d64): undefined reference to `swap'
	/usr/bin/ld: userfaultfd.c:(.text+0x4d82): undefined reference to `swap'
	collect2: error: ld returned 1 exit status

Fixes: 2c769ed7137a ("tools/testing/selftests/vm/userfaultfd.c: use swap() to make code cleaner")
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 2f49c9af1b58..50476ec6c070 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -119,6 +119,9 @@ struct uffd_stats {
 				 ~(unsigned long)(sizeof(unsigned long long) \
 						  -  1)))
 
+#define swap(a, b) \
+	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
 const char *examples =
     "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
     "./userfaultfd anon 100 99999\n\n"
-- 
cgit 


From 46e50459ea10cbf8a534fc1e6e752408dda191ef Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Mon, 14 Feb 2022 21:07:56 +0500
Subject: selftests: Use -isystem instead of -I to include headers

Selftests need kernel headers and glibc for compilation. In compilation
of selftests, uapi headers from kernel source are used instead of
default ones while glibc has already been compiled with different header
files installed in the operating system. So there can be redefination
warnings from compiler. These warnings can be suppressed by using
-isystem to include the uapi headers.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 80e5498eab92..5d9d4ddccccb 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -129,11 +129,11 @@ ifneq ($(KBUILD_OUTPUT),)
   # $(realpath ...) resolves symlinks
   abs_objtree := $(realpath $(abs_objtree))
   BUILD := $(abs_objtree)/kselftest
-  KHDR_INCLUDES := -I${abs_objtree}/usr/include
+  KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include
 else
   BUILD := $(CURDIR)
   abs_srctree := $(shell cd $(top_srcdir) && pwd)
-  KHDR_INCLUDES := -I${abs_srctree}/usr/include
+  KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include
   DEFAULT_INSTALL_HDR_PATH := 1
 endif
 
-- 
cgit 


From 4893992b6de1be5e0c9f6df747912761e96b4271 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Fri, 11 Feb 2022 03:23:19 +0500
Subject: selftests/exec: Rename file binfmt_script to binfmt_script.py

Rename file for readability purpose. Update its usage and references.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/exec/Makefile         |   2 +-
 tools/testing/selftests/exec/binfmt_script    | 171 --------------------------
 tools/testing/selftests/exec/binfmt_script.py | 171 ++++++++++++++++++++++++++
 3 files changed, 172 insertions(+), 172 deletions(-)
 delete mode 100755 tools/testing/selftests/exec/binfmt_script
 create mode 100755 tools/testing/selftests/exec/binfmt_script.py

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 2d7fca446c7f..7f0412a26ba9 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -3,7 +3,7 @@ CFLAGS = -Wall
 CFLAGS += -Wno-nonnull
 CFLAGS += -D_GNU_SOURCE
 
-TEST_PROGS := binfmt_script
+TEST_PROGS := binfmt_script.py
 TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular
 TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
 # Makefile is a run-time dependency, since it's accessed by the execveat test
diff --git a/tools/testing/selftests/exec/binfmt_script b/tools/testing/selftests/exec/binfmt_script
deleted file mode 100755
index 05f94a741c7a..000000000000
--- a/tools/testing/selftests/exec/binfmt_script
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test that truncation of bprm->buf doesn't cause unexpected execs paths, along
-# with various other pathological cases.
-import os, subprocess
-
-# Relevant commits
-#
-# b5372fe5dc84 ("exec: load_script: Do not exec truncated interpreter path")
-# 6eb3c3d0a52d ("exec: increase BINPRM_BUF_SIZE to 256")
-
-# BINPRM_BUF_SIZE
-SIZE=256
-
-NAME_MAX=int(subprocess.check_output(["getconf", "NAME_MAX", "."]))
-
-test_num=0
-
-code='''#!/usr/bin/perl
-print "Executed interpreter! Args:\n";
-print "0 : '$0'\n";
-$counter = 1;
-foreach my $a (@ARGV) {
-    print "$counter : '$a'\n";
-    $counter++;
-}
-'''
-
-##
-# test - produce a binfmt_script hashbang line for testing
-#
-# @size:     bytes for bprm->buf line, including hashbang but not newline
-# @good:     whether this script is expected to execute correctly
-# @hashbang: the special 2 bytes for running binfmt_script
-# @leading:  any leading whitespace before the executable path
-# @root:     start of executable pathname
-# @target:   end of executable pathname
-# @arg:      bytes following the executable pathname
-# @fill:     character to fill between @root and @target to reach @size bytes
-# @newline:  character to use as newline, not counted towards @size
-# ...
-def test(name, size, good=True, leading="", root="./", target="/perl",
-                     fill="A", arg="", newline="\n", hashbang="#!"):
-    global test_num, tests, NAME_MAX
-    test_num += 1
-    if test_num > tests:
-        raise ValueError("more binfmt_script tests than expected! (want %d, expected %d)"
-                         % (test_num, tests))
-
-    middle = ""
-    remaining = size - len(hashbang) - len(leading) - len(root) - len(target) - len(arg)
-    # The middle of the pathname must not exceed NAME_MAX
-    while remaining >= NAME_MAX:
-        middle += fill * (NAME_MAX - 1)
-        middle += '/'
-        remaining -= NAME_MAX
-    middle += fill * remaining
-
-    dirpath = root + middle
-    binary = dirpath + target
-    if len(target):
-        os.makedirs(dirpath, mode=0o755, exist_ok=True)
-        open(binary, "w").write(code)
-        os.chmod(binary, 0o755)
-
-    buf=hashbang + leading + root + middle + target + arg + newline
-    if len(newline) > 0:
-        buf += 'echo this is not really perl\n'
-
-    script = "binfmt_script-%s" % (name)
-    open(script, "w").write(buf)
-    os.chmod(script, 0o755)
-
-    proc = subprocess.Popen(["./%s" % (script)], shell=True,
-                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-    stdout = proc.communicate()[0]
-
-    if proc.returncode == 0 and b'Executed interpreter' in stdout:
-        if good:
-            print("ok %d - binfmt_script %s (successful good exec)"
-                  % (test_num, name))
-        else:
-            print("not ok %d - binfmt_script %s succeeded when it should have failed"
-                  % (test_num, name))
-    else:
-        if good:
-            print("not ok %d - binfmt_script %s failed when it should have succeeded (rc:%d)"
-                  % (test_num, name, proc.returncode))
-        else:
-            print("ok %d - binfmt_script %s (correctly failed bad exec)"
-                  % (test_num, name))
-
-    # Clean up crazy binaries
-    os.unlink(script)
-    if len(target):
-        elements = binary.split('/')
-        os.unlink(binary)
-        elements.pop()
-        while len(elements) > 1:
-            os.rmdir("/".join(elements))
-            elements.pop()
-
-tests=27
-print("TAP version 1.3")
-print("1..%d" % (tests))
-
-### FAIL (8 tests)
-
-# Entire path is well past the BINFMT_BUF_SIZE.
-test(name="too-big",        size=SIZE+80, good=False)
-# Path is right at max size, making it impossible to tell if it was truncated.
-test(name="exact",          size=SIZE,    good=False)
-# Same as above, but with leading whitespace.
-test(name="exact-space",    size=SIZE,    good=False, leading=" ")
-# Huge buffer of only whitespace.
-test(name="whitespace-too-big", size=SIZE+71, good=False, root="",
-                                              fill=" ", target="")
-# A good path, but it gets truncated due to leading whitespace.
-test(name="truncated",      size=SIZE+17, good=False, leading=" " * 19)
-# Entirely empty except for #!
-test(name="empty",          size=2,       good=False, root="",
-                                          fill="", target="", newline="")
-# Within size, but entirely spaces
-test(name="spaces",         size=SIZE-1,  good=False, root="", fill=" ",
-                                          target="", newline="")
-# Newline before binary.
-test(name="newline-prefix", size=SIZE-1,  good=False, leading="\n",
-                                          root="", fill=" ", target="")
-
-### ok (19 tests)
-
-# The original test case that was broken by commit:
-# 8099b047ecc4 ("exec: load_script: don't blindly truncate shebang string")
-test(name="test.pl",        size=439, leading=" ",
-     root="./nix/store/bwav8kz8b3y471wjsybgzw84mrh4js9-perl-5.28.1/bin",
-     arg=" -I/nix/store/x6yyav38jgr924nkna62q3pkp0dgmzlx-perl5.28.1-File-Slurp-9999.25/lib/perl5/site_perl -I/nix/store/ha8v67sl8dac92r9z07vzr4gv1y9nwqz-perl5.28.1-Net-DBus-1.1.0/lib/perl5/site_perl -I/nix/store/dcrkvnjmwh69ljsvpbdjjdnqgwx90a9d-perl5.28.1-XML-Parser-2.44/lib/perl5/site_perl -I/nix/store/rmji88k2zz7h4zg97385bygcydrf2q8h-perl5.28.1-XML-Twig-3.52/lib/perl5/site_perl")
-# One byte under size, leaving newline visible.
-test(name="one-under",           size=SIZE-1)
-# Two bytes under size, leaving newline visible.
-test(name="two-under",           size=SIZE-2)
-# Exact size, but trailing whitespace visible instead of newline
-test(name="exact-trunc-whitespace", size=SIZE, arg=" ")
-# Exact size, but trailing space and first arg char visible instead of newline.
-test(name="exact-trunc-arg",     size=SIZE, arg=" f")
-# One bute under, with confirmed non-truncated arg since newline now visible.
-test(name="one-under-full-arg",  size=SIZE-1, arg=" f")
-# Short read buffer by one byte.
-test(name="one-under-no-nl",     size=SIZE-1, newline="")
-# Short read buffer by half buffer size.
-test(name="half-under-no-nl",    size=int(SIZE/2), newline="")
-# One byte under with whitespace arg. leaving wenline visible.
-test(name="one-under-trunc-arg", size=SIZE-1, arg=" ")
-# One byte under with whitespace leading. leaving wenline visible.
-test(name="one-under-leading",   size=SIZE-1, leading=" ")
-# One byte under with whitespace leading and as arg. leaving newline visible.
-test(name="one-under-leading-trunc-arg",  size=SIZE-1, leading=" ", arg=" ")
-# Same as above, but with 2 bytes under
-test(name="two-under-no-nl",     size=SIZE-2, newline="")
-test(name="two-under-trunc-arg", size=SIZE-2, arg=" ")
-test(name="two-under-leading",   size=SIZE-2, leading=" ")
-test(name="two-under-leading-trunc-arg",   size=SIZE-2, leading=" ", arg=" ")
-# Same as above, but with buffer half filled
-test(name="two-under-no-nl",     size=int(SIZE/2), newline="")
-test(name="two-under-trunc-arg", size=int(SIZE/2), arg=" ")
-test(name="two-under-leading",   size=int(SIZE/2), leading=" ")
-test(name="two-under-lead-trunc-arg", size=int(SIZE/2), leading=" ", arg=" ")
-
-if test_num != tests:
-    raise ValueError("fewer binfmt_script tests than expected! (ran %d, expected %d"
-                     % (test_num, tests))
diff --git a/tools/testing/selftests/exec/binfmt_script.py b/tools/testing/selftests/exec/binfmt_script.py
new file mode 100755
index 000000000000..05f94a741c7a
--- /dev/null
+++ b/tools/testing/selftests/exec/binfmt_script.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that truncation of bprm->buf doesn't cause unexpected execs paths, along
+# with various other pathological cases.
+import os, subprocess
+
+# Relevant commits
+#
+# b5372fe5dc84 ("exec: load_script: Do not exec truncated interpreter path")
+# 6eb3c3d0a52d ("exec: increase BINPRM_BUF_SIZE to 256")
+
+# BINPRM_BUF_SIZE
+SIZE=256
+
+NAME_MAX=int(subprocess.check_output(["getconf", "NAME_MAX", "."]))
+
+test_num=0
+
+code='''#!/usr/bin/perl
+print "Executed interpreter! Args:\n";
+print "0 : '$0'\n";
+$counter = 1;
+foreach my $a (@ARGV) {
+    print "$counter : '$a'\n";
+    $counter++;
+}
+'''
+
+##
+# test - produce a binfmt_script hashbang line for testing
+#
+# @size:     bytes for bprm->buf line, including hashbang but not newline
+# @good:     whether this script is expected to execute correctly
+# @hashbang: the special 2 bytes for running binfmt_script
+# @leading:  any leading whitespace before the executable path
+# @root:     start of executable pathname
+# @target:   end of executable pathname
+# @arg:      bytes following the executable pathname
+# @fill:     character to fill between @root and @target to reach @size bytes
+# @newline:  character to use as newline, not counted towards @size
+# ...
+def test(name, size, good=True, leading="", root="./", target="/perl",
+                     fill="A", arg="", newline="\n", hashbang="#!"):
+    global test_num, tests, NAME_MAX
+    test_num += 1
+    if test_num > tests:
+        raise ValueError("more binfmt_script tests than expected! (want %d, expected %d)"
+                         % (test_num, tests))
+
+    middle = ""
+    remaining = size - len(hashbang) - len(leading) - len(root) - len(target) - len(arg)
+    # The middle of the pathname must not exceed NAME_MAX
+    while remaining >= NAME_MAX:
+        middle += fill * (NAME_MAX - 1)
+        middle += '/'
+        remaining -= NAME_MAX
+    middle += fill * remaining
+
+    dirpath = root + middle
+    binary = dirpath + target
+    if len(target):
+        os.makedirs(dirpath, mode=0o755, exist_ok=True)
+        open(binary, "w").write(code)
+        os.chmod(binary, 0o755)
+
+    buf=hashbang + leading + root + middle + target + arg + newline
+    if len(newline) > 0:
+        buf += 'echo this is not really perl\n'
+
+    script = "binfmt_script-%s" % (name)
+    open(script, "w").write(buf)
+    os.chmod(script, 0o755)
+
+    proc = subprocess.Popen(["./%s" % (script)], shell=True,
+                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    stdout = proc.communicate()[0]
+
+    if proc.returncode == 0 and b'Executed interpreter' in stdout:
+        if good:
+            print("ok %d - binfmt_script %s (successful good exec)"
+                  % (test_num, name))
+        else:
+            print("not ok %d - binfmt_script %s succeeded when it should have failed"
+                  % (test_num, name))
+    else:
+        if good:
+            print("not ok %d - binfmt_script %s failed when it should have succeeded (rc:%d)"
+                  % (test_num, name, proc.returncode))
+        else:
+            print("ok %d - binfmt_script %s (correctly failed bad exec)"
+                  % (test_num, name))
+
+    # Clean up crazy binaries
+    os.unlink(script)
+    if len(target):
+        elements = binary.split('/')
+        os.unlink(binary)
+        elements.pop()
+        while len(elements) > 1:
+            os.rmdir("/".join(elements))
+            elements.pop()
+
+tests=27
+print("TAP version 1.3")
+print("1..%d" % (tests))
+
+### FAIL (8 tests)
+
+# Entire path is well past the BINFMT_BUF_SIZE.
+test(name="too-big",        size=SIZE+80, good=False)
+# Path is right at max size, making it impossible to tell if it was truncated.
+test(name="exact",          size=SIZE,    good=False)
+# Same as above, but with leading whitespace.
+test(name="exact-space",    size=SIZE,    good=False, leading=" ")
+# Huge buffer of only whitespace.
+test(name="whitespace-too-big", size=SIZE+71, good=False, root="",
+                                              fill=" ", target="")
+# A good path, but it gets truncated due to leading whitespace.
+test(name="truncated",      size=SIZE+17, good=False, leading=" " * 19)
+# Entirely empty except for #!
+test(name="empty",          size=2,       good=False, root="",
+                                          fill="", target="", newline="")
+# Within size, but entirely spaces
+test(name="spaces",         size=SIZE-1,  good=False, root="", fill=" ",
+                                          target="", newline="")
+# Newline before binary.
+test(name="newline-prefix", size=SIZE-1,  good=False, leading="\n",
+                                          root="", fill=" ", target="")
+
+### ok (19 tests)
+
+# The original test case that was broken by commit:
+# 8099b047ecc4 ("exec: load_script: don't blindly truncate shebang string")
+test(name="test.pl",        size=439, leading=" ",
+     root="./nix/store/bwav8kz8b3y471wjsybgzw84mrh4js9-perl-5.28.1/bin",
+     arg=" -I/nix/store/x6yyav38jgr924nkna62q3pkp0dgmzlx-perl5.28.1-File-Slurp-9999.25/lib/perl5/site_perl -I/nix/store/ha8v67sl8dac92r9z07vzr4gv1y9nwqz-perl5.28.1-Net-DBus-1.1.0/lib/perl5/site_perl -I/nix/store/dcrkvnjmwh69ljsvpbdjjdnqgwx90a9d-perl5.28.1-XML-Parser-2.44/lib/perl5/site_perl -I/nix/store/rmji88k2zz7h4zg97385bygcydrf2q8h-perl5.28.1-XML-Twig-3.52/lib/perl5/site_perl")
+# One byte under size, leaving newline visible.
+test(name="one-under",           size=SIZE-1)
+# Two bytes under size, leaving newline visible.
+test(name="two-under",           size=SIZE-2)
+# Exact size, but trailing whitespace visible instead of newline
+test(name="exact-trunc-whitespace", size=SIZE, arg=" ")
+# Exact size, but trailing space and first arg char visible instead of newline.
+test(name="exact-trunc-arg",     size=SIZE, arg=" f")
+# One bute under, with confirmed non-truncated arg since newline now visible.
+test(name="one-under-full-arg",  size=SIZE-1, arg=" f")
+# Short read buffer by one byte.
+test(name="one-under-no-nl",     size=SIZE-1, newline="")
+# Short read buffer by half buffer size.
+test(name="half-under-no-nl",    size=int(SIZE/2), newline="")
+# One byte under with whitespace arg. leaving wenline visible.
+test(name="one-under-trunc-arg", size=SIZE-1, arg=" ")
+# One byte under with whitespace leading. leaving wenline visible.
+test(name="one-under-leading",   size=SIZE-1, leading=" ")
+# One byte under with whitespace leading and as arg. leaving newline visible.
+test(name="one-under-leading-trunc-arg",  size=SIZE-1, leading=" ", arg=" ")
+# Same as above, but with 2 bytes under
+test(name="two-under-no-nl",     size=SIZE-2, newline="")
+test(name="two-under-trunc-arg", size=SIZE-2, arg=" ")
+test(name="two-under-leading",   size=SIZE-2, leading=" ")
+test(name="two-under-leading-trunc-arg",   size=SIZE-2, leading=" ", arg=" ")
+# Same as above, but with buffer half filled
+test(name="two-under-no-nl",     size=int(SIZE/2), newline="")
+test(name="two-under-trunc-arg", size=int(SIZE/2), arg=" ")
+test(name="two-under-leading",   size=int(SIZE/2), leading=" ")
+test(name="two-under-lead-trunc-arg", size=int(SIZE/2), leading=" ", arg=" ")
+
+if test_num != tests:
+    raise ValueError("fewer binfmt_script tests than expected! (ran %d, expected %d"
+                     % (test_num, tests))
-- 
cgit 


From b22dfec72c377e350c4cafce4078977d3e5bee03 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Fri, 21 Jan 2022 19:51:52 +0500
Subject: selftests/lkdtm: Remove dead config option

CONFIG_HARDENED_USERCOPY_FALLBACK config option has been removed in
commit 53944f171a89 ("mm: remove HARDENED_USERCOPY_FALLBACK"). Remove it
from the lkdtm selftest config.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/lkdtm/config | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index a26a3fa9e925..a7a58f885f52 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -3,7 +3,6 @@ CONFIG_DEBUG_LIST=y
 CONFIG_SLAB_FREELIST_HARDENED=y
 CONFIG_FORTIFY_SOURCE=y
 CONFIG_HARDENED_USERCOPY=y
-# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
 CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
 CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
 CONFIG_UBSAN_BOUNDS=y
-- 
cgit 


From 1900be289b598b2c553b3add13e491c0bb8a8550 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Fri, 21 Jan 2022 19:51:53 +0500
Subject: selftests/lkdtm: Add UBSAN config

UBSAN_BOUNDS and UBSAN_TRAP depend on UBSAN config option.
merge_config.sh script generates following warnings if parent config
doesn't have UBSAN config already enabled and UBSAN_BOUNDS/UBSAN_TRAP
config options don't get added to the parent config.

Value requested for CONFIG_UBSAN_BOUNDS not in final .config
Requested value:  CONFIG_UBSAN_BOUNDS=y
Actual value:

Value requested for CONFIG_UBSAN_TRAP not in final .config
Requested value:  CONFIG_UBSAN_TRAP=y
Actual value:

Fix this by including UBSAN config.

Fixes: c75be56e35b2 ("lkdtm/bugs: Add ARRAY_BOUNDS to selftests")
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/lkdtm/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index a7a58f885f52..46f39ee76208 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -5,6 +5,7 @@ CONFIG_FORTIFY_SOURCE=y
 CONFIG_HARDENED_USERCOPY=y
 CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
 CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+CONFIG_UBSAN=y
 CONFIG_UBSAN_BOUNDS=y
 CONFIG_UBSAN_TRAP=y
 CONFIG_STACKPROTECTOR_STRONG=y
-- 
cgit 


From f49b8138e62377d1c41ea7ffd55afb093382ee34 Mon Sep 17 00:00:00 2001
From: David Dunn <daviddunn@google.com>
Date: Wed, 23 Feb 2022 22:57:42 +0000
Subject: KVM: selftests: Carve out helper to create "default" VM without vCPUs

Carve out portion of vm_create_default so that selftests can modify
a "default" VM prior to creating vcpus.

Signed-off-by: David Dunn <daviddunn@google.com>
Message-Id: <20220223225743.2703915-3-daviddunn@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/kvm_util_base.h |  3 +++
 tools/testing/selftests/kvm/lib/kvm_util.c          | 21 +++++++++++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 4ed6aa049a91..f987cf7c0d2e 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -336,6 +336,9 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
 				    uint32_t num_percpu_pages, void *guest_code,
 				    uint32_t vcpuids[]);
 
+/* Create a default VM without any vcpus. */
+struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages);
+
 /*
  * Adds a vCPU with reasonable defaults (e.g. a stack)
  *
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index d8cf851ab119..64618032aa58 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -362,6 +362,20 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	return vm;
 }
 
+struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages)
+{
+	struct kvm_vm *vm;
+
+	vm = vm_create(mode, pages, O_RDWR);
+
+	kvm_vm_elf_load(vm, program_invocation_name);
+
+#ifdef __x86_64__
+	vm_create_irqchip(vm);
+#endif
+	return vm;
+}
+
 /*
  * VM Create with customized parameters
  *
@@ -412,13 +426,8 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
 		    nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
 
 	pages = vm_adjust_num_guest_pages(mode, pages);
-	vm = vm_create(mode, pages, O_RDWR);
 
-	kvm_vm_elf_load(vm, program_invocation_name);
-
-#ifdef __x86_64__
-	vm_create_irqchip(vm);
-#endif
+	vm = vm_create_without_vcpus(mode, pages);
 
 	for (i = 0; i < nr_vcpus; ++i) {
 		uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
-- 
cgit 


From 20e416720e7448425c421f1a59b07ff574f8b1e1 Mon Sep 17 00:00:00 2001
From: David Dunn <daviddunn@google.com>
Date: Wed, 23 Feb 2022 22:57:43 +0000
Subject: KVM: selftests: Verify disabling PMU virtualization via
 KVM_CAP_CONFIG_PMU

On a VM with PMU disabled via KVM_CAP_PMU_CONFIG, the PMU should not be
usable by the guest.

Signed-off-by: David Dunn <daviddunn@google.com>
Message-Id: <20220223225743.2703915-4-daviddunn@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/x86_64/pmu_event_filter_test.c   | 33 ++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index c715adcbd487..0d06ffa95d9d 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -325,6 +325,37 @@ static void test_not_member_allow_list(struct kvm_vm *vm)
 	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
 }
 
+/*
+ * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
+ *
+ * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
+ */
+static void test_pmu_config_disable(void (*guest_code)(void))
+{
+	int r;
+	struct kvm_vm *vm;
+	struct kvm_enable_cap cap = { 0 };
+
+	r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
+	if (!(r & KVM_PMU_CAP_DISABLE))
+		return;
+
+	vm = vm_create_without_vcpus(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES);
+
+	cap.cap = KVM_CAP_PMU_CAPABILITY;
+	cap.args[0] = KVM_PMU_CAP_DISABLE;
+	TEST_ASSERT(!vm_enable_cap(vm, &cap), "Failed to set KVM_PMU_CAP_DISABLE.");
+
+	vm_vcpu_add_default(vm, VCPU_ID, guest_code);
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+	TEST_ASSERT(!sanity_check_pmu(vm),
+		    "Guest should not be able to use disabled PMU.");
+
+	kvm_vm_free(vm);
+}
+
 /*
  * Check for a non-zero PMU version, at least one general-purpose
  * counter per logical processor, an EBX bit vector of length greater
@@ -430,5 +461,7 @@ int main(int argc, char *argv[])
 
 	kvm_vm_free(vm);
 
+	test_pmu_config_disable(guest_code);
+
 	return 0;
 }
-- 
cgit 


From 32de73e89099c3f243032a733d3a64d417327a70 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 7 Feb 2022 15:20:34 +0000
Subject: kselftest/arm64: signal: Allow tests to be incompatible with features

Some features may invalidate some tests, for example by supporting an
operation which would trap otherwise. Allow tests to list features that
they are incompatible with so we can cover the case where a signal will
be generated without disruption on systems where that won't happen.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220207152109.197566-6-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../testing/selftests/arm64/signal/test_signals.h  |  1 +
 .../selftests/arm64/signal/test_signals_utils.c    | 34 ++++++++++++++++------
 .../selftests/arm64/signal/test_signals_utils.h    |  2 ++
 3 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
index ebe8694dbef0..f909b70d9e98 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.h
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -53,6 +53,7 @@ struct tdescr {
 	char			*name;
 	char			*descr;
 	unsigned long		feats_required;
+	unsigned long		feats_incompatible;
 	/* bitmask of effectively supported feats: populated at run-time */
 	unsigned long		feats_supported;
 	bool			initialized;
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 2f8c23af3b5e..5743897984b0 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -36,6 +36,8 @@ static inline char *feats_to_string(unsigned long feats)
 {
 	size_t flen = MAX_FEATS_SZ - 1;
 
+	feats_string[0] = '\0';
+
 	for (int i = 0; i < FMAX_END; i++) {
 		if (feats & (1UL << i)) {
 			size_t tlen = strlen(feats_names[i]);
@@ -256,7 +258,7 @@ int test_init(struct tdescr *td)
 		td->minsigstksz = MINSIGSTKSZ;
 	fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz);
 
-	if (td->feats_required) {
+	if (td->feats_required || td->feats_incompatible) {
 		td->feats_supported = 0;
 		/*
 		 * Checking for CPU required features using both the
@@ -267,15 +269,29 @@ int test_init(struct tdescr *td)
 		if (getauxval(AT_HWCAP) & HWCAP_SVE)
 			td->feats_supported |= FEAT_SVE;
 		if (feats_ok(td)) {
-			fprintf(stderr,
-				"Required Features: [%s] supported\n",
-				feats_to_string(td->feats_required &
-						td->feats_supported));
+			if (td->feats_required & td->feats_supported)
+				fprintf(stderr,
+					"Required Features: [%s] supported\n",
+					feats_to_string(td->feats_required &
+							td->feats_supported));
+			if (!(td->feats_incompatible & td->feats_supported))
+				fprintf(stderr,
+					"Incompatible Features: [%s] absent\n",
+					feats_to_string(td->feats_incompatible));
 		} else {
-			fprintf(stderr,
-				"Required Features: [%s] NOT supported\n",
-				feats_to_string(td->feats_required &
-						~td->feats_supported));
+			if ((td->feats_required & td->feats_supported) !=
+			    td->feats_supported)
+				fprintf(stderr,
+					"Required Features: [%s] NOT supported\n",
+					feats_to_string(td->feats_required &
+							~td->feats_supported));
+			if (td->feats_incompatible & td->feats_supported)
+				fprintf(stderr,
+					"Incompatible Features: [%s] supported\n",
+					feats_to_string(td->feats_incompatible &
+							~td->feats_supported));
+
+
 			td->result = KSFT_SKIP;
 			return 0;
 		}
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
index 6772b5c8d274..f3aa99ba67bb 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
@@ -18,6 +18,8 @@ void test_result(struct tdescr *td);
 
 static inline bool feats_ok(struct tdescr *td)
 {
+	if (td->feats_incompatible & td->feats_supported)
+		return false;
 	return (td->feats_required & td->feats_supported) == td->feats_required;
 }
 
-- 
cgit 


From 2aaa36e95ea586ad23edfcc1d474e8b735a4d1c3 Mon Sep 17 00:00:00 2001
From: Mateusz Jończyk <mat.jonczyk@o2.pl>
Date: Sat, 19 Feb 2022 08:27:13 +0100
Subject: selftests/rtc: continuously read RTC in a loop for 30s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some problems with reading the RTC time may happen rarely, for example
while the RTC is updating. So read the RTC many times to catch these
problems. For example, a previous attempt for my
commit ea6fa4961aab ("rtc: mc146818-lib: fix RTC presence check")
was incorrect and would have triggered this selftest.

To avoid the risk of damaging the hardware, wait 11ms before consecutive
reads.

In rtc_time_to_timestamp I copied values manually instead of casting -
just to be on the safe side. The 11ms wait period was chosen so that it is
not a divisor of 1000ms.

Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/rtc/rtctest.c | 66 +++++++++++++++++++++++++++++++++++
 tools/testing/selftests/rtc/settings  |  2 +-
 2 files changed, 67 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c
index 66af608fb4c6..2b9d929a24ed 100644
--- a/tools/testing/selftests/rtc/rtctest.c
+++ b/tools/testing/selftests/rtc/rtctest.c
@@ -20,6 +20,8 @@
 
 #define NUM_UIE 3
 #define ALARM_DELTA 3
+#define READ_LOOP_DURATION_SEC 30
+#define READ_LOOP_SLEEP_MS 11
 
 static char *rtc_file = "/dev/rtc0";
 
@@ -49,6 +51,70 @@ TEST_F(rtc, date_read) {
 	       rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
 }
 
+static time_t rtc_time_to_timestamp(struct rtc_time *rtc_time)
+{
+	struct tm tm_time = {
+	       .tm_sec = rtc_time->tm_sec,
+	       .tm_min = rtc_time->tm_min,
+	       .tm_hour = rtc_time->tm_hour,
+	       .tm_mday = rtc_time->tm_mday,
+	       .tm_mon = rtc_time->tm_mon,
+	       .tm_year = rtc_time->tm_year,
+	};
+
+	return mktime(&tm_time);
+}
+
+static void nanosleep_with_retries(long ns)
+{
+	struct timespec req = {
+		.tv_sec = 0,
+		.tv_nsec = ns,
+	};
+	struct timespec rem;
+
+	while (nanosleep(&req, &rem) != 0) {
+		req.tv_sec = rem.tv_sec;
+		req.tv_nsec = rem.tv_nsec;
+	}
+}
+
+TEST_F_TIMEOUT(rtc, date_read_loop, READ_LOOP_DURATION_SEC + 2) {
+	int rc;
+	long iter_count = 0;
+	struct rtc_time rtc_tm;
+	time_t start_rtc_read, prev_rtc_read;
+
+	TH_LOG("Continuously reading RTC time for %ds (with %dms breaks after every read).",
+	       READ_LOOP_DURATION_SEC, READ_LOOP_SLEEP_MS);
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm);
+	ASSERT_NE(-1, rc);
+	start_rtc_read = rtc_time_to_timestamp(&rtc_tm);
+	prev_rtc_read = start_rtc_read;
+
+	do  {
+		time_t rtc_read;
+
+		rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm);
+		ASSERT_NE(-1, rc);
+
+		rtc_read = rtc_time_to_timestamp(&rtc_tm);
+		/* Time should not go backwards */
+		ASSERT_LE(prev_rtc_read, rtc_read);
+		/* Time should not increase more then 1s at a time */
+		ASSERT_GE(prev_rtc_read + 1, rtc_read);
+
+		/* Sleep 11ms to avoid killing / overheating the RTC */
+		nanosleep_with_retries(READ_LOOP_SLEEP_MS * 1000000);
+
+		prev_rtc_read = rtc_read;
+		iter_count++;
+	} while (prev_rtc_read <= start_rtc_read + READ_LOOP_DURATION_SEC);
+
+	TH_LOG("Performed %ld RTC time reads.", iter_count);
+}
+
 TEST_F_TIMEOUT(rtc, uie_read, NUM_UIE + 2) {
 	int i, rc, irq = 0;
 	unsigned long data;
diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings
index a953c96aa16e..0c1a2075d5f3 100644
--- a/tools/testing/selftests/rtc/settings
+++ b/tools/testing/selftests/rtc/settings
@@ -1 +1 @@
-timeout=180
+timeout=210
-- 
cgit 


From f961e20f15ed35e9ca154a099897d600b78b0311 Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Thu, 27 Jan 2022 12:49:53 +0530
Subject: selftests/powerpc/pmu: Include mmap_buffer field as part of struct
 event

To enable the capturing of samples as part of perf event, add a new
field "mmap_buffer" to "struct event". This field is a place-holder for
sample collection

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-2-kjain@linux.ibm.com
---
 tools/testing/selftests/powerpc/pmu/event.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h
index 302eaab51706..23d20340a160 100644
--- a/tools/testing/selftests/powerpc/pmu/event.h
+++ b/tools/testing/selftests/powerpc/pmu/event.h
@@ -22,6 +22,11 @@ struct event {
 		u64 running;
 		u64 enabled;
 	} result;
+	/*
+	 * mmap buffer used while recording sample.
+	 * Accessed as "struct perf_event_mmap_page"
+	 */
+	void *mmap_buffer;
 };
 
 void event_init(struct event *e, u64 config);
-- 
cgit 


From 07609c193a0cfd1e3532a7dd81383c9d458f485c Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Thu, 17 Feb 2022 15:22:32 +0800
Subject: bpf, selftests: Use raw_tp program for atomic test

Now atomic tests will attach fentry program and run it through
bpf_prog_test_run_opts(), but attaching fentry program depends on BPF
trampoline which is only available under x86-64. Considering many archs
have atomic support, using raw_tp program instead.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220217072232.1186625-5-houtao1@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/atomics.c | 91 ++++++------------------
 tools/testing/selftests/bpf/progs/atomics.c      | 28 ++++----
 2 files changed, 36 insertions(+), 83 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index ab62aba10e2b..13e101f370a1 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -7,19 +7,15 @@
 static void test_add(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	int link_fd;
 	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
-	link_fd = atomics_lskel__add__attach(skel);
-	if (!ASSERT_GT(link_fd, 0, "attach(add)"))
-		return;
-
+	/* No need to attach it, just run it directly */
 	prog_fd = skel->progs.add.prog_fd;
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	if (!ASSERT_OK(err, "test_run_opts err"))
-		goto cleanup;
+		return;
 	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
-		goto cleanup;
+		return;
 
 	ASSERT_EQ(skel->data->add64_value, 3, "add64_value");
 	ASSERT_EQ(skel->bss->add64_result, 1, "add64_result");
@@ -31,27 +27,20 @@ static void test_add(struct atomics_lskel *skel)
 	ASSERT_EQ(skel->bss->add_stack_result, 1, "add_stack_result");
 
 	ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value");
-
-cleanup:
-	close(link_fd);
 }
 
 static void test_sub(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	int link_fd;
 	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
-	link_fd = atomics_lskel__sub__attach(skel);
-	if (!ASSERT_GT(link_fd, 0, "attach(sub)"))
-		return;
-
+	/* No need to attach it, just run it directly */
 	prog_fd = skel->progs.sub.prog_fd;
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	if (!ASSERT_OK(err, "test_run_opts err"))
-		goto cleanup;
+		return;
 	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
-		goto cleanup;
+		return;
 
 	ASSERT_EQ(skel->data->sub64_value, -1, "sub64_value");
 	ASSERT_EQ(skel->bss->sub64_result, 1, "sub64_result");
@@ -63,27 +52,20 @@ static void test_sub(struct atomics_lskel *skel)
 	ASSERT_EQ(skel->bss->sub_stack_result, 1, "sub_stack_result");
 
 	ASSERT_EQ(skel->data->sub_noreturn_value, -1, "sub_noreturn_value");
-
-cleanup:
-	close(link_fd);
 }
 
 static void test_and(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	int link_fd;
 	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
-	link_fd = atomics_lskel__and__attach(skel);
-	if (!ASSERT_GT(link_fd, 0, "attach(and)"))
-		return;
-
+	/* No need to attach it, just run it directly */
 	prog_fd = skel->progs.and.prog_fd;
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	if (!ASSERT_OK(err, "test_run_opts err"))
-		goto cleanup;
+		return;
 	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
-		goto cleanup;
+		return;
 
 	ASSERT_EQ(skel->data->and64_value, 0x010ull << 32, "and64_value");
 	ASSERT_EQ(skel->bss->and64_result, 0x110ull << 32, "and64_result");
@@ -92,26 +74,20 @@ static void test_and(struct atomics_lskel *skel)
 	ASSERT_EQ(skel->bss->and32_result, 0x110, "and32_result");
 
 	ASSERT_EQ(skel->data->and_noreturn_value, 0x010ull << 32, "and_noreturn_value");
-cleanup:
-	close(link_fd);
 }
 
 static void test_or(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	int link_fd;
 	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
-	link_fd = atomics_lskel__or__attach(skel);
-	if (!ASSERT_GT(link_fd, 0, "attach(or)"))
-		return;
-
+	/* No need to attach it, just run it directly */
 	prog_fd = skel->progs.or.prog_fd;
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	if (!ASSERT_OK(err, "test_run_opts err"))
-		goto cleanup;
+		return;
 	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
-		goto cleanup;
+		return;
 
 	ASSERT_EQ(skel->data->or64_value, 0x111ull << 32, "or64_value");
 	ASSERT_EQ(skel->bss->or64_result, 0x110ull << 32, "or64_result");
@@ -120,26 +96,20 @@ static void test_or(struct atomics_lskel *skel)
 	ASSERT_EQ(skel->bss->or32_result, 0x110, "or32_result");
 
 	ASSERT_EQ(skel->data->or_noreturn_value, 0x111ull << 32, "or_noreturn_value");
-cleanup:
-	close(link_fd);
 }
 
 static void test_xor(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	int link_fd;
 	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
-	link_fd = atomics_lskel__xor__attach(skel);
-	if (!ASSERT_GT(link_fd, 0, "attach(xor)"))
-		return;
-
+	/* No need to attach it, just run it directly */
 	prog_fd = skel->progs.xor.prog_fd;
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	if (!ASSERT_OK(err, "test_run_opts err"))
-		goto cleanup;
+		return;
 	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
-		goto cleanup;
+		return;
 
 	ASSERT_EQ(skel->data->xor64_value, 0x101ull << 32, "xor64_value");
 	ASSERT_EQ(skel->bss->xor64_result, 0x110ull << 32, "xor64_result");
@@ -148,26 +118,20 @@ static void test_xor(struct atomics_lskel *skel)
 	ASSERT_EQ(skel->bss->xor32_result, 0x110, "xor32_result");
 
 	ASSERT_EQ(skel->data->xor_noreturn_value, 0x101ull << 32, "xor_nxoreturn_value");
-cleanup:
-	close(link_fd);
 }
 
 static void test_cmpxchg(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	int link_fd;
 	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
-	link_fd = atomics_lskel__cmpxchg__attach(skel);
-	if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)"))
-		return;
-
+	/* No need to attach it, just run it directly */
 	prog_fd = skel->progs.cmpxchg.prog_fd;
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	if (!ASSERT_OK(err, "test_run_opts err"))
-		goto cleanup;
+		return;
 	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
-		goto cleanup;
+		return;
 
 	ASSERT_EQ(skel->data->cmpxchg64_value, 2, "cmpxchg64_value");
 	ASSERT_EQ(skel->bss->cmpxchg64_result_fail, 1, "cmpxchg_result_fail");
@@ -176,45 +140,34 @@ static void test_cmpxchg(struct atomics_lskel *skel)
 	ASSERT_EQ(skel->data->cmpxchg32_value, 2, "lcmpxchg32_value");
 	ASSERT_EQ(skel->bss->cmpxchg32_result_fail, 1, "cmpxchg_result_fail");
 	ASSERT_EQ(skel->bss->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed");
-
-cleanup:
-	close(link_fd);
 }
 
 static void test_xchg(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
-	int link_fd;
 	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
-	link_fd = atomics_lskel__xchg__attach(skel);
-	if (!ASSERT_GT(link_fd, 0, "attach(xchg)"))
-		return;
-
+	/* No need to attach it, just run it directly */
 	prog_fd = skel->progs.xchg.prog_fd;
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
 	if (!ASSERT_OK(err, "test_run_opts err"))
-		goto cleanup;
+		return;
 	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
-		goto cleanup;
+		return;
 
 	ASSERT_EQ(skel->data->xchg64_value, 2, "xchg64_value");
 	ASSERT_EQ(skel->bss->xchg64_result, 1, "xchg64_result");
 
 	ASSERT_EQ(skel->data->xchg32_value, 2, "xchg32_value");
 	ASSERT_EQ(skel->bss->xchg32_result, 1, "xchg32_result");
-
-cleanup:
-	close(link_fd);
 }
 
 void test_atomics(void)
 {
 	struct atomics_lskel *skel;
-	__u32 duration = 0;
 
 	skel = atomics_lskel__open_and_load();
-	if (CHECK(!skel, "skel_load", "atomics skeleton failed\n"))
+	if (!ASSERT_OK_PTR(skel, "atomics skeleton load"))
 		return;
 
 	if (skel->data->skip_tests) {
diff --git a/tools/testing/selftests/bpf/progs/atomics.c b/tools/testing/selftests/bpf/progs/atomics.c
index 16e57313204a..f89c7f0cc53b 100644
--- a/tools/testing/selftests/bpf/progs/atomics.c
+++ b/tools/testing/selftests/bpf/progs/atomics.c
@@ -20,8 +20,8 @@ __u64 add_stack_value_copy = 0;
 __u64 add_stack_result = 0;
 __u64 add_noreturn_value = 1;
 
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(add, int a)
+SEC("raw_tp/sys_enter")
+int add(const void *ctx)
 {
 	if (pid != (bpf_get_current_pid_tgid() >> 32))
 		return 0;
@@ -46,8 +46,8 @@ __s64 sub_stack_value_copy = 0;
 __s64 sub_stack_result = 0;
 __s64 sub_noreturn_value = 1;
 
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(sub, int a)
+SEC("raw_tp/sys_enter")
+int sub(const void *ctx)
 {
 	if (pid != (bpf_get_current_pid_tgid() >> 32))
 		return 0;
@@ -70,8 +70,8 @@ __u32 and32_value = 0x110;
 __u32 and32_result = 0;
 __u64 and_noreturn_value = (0x110ull << 32);
 
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(and, int a)
+SEC("raw_tp/sys_enter")
+int and(const void *ctx)
 {
 	if (pid != (bpf_get_current_pid_tgid() >> 32))
 		return 0;
@@ -91,8 +91,8 @@ __u32 or32_value = 0x110;
 __u32 or32_result = 0;
 __u64 or_noreturn_value = (0x110ull << 32);
 
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(or, int a)
+SEC("raw_tp/sys_enter")
+int or(const void *ctx)
 {
 	if (pid != (bpf_get_current_pid_tgid() >> 32))
 		return 0;
@@ -111,8 +111,8 @@ __u32 xor32_value = 0x110;
 __u32 xor32_result = 0;
 __u64 xor_noreturn_value = (0x110ull << 32);
 
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(xor, int a)
+SEC("raw_tp/sys_enter")
+int xor(const void *ctx)
 {
 	if (pid != (bpf_get_current_pid_tgid() >> 32))
 		return 0;
@@ -132,8 +132,8 @@ __u32 cmpxchg32_value = 1;
 __u32 cmpxchg32_result_fail = 0;
 __u32 cmpxchg32_result_succeed = 0;
 
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(cmpxchg, int a)
+SEC("raw_tp/sys_enter")
+int cmpxchg(const void *ctx)
 {
 	if (pid != (bpf_get_current_pid_tgid() >> 32))
 		return 0;
@@ -153,8 +153,8 @@ __u64 xchg64_result = 0;
 __u32 xchg32_value = 1;
 __u32 xchg32_result = 0;
 
-SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(xchg, int a)
+SEC("raw_tp/sys_enter")
+int xchg(const void *ctx)
 {
 	if (pid != (bpf_get_current_pid_tgid() >> 32))
 		return 0;
-- 
cgit 


From 3edf5f66c12aa0b318b05ad2c04cf363b0f51f99 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@nvidia.com>
Date: Tue, 1 Mar 2022 05:04:37 +0000
Subject: selftests: add new tests for vxlan vnifiltering

This patch adds a new test script test_vxlan_vnifiltering.sh
with tests for vni filtering api, various datapath tests.
Also has a test with a mix of traditional, metadata and vni
filtering devices inuse at the same time.

Signed-off-by: Roopa Prabhu <roopa@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/test_vxlan_vnifiltering.sh       | 579 +++++++++++++++++++++
 1 file changed, 579 insertions(+)
 create mode 100755 tools/testing/selftests/net/test_vxlan_vnifiltering.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
new file mode 100755
index 000000000000..704997ffc244
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -0,0 +1,579 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the VXLAN vni filtering api and
+# datapath.
+# It simulates two hypervisors running two VMs each using four network
+# six namespaces: two for the HVs, four for the VMs. Each VM is
+# connected to a separate bridge. The VM's use overlapping vlans and
+# hence the separate bridge domain. Each vxlan device is a collect
+# metadata device with vni filtering and hence has the ability to
+# terminate configured vni's only.
+
+#  +--------------------------------+     +------------------------------------+
+#  |  vm-11 netns                   |     |  vm-21 netns                       |
+#  |                                |     |                                    |
+#  |+------------+  +-------------+ |     |+-------------+ +----------------+  |
+#  ||veth-11.10  |  |veth-11.20   | |     ||veth-21.10   | | veth-21.20     |  |
+#  ||10.0.10.11/24  |10.0.20.11/24| |     ||10.0.10.21/24| | 10.0.20.21/24  |  |
+#  |+------|-----+  +|------------+ |     |+-----------|-+ +---|------------+  |
+#  |       |         |              |     |            |       |               |
+#  |       |         |              |     |         +------------+             |
+#  |      +------------+            |     |         | veth-21    |             |
+#  |      | veth-11    |            |     |         |            |             |
+#  |      |            |            |     |         +-----|------+             |
+#  |      +-----|------+            |     |               |                    |
+#  |            |                   |     |               |                    |
+#  +------------|-------------------+     +---------------|--------------------+
+#  +------------|-----------------------------------------|-------------------+
+#  |      +-----|------+                            +-----|------+            |
+#  |      |vethhv-11   |                            |vethhv-21   |            |
+#  |      +----|-------+                            +-----|------+            |
+#  |       +---|---+                                  +---|--+                |
+#  |       |  br1  |                                  | br2  |                |
+#  |       +---|---+                                  +---|--+                |
+#  |       +---|----+                                 +---|--+                |
+#  |       |  vxlan1|                                 |vxlan2|                |
+#  |       +--|-----+                                 +--|---+                |
+#  |          |                                          |                    |
+#  |          |         +---------------------+          |                    |
+#  |          |         |veth0                |          |                    |
+#  |          +---------|172.16.0.1/24        -----------+                    |
+#  |                    |2002:fee1::1/64      |                               |
+#  | hv-1 netns         +--------|------------+                               |
+#  +-----------------------------|--------------------------------------------+
+#                                |
+#  +-----------------------------|--------------------------------------------+
+#  | hv-2 netns         +--------|-------------+                              |
+#  |                    | veth0                |                              |
+#  |             +------| 172.16.0.2/24        |---+                          |
+#  |             |      | 2002:fee1::2/64      |   |                          |
+#  |             |      |                      |   |                          |
+#  |             |      +----------------------+   |         -                |
+#  |             |                                 |                          |
+#  |           +-|-------+                +--------|-+                        |
+#  |           | vxlan1  |                |  vxlan2  |                        |
+#  |           +----|----+                +---|------+                        |
+#  |             +--|--+                    +-|---+                           |
+#  |             | br1 |                    | br2 |                           |
+#  |             +--|--+                    +--|--+                           |
+#  |          +-----|-------+             +----|-------+                      |
+#  |          | vethhv-12   |             |vethhv-22   |                      |
+#  |          +------|------+             +-------|----+                      |
+#  +-----------------|----------------------------|---------------------------+
+#                    |                            |
+#  +-----------------|-----------------+ +--------|---------------------------+
+#  |         +-------|---+             | |     +--|---------+                 |
+#  |         | veth-12   |             | |     |veth-22     |                 |
+#  |         +-|--------|+             | |     +--|--------|+                 |
+#  |           |        |              | |        |        |                  |
+#  |+----------|--+ +---|-----------+  | |+-------|-----+ +|---------------+  |
+#  ||veth-12.10   | |veth-12.20     |  | ||veth-22.10   | |veth-22.20      |  |
+#  ||10.0.10.12/24| |10.0.20.12/24  |  | ||10.0.10.22/24| |10.0.20.22/24   |  |
+#  |+-------------+ +---------------+  | |+-------------+ +----------------+  |
+#  |                                   | |                                    |
+#  |                                   | |                                    |
+#  | vm-12 netns                       | |vm-22 netns                         |
+#  +-----------------------------------+ +------------------------------------+
+#
+#
+# This test tests the new vxlan vnifiltering api
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="
+	vxlan_vnifilter_api
+	vxlan_vnifilter_datapath
+	vxlan_vnifilter_datapath_pervni
+	vxlan_vnifilter_datapath_mgroup
+	vxlan_vnifilter_datapath_mgroup_pervni
+	vxlan_vnifilter_metadata_and_traditional_mix
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+}
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	return $rc
+}
+
+check_hv_connectivity() {
+	ip netns exec hv-1 ping -c 1 -W 1 $1 &>/dev/null
+	sleep 1
+	ip netns exec hv-1 ping -c 1 -W 1 $2 &>/dev/null
+
+	return $?
+}
+
+check_vm_connectivity() {
+	run_cmd "ip netns exec vm-11 ping -c 1 -W 1 10.0.10.12"
+	log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
+
+	run_cmd "ip netns exec vm-21 ping -c 1 -W 1 10.0.10.22"
+	log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
+}
+
+cleanup() {
+	ip link del veth-hv-1 2>/dev/null || true
+	ip link del vethhv-11 vethhv-12 vethhv-21 vethhv-22 2>/dev/null || true
+
+	for ns in hv-1 hv-2 vm-11 vm-21 vm-12 vm-22 vm-31 vm-32; do
+		ip netns del $ns 2>/dev/null || true
+	done
+}
+
+trap cleanup EXIT
+
+setup-hv-networking() {
+	hv=$1
+	local1=$2
+	mask1=$3
+	local2=$4
+	mask2=$5
+
+	ip netns add hv-$hv
+	ip link set veth-hv-$hv netns hv-$hv
+	ip -netns hv-$hv link set veth-hv-$hv name veth0
+	ip -netns hv-$hv addr add $local1/$mask1 dev veth0
+	ip -netns hv-$hv addr add $local2/$mask2 dev veth0
+	ip -netns hv-$hv link set veth0 up
+}
+
+# Setups a "VM" simulated by a netns an a veth pair
+# example: setup-vm <hvid> <vmid> <brid> <VATTRS> <mcast_for_bum>
+# VATTRS = comma separated "<vlan>-<v[46]>-<localip>-<remoteip>-<VTYPE>-<vxlandstport>"
+# VTYPE = vxlan device type. "default = traditional device, metadata = metadata device
+#         vnifilter = vnifiltering device,
+#         vnifilterg = vnifiltering device with per vni group/remote"
+# example:
+#     setup-vm 1 11 1 \
+#         10-v4-172.16.0.1-239.1.1.100-vnifilterg,20-v4-172.16.0.1-239.1.1.100-vnifilterg 1
+#
+setup-vm() {
+	hvid=$1
+	vmid=$2
+	brid=$3
+	vattrs=$4
+	mcast=$5
+	lastvxlandev=""
+
+	# create bridge
+	ip -netns hv-$hvid link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 0
+	ip -netns hv-$hvid link set br$brid up
+
+	# create vm namespace and interfaces and connect to hypervisor
+	# namespace
+	ip netns add vm-$vmid
+	hvvethif="vethhv-$vmid"
+	vmvethif="veth-$vmid"
+	ip link add $hvvethif type veth peer name $vmvethif
+	ip link set $hvvethif netns hv-$hvid
+	ip link set $vmvethif netns vm-$vmid
+	ip -netns hv-$hvid link set $hvvethif up
+	ip -netns vm-$vmid link set $vmvethif up
+	ip -netns hv-$hvid link set $hvvethif master br$brid
+
+	# configure VM vlan/vni filtering on hypervisor
+	for vmap in $(echo $vattrs | cut -d "," -f1- --output-delimiter=' ')
+	do
+	local vid=$(echo $vmap | awk -F'-' '{print ($1)}')
+	local family=$(echo $vmap | awk -F'-' '{print ($2)}')
+	local localip=$(echo $vmap | awk -F'-' '{print ($3)}')
+	local group=$(echo $vmap | awk -F'-' '{print ($4)}')
+	local vtype=$(echo $vmap | awk -F'-' '{print ($5)}')
+	local port=$(echo $vmap | awk -F'-' '{print ($6)}')
+
+	ip -netns vm-$vmid link add name $vmvethif.$vid link $vmvethif type vlan id $vid
+	ip -netns vm-$vmid addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid
+	ip -netns vm-$vmid link set $vmvethif.$vid up
+
+	tid=$vid
+	vxlandev="vxlan$brid"
+	vxlandevflags=""
+
+	if [[ -n $vtype && $vtype == "metadata" ]]; then
+	   vxlandevflags="$vxlandevflags external"
+	elif [[ -n $vtype && $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then
+	   vxlandevflags="$vxlandevflags external vnifilter"
+	   tid=$((vid+brid))
+	else
+	   vxlandevflags="$vxlandevflags id $tid"
+	   vxlandev="vxlan$tid"
+	fi
+
+	if [[ -n $vtype && $vtype != "vnifilterg" ]]; then
+	   if [[ -n "$group" && "$group" != "null" ]]; then
+	      if [ $mcast -eq 1 ]; then
+		 vxlandevflags="$vxlandevflags group $group"
+	      else
+		 vxlandevflags="$vxlandevflags remote $group"
+	      fi
+	   fi
+	fi
+
+	if [[ -n "$port" && "$port" != "default" ]]; then
+	      vxlandevflags="$vxlandevflags dstport $port"
+	fi
+
+	# create vxlan device
+	if [ "$vxlandev" != "$lastvxlandev" ]; then
+	     ip -netns hv-$hvid link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null
+	     ip -netns hv-$hvid link set $vxlandev master br$brid
+	     ip -netns hv-$hvid link set $vxlandev up
+	     lastvxlandev=$vxlandev
+	fi
+
+	# add vlan
+	bridge -netns hv-$hvid vlan add vid $vid dev $hvvethif
+	bridge -netns hv-$hvid vlan add vid $vid pvid dev $vxlandev
+
+	# Add bridge vni filter for tx
+	if [[ -n $vtype && $vtype == "metadata" || $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then
+	   bridge -netns hv-$hvid link set dev $vxlandev vlan_tunnel on
+	   bridge -netns hv-$hvid vlan add dev $vxlandev vid $vid tunnel_info id $tid
+	fi
+
+	if [[ -n $vtype && $vtype == "metadata" ]]; then
+	   bridge -netns hv-$hvid fdb add 00:00:00:00:00:00 dev $vxlandev \
+								src_vni $tid vni $tid dst $group self
+	elif [[ -n $vtype && $vtype == "vnifilter" ]]; then
+	   # Add per vni rx filter with 'bridge vni' api
+	   bridge -netns hv-$hvid vni add dev $vxlandev vni $tid
+	elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then
+	   # Add per vni group config with 'bridge vni' api
+	   if [ -n "$group" ]; then
+	      if [ "$family" == "v4" ]; then
+		 if [ $mcast -eq 1 ]; then
+		    bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group
+		 else
+		    bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group
+		 fi
+	      else
+		 if [ $mcast -eq 1 ]; then
+		    bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group6 $group
+		 else
+		    bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote6 $group
+		 fi
+	      fi
+	   fi
+	fi
+	done
+}
+
+setup_vnifilter_api()
+{
+	ip link add veth-host type veth peer name veth-testns
+	ip netns add testns
+	ip link set veth-testns netns testns
+}
+
+cleanup_vnifilter_api()
+{
+	ip link del veth-host 2>/dev/null || true
+	ip netns del testns 2>/dev/null || true
+}
+
+# tests vxlan filtering api
+vxlan_vnifilter_api()
+{
+	hv1addr1="172.16.0.1"
+	hv2addr1="172.16.0.2"
+	hv1addr2="2002:fee1::1"
+	hv2addr2="2002:fee1::2"
+	localip="172.16.0.1"
+	group="239.1.1.101"
+
+	cleanup_vnifilter_api &>/dev/null
+	setup_vnifilter_api
+
+	# Duplicate vni test
+	# create non-vnifiltering traditional vni device
+	run_cmd "ip -netns testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789"
+	log_test $? 0 "Create traditional vxlan device"
+
+	# create vni filtering device
+	run_cmd "ip -netns testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789"
+	log_test $? 1 "Cannot create vnifilter device without external flag"
+
+	run_cmd "ip -netns testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789"
+	log_test $? 0 "Creating external vxlan device with vnifilter flag"
+
+	run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 100"
+	log_test $? 0 "Cannot set in-use vni id on vnifiltering device"
+
+	run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200"
+	log_test $? 0 "Set new vni id on vnifiltering device"
+
+	run_cmd "ip -netns testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789"
+	log_test $? 0 "Create second external vxlan device with vnifilter flag"
+
+	run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 200"
+	log_test $? 255 "Cannot set in-use vni id on vnifiltering device"
+
+	run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300"
+	log_test $? 0 "Set new vni id on vnifiltering device"
+
+	# check in bridge vni show
+	run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300"
+	log_test $? 0 "Update vni id on vnifiltering device"
+
+	run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 400"
+	log_test $? 0 "Add new vni id on vnifiltering device"
+
+	# add multicast group per vni
+	run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200 group $group"
+	log_test $? 0 "Set multicast group on existing vni"
+
+	# add multicast group per vni
+	run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300 group $group"
+	log_test $? 0 "Set multicast group on existing vni"
+
+	# set vnifilter on an existing external vxlan device
+	run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external vnifilter"
+	log_test $? 2 "Cannot set vnifilter flag on a device"
+
+	# change vxlan vnifilter flag
+	run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external novnifilter"
+	log_test $? 2 "Cannot unset vnifilter flag on a device"
+}
+
+# Sanity test vnifilter datapath
+# vnifilter vnis inherit BUM group from
+# vxlan device
+vxlan_vnifilter_datapath()
+{
+	hv1addr1="172.16.0.1"
+	hv2addr1="172.16.0.2"
+	hv1addr2="2002:fee1::1"
+	hv2addr2="2002:fee1::2"
+
+	ip link add veth-hv-1 type veth peer name veth-hv-2
+	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 $hv2addr1 $hv2addr2
+	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 $hv1addr1 $hv1addr2
+
+        check_hv_connectivity hv2addr1 hv2addr2
+
+	setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0
+	setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0
+
+	setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilter,20-v4-$hv2addr1-$hv1addr1-vnifilter 0
+	setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilter,20-v6-$hv2addr2-$hv1addr2-vnifilter 0
+
+        check_vm_connectivity "vnifiltering vxlan"
+}
+
+# Sanity test vnifilter datapath
+# with vnifilter per vni configured BUM
+# group/remote
+vxlan_vnifilter_datapath_pervni()
+{
+	hv1addr1="172.16.0.1"
+	hv2addr1="172.16.0.2"
+	hv1addr2="2002:fee1::1"
+	hv2addr2="2002:fee1::2"
+
+	ip link add veth-hv-1 type veth peer name veth-hv-2
+	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+        check_hv_connectivity hv2addr1 hv2addr2
+
+	setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilterg,20-v4-$hv1addr1-$hv2addr1-vnifilterg 0
+	setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilterg,20-v6-$hv1addr2-$hv2addr2-vnifilterg 0
+
+	setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilterg,20-v4-$hv2addr1-$hv1addr1-vnifilterg 0
+	setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilterg,20-v6-$hv2addr2-$hv1addr2-vnifilterg 0
+
+        check_vm_connectivity "vnifiltering vxlan pervni remote"
+}
+
+
+vxlan_vnifilter_datapath_mgroup()
+{
+	hv1addr1="172.16.0.1"
+	hv2addr1="172.16.0.2"
+	hv1addr2="2002:fee1::1"
+	hv2addr2="2002:fee1::2"
+        group="239.1.1.100"
+        group6="ff07::1"
+
+	ip link add veth-hv-1 type veth peer name veth-hv-2
+	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+        check_hv_connectivity hv2addr1 hv2addr2
+
+	setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilter,20-v4-$hv1addr1-$group-vnifilter 1
+	setup-vm 1 21 2 "10-v6-$hv1addr2-$group6-vnifilter,20-v6-$hv1addr2-$group6-vnifilter" 1
+
+        setup-vm 2 12 1 10-v4-$hv2addr1-$group-vnifilter,20-v4-$hv2addr1-$group-vnifilter 1
+        setup-vm 2 22 2 10-v6-$hv2addr2-$group6-vnifilter,20-v6-$hv2addr2-$group6-vnifilter 1
+
+        check_vm_connectivity "vnifiltering vxlan mgroup"
+}
+
+vxlan_vnifilter_datapath_mgroup_pervni()
+{
+	hv1addr1="172.16.0.1"
+	hv2addr1="172.16.0.2"
+	hv1addr2="2002:fee1::1"
+	hv2addr2="2002:fee1::2"
+        group="239.1.1.100"
+        group6="ff07::1"
+
+	ip link add veth-hv-1 type veth peer name veth-hv-2
+	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+        check_hv_connectivity hv2addr1 hv2addr2
+
+	setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilterg,20-v4-$hv1addr1-$group-vnifilterg 1
+	setup-vm 1 21 2 10-v6-$hv1addr2-$group6-vnifilterg,20-v6-$hv1addr2-$group6-vnifilterg 1
+
+        setup-vm 2 12 1 10-v4-$hv2addr1-$group-vnifilterg,20-v4-$hv2addr1-$group-vnifilterg 1
+        setup-vm 2 22 2 10-v6-$hv2addr2-$group6-vnifilterg,20-v6-$hv2addr2-$group6-vnifilterg 1
+
+        check_vm_connectivity "vnifiltering vxlan pervni mgroup"
+}
+
+vxlan_vnifilter_metadata_and_traditional_mix()
+{
+	hv1addr1="172.16.0.1"
+	hv2addr1="172.16.0.2"
+	hv1addr2="2002:fee1::1"
+	hv2addr2="2002:fee1::2"
+
+	ip link add veth-hv-1 type veth peer name veth-hv-2
+	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+        check_hv_connectivity hv2addr1 hv2addr2
+
+	setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0
+	setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0
+	setup-vm 1 31 3 30-v4-$hv1addr1-$hv2addr1-default-4790,40-v6-$hv1addr2-$hv2addr2-default-4790,50-v4-$hv1addr1-$hv2addr1-metadata-4791 0
+
+
+	setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilter,20-v4-$hv2addr1-$hv1addr1-vnifilter 0
+	setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilter,20-v6-$hv2addr2-$hv1addr2-vnifilter 0
+	setup-vm 2 32 3 30-v4-$hv2addr1-$hv1addr1-default-4790,40-v6-$hv2addr2-$hv1addr2-default-4790,50-v4-$hv2addr1-$hv1addr1-metadata-4791 0
+
+        check_vm_connectivity "vnifiltering vxlan pervni remote mix"
+
+	# check VM connectivity over traditional/non-vxlan filtering vxlan devices
+	run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.30.32"
+        log_test $? 0 "VM connectivity over traditional vxlan (ipv4 default rdst)"
+
+	run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.40.32"
+        log_test $? 0 "VM connectivity over traditional vxlan (ipv6 default rdst)"
+
+	run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.50.32"
+        log_test $? 0 "VM connectivity over metadata nonfiltering vxlan (ipv4 default rdst)"
+}
+
+while getopts :t:pP46hv o
+do
+	case $o in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip link help vxlan 2>&1 | grep -q "vnifilter"
+if [ $? -ne 0 ]; then
+   echo "SKIP: iproute2 too old, missing vxlan dev vnifilter setting"
+   sync
+   exit $ksft_skip
+fi
+
+bridge vni help 2>&1 | grep -q "Usage: bridge vni"
+if [ $? -ne 0 ]; then
+   echo "SKIP: iproute2 bridge lacks vxlan vnifiltering support"
+   exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+	case $t in
+	none) setup; exit 0;;
+	*) $t; cleanup;;
+	esac
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
-- 
cgit 


From c315669e2fbd71bb9387066f60f0d91b0ceb28f3 Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Thu, 27 Jan 2022 12:49:54 +0530
Subject: selftests/powerpc/pmu: Add support for perf sampling tests

Add support functions for enabling perf sampling test in a new folder
"sampling_tests" under "selftests/powerpc/pmu". This includes support
functions for allocating and processing the mmap buffer. These functions
are added/defined in "sampling_tests/misc.*" files.

Also updates the corresponding Makefiles in "selftests/powerpc" and
"sampling_tests" folder.

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[mpe: Drop unneeded bits from the Makefile]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-3-kjain@linux.ibm.com
---
 tools/testing/selftests/powerpc/pmu/Makefile       |  11 ++-
 .../selftests/powerpc/pmu/sampling_tests/Makefile  |   7 ++
 .../selftests/powerpc/pmu/sampling_tests/misc.c    | 105 +++++++++++++++++++++
 .../selftests/powerpc/pmu/sampling_tests/misc.h    |   9 ++
 4 files changed, 130 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 904672fb78dd..edbd96d3b2ab 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -8,7 +8,7 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-all: $(TEST_GEN_PROGS) ebb
+all: $(TEST_GEN_PROGS) ebb sampling_tests
 
 $(TEST_GEN_PROGS): $(EXTRA_SOURCES)
 
@@ -26,25 +26,32 @@ DEFAULT_RUN_TESTS := $(RUN_TESTS)
 override define RUN_TESTS
 	$(DEFAULT_RUN_TESTS)
 	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
+	TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
 endef
 
 DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
 override define EMIT_TESTS
 	$(DEFAULT_EMIT_TESTS)
 	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
+	TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
 endef
 
 DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
 override define INSTALL_RULE
 	$(DEFAULT_INSTALL_RULE)
 	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
+	TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
 endef
 
 clean:
 	$(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o
 	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
+	TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
 
 ebb:
 	TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
 
-.PHONY: all run_tests clean ebb
+sampling_tests:
+	TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
+
+.PHONY: all run_tests clean ebb sampling_tests
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
new file mode 100644
index 000000000000..ac3d03ffc428
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -m64
+
+top_srcdir = ../../../../../..
+include ../../../lib.mk
+
+$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
new file mode 100644
index 000000000000..4779b107f43b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "misc.h"
+
+#define PAGE_SIZE               sysconf(_SC_PAGESIZE)
+
+/*
+ * Allocate mmap buffer of "mmap_pages" number of
+ * pages.
+ */
+void *event_sample_buf_mmap(int fd, int mmap_pages)
+{
+	size_t page_size = sysconf(_SC_PAGESIZE);
+	size_t mmap_size;
+	void *buff;
+
+	if (mmap_pages <= 0)
+		return NULL;
+
+	if (fd <= 0)
+		return NULL;
+
+	mmap_size =  page_size * (1 + mmap_pages);
+	buff = mmap(NULL, mmap_size,
+		PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+	if (buff == MAP_FAILED) {
+		perror("mmap() failed.");
+		return NULL;
+	}
+	return buff;
+}
+
+/*
+ * Post process the mmap buffer.
+ * - If sample_count != NULL then return count of total
+ *   number of samples present in the mmap buffer.
+ * - If sample_count == NULL then return the address
+ *   of first sample from the mmap buffer
+ */
+void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count)
+{
+	size_t page_size = sysconf(_SC_PAGESIZE);
+	struct perf_event_header *header = sample_buff + page_size;
+	struct perf_event_mmap_page *metadata_page = sample_buff;
+	unsigned long data_head, data_tail;
+
+	/*
+	 * PERF_RECORD_SAMPLE:
+	 * struct {
+	 *     struct perf_event_header hdr;
+	 *     u64 data[];
+	 * };
+	 */
+
+	data_head = metadata_page->data_head;
+	/* sync memory before reading sample */
+	mb();
+	data_tail = metadata_page->data_tail;
+
+	/* Check for sample_count */
+	if (sample_count)
+		*sample_count = 0;
+
+	while (1) {
+		/*
+		 * Reads the mmap data buffer by moving
+		 * the data_tail to know the last read data.
+		 * data_head points to head in data buffer.
+		 * refer "struct perf_event_mmap_page" in
+		 * "include/uapi/linux/perf_event.h".
+		 */
+		if (data_head - data_tail < sizeof(header))
+			return NULL;
+
+		data_tail += sizeof(header);
+		if (header->type == PERF_RECORD_SAMPLE) {
+			*size = (header->size - sizeof(header));
+			if (!sample_count)
+				return sample_buff + page_size + data_tail;
+			data_tail += *size;
+			*sample_count += 1;
+		} else {
+			*size = (header->size - sizeof(header));
+			if ((metadata_page->data_tail + *size) > metadata_page->data_head)
+				data_tail = metadata_page->data_head;
+			else
+				data_tail += *size;
+		}
+		header = (struct perf_event_header *)((void *)header + header->size);
+	}
+	return NULL;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
new file mode 100644
index 000000000000..291f9adba817
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include "../event.h"
+
+void *event_sample_buf_mmap(int fd, int mmap_pages);
+void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count);
-- 
cgit 


From 6523dce86222451e5ca2df8a46597a217084bfdf Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.ibm.com>
Date: Thu, 27 Jan 2022 12:49:55 +0530
Subject: selftests/powerpc/pmu: Add macros to parse event codes

Each platform has raw event encoding format which specifies the bit
positions for different fields. The fields from event code gets
translated into performance monitoring mode control register (MMCRx)
settings. Patch add macros to extract individual fields from the event
code.

Add functions for sanity checks, since testcases currently are only
supported in power9 and power10.

Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
[mpe: Read PVR directly rather than using /proc/cpuinfo]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-4-kjain@linux.ibm.com
---
 tools/testing/selftests/powerpc/include/reg.h      |   4 +
 .../selftests/powerpc/pmu/sampling_tests/misc.c    | 132 +++++++++++++++++++++
 .../selftests/powerpc/pmu/sampling_tests/misc.h    |  36 ++++++
 .../selftests/powerpc/security/spectre_v2.c        |   2 -
 4 files changed, 172 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index c0f2742a3a59..c422be8a42b2 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -52,6 +52,9 @@
 #define SPRN_TFHAR      0x80    /* Transaction Failure Handler Addr */
 #define SPRN_TAR        0x32f	/* Target Address Register */
 
+#define PVR_VER(pvr)	(((pvr) >>  16) & 0xFFFF)
+#define SPRN_PVR	0x11F
+
 #define SPRN_DSCR_PRIV 0x11	/* Privilege State DSCR */
 #define SPRN_DSCR      0x03	/* Data Stream Control Register */
 #define SPRN_PPR       896	/* Program Priority Register */
@@ -84,6 +87,7 @@
 #define TEXASR_ROT	0x0000000002000000
 
 /* MSR register bits */
+#define MSR_HV 		(1ul << 60)	/* Hypervisor state */
 #define MSR_TS_S_LG     33              /* Trans Mem state: Suspended */
 #define MSR_TS_T_LG	34              /* Trans Mem state: Active */
 
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
index 4779b107f43b..a86d7d125955 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright 2022, Athira Rajeev, IBM Corp.
+ * Copyright 2022, Madhavan Srinivasan, IBM Corp.
  */
 
 #include <unistd.h>
@@ -16,6 +17,137 @@
 
 #define PAGE_SIZE               sysconf(_SC_PAGESIZE)
 
+/* Storage for platform version */
+int pvr;
+u64 platform_extended_mask;
+
+/* Mask and Shift for Event code fields */
+int ev_mask_pmcxsel, ev_shift_pmcxsel;		//pmcxsel field
+int ev_mask_marked, ev_shift_marked;		//marked filed
+int ev_mask_comb, ev_shift_comb;		//combine field
+int ev_mask_unit, ev_shift_unit;		//unit field
+int ev_mask_pmc, ev_shift_pmc;			//pmc field
+int ev_mask_cache, ev_shift_cache;		//Cache sel field
+int ev_mask_sample, ev_shift_sample;		//Random sampling field
+int ev_mask_thd_sel, ev_shift_thd_sel;		//thresh_sel field
+int ev_mask_thd_start, ev_shift_thd_start;	//thresh_start field
+int ev_mask_thd_stop, ev_shift_thd_stop;	//thresh_stop field
+int ev_mask_thd_cmp, ev_shift_thd_cmp;		//thresh cmp field
+int ev_mask_sm, ev_shift_sm;			//SDAR mode field
+int ev_mask_rsq, ev_shift_rsq;			//radix scope qual field
+int ev_mask_l2l3, ev_shift_l2l3;		//l2l3 sel field
+int ev_mask_mmcr3_src, ev_shift_mmcr3_src;	//mmcr3 field
+
+static void init_ev_encodes(void)
+{
+	ev_mask_pmcxsel = 0xff;
+	ev_shift_pmcxsel = 0;
+	ev_mask_marked = 1;
+	ev_shift_marked = 8;
+	ev_mask_unit = 0xf;
+	ev_shift_unit = 12;
+	ev_mask_pmc = 0xf;
+	ev_shift_pmc = 16;
+	ev_mask_sample	= 0x1f;
+	ev_shift_sample = 24;
+	ev_mask_thd_sel = 0x7;
+	ev_shift_thd_sel = 29;
+	ev_mask_thd_start = 0xf;
+	ev_shift_thd_start = 36;
+	ev_mask_thd_stop = 0xf;
+	ev_shift_thd_stop = 32;
+
+	switch (pvr) {
+	case POWER10:
+		ev_mask_rsq = 1;
+		ev_shift_rsq = 9;
+		ev_mask_comb = 3;
+		ev_shift_comb = 10;
+		ev_mask_cache = 3;
+		ev_shift_cache = 20;
+		ev_mask_sm = 0x3;
+		ev_shift_sm = 22;
+		ev_mask_l2l3 = 0x1f;
+		ev_shift_l2l3 = 40;
+		ev_mask_mmcr3_src = 0x7fff;
+		ev_shift_mmcr3_src = 45;
+		break;
+	case POWER9:
+		ev_mask_comb = 3;
+		ev_shift_comb = 10;
+		ev_mask_cache = 0xf;
+		ev_shift_cache = 20;
+		ev_mask_thd_cmp = 0x3ff;
+		ev_shift_thd_cmp = 40;
+		ev_mask_sm = 0x3;
+		ev_shift_sm = 50;
+		break;
+	default:
+		FAIL_IF_EXIT(1);
+	}
+}
+
+/* Return the extended regs mask value */
+static u64 perf_get_platform_reg_mask(void)
+{
+	if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
+		return PERF_POWER10_MASK;
+	if (have_hwcap2(PPC_FEATURE2_ARCH_3_00))
+		return PERF_POWER9_MASK;
+
+	return -1;
+}
+
+int check_extended_regs_support(void)
+{
+	int fd;
+	struct event event;
+
+	event_init(&event, 0x1001e);
+
+	event.attr.type = 4;
+	event.attr.sample_period = 1;
+	event.attr.disabled = 1;
+	event.attr.sample_type = PERF_SAMPLE_REGS_INTR;
+	event.attr.sample_regs_intr = platform_extended_mask;
+
+	fd = event_open(&event);
+	if (fd != -1)
+		return 0;
+
+	return -1;
+}
+
+int check_pvr_for_sampling_tests(void)
+{
+	pvr = PVR_VER(mfspr(SPRN_PVR));
+
+	platform_extended_mask = perf_get_platform_reg_mask();
+
+	/*
+	 * Check for supported platforms
+	 * for sampling test
+	 */
+	if ((pvr != POWER10) && (pvr != POWER9))
+		goto out;
+
+	/*
+	 * Check PMU driver registered by looking for
+	 * PPC_FEATURE2_EBB bit in AT_HWCAP2
+	 */
+	if (!have_hwcap2(PPC_FEATURE2_EBB))
+		goto out;
+
+	/* check if platform supports extended regs */
+	if (check_extended_regs_support())
+		goto out;
+
+	init_ev_encodes();
+	return 0;
+out:
+	printf("%s: Sampling tests un-supported\n", __func__);
+	return -1;
+}
 /*
  * Allocate mmap buffer of "mmap_pages" number of
  * pages.
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
index 291f9adba817..c8f64e8e749c 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
@@ -1,9 +1,45 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2022, Athira Rajeev, IBM Corp.
+ * Copyright 2022, Madhavan Srinivasan, IBM Corp.
  */
 
 #include "../event.h"
 
+#define POWER10 0x80
+#define POWER9  0x4e
+#define PERF_POWER9_MASK        0x7f8ffffffffffff
+#define PERF_POWER10_MASK       0x7ffffffffffffff
+
+extern int ev_mask_pmcxsel, ev_shift_pmcxsel;
+extern int ev_mask_marked, ev_shift_marked;
+extern int ev_mask_comb, ev_shift_comb;
+extern int ev_mask_unit, ev_shift_unit;
+extern int ev_mask_pmc, ev_shift_pmc;
+extern int ev_mask_cache, ev_shift_cache;
+extern int ev_mask_sample, ev_shift_sample;
+extern int ev_mask_thd_sel, ev_shift_thd_sel;
+extern int ev_mask_thd_start, ev_shift_thd_start;
+extern int ev_mask_thd_stop, ev_shift_thd_stop;
+extern int ev_mask_thd_cmp, ev_shift_thd_cmp;
+extern int ev_mask_sm, ev_shift_sm;
+extern int ev_mask_rsq, ev_shift_rsq;
+extern int ev_mask_l2l3, ev_shift_l2l3;
+extern int ev_mask_mmcr3_src, ev_shift_mmcr3_src;
+extern int pvr;
+extern u64 platform_extended_mask;
+extern int check_pvr_for_sampling_tests(void);
+
+/*
+ * Event code field extraction macro.
+ * Raw event code is combination of multiple
+ * fields. Macro to extract individual fields
+ *
+ * x - Raw event code value
+ * y - Field to extract
+ */
+#define EV_CODE_EXTRACT(x, y)   \
+	((x >> ev_shift_##y) & ev_mask_##y)
+
 void *event_sample_buf_mmap(int fd, int mmap_pages);
 void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count);
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index 83647b8277e7..d42ca8c676c3 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -125,8 +125,6 @@ static enum spectre_v2_state get_sysfs_state(void)
 #define PM_BR_PRED_PCACHE	0x048a0	// P9 only
 #define PM_BR_MPRED_PCACHE	0x048b0	// P9 only
 
-#define SPRN_PVR 287
-
 int spectre_v2_test(void)
 {
 	enum spectre_v2_state state;
-- 
cgit 


From 5f6c3061af7ca3b0f9f8a20ec7a445671f7e6b5a Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Thu, 27 Jan 2022 12:49:56 +0530
Subject: selftests/powerpc/pmu: Add utility functions to post process the mmap
 buffer

Add couple of basic utility functions to post process the mmap buffer.
It includes function to read the total number of samples present in the
mmap buffer and function to get the address of the first sample.

Add function "get_intr_regs" which will return pointer to interrupt
registers present in the sample, incase sample type
PERF_SAMPLE_REGS_INTR is set.

Add functions "get_reg_value" which can be used to read any interrupt
register value from a given sample.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-5-kjain@linux.ibm.com
---
 .../selftests/powerpc/pmu/sampling_tests/misc.c    | 175 +++++++++++++++++++++
 .../selftests/powerpc/pmu/sampling_tests/misc.h    |   4 +
 2 files changed, 179 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
index a86d7d125955..fca054bbc094 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
@@ -2,6 +2,7 @@
 /*
  * Copyright 2022, Athira Rajeev, IBM Corp.
  * Copyright 2022, Madhavan Srinivasan, IBM Corp.
+ * Copyright 2022, Kajol Jain, IBM Corp.
  */
 
 #include <unistd.h>
@@ -235,3 +236,177 @@ void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count)
 	}
 	return NULL;
 }
+
+int collect_samples(void *sample_buff)
+{
+	u64 sample_count;
+	size_t size = 0;
+
+	__event_read_samples(sample_buff, &size, &sample_count);
+	return sample_count;
+}
+
+static void *perf_read_first_sample(void *sample_buff, size_t *size)
+{
+	return __event_read_samples(sample_buff, size, NULL);
+}
+
+u64 *get_intr_regs(struct event *event, void *sample_buff)
+{
+	u64 type = event->attr.sample_type;
+	u64 *intr_regs;
+	size_t size = 0;
+
+	if ((type ^ PERF_SAMPLE_REGS_INTR))
+		return NULL;
+
+	intr_regs = (u64 *)perf_read_first_sample(sample_buff, &size);
+	if (!intr_regs)
+		return NULL;
+
+	/*
+	 * First entry in the sample buffer used to specify
+	 * PERF_SAMPLE_REGS_ABI_64, skip perf regs abi to access
+	 * interrupt registers.
+	 */
+	++intr_regs;
+
+	return intr_regs;
+}
+
+static const unsigned int __perf_reg_mask(const char *register_name)
+{
+	if (!strcmp(register_name, "R0"))
+		return 0;
+	else if (!strcmp(register_name, "R1"))
+		return 1;
+	else if (!strcmp(register_name, "R2"))
+		return 2;
+	else if (!strcmp(register_name, "R3"))
+		return 3;
+	else if (!strcmp(register_name, "R4"))
+		return 4;
+	else if (!strcmp(register_name, "R5"))
+		return 5;
+	else if (!strcmp(register_name, "R6"))
+		return 6;
+	else if (!strcmp(register_name, "R7"))
+		return 7;
+	else if (!strcmp(register_name, "R8"))
+		return 8;
+	else if (!strcmp(register_name, "R9"))
+		return 9;
+	else if (!strcmp(register_name, "R10"))
+		return 10;
+	else if (!strcmp(register_name, "R11"))
+		return 11;
+	else if (!strcmp(register_name, "R12"))
+		return 12;
+	else if (!strcmp(register_name, "R13"))
+		return 13;
+	else if (!strcmp(register_name, "R14"))
+		return 14;
+	else if (!strcmp(register_name, "R15"))
+		return 15;
+	else if (!strcmp(register_name, "R16"))
+		return 16;
+	else if (!strcmp(register_name, "R17"))
+		return 17;
+	else if (!strcmp(register_name, "R18"))
+		return 18;
+	else if (!strcmp(register_name, "R19"))
+		return 19;
+	else if (!strcmp(register_name, "R20"))
+		return 20;
+	else if (!strcmp(register_name, "R21"))
+		return 21;
+	else if (!strcmp(register_name, "R22"))
+		return 22;
+	else if (!strcmp(register_name, "R23"))
+		return 23;
+	else if (!strcmp(register_name, "R24"))
+		return 24;
+	else if (!strcmp(register_name, "R25"))
+		return 25;
+	else if (!strcmp(register_name, "R26"))
+		return 26;
+	else if (!strcmp(register_name, "R27"))
+		return 27;
+	else if (!strcmp(register_name, "R28"))
+		return 28;
+	else if (!strcmp(register_name, "R29"))
+		return 29;
+	else if (!strcmp(register_name, "R30"))
+		return 30;
+	else if (!strcmp(register_name, "R31"))
+		return 31;
+	else if (!strcmp(register_name, "NIP"))
+		return 32;
+	else if (!strcmp(register_name, "MSR"))
+		return 33;
+	else if (!strcmp(register_name, "ORIG_R3"))
+		return 34;
+	else if (!strcmp(register_name, "CTR"))
+		return 35;
+	else if (!strcmp(register_name, "LINK"))
+		return 36;
+	else if (!strcmp(register_name, "XER"))
+		return 37;
+	else if (!strcmp(register_name, "CCR"))
+		return 38;
+	else if (!strcmp(register_name, "SOFTE"))
+		return 39;
+	else if (!strcmp(register_name, "TRAP"))
+		return 40;
+	else if (!strcmp(register_name, "DAR"))
+		return 41;
+	else if (!strcmp(register_name, "DSISR"))
+		return 42;
+	else if (!strcmp(register_name, "SIER"))
+		return 43;
+	else if (!strcmp(register_name, "MMCRA"))
+		return 44;
+	else if (!strcmp(register_name, "MMCR0"))
+		return 45;
+	else if (!strcmp(register_name, "MMCR1"))
+		return 46;
+	else if (!strcmp(register_name, "MMCR2"))
+		return 47;
+	else if (!strcmp(register_name, "MMCR3"))
+		return 48;
+	else if (!strcmp(register_name, "SIER2"))
+		return 49;
+	else if (!strcmp(register_name, "SIER3"))
+		return 50;
+	else if (!strcmp(register_name, "PMC1"))
+		return 51;
+	else if (!strcmp(register_name, "PMC2"))
+		return 52;
+	else if (!strcmp(register_name, "PMC3"))
+		return 53;
+	else if (!strcmp(register_name, "PMC4"))
+		return 54;
+	else if (!strcmp(register_name, "PMC5"))
+		return 55;
+	else if (!strcmp(register_name, "PMC6"))
+		return 56;
+	else if (!strcmp(register_name, "SDAR"))
+		return 57;
+	else if (!strcmp(register_name, "SIAR"))
+		return 58;
+	else
+		return -1;
+}
+
+u64 get_reg_value(u64 *intr_regs, char *register_name)
+{
+	int register_bit_position;
+
+	register_bit_position = __perf_reg_mask(register_name);
+
+	if (register_bit_position < 0 || (!((platform_extended_mask >>
+			(register_bit_position - 1)) & 1)))
+		return -1;
+
+	return *(intr_regs + register_bit_position);
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
index c8f64e8e749c..a8d67fcad9ae 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
@@ -2,6 +2,7 @@
 /*
  * Copyright 2022, Athira Rajeev, IBM Corp.
  * Copyright 2022, Madhavan Srinivasan, IBM Corp.
+ * Copyright 2022, Kajol Jain, IBM Corp.
  */
 
 #include "../event.h"
@@ -43,3 +44,6 @@ extern int check_pvr_for_sampling_tests(void);
 
 void *event_sample_buf_mmap(int fd, int mmap_pages);
 void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count);
+int collect_samples(void *sample_buff);
+u64 *get_intr_regs(struct event *event, void *sample_buff);
+u64 get_reg_value(u64 *intr_regs, char *register_name);
-- 
cgit 


From 54d4ba7f22d1ed5bfbc915771cf2e3e147cf03b2 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.ibm.com>
Date: Thu, 27 Jan 2022 12:49:57 +0530
Subject: selftests/powerpc/pmu: Add event_init_sampling function

Extended event_init_opts() to include initialization of sampling
testcases. Patch adds an event_init_sampling() wrapper to initialize
event attribute fields for sampling events. This includes initializing
sample period, sample type and event type.

Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-6-kjain@linux.ibm.com
---
 tools/testing/selftests/powerpc/pmu/event.c | 19 ++++++++++++++++++-
 tools/testing/selftests/powerpc/pmu/event.h |  1 +
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/event.c b/tools/testing/selftests/powerpc/pmu/event.c
index 48e3a413b15d..0c1c1bdba081 100644
--- a/tools/testing/selftests/powerpc/pmu/event.c
+++ b/tools/testing/selftests/powerpc/pmu/event.c
@@ -8,6 +8,7 @@
 #include <sys/syscall.h>
 #include <string.h>
 #include <stdio.h>
+#include <stdbool.h>
 #include <sys/ioctl.h>
 
 #include "event.h"
@@ -20,7 +21,8 @@ int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
 			   group_fd, flags);
 }
 
-void event_init_opts(struct event *e, u64 config, int type, char *name)
+static void  __event_init_opts(struct event *e, u64 config,
+			       int type, char *name, bool sampling)
 {
 	memset(e, 0, sizeof(*e));
 
@@ -32,6 +34,16 @@ void event_init_opts(struct event *e, u64 config, int type, char *name)
 	/* This has to match the structure layout in the header */
 	e->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | \
 				  PERF_FORMAT_TOTAL_TIME_RUNNING;
+	if (sampling) {
+		e->attr.sample_period = 1000;
+		e->attr.sample_type = PERF_SAMPLE_REGS_INTR;
+		e->attr.disabled = 1;
+	}
+}
+
+void event_init_opts(struct event *e, u64 config, int type, char *name)
+{
+	__event_init_opts(e, config, type, name, false);
 }
 
 void event_init_named(struct event *e, u64 config, char *name)
@@ -44,6 +56,11 @@ void event_init(struct event *e, u64 config)
 	event_init_opts(e, config, PERF_TYPE_RAW, "event");
 }
 
+void event_init_sampling(struct event *e, u64 config)
+{
+	__event_init_opts(e, config, PERF_TYPE_RAW, "event", true);
+}
+
 #define PERF_CURRENT_PID	0
 #define PERF_NO_PID		-1
 #define PERF_NO_CPU		-1
diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h
index 23d20340a160..51aad0b6d9ad 100644
--- a/tools/testing/selftests/powerpc/pmu/event.h
+++ b/tools/testing/selftests/powerpc/pmu/event.h
@@ -32,6 +32,7 @@ struct event {
 void event_init(struct event *e, u64 config);
 void event_init_named(struct event *e, u64 config, char *name);
 void event_init_opts(struct event *e, u64 config, int type, char *name);
+void event_init_sampling(struct event *e, u64 config);
 int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd);
 int event_open_with_group(struct event *e, int group_fd);
 int event_open_with_pid(struct event *e, pid_t pid);
-- 
cgit 


From 79c4e6aba8dfc9206acc68884498080f451121f7 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.ibm.com>
Date: Thu, 27 Jan 2022 12:49:58 +0530
Subject: selftests/powerpc/pmu: Add macros to extract mmcr fields

Along with it, Add macros and utility functions to fetch individual
fields from Monitor Mode Control Register 2(MMCR2) register.

Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-7-kjain@linux.ibm.com
---
 .../selftests/powerpc/pmu/sampling_tests/misc.h    | 52 ++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
index a8d67fcad9ae..bac447e92cf6 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
@@ -47,3 +47,55 @@ void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count);
 int collect_samples(void *sample_buff);
 u64 *get_intr_regs(struct event *event, void *sample_buff);
 u64 get_reg_value(u64 *intr_regs, char *register_name);
+
+static inline int get_mmcr2_fcs(u64 mmcr2, int pmc)
+{
+	return ((mmcr2 & (1ull << (63 - (((pmc) - 1) * 9)))) >> (63 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcp(u64 mmcr2, int pmc)
+{
+	return ((mmcr2 & (1ull << (62 - (((pmc) - 1) * 9)))) >> (62 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcpc(u64 mmcr2, int pmc)
+{
+	return ((mmcr2 & (1ull << (61 - (((pmc) - 1) * 9)))) >> (61 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcm1(u64 mmcr2, int pmc)
+{
+	return ((mmcr2 & (1ull << (60 - (((pmc) - 1) * 9)))) >> (60 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcm0(u64 mmcr2, int pmc)
+{
+	return ((mmcr2 & (1ull << (59 - (((pmc) - 1) * 9)))) >> (59 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcwait(u64 mmcr2, int pmc)
+{
+	return ((mmcr2 & (1ull << (58 - (((pmc) - 1) * 9)))) >> (58 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fch(u64 mmcr2, int pmc)
+{
+	return ((mmcr2 & (1ull << (57 - (((pmc) - 1) * 9)))) >> (57 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcti(u64 mmcr2, int pmc)
+{
+	return ((mmcr2 & (1ull << (56 - (((pmc) - 1) * 9)))) >> (56 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcta(u64 mmcr2, int pmc)
+{
+	return ((mmcr2 & (1ull << (55 - (((pmc) - 1) * 9)))) >> (55 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_l2l3(u64 mmcr2, int pmc)
+{
+	if (pvr == POWER10)
+		return ((mmcr2 & 0xf8) >> 3);
+	return 0;
+}
-- 
cgit 


From 2b49e641063e7569493371ef433b9c4ce8c8dd8b Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Thu, 27 Jan 2022 12:49:59 +0530
Subject: selftests/powerpc/pmu: Add macro to extract mmcr0/mmcr1 fields

Add macro and utility functions to fetch individual fields from Monitor
Mode Control Register 0(MMCR0) and Monitor Mode Control Register
1(MMCR1) PMU register.

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-8-kjain@linux.ibm.com
---
 .../selftests/powerpc/pmu/sampling_tests/misc.h    | 64 ++++++++++++++++++++++
 1 file changed, 64 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
index bac447e92cf6..77690ca1dfc1 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
@@ -12,6 +12,10 @@
 #define PERF_POWER9_MASK        0x7f8ffffffffffff
 #define PERF_POWER10_MASK       0x7ffffffffffffff
 
+#define MMCR0_FC56      0x00000010UL /* freeze counters 5 and 6 */
+#define MMCR0_PMCCEXT   0x00000200UL /* PMCCEXT control */
+#define MMCR1_RSQ       0x200000000000ULL /* radix scope qual field */
+
 extern int ev_mask_pmcxsel, ev_shift_pmcxsel;
 extern int ev_mask_marked, ev_shift_marked;
 extern int ev_mask_comb, ev_shift_comb;
@@ -48,6 +52,66 @@ int collect_samples(void *sample_buff);
 u64 *get_intr_regs(struct event *event, void *sample_buff);
 u64 get_reg_value(u64 *intr_regs, char *register_name);
 
+static inline int get_mmcr0_fc56(u64 mmcr0, int pmc)
+{
+	return (mmcr0 & MMCR0_FC56);
+}
+
+static inline int get_mmcr0_pmccext(u64 mmcr0, int pmc)
+{
+	return (mmcr0 & MMCR0_PMCCEXT);
+}
+
+static inline int get_mmcr0_pmao(u64 mmcr0, int pmc)
+{
+	return ((mmcr0 >> 7) & 0x1);
+}
+
+static inline int get_mmcr0_cc56run(u64 mmcr0, int pmc)
+{
+	return ((mmcr0 >> 8) & 0x1);
+}
+
+static inline int get_mmcr0_pmcjce(u64 mmcr0, int pmc)
+{
+	return ((mmcr0 >> 14) & 0x1);
+}
+
+static inline int get_mmcr0_pmc1ce(u64 mmcr0, int pmc)
+{
+	return ((mmcr0 >> 15) & 0x1);
+}
+
+static inline int get_mmcr0_pmae(u64 mmcr0, int pmc)
+{
+	return ((mmcr0 >> 27) & 0x1);
+}
+
+static inline int get_mmcr1_pmcxsel(u64 mmcr1, int pmc)
+{
+	return ((mmcr1 >> ((24 - (((pmc) - 1) * 8))) & 0xff));
+}
+
+static inline int get_mmcr1_unit(u64 mmcr1, int pmc)
+{
+	return ((mmcr1 >> ((60 - (4 * ((pmc) - 1))))) & 0xf);
+}
+
+static inline int get_mmcr1_comb(u64 mmcr1, int pmc)
+{
+	return ((mmcr1 >> (38 - ((pmc - 1) * 2))) & 0x3);
+}
+
+static inline int get_mmcr1_cache(u64 mmcr1, int pmc)
+{
+	return ((mmcr1 >> 46) & 0x3);
+}
+
+static inline int get_mmcr1_rsq(u64 mmcr1, int pmc)
+{
+	return mmcr1 & MMCR1_RSQ;
+}
+
 static inline int get_mmcr2_fcs(u64 mmcr2, int pmc)
 {
 	return ((mmcr2 & (1ull << (63 - (((pmc) - 1) * 9)))) >> (63 - (((pmc) - 1) * 9)));
-- 
cgit 


From 13307f9584ea9408d9959302074dc4e8308b6cab Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Thu, 27 Jan 2022 12:50:00 +0530
Subject: selftests/powerpc/pmu: Add macro to extract mmcr3 and mmcra fields

Add macro and utility functions to fetch individual fields from Monitor
Mode Control Register 3(MMCR3)and Monitor Mode Control Register A(MMCRA)
PMU registers

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-9-kjain@linux.ibm.com
---
 .../selftests/powerpc/pmu/sampling_tests/misc.h    | 62 ++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
index 77690ca1dfc1..7675f3177725 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
@@ -15,6 +15,7 @@
 #define MMCR0_FC56      0x00000010UL /* freeze counters 5 and 6 */
 #define MMCR0_PMCCEXT   0x00000200UL /* PMCCEXT control */
 #define MMCR1_RSQ       0x200000000000ULL /* radix scope qual field */
+#define BHRB_DISABLE    0x2000000000ULL /* MMCRA BHRB DISABLE bit */
 
 extern int ev_mask_pmcxsel, ev_shift_pmcxsel;
 extern int ev_mask_marked, ev_shift_marked;
@@ -163,3 +164,64 @@ static inline int get_mmcr2_l2l3(u64 mmcr2, int pmc)
 		return ((mmcr2 & 0xf8) >> 3);
 	return 0;
 }
+
+static inline int get_mmcr3_src(u64 mmcr3, int pmc)
+{
+	if (pvr != POWER10)
+		return 0;
+	return ((mmcr3 >> ((49 - (15 * ((pmc) - 1))))) & 0x7fff);
+}
+
+static inline int get_mmcra_thd_cmp(u64 mmcra, int pmc)
+{
+	if (pvr == POWER10)
+		return ((mmcra >> 45) & 0x7ff);
+	return ((mmcra >> 45) & 0x3ff);
+}
+
+static inline int get_mmcra_sm(u64 mmcra, int pmc)
+{
+	return ((mmcra >> 42) & 0x3);
+}
+
+static inline int get_mmcra_bhrb_disable(u64 mmcra, int pmc)
+{
+	if (pvr == POWER10)
+		return mmcra & BHRB_DISABLE;
+	return 0;
+}
+
+static inline int get_mmcra_ifm(u64 mmcra, int pmc)
+{
+	return ((mmcra >> 30) & 0x3);
+}
+
+static inline int get_mmcra_thd_sel(u64 mmcra, int pmc)
+{
+	return ((mmcra >> 16) & 0x7);
+}
+
+static inline int get_mmcra_thd_start(u64 mmcra, int pmc)
+{
+	return ((mmcra >> 12) & 0xf);
+}
+
+static inline int get_mmcra_thd_stop(u64 mmcra, int pmc)
+{
+	return ((mmcra >> 8) & 0xf);
+}
+
+static inline int get_mmcra_rand_samp_elig(u64 mmcra, int pmc)
+{
+	return ((mmcra >> 4) & 0x7);
+}
+
+static inline int get_mmcra_sample_mode(u64 mmcra, int pmc)
+{
+	return ((mmcra >> 1) & 0x3);
+}
+
+static inline int get_mmcra_marked(u64 mmcra, int pmc)
+{
+	return mmcra & 0x1;
+}
-- 
cgit 


From eb7aa044df18c6f7a88bc17fc4c9f4524652a290 Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Thu, 27 Jan 2022 12:50:01 +0530
Subject: selftests/powerpc/pmu/: Add interface test for mmcr0 exception bits

The testcase uses "instructions" event to verify two bits(PMAE and PMAO)
in Monitor Mode Control Register 0 (MMCR0). At the time of interrupt,
pmae bit ( which enables performance monitor exception ) is expected to
be cleared and pmao (which indicates performance monitor alert) bit is
expected to be set in MMCR0. And testcases handles these checks.

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-10-kjain@linux.ibm.com
---
 .../powerpc/pmu/sampling_tests/.gitignore          |  1 +
 .../selftests/powerpc/pmu/sampling_tests/Makefile  |  4 +-
 .../pmu/sampling_tests/mmcr0_exceptionbits_test.c  | 59 ++++++++++++++++++++++
 3 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
new file mode 100644
index 000000000000..067b9f3a7f84
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -0,0 +1 @@
+mmcr0_exceptionbits_test
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index ac3d03ffc428..c81db8b553f6 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -1,7 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -m64
 
+TEST_GEN_PROGS := mmcr0_exceptionbits_test
+
 top_srcdir = ../../../../../..
 include ../../../lib.mk
 
-$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h
+$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c
new file mode 100644
index 000000000000..982aa56d2171
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/*
+ * A perf sampling test for mmcr0
+ * fields : pmae, pmao.
+ */
+static int mmcr0_exceptionbits(void)
+{
+	struct event event;
+	u64 *intr_regs;
+
+	/* Check for platform support for the test */
+	SKIP_IF(check_pvr_for_sampling_tests());
+
+	/* Init the event for the sampling test */
+	event_init_sampling(&event, 0x500fa);
+	event.attr.sample_regs_intr = platform_extended_mask;
+	FAIL_IF(event_open(&event));
+	event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+	FAIL_IF(event_enable(&event));
+
+	/* workload to make the event overflow */
+	thirty_two_instruction_loop(10000);
+
+	FAIL_IF(event_disable(&event));
+
+	/* Check for sample count */
+	FAIL_IF(!collect_samples(event.mmap_buffer));
+
+	intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+	/* Check for intr_regs */
+	FAIL_IF(!intr_regs);
+
+	/* Verify that pmae is cleared and pmao is set in MMCR0 */
+	FAIL_IF(get_mmcr0_pmae(get_reg_value(intr_regs, "MMCR0"), 5));
+	FAIL_IF(!get_mmcr0_pmao(get_reg_value(intr_regs, "MMCR0"), 5));
+
+	event_close(&event);
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(mmcr0_exceptionbits, "mmcr0_exceptionbits");
+}
-- 
cgit 


From a7c0ab2e61484c0844eae2f208a06cc940338d83 Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Thu, 27 Jan 2022 12:50:02 +0530
Subject: selftests/powerpc/pmu/: Add interface test for mmcr0_cc56run field

The testcase uses event code 0x500fa ("instructions") to check the
CC56RUN bit setting in Monitor Mode Control Register 0(MMCR0). In ISA
v3.1 platform, this bit is expected to be set in MMCR0 when using
Performance Monitor Counter 5 and 6 (PMC5 and PMC6). Verify this is done
correctly by perf interface.

CC56RUN bit makes PMC5 and PMC6 count regardless of the run latch state.
This bit is set in power10 since PMC5 and PMC6 is used in power10 for
counting instructions and cycles. Hence added a check to skip this test
in other platforms

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-11-kjain@linux.ibm.com
---
 .../powerpc/pmu/sampling_tests/.gitignore          |  1 +
 .../selftests/powerpc/pmu/sampling_tests/Makefile  |  2 +-
 .../pmu/sampling_tests/mmcr0_cc56run_test.c        | 59 ++++++++++++++++++++++
 3 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
index 067b9f3a7f84..641634e3bace 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -1 +1,2 @@
 mmcr0_exceptionbits_test
+mmcr0_cc56run_test
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index c81db8b553f6..fc0b39957522 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -m64
 
-TEST_GEN_PROGS := mmcr0_exceptionbits_test
+TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c
new file mode 100644
index 000000000000..ae4172f83817
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/*
+ * A perf sampling test for mmcr0
+ * field: cc56run.
+ */
+static int mmcr0_cc56run(void)
+{
+	struct event event;
+	u64 *intr_regs;
+
+	/* Check for platform support for the test */
+	SKIP_IF(check_pvr_for_sampling_tests());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	 /* Init the event for the sampling test */
+	event_init_sampling(&event, 0x500fa);
+	event.attr.sample_regs_intr = platform_extended_mask;
+	FAIL_IF(event_open(&event));
+	event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+	FAIL_IF(event_enable(&event));
+
+	/* workload to make the event overflow */
+	thirty_two_instruction_loop(10000);
+
+	FAIL_IF(event_disable(&event));
+
+	/* Check for sample count */
+	FAIL_IF(!collect_samples(event.mmap_buffer));
+
+	intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+	/* Check for intr_regs */
+	FAIL_IF(!intr_regs);
+
+	/* Verify that cc56run bit is set in MMCR0 */
+	FAIL_IF(!get_mmcr0_cc56run(get_reg_value(intr_regs, "MMCR0"), 5));
+
+	event_close(&event);
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(mmcr0_cc56run, "mmcr0_cc56run");
+}
-- 
cgit 


From b24142b9d2401468bcd8df157013306d5b4f6626 Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Thu, 27 Jan 2022 12:50:03 +0530
Subject: selftests/powerpc/pmu/: Add interface test for mmcr0_pmccext bit

The testcase uses cycles event to check the PMCCEXT bit setting in
Monitor Mode Control Register 0 (MMCR0). Check if perf interface sets
this control bit in MMCR0 on ISA v3.1 platform.

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-12-kjain@linux.ibm.com
---
 .../powerpc/pmu/sampling_tests/.gitignore          |  1 +
 .../selftests/powerpc/pmu/sampling_tests/Makefile  |  2 +-
 .../pmu/sampling_tests/mmcr0_pmccext_test.c        | 59 ++++++++++++++++++++++
 3 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
index 641634e3bace..991ed33eda20 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -1,2 +1,3 @@
 mmcr0_exceptionbits_test
 mmcr0_cc56run_test
+mmcr0_pmccext_test
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index fc0b39957522..7feaf5d387c2 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -m64
 
-TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test
+TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c
new file mode 100644
index 000000000000..dfd186cd8eec
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/*
+ * A perf sampling test for mmcr0
+ * field: pmccext
+ */
+static int mmcr0_pmccext(void)
+{
+	struct event event;
+	u64 *intr_regs;
+
+	/* Check for platform support for the test */
+	SKIP_IF(check_pvr_for_sampling_tests());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	/* Init the event for the sampling test */
+	event_init_sampling(&event, 0x4001e);
+	event.attr.sample_regs_intr = platform_extended_mask;
+	FAIL_IF(event_open(&event));
+	event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+	FAIL_IF(event_enable(&event));
+
+	/* workload to make the event overflow */
+	thirty_two_instruction_loop(10000);
+
+	FAIL_IF(event_disable(&event));
+
+	/* Check for sample count */
+	FAIL_IF(!collect_samples(event.mmap_buffer));
+
+	intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+	/* Check for intr_regs */
+	FAIL_IF(!intr_regs);
+
+	/* Verify that pmccext field is set in MMCR0 */
+	FAIL_IF(!get_mmcr0_pmccext(get_reg_value(intr_regs, "MMCR0"), 4));
+
+	event_close(&event);
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(mmcr0_pmccext, "mmcr0_pmccext");
+}
-- 
cgit 


From 9ac7c6d5e4b570f416d849b263a6f67a617b4fa5 Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Thu, 27 Jan 2022 12:50:04 +0530
Subject: selftests/powerpc/pmu/: Add interface test for mmcr0_pmcjce field

The testcase uses event code 0x500fa ("instructions") to verify the
PMCjCE bit setting in Monitor Mode Control Register 0 (MMCR0). This bit
is expected to be set in MMCR0 when using Performance Monitor Counter
5 (PMC5). Checks if perf interface sets this bit correctly.

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-13-kjain@linux.ibm.com
---
 .../powerpc/pmu/sampling_tests/.gitignore          |  1 +
 .../selftests/powerpc/pmu/sampling_tests/Makefile  |  3 +-
 .../powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c | 58 ++++++++++++++++++++++
 3 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
index 991ed33eda20..0bc68fed074d 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -1,3 +1,4 @@
 mmcr0_exceptionbits_test
 mmcr0_cc56run_test
 mmcr0_pmccext_test
+mmcr0_pmcjce_test
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index 7feaf5d387c2..28029c0f1399 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -m64
 
-TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test
+TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \
+		   mmcr0_pmcjce_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c
new file mode 100644
index 000000000000..fdd8ed9bf725
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/*
+ * A perf sampling test for mmcr0
+ * field: pmcjce
+ */
+static int mmcr0_pmcjce(void)
+{
+	struct event event;
+	u64 *intr_regs;
+
+	/* Check for platform support for the test */
+	SKIP_IF(check_pvr_for_sampling_tests());
+
+	/* Init the event for the sampling test */
+	event_init_sampling(&event, 0x500fa);
+	event.attr.sample_regs_intr = platform_extended_mask;
+	FAIL_IF(event_open(&event));
+	event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+	FAIL_IF(event_enable(&event));
+
+	/* workload to make the event overflow */
+	thirty_two_instruction_loop(10000);
+
+	FAIL_IF(event_disable(&event));
+
+	/* Check for sample count */
+	FAIL_IF(!collect_samples(event.mmap_buffer));
+
+	intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+	/* Check for intr_regs */
+	FAIL_IF(!intr_regs);
+
+	/* Verify that pmcjce field is set in MMCR0 */
+	FAIL_IF(!get_mmcr0_pmcjce(get_reg_value(intr_regs, "MMCR0"), 5));
+
+	event_close(&event);
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(mmcr0_pmcjce, "mmcr0_pmcjce");
+}
-- 
cgit 


From d5172f2585cd0fc9788aa9b25d3dce6483321792 Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Thu, 27 Jan 2022 12:50:05 +0530
Subject: selftests/powerpc/pmu/: Add interface test for mmcr0_fc56 field using
 pmc1

The testcase uses event code 0x1001e to verify two bit settings (FC5-6
and PMC1CE) in Monitor Mode Control Register 0 (MMCR0). Check if FC5-6
bit to be set in MMCR0 when not using Performance Monitor Counter 5 and
6 (PMC5 and PMC6). And also PMC1CE is expected to be set when using
PMC1. Test if these fields are programmed correctly via perf interface.

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-14-kjain@linux.ibm.com
---
 .../powerpc/pmu/sampling_tests/.gitignore          |  1 +
 .../selftests/powerpc/pmu/sampling_tests/Makefile  |  2 +-
 .../pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c    | 59 ++++++++++++++++++++++
 3 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
index 0bc68fed074d..3229f088f542 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -2,3 +2,4 @@ mmcr0_exceptionbits_test
 mmcr0_cc56run_test
 mmcr0_pmccext_test
 mmcr0_pmcjce_test
+mmcr0_fc56_pmc1ce_test
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index 28029c0f1399..b6bc066e5047 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -2,7 +2,7 @@
 CFLAGS += -m64
 
 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \
-		   mmcr0_pmcjce_test
+		   mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c
new file mode 100644
index 000000000000..1c1813c182c0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/*
+ * A perf sampling test for mmcr0
+ * fields: fc56, pmc1ce.
+ */
+static int mmcr0_fc56_pmc1ce(void)
+{
+	struct event event;
+	u64 *intr_regs;
+
+	/* Check for platform support for the test */
+	SKIP_IF(check_pvr_for_sampling_tests());
+
+	/* Init the event for the sampling test */
+	event_init_sampling(&event, 0x1001e);
+	event.attr.sample_regs_intr = platform_extended_mask;
+	FAIL_IF(event_open(&event));
+	event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+	FAIL_IF(event_enable(&event));
+
+	/* workload to make the event overflow */
+	thirty_two_instruction_loop(10000);
+
+	FAIL_IF(event_disable(&event));
+
+	/* Check for sample count */
+	FAIL_IF(!collect_samples(event.mmap_buffer));
+
+	intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+	/* Check for intr_regs */
+	FAIL_IF(!intr_regs);
+
+	/* Verify that fc56, pmc1ce fields are set in MMCR0 */
+	FAIL_IF(!get_mmcr0_fc56(get_reg_value(intr_regs, "MMCR0"), 1));
+	FAIL_IF(!get_mmcr0_pmc1ce(get_reg_value(intr_regs, "MMCR0"), 1));
+
+	event_close(&event);
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(mmcr0_fc56_pmc1ce, "mmcr0_fc56_pmc1ce");
+}
-- 
cgit 


From 6e11374b08723b2c43ae83bd5d48000d695f13a0 Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Thu, 27 Jan 2022 12:50:06 +0530
Subject: selftests/powerpc/pmu/: Add interface test for mmcr0_pmc56 using pmc5

The testcase uses event code 0x500fa to verify the FC5-6 bit setting in
Monitor Mode Control Register 0 (MMCR0). Check if FC5-6 bit is not set
in MMCR0 when using Performance Monitor Counter 5 and 6 (PMC5 and PMC6).

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-15-kjain@linux.ibm.com
---
 .../powerpc/pmu/sampling_tests/.gitignore          |  1 +
 .../selftests/powerpc/pmu/sampling_tests/Makefile  |  2 +-
 .../pmu/sampling_tests/mmcr0_fc56_pmc56_test.c     | 58 ++++++++++++++++++++++
 3 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
index 3229f088f542..1a3b7323acd1 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -3,3 +3,4 @@ mmcr0_cc56run_test
 mmcr0_pmccext_test
 mmcr0_pmcjce_test
 mmcr0_fc56_pmc1ce_test
+mmcr0_fc56_pmc56_test
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index b6bc066e5047..790a7ff21a90 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -2,7 +2,7 @@
 CFLAGS += -m64
 
 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \
-		   mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test
+		   mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c
new file mode 100644
index 000000000000..332d24b5ab9c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/*
+ * A perf sampling test for mmcr0
+ * fields: fc56_pmc56
+ */
+static int mmcr0_fc56_pmc56(void)
+{
+	struct event event;
+	u64 *intr_regs;
+
+	 /* Check for platform support for the test */
+	SKIP_IF(check_pvr_for_sampling_tests());
+
+	/* Init the event for the sampling test */
+	event_init_sampling(&event, 0x500fa);
+	event.attr.sample_regs_intr = platform_extended_mask;
+	FAIL_IF(event_open(&event));
+	event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+	FAIL_IF(event_enable(&event));
+
+	/* workload to make the event overflow */
+	thirty_two_instruction_loop(10000);
+
+	FAIL_IF(event_disable(&event));
+
+	/* Check for sample count */
+	FAIL_IF(!collect_samples(event.mmap_buffer));
+
+	intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+	/* Check for intr_regs */
+	FAIL_IF(!intr_regs);
+
+	/* Verify that fc56 is not set in MMCR0 when using PMC5 */
+	FAIL_IF(get_mmcr0_fc56(get_reg_value(intr_regs, "MMCR0"), 5));
+
+	event_close(&event);
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(mmcr0_fc56_pmc56, "mmcr0_fc56_pmc56");
+}
-- 
cgit 


From 2becea3b6acf308642d6c0e9bd41caf7820753f5 Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Thu, 27 Jan 2022 12:50:07 +0530
Subject: selftests/powerpc/pmu/: Add interface test for mmcr1_comb field

The testcase uses event code "0x26880" to verify the settings for
different fields in Monitor Mode Control Register 1 (MMCR1). The field
include PMCxCOMB. Checks if this field are translated correctly via perf
interface to MMCR1

Add selftest for mmcr1 comb field.

Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
[mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-16-kjain@linux.ibm.com
---
 .../powerpc/pmu/sampling_tests/.gitignore          |  1 +
 .../selftests/powerpc/pmu/sampling_tests/Makefile  |  3 +-
 .../powerpc/pmu/sampling_tests/mmcr1_comb_test.c   | 66 ++++++++++++++++++++++
 3 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
index 1a3b7323acd1..f0ad66d78ee0 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -4,3 +4,4 @@ mmcr0_pmccext_test
 mmcr0_pmcjce_test
 mmcr0_fc56_pmc1ce_test
 mmcr0_fc56_pmc56_test
+mmcr1_comb_test
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index 790a7ff21a90..da87ffddc568 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -2,7 +2,8 @@
 CFLAGS += -m64
 
 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \
-		   mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test
+		   mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \
+		   mmcr1_comb_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c
new file mode 100644
index 000000000000..5aea6499ee9a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+#define EventCode 0x46880
+
+extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target);
+
+/*
+ * A perf sampling test for mmcr1
+ * fields : comb.
+ */
+static int mmcr1_comb(void)
+{
+	struct event event;
+	u64 *intr_regs;
+	u64 dummy;
+
+	/* Check for platform support for the test */
+	SKIP_IF(check_pvr_for_sampling_tests());
+
+	/* Init the event for the sampling test */
+	event_init_sampling(&event, EventCode);
+	event.attr.sample_regs_intr = platform_extended_mask;
+	FAIL_IF(event_open(&event));
+	event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+	FAIL_IF(event_enable(&event));
+
+	/* workload to make the event overflow */
+	thirty_two_instruction_loop_with_ll_sc(10000000, &dummy);
+
+	FAIL_IF(event_disable(&event));
+
+	/* Check for sample count */
+	FAIL_IF(!collect_samples(event.mmap_buffer));
+
+	intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+	/* Check for intr_regs */
+	FAIL_IF(!intr_regs);
+
+	/*
+	 * Verify that comb field match with
+	 * corresponding event code fields
+	 */
+	FAIL_IF(EV_CODE_EXTRACT(event.attr.config, comb) !=
+		get_mmcr1_comb(get_reg_value(intr_regs, "MMCR1"), 4));
+
+	event_close(&event);
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(mmcr1_comb, "mmcr1_comb");
+}
-- 
cgit 


From ac575b2606bf99a0d01a054196e24e1ad82c194d Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.ibm.com>
Date: Thu, 27 Jan 2022 12:50:09 +0530
Subject: selftests/powerpc/pmu/: Add interface test for mmcr2_l2l3 field

The testcases uses event code 0x010000046080 to verify the l2l3 bit
setting for Monitor Mode Control Register 2 (MMCR2). check if this bit
is set correctly via perf interface in ISA v3.1 platform.

Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
[mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-18-kjain@linux.ibm.com
---
 .../powerpc/pmu/sampling_tests/.gitignore          |  1 +
 .../selftests/powerpc/pmu/sampling_tests/Makefile  |  2 +-
 .../powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c   | 74 ++++++++++++++++++++++
 3 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
index f0ad66d78ee0..84bfc4d0e51c 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -5,3 +5,4 @@ mmcr0_pmcjce_test
 mmcr0_fc56_pmc1ce_test
 mmcr0_fc56_pmc56_test
 mmcr1_comb_test
+mmcr2_l2l3_test
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index da87ffddc568..ec4c758e91a9 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -3,7 +3,7 @@ CFLAGS += -m64
 
 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \
 		   mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \
-		   mmcr1_comb_test
+		   mmcr1_comb_test mmcr2_l2l3_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c
new file mode 100644
index 000000000000..ceca597016b2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Madhavan Srinivasan, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+/* All successful D-side store dispatches for this thread */
+#define EventCode 0x010000046080
+
+#define MALLOC_SIZE     (0x10000 * 10)  /* Ought to be enough .. */
+
+/*
+ * A perf sampling test for mmcr2
+ * fields : l2l3
+ */
+static int mmcr2_l2l3(void)
+{
+	struct event event;
+	u64 *intr_regs;
+	char *p;
+	int i;
+
+	/* Check for platform support for the test */
+	SKIP_IF(check_pvr_for_sampling_tests());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	/* Init the event for the sampling test */
+	event_init_sampling(&event, EventCode);
+	event.attr.sample_regs_intr = platform_extended_mask;
+	FAIL_IF(event_open(&event));
+	event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+	FAIL_IF(event_enable(&event));
+
+	/* workload to make the event overflow */
+	p = malloc(MALLOC_SIZE);
+	FAIL_IF(!p);
+
+	for (i = 0; i < MALLOC_SIZE; i += 0x10000)
+		p[i] = i;
+
+	FAIL_IF(event_disable(&event));
+
+	/* Check for sample count */
+	FAIL_IF(!collect_samples(event.mmap_buffer));
+
+	intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+	/* Check for intr_regs */
+	FAIL_IF(!intr_regs);
+
+	/*
+	 * Verify that l2l3 field of MMCR2 match with
+	 * corresponding event code field
+	 */
+	FAIL_IF(EV_CODE_EXTRACT(event.attr.config, l2l3) !=
+		get_mmcr2_l2l3(get_reg_value(intr_regs, "MMCR2"), 4));
+
+	event_close(&event);
+	free(p);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(mmcr2_l2l3, "mmcr2_l2l3");
+}
-- 
cgit 


From 9ee241f1b1447c7e8ca90902ab1888aa9e7a3c00 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.ibm.com>
Date: Thu, 27 Jan 2022 12:50:10 +0530
Subject: selftests/powerpc/pmu/: Add interface test for mmcr2_fcs_fch fields

The testcases uses cycles event to verify the freeze counter settings in
Monitor Mode Control Register 2 (MMCR2). Event modifier (exclude_kernel)
setting is used for the event attribute to check the FCxS and FCxH (
Freeze counter in privileged and hypervisor state ) settings via perf
interface.

Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
[mpe: Add error checking, check MSR for MSR_HV, drop GET_MMCR_FIELD, add to .gitignore]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-19-kjain@linux.ibm.com
---
 .../powerpc/pmu/sampling_tests/.gitignore          |  1 +
 .../selftests/powerpc/pmu/sampling_tests/Makefile  |  2 +-
 .../pmu/sampling_tests/mmcr2_fcs_fch_test.c        | 85 ++++++++++++++++++++++
 3 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
index 84bfc4d0e51c..58d7551f4c2a 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -6,3 +6,4 @@ mmcr0_fc56_pmc1ce_test
 mmcr0_fc56_pmc56_test
 mmcr1_comb_test
 mmcr2_l2l3_test
+mmcr2_fcs_fch_test
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index ec4c758e91a9..b4271509fe70 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -3,7 +3,7 @@ CFLAGS += -m64
 
 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \
 		   mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \
-		   mmcr1_comb_test mmcr2_l2l3_test
+		   mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c
new file mode 100644
index 000000000000..4e242fd61b25
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Madhavan Srinivasan, IBM Corp.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+static bool is_hv;
+
+static void sig_usr2_handler(int signum, siginfo_t *info, void *data)
+{
+	ucontext_t *uctx = data;
+
+	is_hv = !!(uctx->uc_mcontext.gp_regs[PT_MSR] & MSR_HV);
+}
+
+/*
+ * A perf sampling test for mmcr2
+ * fields : fcs, fch.
+ */
+static int mmcr2_fcs_fch(void)
+{
+	struct sigaction sigact = {
+		.sa_sigaction = sig_usr2_handler,
+		.sa_flags = SA_SIGINFO
+	};
+	struct event event;
+	u64 *intr_regs;
+
+	FAIL_IF(sigaction(SIGUSR2, &sigact, NULL));
+	FAIL_IF(kill(getpid(), SIGUSR2));
+
+	/* Check for platform support for the test */
+	SKIP_IF(check_pvr_for_sampling_tests());
+
+	/* Init the event for the sampling test */
+	event_init_sampling(&event, 0x1001e);
+	event.attr.sample_regs_intr = platform_extended_mask;
+	event.attr.exclude_kernel = 1;
+	FAIL_IF(event_open(&event));
+	event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+	FAIL_IF(event_enable(&event));
+
+	/* workload to make the event overflow */
+	thirty_two_instruction_loop(10000);
+
+	FAIL_IF(event_disable(&event));
+
+	/* Check for sample count */
+	FAIL_IF(!collect_samples(event.mmap_buffer));
+
+	intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+	/* Check for intr_regs */
+	FAIL_IF(!intr_regs);
+
+	/*
+	 * Verify that fcs and fch field of MMCR2 match
+	 * with corresponding modifier fields.
+	 */
+	if (is_hv)
+		FAIL_IF(event.attr.exclude_kernel !=
+			get_mmcr2_fch(get_reg_value(intr_regs, "MMCR2"), 1));
+	else
+		FAIL_IF(event.attr.exclude_kernel !=
+			get_mmcr2_fcs(get_reg_value(intr_regs, "MMCR2"), 1));
+
+	event_close(&event);
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(mmcr2_fcs_fch, "mmcr2_fcs_fch");
+}
-- 
cgit 


From 02f02feb6b50c67171fd56bc3fd0fd96118c5c12 Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Thu, 27 Jan 2022 12:50:11 +0530
Subject: selftests/powerpc/pmu/: Add interface test for mmcr3_src fields

The testcase uses event code 0x1340000001c040 to verify the settings for
different src fields in Monitor Mode Control Register 3 (MMCR3). Checks
if these fields are translated correctly via perf interface to MMCR3 on
ISA v3.1 platform.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
[mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-20-kjain@linux.ibm.com
---
 .../powerpc/pmu/sampling_tests/.gitignore          |  1 +
 .../selftests/powerpc/pmu/sampling_tests/Makefile  |  3 +-
 .../powerpc/pmu/sampling_tests/mmcr3_src_test.c    | 67 ++++++++++++++++++++++
 3 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
index 58d7551f4c2a..2969a9e9ba72 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -7,3 +7,4 @@ mmcr0_fc56_pmc56_test
 mmcr1_comb_test
 mmcr2_l2l3_test
 mmcr2_fcs_fch_test
+mmcr3_src_test
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index b4271509fe70..cd2704b173b3 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -3,7 +3,8 @@ CFLAGS += -m64
 
 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \
 		   mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \
-		   mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test
+		   mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \
+		   mmcr3_src_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c
new file mode 100644
index 000000000000..e154e2a4cc3a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target);
+
+/* The data cache was reloaded from local core's L3 due to a demand load */
+#define EventCode 0x1340000001c040
+
+/*
+ * A perf sampling test for mmcr3
+ * fields.
+ */
+static int mmcr3_src(void)
+{
+	struct event event;
+	u64 *intr_regs;
+	u64 dummy;
+
+	/* Check for platform support for the test */
+	SKIP_IF(check_pvr_for_sampling_tests());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	/* Init the event for the sampling test */
+	event_init_sampling(&event, EventCode);
+	event.attr.sample_regs_intr = platform_extended_mask;
+	FAIL_IF(event_open(&event));
+	event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+	FAIL_IF(event_enable(&event));
+
+	/* workload to make event overflow */
+	thirty_two_instruction_loop_with_ll_sc(1000000, &dummy);
+
+	FAIL_IF(event_disable(&event));
+
+	/* Check for sample count */
+	FAIL_IF(!collect_samples(event.mmap_buffer));
+
+	intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+	/* Check for intr_regs */
+	FAIL_IF(!intr_regs);
+
+	/*
+	 * Verify that src field of MMCR3 match with
+	 * corresponding event code field
+	 */
+	FAIL_IF(EV_CODE_EXTRACT(event.attr.config, mmcr3_src) !=
+		get_mmcr3_src(get_reg_value(intr_regs, "MMCR3"), 1));
+
+	event_close(&event);
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(mmcr3_src, "mmcr3_src");
+}
-- 
cgit 


From 29cf373c5766e6bd1b97056d2d678a41777669aa Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Thu, 27 Jan 2022 12:50:12 +0530
Subject: selftests/powerpc/pmu: Add interface test for mmcra register fields

The testcase uses event code 0x35340401e0 to verify the settings for
different fields in Monitor Mode Control Register A (MMCRA). The fields
include thresh_start, thresh_stop thresh_select, sdar mode, sample and
marked bit. Checks if these fields are translated correctly via perf
interface to MMCRA.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
[mpe: Add error checking, drop GET_MMCR_FIELD, add to .gitignore]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220127072012.662451-21-kjain@linux.ibm.com
---
 .../powerpc/pmu/sampling_tests/.gitignore          |  1 +
 .../selftests/powerpc/pmu/sampling_tests/Makefile  |  2 +-
 .../mmcra_thresh_marked_sample_test.c              | 80 ++++++++++++++++++++++
 3 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
index 2969a9e9ba72..0fce5a694684 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -8,3 +8,4 @@ mmcr1_comb_test
 mmcr2_l2l3_test
 mmcr2_fcs_fch_test
 mmcr3_src_test
+mmcra_thresh_marked_sample_test
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index cd2704b173b3..a785c6a173b9 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -4,7 +4,7 @@ CFLAGS += -m64
 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \
 		   mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \
 		   mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \
-		   mmcr3_src_test
+		   mmcr3_src_test mmcra_thresh_marked_sample_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c
new file mode 100644
index 000000000000..022cc1655eb5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+/*
+ * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
+ * Threshold event selection used is issue to complete for cycles
+ * Sampling criteria is Load only sampling
+ */
+#define EventCode 0x35340401e0
+
+extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target);
+
+/* A perf sampling test to test mmcra fields */
+static int mmcra_thresh_marked_sample(void)
+{
+	struct event event;
+	u64 *intr_regs;
+	u64 dummy;
+
+	/* Check for platform support for the test */
+	SKIP_IF(check_pvr_for_sampling_tests());
+
+	/* Init the event for the sampling test */
+	event_init_sampling(&event, EventCode);
+	event.attr.sample_regs_intr = platform_extended_mask;
+	FAIL_IF(event_open(&event));
+	event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+	FAIL_IF(event_enable(&event));
+
+	/* workload to make the event overflow */
+	thirty_two_instruction_loop_with_ll_sc(1000000, &dummy);
+
+	FAIL_IF(event_disable(&event));
+
+	/* Check for sample count */
+	FAIL_IF(!collect_samples(event.mmap_buffer));
+
+	intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+	/* Check for intr_regs */
+	FAIL_IF(!intr_regs);
+
+	/*
+	 * Verify that thresh sel/start/stop, marked, random sample
+	 * eligibility, sdar mode and sample mode fields match with
+	 * the corresponding event code fields
+	 */
+	FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_sel) !=
+			get_mmcra_thd_sel(get_reg_value(intr_regs, "MMCRA"), 4));
+	FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_start) !=
+			get_mmcra_thd_start(get_reg_value(intr_regs, "MMCRA"), 4));
+	FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_stop) !=
+			get_mmcra_thd_stop(get_reg_value(intr_regs, "MMCRA"), 4));
+	FAIL_IF(EV_CODE_EXTRACT(event.attr.config, marked) !=
+			get_mmcra_marked(get_reg_value(intr_regs, "MMCRA"), 4));
+	FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sample >> 2) !=
+			get_mmcra_rand_samp_elig(get_reg_value(intr_regs, "MMCRA"), 4));
+	FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sample & 0x3) !=
+			get_mmcra_sample_mode(get_reg_value(intr_regs, "MMCRA"), 4));
+	FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sm) !=
+			get_mmcra_sm(get_reg_value(intr_regs, "MMCRA"), 4));
+
+	event_close(&event);
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(mmcra_thresh_marked_sample, "mmcra_thresh_marked_sample");
+}
-- 
cgit 


From bd004cad78c04d762a99127af2f8208b9579af21 Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huawei.com>
Date: Tue, 1 Mar 2022 00:32:50 -0500
Subject: selftests/bpf: Update btf_dump case for conflicting names

Update btf_dump case for conflicting names caused by forward declaration.

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20220301053250.1464204-3-xukuohai@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/btf_dump.c | 54 +++++++++++++++++------
 1 file changed, 41 insertions(+), 13 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 9e26903f9170..5fce7008d1ff 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -148,22 +148,38 @@ static void test_btf_dump_incremental(void)
 
 	/* First, generate BTF corresponding to the following C code:
 	 *
-	 * enum { VAL = 1 };
+	 * enum x;
+	 *
+	 * enum x { X = 1 };
+	 *
+	 * enum { Y = 1 };
+	 *
+	 * struct s;
 	 *
 	 * struct s { int x; };
 	 *
 	 */
+	id = btf__add_enum(btf, "x", 4);
+	ASSERT_EQ(id, 1, "enum_declaration_id");
+	id = btf__add_enum(btf, "x", 4);
+	ASSERT_EQ(id, 2, "named_enum_id");
+	err = btf__add_enum_value(btf, "X", 1);
+	ASSERT_OK(err, "named_enum_val_ok");
+
 	id = btf__add_enum(btf, NULL, 4);
-	ASSERT_EQ(id, 1, "enum_id");
-	err = btf__add_enum_value(btf, "VAL", 1);
-	ASSERT_OK(err, "enum_val_ok");
+	ASSERT_EQ(id, 3, "anon_enum_id");
+	err = btf__add_enum_value(btf, "Y", 1);
+	ASSERT_OK(err, "anon_enum_val_ok");
 
 	id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
-	ASSERT_EQ(id, 2, "int_id");
+	ASSERT_EQ(id, 4, "int_id");
+
+	id = btf__add_fwd(btf, "s", BTF_FWD_STRUCT);
+	ASSERT_EQ(id, 5, "fwd_id");
 
 	id = btf__add_struct(btf, "s", 4);
-	ASSERT_EQ(id, 3, "struct_id");
-	err = btf__add_field(btf, "x", 2, 0, 0);
+	ASSERT_EQ(id, 6, "struct_id");
+	err = btf__add_field(btf, "x", 4, 0, 0);
 	ASSERT_OK(err, "field_ok");
 
 	for (i = 1; i < btf__type_cnt(btf); i++) {
@@ -173,11 +189,20 @@ static void test_btf_dump_incremental(void)
 
 	fflush(dump_buf_file);
 	dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+
 	ASSERT_STREQ(dump_buf,
+"enum x;\n"
+"\n"
+"enum x {\n"
+"	X = 1,\n"
+"};\n"
+"\n"
 "enum {\n"
-"	VAL = 1,\n"
+"	Y = 1,\n"
 "};\n"
 "\n"
+"struct s;\n"
+"\n"
 "struct s {\n"
 "	int x;\n"
 "};\n\n", "c_dump1");
@@ -199,10 +224,12 @@ static void test_btf_dump_incremental(void)
 	fseek(dump_buf_file, 0, SEEK_SET);
 
 	id = btf__add_struct(btf, "s", 4);
-	ASSERT_EQ(id, 4, "struct_id");
-	err = btf__add_field(btf, "x", 1, 0, 0);
+	ASSERT_EQ(id, 7, "struct_id");
+	err = btf__add_field(btf, "x", 2, 0, 0);
+	ASSERT_OK(err, "field_ok");
+	err = btf__add_field(btf, "y", 3, 32, 0);
 	ASSERT_OK(err, "field_ok");
-	err = btf__add_field(btf, "s", 3, 32, 0);
+	err = btf__add_field(btf, "s", 6, 64, 0);
 	ASSERT_OK(err, "field_ok");
 
 	for (i = 1; i < btf__type_cnt(btf); i++) {
@@ -214,9 +241,10 @@ static void test_btf_dump_incremental(void)
 	dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
 	ASSERT_STREQ(dump_buf,
 "struct s___2 {\n"
+"	enum x x;\n"
 "	enum {\n"
-"		VAL___2 = 1,\n"
-"	} x;\n"
+"		Y___2 = 1,\n"
+"	} y;\n"
 "	struct s s;\n"
 "};\n\n" , "c_dump1");
 
-- 
cgit 


From 85c68eb429f7fc136b9bbb3646eb38a9e0e30fa2 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 4 Feb 2022 21:42:05 +0000
Subject: KVM: selftests: Add test to verify KVM handling of ICR

The main thing that the selftest verifies is that KVM copies x2APIC's
ICR[63:32] to/from ICR2 when userspace accesses the vAPIC page via
KVM_{G,S}ET_LAPIC.  KVM previously split x2APIC ICR to ICR+ICR2 at the
time of write (from the guest), and so KVM must preserve that behavior
for backwards compatibility between different versions of KVM.

It will also test other invariants, e.g. that KVM clears the BUSY
flag on ICR writes, that the reserved bits in ICR2 are dropped on writes
from the guest, etc...

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220204214205.3306634-12-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore             |   1 +
 tools/testing/selftests/kvm/Makefile               |   1 +
 tools/testing/selftests/kvm/include/x86_64/apic.h  |   1 +
 .../selftests/kvm/x86_64/xapic_state_test.c        | 150 +++++++++++++++++++++
 4 files changed, 153 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/xapic_state_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 7903580a48ac..052ddfe4b23a 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -46,6 +46,7 @@
 /x86_64/vmx_tsc_adjust_test
 /x86_64/vmx_nested_tsc_scaling_test
 /x86_64/xapic_ipi_test
+/x86_64/xapic_state_test
 /x86_64/xen_shinfo_test
 /x86_64/xen_vmcall_test
 /x86_64/xss_msr_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index f9943b96ab3f..f7fa5655e535 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -77,6 +77,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
 TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
+TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
 TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
 TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
 TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
index 0be4757f1f20..ac88557dcc9a 100644
--- a/tools/testing/selftests/kvm/include/x86_64/apic.h
+++ b/tools/testing/selftests/kvm/include/x86_64/apic.h
@@ -33,6 +33,7 @@
 #define	APIC_SPIV	0xF0
 #define		APIC_SPIV_FOCUS_DISABLED	(1 << 9)
 #define		APIC_SPIV_APIC_ENABLED		(1 << 8)
+#define APIC_IRR	0x200
 #define	APIC_ICR	0x300
 #define		APIC_DEST_SELF		0x40000
 #define		APIC_DEST_ALLINC	0x80000
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
new file mode 100644
index 000000000000..0792334ba243
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+struct kvm_vcpu {
+	uint32_t id;
+	bool is_x2apic;
+};
+
+static void xapic_guest_code(void)
+{
+	asm volatile("cli");
+
+	xapic_enable();
+
+	while (1) {
+		uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
+			       (u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
+
+		xapic_write_reg(APIC_ICR2, val >> 32);
+		xapic_write_reg(APIC_ICR, val);
+		GUEST_SYNC(val);
+	}
+}
+
+static void x2apic_guest_code(void)
+{
+	asm volatile("cli");
+
+	x2apic_enable();
+
+	do {
+		uint64_t val = x2apic_read_reg(APIC_IRR) |
+			       x2apic_read_reg(APIC_IRR + 0x10) << 32;
+
+		x2apic_write_reg(APIC_ICR, val);
+		GUEST_SYNC(val);
+	} while (1);
+}
+
+static void ____test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t val)
+{
+	struct kvm_lapic_state xapic;
+	struct ucall uc;
+	uint64_t icr;
+
+	/*
+	 * Tell the guest what ICR value to write.  Use the IRR to pass info,
+	 * all bits are valid and should not be modified by KVM (ignoring the
+	 * fact that vectors 0-15 are technically illegal).
+	 */
+	vcpu_ioctl(vm, vcpu->id, KVM_GET_LAPIC, &xapic);
+	*((u32 *)&xapic.regs[APIC_IRR]) = val;
+	*((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32;
+	vcpu_ioctl(vm, vcpu->id, KVM_SET_LAPIC, &xapic);
+
+	vcpu_run(vm, vcpu->id);
+	ASSERT_EQ(get_ucall(vm, vcpu->id, &uc), UCALL_SYNC);
+	ASSERT_EQ(uc.args[1], val);
+
+	vcpu_ioctl(vm, vcpu->id, KVM_GET_LAPIC, &xapic);
+	icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
+	      (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
+	if (!vcpu->is_x2apic)
+		val &= (-1u | (0xffull << (32 + 24)));
+	ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+}
+
+static void __test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t val)
+{
+	____test_icr(vm, vcpu, val | APIC_ICR_BUSY);
+	____test_icr(vm, vcpu, val & ~(u64)APIC_ICR_BUSY);
+}
+
+static void test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+	uint64_t icr, i, j;
+
+	icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
+	for (i = 0; i <= 0xff; i++)
+		__test_icr(vm, vcpu, icr | i);
+
+	icr = APIC_INT_ASSERT | APIC_DM_FIXED;
+	for (i = 0; i <= 0xff; i++)
+		__test_icr(vm, vcpu, icr | i);
+
+	/*
+	 * Send all flavors of IPIs to non-existent vCPUs.  TODO: use number of
+	 * vCPUs, not vcpu.id + 1.  Arbitrarily use vector 0xff.
+	 */
+	icr = APIC_INT_ASSERT | 0xff;
+	for (i = vcpu->id + 1; i < 0xff; i++) {
+		for (j = 0; j < 8; j++)
+			__test_icr(vm, vcpu, i << (32 + 24) | APIC_INT_ASSERT | (j << 8));
+	}
+
+	/* And again with a shorthand destination for all types of IPIs. */
+	icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT;
+	for (i = 0; i < 8; i++)
+		__test_icr(vm, vcpu, icr | (i << 8));
+
+	/* And a few garbage value, just make sure it's an IRQ (blocked). */
+	__test_icr(vm, vcpu, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
+	__test_icr(vm, vcpu, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
+	__test_icr(vm, vcpu, -1ull & ~APIC_DM_FIXED_MASK);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu vcpu = {
+		.id = 0,
+		.is_x2apic = true,
+	};
+	struct kvm_cpuid2 *cpuid;
+	struct kvm_vm *vm;
+	int i;
+
+	vm = vm_create_default(vcpu.id, 0, x2apic_guest_code);
+	test_icr(vm, &vcpu);
+	kvm_vm_free(vm);
+
+	/*
+	 * Use a second VM for the xAPIC test so that x2APIC can be hidden from
+	 * the guest in order to test AVIC.  KVM disallows changing CPUID after
+	 * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
+	 */
+	vm = vm_create_default(vcpu.id, 0, xapic_guest_code);
+	vcpu.is_x2apic = false;
+
+	cpuid = vcpu_get_cpuid(vm, vcpu.id);
+	for (i = 0; i < cpuid->nent; i++) {
+		if (cpuid->entries[i].function == 1)
+			break;
+	}
+	cpuid->entries[i].ecx &= ~BIT(21);
+	vcpu_set_cpuid(vm, vcpu.id, cpuid);
+
+	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+	test_icr(vm, &vcpu);
+	kvm_vm_free(vm);
+}
-- 
cgit 


From 9132c3947b09a6c67372424ff69f867f2cee82f8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 31 Jan 2022 17:16:37 -0800
Subject: selftests/exec: Test for empty string on NULL argv

Test for the NULL argv argument producing a single empty string on exec.

Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Yang Yingliang <yangyingliang@huawei.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kselftest@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/lkml/20220201011637.2457646-1-keescook@chromium.org
---
 tools/testing/selftests/exec/Makefile    |  1 +
 tools/testing/selftests/exec/null-argv.c | 78 ++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 tools/testing/selftests/exec/null-argv.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 12c5e27d32c1..551affb437fe 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -10,6 +10,7 @@ TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
 TEST_FILES := Makefile
 
 TEST_GEN_PROGS += recursion-depth
+TEST_GEN_PROGS += null-argv
 
 EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*	\
 	       $(OUTPUT)/S_I*.test
diff --git a/tools/testing/selftests/exec/null-argv.c b/tools/testing/selftests/exec/null-argv.c
new file mode 100644
index 000000000000..c19726e710d1
--- /dev/null
+++ b/tools/testing/selftests/exec/null-argv.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test that empty argvs are swapped out for a single empty string. */
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+
+#define FORK(exec)				\
+do {						\
+	pid = fork();				\
+	if (pid == 0) {				\
+		/* Child */			\
+		exec; /* Some kind of exec */	\
+		perror("# " #exec);		\
+		return 1;			\
+	}					\
+	check_result(pid, #exec);		\
+} while (0)
+
+void check_result(pid_t pid, const char *msg)
+{
+	int wstatus;
+
+	if (pid == (pid_t)-1) {
+		perror("# fork");
+		ksft_test_result_fail("fork failed: %s\n", msg);
+		return;
+	}
+	if (waitpid(pid, &wstatus, 0) < 0) {
+		perror("# waitpid");
+		ksft_test_result_fail("waitpid failed: %s\n", msg);
+		return;
+	}
+	if (!WIFEXITED(wstatus)) {
+		ksft_test_result_fail("child did not exit: %s\n", msg);
+		return;
+	}
+	if (WEXITSTATUS(wstatus) != 0) {
+		ksft_test_result_fail("non-zero exit: %s\n", msg);
+		return;
+	}
+	ksft_test_result_pass("%s\n", msg);
+}
+
+int main(int argc, char *argv[], char *envp[])
+{
+	pid_t pid;
+	static char * const args[] = { NULL };
+	static char * const str[] = { "", NULL };
+
+	/* argc counting checks */
+	if (argc < 1) {
+		fprintf(stderr, "# FAIL: saw argc == 0 (old kernel?)\n");
+		return 1;
+	}
+	if (argc != 1) {
+		fprintf(stderr, "# FAIL: unknown argc (%d)\n", argc);
+		return 1;
+	}
+	if (argv[0][0] == '\0') {
+		/* Good, we found a NULL terminated string at argv[0]! */
+		return 0;
+	}
+
+	/* Test runner. */
+	ksft_print_header();
+	ksft_set_plan(5);
+
+	FORK(execve(argv[0], str, NULL));
+	FORK(execve(argv[0], NULL, NULL));
+	FORK(execve(argv[0], NULL, envp));
+	FORK(execve(argv[0], args, NULL));
+	FORK(execve(argv[0], args, envp));
+
+	ksft_exit(ksft_cnt.ksft_pass == ksft_plan);
+}
-- 
cgit 


From a3d38af35d61a1e2045b73b4e43fa5ffb9d71008 Mon Sep 17 00:00:00 2001
From: "David E. Box" <david.e.box@linux.intel.com>
Date: Thu, 24 Feb 2022 17:24:57 -0800
Subject: selftests: sdsi: test sysfs setup

Tests file configuration and error handling of the Intel Software
Defined Silicon sysfs ABI.

Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Link: https://lore.kernel.org/r/20220225012457.1661574-2-david.e.box@linux.intel.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 MAINTAINERS                                       |   1 +
 tools/testing/selftests/drivers/sdsi/sdsi.sh      |  25 +++
 tools/testing/selftests/drivers/sdsi/sdsi_test.py | 226 ++++++++++++++++++++++
 3 files changed, 252 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/sdsi/sdsi.sh
 create mode 100644 tools/testing/selftests/drivers/sdsi/sdsi_test.py

(limited to 'tools/testing')

diff --git a/MAINTAINERS b/MAINTAINERS
index b1281f44a5a2..a09e383d9d11 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9882,6 +9882,7 @@ M:	David E. Box <david.e.box@linux.intel.com>
 S:	Supported
 F:	drivers/platform/x86/intel/sdsi.c
 F:	tools/arch/x86/intel_sdsi/
+F:	tools/testing/selftests/drivers/sdsi/
 
 INTEL SKYLAKE INT3472 ACPI DEVICE DRIVER
 M:	Daniel Scally <djrscally@gmail.com>
diff --git a/tools/testing/selftests/drivers/sdsi/sdsi.sh b/tools/testing/selftests/drivers/sdsi/sdsi.sh
new file mode 100755
index 000000000000..9b84b9b82b49
--- /dev/null
+++ b/tools/testing/selftests/drivers/sdsi/sdsi.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the intel_sdsi driver
+
+if ! command -v python3 > /dev/null 2>&1; then
+	echo "drivers/sdsi: [SKIP] python3 not installed"
+	exit 77
+fi
+
+if ! python3 -c "import pytest" > /dev/null 2>&1; then
+	echo "drivers/sdsi: [SKIP] pytest module not installed"
+	exit 77
+fi
+
+if ! /sbin/modprobe -q -r intel_sdsi; then
+	echo "drivers/sdsi: [SKIP]"
+	exit 77
+fi
+
+if /sbin/modprobe -q intel_sdsi && python3 -m pytest sdsi_test.py; then
+	echo "drivers/sdsi: [OK]"
+else
+	echo "drivers/sdsi: [FAIL]"
+	exit 1
+fi
diff --git a/tools/testing/selftests/drivers/sdsi/sdsi_test.py b/tools/testing/selftests/drivers/sdsi/sdsi_test.py
new file mode 100644
index 000000000000..5efb29feee70
--- /dev/null
+++ b/tools/testing/selftests/drivers/sdsi/sdsi_test.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from struct import pack
+from time import sleep
+
+import errno
+import glob
+import os
+import subprocess
+
+try:
+    import pytest
+except ImportError:
+    print("Unable to import pytest python module.")
+    print("\nIf not already installed, you may do so with:")
+    print("\t\tpip3 install pytest")
+    exit(1)
+
+SOCKETS = glob.glob('/sys/bus/auxiliary/devices/intel_vsec.sdsi.*')
+NUM_SOCKETS = len(SOCKETS)
+
+MODULE_NAME = 'intel_sdsi'
+DEV_PREFIX = 'intel_vsec.sdsi'
+CLASS_DIR = '/sys/bus/auxiliary/devices'
+GUID = "0x6dd191"
+
+def read_bin_file(file):
+    with open(file, mode='rb') as f:
+        content = f.read()
+    return content
+
+def get_dev_file_path(socket, file):
+    return CLASS_DIR + '/' + DEV_PREFIX + '.' + str(socket) + '/' + file
+
+def kmemleak_enabled():
+    kmemleak = "/sys/kernel/debug/kmemleak"
+    return os.path.isfile(kmemleak)
+
+class TestSDSiDriver:
+    def test_driver_loaded(self):
+        lsmod_p = subprocess.Popen(('lsmod'), stdout=subprocess.PIPE)
+        result = subprocess.check_output(('grep', '-q', MODULE_NAME), stdin=lsmod_p.stdout)
+
+@pytest.mark.parametrize('socket', range(0, NUM_SOCKETS))
+class TestSDSiFilesClass:
+
+    def read_value(self, file):
+        f = open(file, "r")
+        value = f.read().strip("\n")
+        return value
+
+    def get_dev_folder(self, socket):
+        return CLASS_DIR + '/' + DEV_PREFIX + '.' + str(socket) + '/'
+
+    def test_sysfs_files_exist(self, socket):
+        folder = self.get_dev_folder(socket)
+        print (folder)
+        assert os.path.isfile(folder + "guid") == True
+        assert os.path.isfile(folder + "provision_akc") == True
+        assert os.path.isfile(folder + "provision_cap") == True
+        assert os.path.isfile(folder + "state_certificate") == True
+        assert os.path.isfile(folder + "registers") == True
+
+    def test_sysfs_file_permissions(self, socket):
+        folder = self.get_dev_folder(socket)
+        mode = os.stat(folder + "guid").st_mode & 0o777
+        assert mode == 0o444    # Read all
+        mode = os.stat(folder + "registers").st_mode & 0o777
+        assert mode == 0o400    # Read owner
+        mode = os.stat(folder + "provision_akc").st_mode & 0o777
+        assert mode == 0o200    # Read owner
+        mode = os.stat(folder + "provision_cap").st_mode & 0o777
+        assert mode == 0o200    # Read owner
+        mode = os.stat(folder + "state_certificate").st_mode & 0o777
+        assert mode == 0o400    # Read owner
+
+    def test_sysfs_file_ownership(self, socket):
+        folder = self.get_dev_folder(socket)
+
+        st = os.stat(folder + "guid")
+        assert st.st_uid == 0
+        assert st.st_gid == 0
+
+        st = os.stat(folder + "registers")
+        assert st.st_uid == 0
+        assert st.st_gid == 0
+
+        st = os.stat(folder + "provision_akc")
+        assert st.st_uid == 0
+        assert st.st_gid == 0
+
+        st = os.stat(folder + "provision_cap")
+        assert st.st_uid == 0
+        assert st.st_gid == 0
+
+        st = os.stat(folder + "state_certificate")
+        assert st.st_uid == 0
+        assert st.st_gid == 0
+
+    def test_sysfs_file_sizes(self, socket):
+        folder = self.get_dev_folder(socket)
+
+        if self.read_value(folder + "guid") == GUID:
+            st = os.stat(folder + "registers")
+            assert st.st_size == 72
+
+        st = os.stat(folder + "provision_akc")
+        assert st.st_size == 1024
+
+        st = os.stat(folder + "provision_cap")
+        assert st.st_size == 1024
+
+        st = os.stat(folder + "state_certificate")
+        assert st.st_size == 4096
+
+    def test_no_seek_allowed(self, socket):
+        folder = self.get_dev_folder(socket)
+        rand_file = bytes(os.urandom(8))
+
+        f = open(folder + "provision_cap", "wb", 0)
+        f.seek(1)
+        with pytest.raises(OSError) as error:
+            f.write(rand_file)
+        assert error.value.errno == errno.ESPIPE
+        f.close()
+
+        f = open(folder + "provision_akc", "wb", 0)
+        f.seek(1)
+        with pytest.raises(OSError) as error:
+            f.write(rand_file)
+        assert error.value.errno == errno.ESPIPE
+        f.close()
+
+    def test_registers_seek(self, socket):
+        folder = self.get_dev_folder(socket)
+
+        # Check that the value read from an offset of the entire
+        # file is none-zero and the same as the value read
+        # from seeking to the same location
+        f = open(folder + "registers", "rb")
+        data = f.read()
+        f.seek(64)
+        id = f.read()
+        assert id != bytes(0)
+        assert data[64:] == id
+        f.close()
+
+@pytest.mark.parametrize('socket', range(0, NUM_SOCKETS))
+class TestSDSiMailboxCmdsClass:
+    def test_provision_akc_eoverflow_1017_bytes(self, socket):
+
+        # The buffer for writes is 1k, of with 8 bytes must be
+        # reserved for the command, leaving 1016 bytes max.
+        # Check that we get an overflow error for 1017 bytes.
+        node = get_dev_file_path(socket, "provision_akc")
+        rand_file = bytes(os.urandom(1017))
+
+        f = open(node, 'wb', 0)
+        with pytest.raises(OSError) as error:
+            f.write(rand_file)
+        assert error.value.errno == errno.EOVERFLOW
+        f.close()
+
+@pytest.mark.parametrize('socket', range(0, NUM_SOCKETS))
+class TestSdsiDriverLocksClass:
+    def test_enodev_when_pci_device_removed(self, socket):
+        node = get_dev_file_path(socket, "provision_akc")
+        dev_name = DEV_PREFIX + '.' + str(socket)
+        driver_dir = CLASS_DIR + '/' + dev_name + "/driver/"
+        rand_file = bytes(os.urandom(8))
+
+        f = open(node, 'wb', 0)
+        g = open(node, 'wb', 0)
+
+        with open(driver_dir + 'unbind', 'w') as k:
+            print(dev_name, file = k)
+
+        with pytest.raises(OSError) as error:
+            f.write(rand_file)
+        assert error.value.errno == errno.ENODEV
+
+        with pytest.raises(OSError) as error:
+            g.write(rand_file)
+        assert error.value.errno == errno.ENODEV
+
+        f.close()
+        g.close()
+
+        # Short wait needed to allow file to close before pulling driver
+        sleep(1)
+
+        p = subprocess.Popen(('modprobe', '-r', 'intel_sdsi'))
+        p.wait()
+        p = subprocess.Popen(('modprobe', '-r', 'intel_vsec'))
+        p.wait()
+        p = subprocess.Popen(('modprobe', 'intel_vsec'))
+        p.wait()
+
+        # Short wait needed to allow driver time to get inserted
+        # before continuing tests
+        sleep(1)
+
+    def test_memory_leak(self, socket):
+        if not kmemleak_enabled():
+            pytest.skip("kmemleak not enabled in kernel")
+
+        dev_name = DEV_PREFIX + '.' + str(socket)
+        driver_dir = CLASS_DIR + '/' + dev_name + "/driver/"
+
+        with open(driver_dir + 'unbind', 'w') as k:
+            print(dev_name, file = k)
+
+        sleep(1)
+
+        subprocess.check_output(('modprobe', '-r', 'intel_sdsi'))
+        subprocess.check_output(('modprobe', '-r', 'intel_vsec'))
+
+        with open('/sys/kernel/debug/kmemleak', 'w') as f:
+            print('scan', file = f)
+        sleep(5)
+
+        assert os.stat('/sys/kernel/debug/kmemleak').st_size == 0
+
+        subprocess.check_output(('modprobe', 'intel_vsec'))
+        sleep(1)
-- 
cgit 


From ba95e7930957e853ffae2d4528e057abe486a005 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Wed, 2 Mar 2022 18:31:28 +0200
Subject: selftests: forwarding: hw_stats_l3: Add a new test

Add a test that verifies operation of L3 HW statistics.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/hw_stats_l3.sh        | 332 +++++++++++++++++++++
 1 file changed, 332 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/hw_stats_l3.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
new file mode 100755
index 000000000000..1c11c4256d06
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+                     +----------------------+
+# | H1                 |                     |                   H2 |
+# |                    |                     |                      |
+# |          $h1.200 + |                     | + $h2.200            |
+# |     192.0.2.1/28 | |                     | | 192.0.2.18/28      |
+# | 2001:db8:1::1/64 | |                     | | 2001:db8:2::1/64   |
+# |                  | |                     | |                    |
+# |              $h1 + |                     | + $h2                |
+# |                  | |                     | |                    |
+# +------------------|-+                     +-|--------------------+
+#                    |                         |
+# +------------------|-------------------------|--------------------+
+# | SW               |                         |                    |
+# |                  |                         |                    |
+# |             $rp1 +                         + $rp2               |
+# |                  |                         |                    |
+# |         $rp1.200 +                         + $rp2.200           |
+# |     192.0.2.2/28                             192.0.2.17/28      |
+# | 2001:db8:1::2/64                             2001:db8:2::2/64   |
+# |                                                                 |
+# +-----------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	test_stats_rx_ipv4
+	test_stats_tx_ipv4
+	test_stats_rx_ipv6
+	test_stats_tx_ipv6
+	respin_enablement
+	test_stats_rx_ipv4
+	test_stats_tx_ipv4
+	test_stats_rx_ipv6
+	test_stats_tx_ipv6
+	reapply_config
+	ping_ipv4
+	ping_ipv6
+	test_stats_rx_ipv4
+	test_stats_tx_ipv4
+	test_stats_rx_ipv6
+	test_stats_tx_ipv6
+	test_stats_report_rx
+	test_stats_report_tx
+	test_destroy_enabled
+	test_double_enable
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+	ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+	vlan_destroy $h1 200
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64
+	ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+	ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+	ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+	vlan_destroy $h2 200
+	simple_if_fini $h2
+}
+
+router_rp1_200_create()
+{
+	ip link add name $rp1.200 up \
+		link $rp1 addrgenmode eui64 type vlan id 200
+	ip address add dev $rp1.200 192.0.2.2/28
+	ip address add dev $rp1.200 2001:db8:1::2/64
+	ip stats set dev $rp1.200 l3_stats on
+}
+
+router_rp1_200_destroy()
+{
+	ip stats set dev $rp1.200 l3_stats off
+	ip address del dev $rp1.200 2001:db8:1::2/64
+	ip address del dev $rp1.200 192.0.2.2/28
+	ip link del dev $rp1.200
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	router_rp1_200_create
+
+	ip link set dev $rp2 up
+	vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	vlan_destroy $rp2 200
+	ip link set dev $rp2 down
+
+	router_rp1_200_destroy
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1mac=$(mac_get $rp1)
+	rp2mac=$(mac_get $rp2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1.200 192.0.2.18 " IPv4"
+}
+
+ping_ipv6()
+{
+	ping_test $h1.200 2001:db8:2::1 " IPv6"
+}
+
+get_l3_stat()
+{
+	local selector=$1; shift
+
+	ip -j stats show dev $rp1.200 group offload subgroup l3_stats |
+		  jq '.[0].stats64.'$selector
+}
+
+send_packets_rx_ipv4()
+{
+	# Send 21 packets instead of 20, because the first one might trap and go
+	# through the SW datapath, which might not bump the HW counter.
+	$MZ $h1.200 -c 21 -d 20msec -p 100 \
+	    -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+send_packets_rx_ipv6()
+{
+	$MZ $h1.200 -6 -c 21 -d 20msec -p 100 \
+	    -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv4()
+{
+	$MZ $h2.200 -c 21 -d 20msec -p 100 \
+	    -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv6()
+{
+	$MZ $h2.200 -6 -c 21 -d 20msec -p 100 \
+	    -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+___test_stats()
+{
+	local dir=$1; shift
+	local prot=$1; shift
+
+	local a
+	local b
+
+	a=$(get_l3_stat ${dir}.packets)
+	send_packets_${dir}_${prot}
+	"$@"
+	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+		       get_l3_stat ${dir}.packets)
+	check_err $? "Traffic not reflected in the counter: $a -> $b"
+}
+
+__test_stats()
+{
+	local dir=$1; shift
+	local prot=$1; shift
+
+	RET=0
+	___test_stats "$dir" "$prot"
+	log_test "Test $dir packets: $prot"
+}
+
+test_stats_rx_ipv4()
+{
+	__test_stats rx ipv4
+}
+
+test_stats_tx_ipv4()
+{
+	__test_stats tx ipv4
+}
+
+test_stats_rx_ipv6()
+{
+	__test_stats rx ipv6
+}
+
+test_stats_tx_ipv6()
+{
+	__test_stats tx ipv6
+}
+
+# Make sure everything works well even after stats have been disabled and
+# reenabled on the same device without touching the L3 configuration.
+respin_enablement()
+{
+	log_info "Turning stats off and on again"
+	ip stats set dev $rp1.200 l3_stats off
+	ip stats set dev $rp1.200 l3_stats on
+}
+
+# For the initial run, l3_stats is enabled on a completely set up netdevice. Now
+# do it the other way around: enabling the L3 stats on an L2 netdevice, and only
+# then apply the L3 configuration.
+reapply_config()
+{
+	log_info "Reapplying configuration"
+
+	router_rp1_200_destroy
+
+	ip link add name $rp1.200 link $rp1 addrgenmode none type vlan id 200
+	ip stats set dev $rp1.200 l3_stats on
+	ip link set dev $rp1.200 up addrgenmode eui64
+	ip address add dev $rp1.200 192.0.2.2/28
+	ip address add dev $rp1.200 2001:db8:1::2/64
+}
+
+__test_stats_report()
+{
+	local dir=$1; shift
+	local prot=$1; shift
+
+	local a
+	local b
+
+	RET=0
+
+	a=$(get_l3_stat ${dir}.packets)
+	send_packets_${dir}_${prot}
+	ip address flush dev $rp1.200
+	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+		       get_l3_stat ${dir}.packets)
+	check_err $? "Traffic not reflected in the counter: $a -> $b"
+	log_test "Test ${dir} packets: stats pushed on loss of L3"
+
+	ip stats set dev $rp1.200 l3_stats off
+	ip link del dev $rp1.200
+	router_rp1_200_create
+}
+
+test_stats_report_rx()
+{
+	__test_stats_report rx ipv4
+}
+
+test_stats_report_tx()
+{
+	__test_stats_report tx ipv4
+}
+
+test_destroy_enabled()
+{
+	RET=0
+
+	ip link del dev $rp1.200
+	router_rp1_200_create
+
+	log_test "Destroy l3_stats-enabled netdev"
+}
+
+test_double_enable()
+{
+	RET=0
+	___test_stats rx ipv4 \
+		ip stats set dev $rp1.200 l3_stats on
+	log_test "Test stat retention across a spurious enablement"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From c803475fd8ddc9996abd446fdccf5479b3c7b609 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 2 Mar 2022 11:56:41 -0800
Subject: bpf: selftests: test skb->tstamp in redirect_neigh

This patch adds tests on forwarding the delivery_time for
the following cases
- tcp/udp + ip4/ip6 + bpf_redirect_neigh
- tcp/udp + ip4/ip6 + ip[6]_forward
- bpf_skb_set_delivery_time
- The old rcv timestamp expectation on tc-bpf@ingress

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/bpf/prog_tests/tc_redirect.c | 434 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_tc_dtime.c  | 349 +++++++++++++++++
 2 files changed, 783 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/test_tc_dtime.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 647b0a833628..2b255e28ed26 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -17,6 +17,8 @@
 #include <linux/if_tun.h>
 #include <linux/limits.h>
 #include <linux/sysctl.h>
+#include <linux/time_types.h>
+#include <linux/net_tstamp.h>
 #include <sched.h>
 #include <stdbool.h>
 #include <stdio.h>
@@ -29,6 +31,11 @@
 #include "test_tc_neigh_fib.skel.h"
 #include "test_tc_neigh.skel.h"
 #include "test_tc_peer.skel.h"
+#include "test_tc_dtime.skel.h"
+
+#ifndef TCP_TX_DELAY
+#define TCP_TX_DELAY 37
+#endif
 
 #define NS_SRC "ns_src"
 #define NS_FWD "ns_fwd"
@@ -61,6 +68,7 @@
 #define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
 
 #define TIMEOUT_MILLIS 10000
+#define NSEC_PER_SEC 1000000000ULL
 
 #define log_err(MSG, ...) \
 	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
@@ -440,6 +448,431 @@ static int set_forwarding(bool enable)
 	return 0;
 }
 
+static void rcv_tstamp(int fd, const char *expected, size_t s)
+{
+	struct __kernel_timespec pkt_ts = {};
+	char ctl[CMSG_SPACE(sizeof(pkt_ts))];
+	struct timespec now_ts;
+	struct msghdr msg = {};
+	__u64 now_ns, pkt_ns;
+	struct cmsghdr *cmsg;
+	struct iovec iov;
+	char data[32];
+	int ret;
+
+	iov.iov_base = data;
+	iov.iov_len = sizeof(data);
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = &ctl;
+	msg.msg_controllen = sizeof(ctl);
+
+	ret = recvmsg(fd, &msg, 0);
+	if (!ASSERT_EQ(ret, s, "recvmsg"))
+		return;
+	ASSERT_STRNEQ(data, expected, s, "expected rcv data");
+
+	cmsg = CMSG_FIRSTHDR(&msg);
+	if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
+	    cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
+		memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
+
+	pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
+	ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
+
+	ret = clock_gettime(CLOCK_REALTIME, &now_ts);
+	ASSERT_OK(ret, "clock_gettime");
+	now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
+
+	if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
+		ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
+			  "check rcv tstamp");
+}
+
+static void snd_tstamp(int fd, char *b, size_t s)
+{
+	struct sock_txtime opt = { .clockid = CLOCK_TAI };
+	char ctl[CMSG_SPACE(sizeof(__u64))];
+	struct timespec now_ts;
+	struct msghdr msg = {};
+	struct cmsghdr *cmsg;
+	struct iovec iov;
+	__u64 now_ns;
+	int ret;
+
+	ret = clock_gettime(CLOCK_TAI, &now_ts);
+	ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
+	now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
+
+	iov.iov_base = b;
+	iov.iov_len = s;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = &ctl;
+	msg.msg_controllen = sizeof(ctl);
+
+	cmsg = CMSG_FIRSTHDR(&msg);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_TXTIME;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
+	*(__u64 *)CMSG_DATA(cmsg) = now_ns;
+
+	ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
+	ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
+
+	ret = sendmsg(fd, &msg, 0);
+	ASSERT_EQ(ret, s, "sendmsg");
+}
+
+static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
+{
+	int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
+	char buf[] = "testing testing";
+	struct nstoken *nstoken;
+
+	nstoken = open_netns(NS_DST);
+	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
+		return;
+	listen_fd = start_server(family, type, addr, port, 0);
+	close_netns(nstoken);
+
+	if (!ASSERT_GE(listen_fd, 0, "listen"))
+		return;
+
+	/* Ensure the kernel puts the (rcv) timestamp for all skb */
+	err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
+			 &opt, sizeof(opt));
+	if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
+		goto done;
+
+	if (type == SOCK_STREAM) {
+		/* Ensure the kernel set EDT when sending out rst/ack
+		 * from the kernel's ctl_sk.
+		 */
+		err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
+				 sizeof(opt));
+		if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
+			goto done;
+	}
+
+	nstoken = open_netns(NS_SRC);
+	if (!ASSERT_OK_PTR(nstoken, "setns src"))
+		goto done;
+	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
+	close_netns(nstoken);
+
+	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+		goto done;
+
+	if (type == SOCK_STREAM) {
+		int n;
+
+		accept_fd = accept(listen_fd, NULL, NULL);
+		if (!ASSERT_GE(accept_fd, 0, "accept"))
+			goto done;
+
+		n = write(client_fd, buf, sizeof(buf));
+		if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+			goto done;
+		rcv_tstamp(accept_fd, buf, sizeof(buf));
+	} else {
+		snd_tstamp(client_fd, buf, sizeof(buf));
+		rcv_tstamp(listen_fd, buf, sizeof(buf));
+	}
+
+done:
+	close(listen_fd);
+	if (accept_fd != -1)
+		close(accept_fd);
+	if (client_fd != -1)
+		close(client_fd);
+}
+
+static int netns_load_dtime_bpf(struct test_tc_dtime *skel)
+{
+	struct nstoken *nstoken;
+
+#define PIN_FNAME(__file) "/sys/fs/bpf/" #__file
+#define PIN(__prog) ({							\
+		int err = bpf_program__pin(skel->progs.__prog, PIN_FNAME(__prog)); \
+		if (!ASSERT_OK(err, "pin " #__prog))		\
+			goto fail;					\
+		})
+
+	/* setup ns_src tc progs */
+	nstoken = open_netns(NS_SRC);
+	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
+		return -1;
+	PIN(egress_host);
+	PIN(ingress_host);
+	SYS("tc qdisc add dev veth_src clsact");
+	SYS("tc filter add dev veth_src ingress bpf da object-pinned "
+	    PIN_FNAME(ingress_host));
+	SYS("tc filter add dev veth_src egress bpf da object-pinned "
+	    PIN_FNAME(egress_host));
+	close_netns(nstoken);
+
+	/* setup ns_dst tc progs */
+	nstoken = open_netns(NS_DST);
+	if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
+		return -1;
+	PIN(egress_host);
+	PIN(ingress_host);
+	SYS("tc qdisc add dev veth_dst clsact");
+	SYS("tc filter add dev veth_dst ingress bpf da object-pinned "
+	    PIN_FNAME(ingress_host));
+	SYS("tc filter add dev veth_dst egress bpf da object-pinned "
+	    PIN_FNAME(egress_host));
+	close_netns(nstoken);
+
+	/* setup ns_fwd tc progs */
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+		return -1;
+	PIN(ingress_fwdns_prio100);
+	PIN(egress_fwdns_prio100);
+	PIN(ingress_fwdns_prio101);
+	PIN(egress_fwdns_prio101);
+	SYS("tc qdisc add dev veth_dst_fwd clsact");
+	SYS("tc filter add dev veth_dst_fwd ingress prio 100 bpf da object-pinned "
+	    PIN_FNAME(ingress_fwdns_prio100));
+	SYS("tc filter add dev veth_dst_fwd ingress prio 101 bpf da object-pinned "
+	    PIN_FNAME(ingress_fwdns_prio101));
+	SYS("tc filter add dev veth_dst_fwd egress prio 100 bpf da object-pinned "
+	    PIN_FNAME(egress_fwdns_prio100));
+	SYS("tc filter add dev veth_dst_fwd egress prio 101 bpf da object-pinned "
+	    PIN_FNAME(egress_fwdns_prio101));
+	SYS("tc qdisc add dev veth_src_fwd clsact");
+	SYS("tc filter add dev veth_src_fwd ingress prio 100 bpf da object-pinned "
+	    PIN_FNAME(ingress_fwdns_prio100));
+	SYS("tc filter add dev veth_src_fwd ingress prio 101 bpf da object-pinned "
+	    PIN_FNAME(ingress_fwdns_prio101));
+	SYS("tc filter add dev veth_src_fwd egress prio 100 bpf da object-pinned "
+	    PIN_FNAME(egress_fwdns_prio100));
+	SYS("tc filter add dev veth_src_fwd egress prio 101 bpf da object-pinned "
+	    PIN_FNAME(egress_fwdns_prio101));
+	close_netns(nstoken);
+
+#undef PIN
+
+	return 0;
+
+fail:
+	close_netns(nstoken);
+	return -1;
+}
+
+enum {
+	INGRESS_FWDNS_P100,
+	INGRESS_FWDNS_P101,
+	EGRESS_FWDNS_P100,
+	EGRESS_FWDNS_P101,
+	INGRESS_ENDHOST,
+	EGRESS_ENDHOST,
+	SET_DTIME,
+	__MAX_CNT,
+};
+
+const char *cnt_names[] = {
+	"ingress_fwdns_p100",
+	"ingress_fwdns_p101",
+	"egress_fwdns_p100",
+	"egress_fwdns_p101",
+	"ingress_endhost",
+	"egress_endhost",
+	"set_dtime",
+};
+
+enum {
+	TCP_IP6_CLEAR_DTIME,
+	TCP_IP4,
+	TCP_IP6,
+	UDP_IP4,
+	UDP_IP6,
+	TCP_IP4_RT_FWD,
+	TCP_IP6_RT_FWD,
+	UDP_IP4_RT_FWD,
+	UDP_IP6_RT_FWD,
+	UKN_TEST,
+	__NR_TESTS,
+};
+
+const char *test_names[] = {
+	"tcp ip6 clear dtime",
+	"tcp ip4",
+	"tcp ip6",
+	"udp ip4",
+	"udp ip6",
+	"tcp ip4 rt fwd",
+	"tcp ip6 rt fwd",
+	"udp ip4 rt fwd",
+	"udp ip6 rt fwd",
+};
+
+static const char *dtime_cnt_str(int test, int cnt)
+{
+	static char name[64];
+
+	snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
+
+	return name;
+}
+
+static const char *dtime_err_str(int test, int cnt)
+{
+	static char name[64];
+
+	snprintf(name, sizeof(name), "%s %s errs", test_names[test],
+		 cnt_names[cnt]);
+
+	return name;
+}
+
+static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
+{
+	int i, t = TCP_IP6_CLEAR_DTIME;
+	__u32 *dtimes = skel->bss->dtimes[t];
+	__u32 *errs = skel->bss->errs[t];
+
+	skel->bss->test = t;
+	test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 0);
+
+	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
+		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
+	ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
+		  dtime_cnt_str(t, INGRESS_FWDNS_P101));
+	ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
+		  dtime_cnt_str(t, EGRESS_FWDNS_P100));
+	ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
+		  dtime_cnt_str(t, EGRESS_FWDNS_P101));
+	ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
+		  dtime_cnt_str(t, EGRESS_ENDHOST));
+	ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
+		  dtime_cnt_str(t, INGRESS_ENDHOST));
+
+	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
+		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
+}
+
+static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
+{
+	__u32 *dtimes, *errs;
+	const char *addr;
+	int i, t;
+
+	if (family == AF_INET) {
+		t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
+		addr = IP4_DST;
+	} else {
+		t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
+		addr = IP6_DST;
+	}
+
+	dtimes = skel->bss->dtimes[t];
+	errs = skel->bss->errs[t];
+
+	skel->bss->test = t;
+	test_inet_dtime(family, SOCK_STREAM, addr, 0);
+
+	/* fwdns_prio100 prog does not read delivery_time_type, so
+	 * kernel puts the (rcv) timetamp in __sk_buff->tstamp
+	 */
+	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
+		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
+	for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
+		ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
+
+	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
+		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
+}
+
+static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
+{
+	__u32 *dtimes, *errs;
+	const char *addr;
+	int i, t;
+
+	if (family == AF_INET) {
+		t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
+		addr = IP4_DST;
+	} else {
+		t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
+		addr = IP6_DST;
+	}
+
+	dtimes = skel->bss->dtimes[t];
+	errs = skel->bss->errs[t];
+
+	skel->bss->test = t;
+	test_inet_dtime(family, SOCK_DGRAM, addr, 0);
+
+	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
+		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
+	/* non mono delivery time is not forwarded */
+	ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
+		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
+	for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
+		ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
+
+	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
+		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
+}
+
+static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
+{
+	struct test_tc_dtime *skel;
+	struct nstoken *nstoken;
+	int err;
+
+	skel = test_tc_dtime__open();
+	if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
+		return;
+
+	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+	err = test_tc_dtime__load(skel);
+	if (!ASSERT_OK(err, "test_tc_dtime__load"))
+		goto done;
+
+	if (netns_load_dtime_bpf(skel))
+		goto done;
+
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+		goto done;
+	err = set_forwarding(false);
+	close_netns(nstoken);
+	if (!ASSERT_OK(err, "disable forwarding"))
+		goto done;
+
+	test_tcp_clear_dtime(skel);
+
+	test_tcp_dtime(skel, AF_INET, true);
+	test_tcp_dtime(skel, AF_INET6, true);
+	test_udp_dtime(skel, AF_INET, true);
+	test_udp_dtime(skel, AF_INET6, true);
+
+	/* Test the kernel ip[6]_forward path instead
+	 * of bpf_redirect_neigh().
+	 */
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+		goto done;
+	err = set_forwarding(true);
+	close_netns(nstoken);
+	if (!ASSERT_OK(err, "enable forwarding"))
+		goto done;
+
+	test_tcp_dtime(skel, AF_INET, false);
+	test_tcp_dtime(skel, AF_INET6, false);
+	test_udp_dtime(skel, AF_INET, false);
+	test_udp_dtime(skel, AF_INET6, false);
+
+done:
+	test_tc_dtime__destroy(skel);
+}
+
 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
 {
 	struct nstoken *nstoken = NULL;
@@ -787,6 +1220,7 @@ static void *test_tc_redirect_run_tests(void *arg)
 	RUN_TEST(tc_redirect_peer_l3);
 	RUN_TEST(tc_redirect_neigh);
 	RUN_TEST(tc_redirect_neigh_fib);
+	RUN_TEST(tc_redirect_dtime);
 	return NULL;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
new file mode 100644
index 000000000000..9d9e8e17b8a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2022 Meta
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <sys/socket.h>
+
+/* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst
+ *           |                                 |
+ *  ns_src   |              ns_fwd             |   ns_dst
+ *
+ * ns_src and ns_dst: ENDHOST namespace
+ *            ns_fwd: Fowarding namespace
+ */
+
+#define ctx_ptr(field)		(void *)(long)(field)
+
+#define ip4_src			__bpf_htonl(0xac100164) /* 172.16.1.100 */
+#define ip4_dst			__bpf_htonl(0xac100264) /* 172.16.2.100 */
+
+#define ip6_src			{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+				  0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+#define ip6_dst			{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+				  0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+
+#define v6_equal(a, b)		(a.s6_addr32[0] == b.s6_addr32[0] && \
+				 a.s6_addr32[1] == b.s6_addr32[1] && \
+				 a.s6_addr32[2] == b.s6_addr32[2] && \
+				 a.s6_addr32[3] == b.s6_addr32[3])
+
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
+
+#define EGRESS_ENDHOST_MAGIC	0x0b9fbeef
+#define INGRESS_FWDNS_MAGIC	0x1b9fbeef
+#define EGRESS_FWDNS_MAGIC	0x2b9fbeef
+
+enum {
+	INGRESS_FWDNS_P100,
+	INGRESS_FWDNS_P101,
+	EGRESS_FWDNS_P100,
+	EGRESS_FWDNS_P101,
+	INGRESS_ENDHOST,
+	EGRESS_ENDHOST,
+	SET_DTIME,
+	__MAX_CNT,
+};
+
+enum {
+	TCP_IP6_CLEAR_DTIME,
+	TCP_IP4,
+	TCP_IP6,
+	UDP_IP4,
+	UDP_IP6,
+	TCP_IP4_RT_FWD,
+	TCP_IP6_RT_FWD,
+	UDP_IP4_RT_FWD,
+	UDP_IP6_RT_FWD,
+	UKN_TEST,
+	__NR_TESTS,
+};
+
+enum {
+	SRC_NS = 1,
+	DST_NS,
+};
+
+__u32 dtimes[__NR_TESTS][__MAX_CNT] = {};
+__u32 errs[__NR_TESTS][__MAX_CNT] = {};
+__u32 test = 0;
+
+static void inc_dtimes(__u32 idx)
+{
+	if (test < __NR_TESTS)
+		dtimes[test][idx]++;
+	else
+		dtimes[UKN_TEST][idx]++;
+}
+
+static void inc_errs(__u32 idx)
+{
+	if (test < __NR_TESTS)
+		errs[test][idx]++;
+	else
+		errs[UKN_TEST][idx]++;
+}
+
+static int skb_proto(int type)
+{
+	return type & 0xff;
+}
+
+static int skb_ns(int type)
+{
+	return (type >> 8) & 0xff;
+}
+
+static bool fwdns_clear_dtime(void)
+{
+	return test == TCP_IP6_CLEAR_DTIME;
+}
+
+static bool bpf_fwd(void)
+{
+	return test < TCP_IP4_RT_FWD;
+}
+
+/* -1: parse error: TC_ACT_SHOT
+ *  0: not testing traffic: TC_ACT_OK
+ * >0: first byte is the inet_proto, second byte has the netns
+ *     of the sender
+ */
+static int skb_get_type(struct __sk_buff *skb)
+{
+	void *data_end = ctx_ptr(skb->data_end);
+	void *data = ctx_ptr(skb->data);
+	__u8 inet_proto = 0, ns = 0;
+	struct ipv6hdr *ip6h;
+	struct iphdr *iph;
+
+	switch (skb->protocol) {
+	case __bpf_htons(ETH_P_IP):
+		iph = data + sizeof(struct ethhdr);
+		if (iph + 1 > data_end)
+			return -1;
+		if (iph->saddr == ip4_src)
+			ns = SRC_NS;
+		else if (iph->saddr == ip4_dst)
+			ns = DST_NS;
+		inet_proto = iph->protocol;
+		break;
+	case __bpf_htons(ETH_P_IPV6):
+		ip6h = data + sizeof(struct ethhdr);
+		if (ip6h + 1 > data_end)
+			return -1;
+		if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src))
+			ns = SRC_NS;
+		else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst))
+			ns = DST_NS;
+		inet_proto = ip6h->nexthdr;
+		break;
+	default:
+		return 0;
+	}
+
+	if ((inet_proto != IPPROTO_TCP && inet_proto != IPPROTO_UDP) || !ns)
+		return 0;
+
+	return (ns << 8 | inet_proto);
+}
+
+/* format: direction@iface@netns
+ * egress@veth_(src|dst)@ns_(src|dst)
+ */
+SEC("tc")
+int egress_host(struct __sk_buff *skb)
+{
+	int skb_type;
+
+	skb_type = skb_get_type(skb);
+	if (skb_type == -1)
+		return TC_ACT_SHOT;
+	if (!skb_type)
+		return TC_ACT_OK;
+
+	if (skb_proto(skb_type) == IPPROTO_TCP) {
+		if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO &&
+		    skb->tstamp)
+			inc_dtimes(EGRESS_ENDHOST);
+		else
+			inc_errs(EGRESS_ENDHOST);
+	} else {
+		if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_UNSPEC &&
+		    skb->tstamp)
+			inc_dtimes(EGRESS_ENDHOST);
+		else
+			inc_errs(EGRESS_ENDHOST);
+	}
+
+	skb->tstamp = EGRESS_ENDHOST_MAGIC;
+
+	return TC_ACT_OK;
+}
+
+/* ingress@veth_(src|dst)@ns_(src|dst) */
+SEC("tc")
+int ingress_host(struct __sk_buff *skb)
+{
+	int skb_type;
+
+	skb_type = skb_get_type(skb);
+	if (skb_type == -1)
+		return TC_ACT_SHOT;
+	if (!skb_type)
+		return TC_ACT_OK;
+
+	if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO &&
+	    skb->tstamp == EGRESS_FWDNS_MAGIC)
+		inc_dtimes(INGRESS_ENDHOST);
+	else
+		inc_errs(INGRESS_ENDHOST);
+
+	return TC_ACT_OK;
+}
+
+/* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */
+SEC("tc")
+int ingress_fwdns_prio100(struct __sk_buff *skb)
+{
+	int skb_type;
+
+	skb_type = skb_get_type(skb);
+	if (skb_type == -1)
+		return TC_ACT_SHOT;
+	if (!skb_type)
+		return TC_ACT_OK;
+
+	/* delivery_time is only available to the ingress
+	 * if the tc-bpf checks the skb->delivery_time_type.
+	 */
+	if (skb->tstamp == EGRESS_ENDHOST_MAGIC)
+		inc_errs(INGRESS_FWDNS_P100);
+
+	if (fwdns_clear_dtime())
+		skb->tstamp = 0;
+
+	return TC_ACT_UNSPEC;
+}
+
+/* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */
+SEC("tc")
+int egress_fwdns_prio100(struct __sk_buff *skb)
+{
+	int skb_type;
+
+	skb_type = skb_get_type(skb);
+	if (skb_type == -1)
+		return TC_ACT_SHOT;
+	if (!skb_type)
+		return TC_ACT_OK;
+
+	/* delivery_time is always available to egress even
+	 * the tc-bpf did not use the delivery_time_type.
+	 */
+	if (skb->tstamp == INGRESS_FWDNS_MAGIC)
+		inc_dtimes(EGRESS_FWDNS_P100);
+	else
+		inc_errs(EGRESS_FWDNS_P100);
+
+	if (fwdns_clear_dtime())
+		skb->tstamp = 0;
+
+	return TC_ACT_UNSPEC;
+}
+
+/* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */
+SEC("tc")
+int ingress_fwdns_prio101(struct __sk_buff *skb)
+{
+	__u64 expected_dtime = EGRESS_ENDHOST_MAGIC;
+	int skb_type;
+
+	skb_type = skb_get_type(skb);
+	if (skb_type == -1 || !skb_type)
+		/* Should have handled in prio100 */
+		return TC_ACT_SHOT;
+
+	if (skb_proto(skb_type) == IPPROTO_UDP)
+		expected_dtime = 0;
+
+	if (skb->delivery_time_type) {
+		if (fwdns_clear_dtime() ||
+		    skb->delivery_time_type != BPF_SKB_DELIVERY_TIME_MONO ||
+		    skb->tstamp != expected_dtime)
+			inc_errs(INGRESS_FWDNS_P101);
+		else
+			inc_dtimes(INGRESS_FWDNS_P101);
+	} else {
+		if (!fwdns_clear_dtime() && expected_dtime)
+			inc_errs(INGRESS_FWDNS_P101);
+	}
+
+	if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO) {
+		skb->tstamp = INGRESS_FWDNS_MAGIC;
+	} else {
+		if (bpf_skb_set_delivery_time(skb, INGRESS_FWDNS_MAGIC,
+					      BPF_SKB_DELIVERY_TIME_MONO))
+			inc_errs(SET_DTIME);
+		if (!bpf_skb_set_delivery_time(skb, INGRESS_FWDNS_MAGIC,
+					       BPF_SKB_DELIVERY_TIME_UNSPEC))
+			inc_errs(SET_DTIME);
+	}
+
+	if (skb_ns(skb_type) == SRC_NS)
+		return bpf_fwd() ?
+			bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK;
+	else
+		return bpf_fwd() ?
+			bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK;
+}
+
+/* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */
+SEC("tc")
+int egress_fwdns_prio101(struct __sk_buff *skb)
+{
+	int skb_type;
+
+	skb_type = skb_get_type(skb);
+	if (skb_type == -1 || !skb_type)
+		/* Should have handled in prio100 */
+		return TC_ACT_SHOT;
+
+	if (skb->delivery_time_type) {
+		if (fwdns_clear_dtime() ||
+		    skb->delivery_time_type != BPF_SKB_DELIVERY_TIME_MONO ||
+		    skb->tstamp != INGRESS_FWDNS_MAGIC)
+			inc_errs(EGRESS_FWDNS_P101);
+		else
+			inc_dtimes(EGRESS_FWDNS_P101);
+	} else {
+		if (!fwdns_clear_dtime())
+			inc_errs(EGRESS_FWDNS_P101);
+	}
+
+	if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO) {
+		skb->tstamp = EGRESS_FWDNS_MAGIC;
+	} else {
+		if (bpf_skb_set_delivery_time(skb, EGRESS_FWDNS_MAGIC,
+					      BPF_SKB_DELIVERY_TIME_MONO))
+			inc_errs(SET_DTIME);
+		if (!bpf_skb_set_delivery_time(skb, EGRESS_FWDNS_MAGIC,
+					       BPF_SKB_DELIVERY_TIME_UNSPEC))
+			inc_errs(SET_DTIME);
+	}
+
+	return TC_ACT_OK;
+}
+
+char __license[] SEC("license") = "GPL";
-- 
cgit 


From 7df5072cc05fd1aab5823bbc465d033cd292fca8 Mon Sep 17 00:00:00 2001
From: Mykola Lysenko <mykolal@fb.com>
Date: Tue, 1 Mar 2022 14:27:45 -0800
Subject: bpf: Small BPF verifier log improvements

In particular these include:

  1) Remove output of inv for scalars in print_verifier_state
  2) Replace inv with scalar in verifier error messages
  3) Remove _value suffixes for umin/umax/s32_min/etc (except map_value)
  4) Remove output of id=0
  5) Remove output of ref_obj_id=0

Signed-off-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220301222745.1667206-1-mykolal@fb.com
---
 kernel/bpf/verifier.c                              |  64 +++---
 tools/testing/selftests/bpf/prog_tests/align.c     | 218 ++++++++++-----------
 tools/testing/selftests/bpf/prog_tests/log_buf.c   |   4 +-
 .../selftests/bpf/verifier/atomic_invalid.c        |   6 +-
 tools/testing/selftests/bpf/verifier/bounds.c      |   4 +-
 tools/testing/selftests/bpf/verifier/calls.c       |   6 +-
 tools/testing/selftests/bpf/verifier/ctx.c         |   4 +-
 .../selftests/bpf/verifier/direct_packet_access.c  |   2 +-
 .../selftests/bpf/verifier/helper_access_var_len.c |   6 +-
 tools/testing/selftests/bpf/verifier/jmp32.c       |  16 +-
 tools/testing/selftests/bpf/verifier/precise.c     |   4 +-
 tools/testing/selftests/bpf/verifier/raw_stack.c   |   4 +-
 .../testing/selftests/bpf/verifier/ref_tracking.c  |   6 +-
 .../selftests/bpf/verifier/search_pruning.c        |   2 +-
 tools/testing/selftests/bpf/verifier/sock.c        |   2 +-
 tools/testing/selftests/bpf/verifier/spill_fill.c  |  38 ++--
 tools/testing/selftests/bpf/verifier/unpriv.c      |   4 +-
 .../selftests/bpf/verifier/value_illegal_alu.c     |   4 +-
 .../selftests/bpf/verifier/value_ptr_arith.c       |   4 +-
 tools/testing/selftests/bpf/verifier/var_off.c     |   2 +-
 20 files changed, 203 insertions(+), 197 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d7473fee247c..a57db4b2803c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -539,7 +539,7 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
 	char postfix[16] = {0}, prefix[32] = {0};
 	static const char * const str[] = {
 		[NOT_INIT]		= "?",
-		[SCALAR_VALUE]		= "inv",
+		[SCALAR_VALUE]		= "scalar",
 		[PTR_TO_CTX]		= "ctx",
 		[CONST_PTR_TO_MAP]	= "map_ptr",
 		[PTR_TO_MAP_VALUE]	= "map_value",
@@ -685,74 +685,80 @@ static void print_verifier_state(struct bpf_verifier_env *env,
 			continue;
 		verbose(env, " R%d", i);
 		print_liveness(env, reg->live);
-		verbose(env, "=%s", reg_type_str(env, t));
+		verbose(env, "=");
 		if (t == SCALAR_VALUE && reg->precise)
 			verbose(env, "P");
 		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
 		    tnum_is_const(reg->var_off)) {
 			/* reg->off should be 0 for SCALAR_VALUE */
+			verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
 			verbose(env, "%lld", reg->var_off.value + reg->off);
 		} else {
+			const char *sep = "";
+
+			verbose(env, "%s", reg_type_str(env, t));
 			if (base_type(t) == PTR_TO_BTF_ID ||
 			    base_type(t) == PTR_TO_PERCPU_BTF_ID)
 				verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
-			verbose(env, "(id=%d", reg->id);
-			if (reg_type_may_be_refcounted_or_null(t))
-				verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
+			verbose(env, "(");
+/*
+ * _a stands for append, was shortened to avoid multiline statements below.
+ * This macro is used to output a comma separated list of attributes.
+ */
+#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
+
+			if (reg->id)
+				verbose_a("id=%d", reg->id);
+			if (reg_type_may_be_refcounted_or_null(t) && reg->ref_obj_id)
+				verbose_a("ref_obj_id=%d", reg->ref_obj_id);
 			if (t != SCALAR_VALUE)
-				verbose(env, ",off=%d", reg->off);
+				verbose_a("off=%d", reg->off);
 			if (type_is_pkt_pointer(t))
-				verbose(env, ",r=%d", reg->range);
+				verbose_a("r=%d", reg->range);
 			else if (base_type(t) == CONST_PTR_TO_MAP ||
 				 base_type(t) == PTR_TO_MAP_KEY ||
 				 base_type(t) == PTR_TO_MAP_VALUE)
-				verbose(env, ",ks=%d,vs=%d",
-					reg->map_ptr->key_size,
-					reg->map_ptr->value_size);
+				verbose_a("ks=%d,vs=%d",
+					  reg->map_ptr->key_size,
+					  reg->map_ptr->value_size);
 			if (tnum_is_const(reg->var_off)) {
 				/* Typically an immediate SCALAR_VALUE, but
 				 * could be a pointer whose offset is too big
 				 * for reg->off
 				 */
-				verbose(env, ",imm=%llx", reg->var_off.value);
+				verbose_a("imm=%llx", reg->var_off.value);
 			} else {
 				if (reg->smin_value != reg->umin_value &&
 				    reg->smin_value != S64_MIN)
-					verbose(env, ",smin_value=%lld",
-						(long long)reg->smin_value);
+					verbose_a("smin=%lld", (long long)reg->smin_value);
 				if (reg->smax_value != reg->umax_value &&
 				    reg->smax_value != S64_MAX)
-					verbose(env, ",smax_value=%lld",
-						(long long)reg->smax_value);
+					verbose_a("smax=%lld", (long long)reg->smax_value);
 				if (reg->umin_value != 0)
-					verbose(env, ",umin_value=%llu",
-						(unsigned long long)reg->umin_value);
+					verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
 				if (reg->umax_value != U64_MAX)
-					verbose(env, ",umax_value=%llu",
-						(unsigned long long)reg->umax_value);
+					verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
 				if (!tnum_is_unknown(reg->var_off)) {
 					char tn_buf[48];
 
 					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
-					verbose(env, ",var_off=%s", tn_buf);
+					verbose_a("var_off=%s", tn_buf);
 				}
 				if (reg->s32_min_value != reg->smin_value &&
 				    reg->s32_min_value != S32_MIN)
-					verbose(env, ",s32_min_value=%d",
-						(int)(reg->s32_min_value));
+					verbose_a("s32_min=%d", (int)(reg->s32_min_value));
 				if (reg->s32_max_value != reg->smax_value &&
 				    reg->s32_max_value != S32_MAX)
-					verbose(env, ",s32_max_value=%d",
-						(int)(reg->s32_max_value));
+					verbose_a("s32_max=%d", (int)(reg->s32_max_value));
 				if (reg->u32_min_value != reg->umin_value &&
 				    reg->u32_min_value != U32_MIN)
-					verbose(env, ",u32_min_value=%d",
-						(int)(reg->u32_min_value));
+					verbose_a("u32_min=%d", (int)(reg->u32_min_value));
 				if (reg->u32_max_value != reg->umax_value &&
 				    reg->u32_max_value != U32_MAX)
-					verbose(env, ",u32_max_value=%d",
-						(int)(reg->u32_max_value));
+					verbose_a("u32_max=%d", (int)(reg->u32_max_value));
 			}
+#undef verbose_a
+
 			verbose(env, ")");
 		}
 	}
@@ -777,7 +783,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
 		if (is_spilled_reg(&state->stack[i])) {
 			reg = &state->stack[i].spilled_ptr;
 			t = reg->type;
-			verbose(env, "=%s", reg_type_str(env, t));
+			verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
 			if (t == SCALAR_VALUE && reg->precise)
 				verbose(env, "P");
 			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 0ee29e11eaee..970f09156eb4 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -39,13 +39,13 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1=ctx(id=0,off=0,imm=0)"},
+			{0, "R1=ctx(off=0,imm=0)"},
 			{0, "R10=fp0"},
-			{0, "R3_w=inv2"},
-			{1, "R3_w=inv4"},
-			{2, "R3_w=inv8"},
-			{3, "R3_w=inv16"},
-			{4, "R3_w=inv32"},
+			{0, "R3_w=2"},
+			{1, "R3_w=4"},
+			{2, "R3_w=8"},
+			{3, "R3_w=16"},
+			{4, "R3_w=32"},
 		},
 	},
 	{
@@ -67,19 +67,19 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1=ctx(id=0,off=0,imm=0)"},
+			{0, "R1=ctx(off=0,imm=0)"},
 			{0, "R10=fp0"},
-			{0, "R3_w=inv1"},
-			{1, "R3_w=inv2"},
-			{2, "R3_w=inv4"},
-			{3, "R3_w=inv8"},
-			{4, "R3_w=inv16"},
-			{5, "R3_w=inv1"},
-			{6, "R4_w=inv32"},
-			{7, "R4_w=inv16"},
-			{8, "R4_w=inv8"},
-			{9, "R4_w=inv4"},
-			{10, "R4_w=inv2"},
+			{0, "R3_w=1"},
+			{1, "R3_w=2"},
+			{2, "R3_w=4"},
+			{3, "R3_w=8"},
+			{4, "R3_w=16"},
+			{5, "R3_w=1"},
+			{6, "R4_w=32"},
+			{7, "R4_w=16"},
+			{8, "R4_w=8"},
+			{9, "R4_w=4"},
+			{10, "R4_w=2"},
 		},
 	},
 	{
@@ -96,14 +96,14 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1=ctx(id=0,off=0,imm=0)"},
+			{0, "R1=ctx(off=0,imm=0)"},
 			{0, "R10=fp0"},
-			{0, "R3_w=inv4"},
-			{1, "R3_w=inv8"},
-			{2, "R3_w=inv10"},
-			{3, "R4_w=inv8"},
-			{4, "R4_w=inv12"},
-			{5, "R4_w=inv14"},
+			{0, "R3_w=4"},
+			{1, "R3_w=8"},
+			{2, "R3_w=10"},
+			{3, "R4_w=8"},
+			{4, "R4_w=12"},
+			{5, "R4_w=14"},
 		},
 	},
 	{
@@ -118,12 +118,12 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1=ctx(id=0,off=0,imm=0)"},
+			{0, "R1=ctx(off=0,imm=0)"},
 			{0, "R10=fp0"},
-			{0, "R3_w=inv7"},
-			{1, "R3_w=inv7"},
-			{2, "R3_w=inv14"},
-			{3, "R3_w=inv56"},
+			{0, "R3_w=7"},
+			{1, "R3_w=7"},
+			{2, "R3_w=14"},
+			{3, "R3_w=56"},
 		},
 	},
 
@@ -161,19 +161,19 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{6, "R0_w=pkt(id=0,off=8,r=8,imm=0)"},
-			{6, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{7, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
-			{8, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{9, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
-			{10, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
-			{12, "R3_w=pkt_end(id=0,off=0,imm=0)"},
-			{17, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{18, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
-			{19, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
-			{20, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
-			{21, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{22, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+			{6, "R0_w=pkt(off=8,r=8,imm=0)"},
+			{6, "R3_w=scalar(umax=255,var_off=(0x0; 0xff))"},
+			{7, "R3_w=scalar(umax=510,var_off=(0x0; 0x1fe))"},
+			{8, "R3_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+			{9, "R3_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"},
+			{10, "R3_w=scalar(umax=4080,var_off=(0x0; 0xff0))"},
+			{12, "R3_w=pkt_end(off=0,imm=0)"},
+			{17, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"},
+			{18, "R4_w=scalar(umax=8160,var_off=(0x0; 0x1fe0))"},
+			{19, "R4_w=scalar(umax=4080,var_off=(0x0; 0xff0))"},
+			{20, "R4_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"},
+			{21, "R4_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+			{22, "R4_w=scalar(umax=510,var_off=(0x0; 0x1fe))"},
 		},
 	},
 	{
@@ -194,16 +194,16 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{6, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{7, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
-			{8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{9, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
-			{10, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
-			{11, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
-			{12, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{13, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
-			{14, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
-			{15, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+			{6, "R3_w=scalar(umax=255,var_off=(0x0; 0xff))"},
+			{7, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"},
+			{8, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"},
+			{9, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"},
+			{10, "R4_w=scalar(umax=510,var_off=(0x0; 0x1fe))"},
+			{11, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"},
+			{12, "R4_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
+			{13, "R4_w=scalar(id=1,umax=255,var_off=(0x0; 0xff))"},
+			{14, "R4_w=scalar(umax=2040,var_off=(0x0; 0x7f8))"},
+			{15, "R4_w=scalar(umax=4080,var_off=(0x0; 0xff0))"},
 		},
 	},
 	{
@@ -234,14 +234,14 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{2, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
-			{4, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
-			{5, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
-			{9, "R2=pkt(id=0,off=0,r=18,imm=0)"},
-			{10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
-			{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{13, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
-			{14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+			{2, "R5_w=pkt(off=0,r=0,imm=0)"},
+			{4, "R5_w=pkt(off=14,r=0,imm=0)"},
+			{5, "R4_w=pkt(off=14,r=0,imm=0)"},
+			{9, "R2=pkt(off=0,r=18,imm=0)"},
+			{10, "R5=pkt(off=14,r=18,imm=0)"},
+			{10, "R4_w=scalar(umax=255,var_off=(0x0; 0xff))"},
+			{13, "R4_w=scalar(umax=65535,var_off=(0x0; 0xffff))"},
+			{14, "R4_w=scalar(umax=65535,var_off=(0x0; 0xffff))"},
 		},
 	},
 	{
@@ -296,59 +296,59 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
-			{7, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{6, "R2_w=pkt(off=0,r=8,imm=0)"},
+			{7, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Offset is added to packet pointer R5, resulting in
 			 * known fixed offset, and variable offset from R6.
 			 */
-			{11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{11, "R5_w=pkt(id=1,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* At the time the word size load is performed from R5,
 			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
 			 * reg->aux_off (14) which is 16.  Then the variable
 			 * offset is considered using reg->aux_off_align which
 			 * is 4 and meets the load's requirements.
 			 */
-			{15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{15, "R4=pkt(id=1,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
+			{15, "R5=pkt(id=1,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Variable offset is added to R5 packet pointer,
 			 * resulting in auxiliary alignment of 4.
 			 */
-			{17, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{17, "R5_w=pkt(id=2,off=0,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Constant offset is added to R5, resulting in
 			 * reg->off of 14.
 			 */
-			{18, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{18, "R5_w=pkt(id=2,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off
 			 * (14) which is 16.  Then the variable offset is 4-byte
 			 * aligned, so the total offset is 4-byte aligned and
 			 * meets the load's requirements.
 			 */
-			{23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{23, "R4=pkt(id=2,off=18,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
+			{23, "R5=pkt(id=2,off=14,r=18,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Constant offset is added to R5 packet pointer,
 			 * resulting in reg->off value of 14.
 			 */
-			{25, "R5_w=pkt(id=0,off=14,r=8"},
+			{25, "R5_w=pkt(off=14,r=8"},
 			/* Variable offset is added to R5, resulting in a
 			 * variable offset of (4n).
 			 */
-			{26, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{26, "R5_w=pkt(id=3,off=14,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Constant is added to R5 again, setting reg->off to 18. */
-			{27, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{27, "R5_w=pkt(id=3,off=18,r=0,umax=1020,var_off=(0x0; 0x3fc))"},
 			/* And once more we add a variable; resulting var_off
 			 * is still (4n), fixed offset is not changed.
 			 * Also, we create a new reg->id.
 			 */
-			{28, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
+			{28, "R5_w=pkt(id=4,off=18,r=0,umax=2040,var_off=(0x0; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
 			 * which is 20.  Then the variable offset is (4n), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
-			{33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
+			{33, "R4=pkt(id=4,off=22,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
+			{33, "R5=pkt(id=4,off=18,r=22,umax=2040,var_off=(0x0; 0x7fc)"},
 		},
 	},
 	{
@@ -386,36 +386,36 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
-			{7, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{6, "R2_w=pkt(off=0,r=8,imm=0)"},
+			{7, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Adding 14 makes R6 be (4n+2) */
-			{8, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{8, "R6_w=scalar(umin=14,umax=1034,var_off=(0x2; 0x7fc))"},
 			/* Packet pointer has (4n+2) offset */
-			{11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
-			{12, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+			{11, "R5_w=pkt(id=1,off=0,r=0,umin=14,umax=1034,var_off=(0x2; 0x7fc)"},
+			{12, "R4=pkt(id=1,off=4,r=0,umin=14,umax=1034,var_off=(0x2; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+			{15, "R5=pkt(id=1,off=0,r=4,umin=14,umax=1034,var_off=(0x2; 0x7fc)"},
 			/* Newly read value in R6 was shifted left by 2, so has
 			 * known alignment of 4.
 			 */
-			{17, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{17, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Added (4n) to packet pointer's (4n+2) var_off, giving
 			 * another (4n+2).
 			 */
-			{19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
-			{20, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+			{19, "R5_w=pkt(id=2,off=0,r=0,umin=14,umax=2054,var_off=(0x2; 0xffc)"},
+			{20, "R4=pkt(id=2,off=4,r=0,umin=14,umax=2054,var_off=(0x2; 0xffc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+			{23, "R5=pkt(id=2,off=0,r=4,umin=14,umax=2054,var_off=(0x2; 0xffc)"},
 		},
 	},
 	{
@@ -448,18 +448,18 @@ static struct bpf_align_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = REJECT,
 		.matches = {
-			{3, "R5_w=pkt_end(id=0,off=0,imm=0)"},
+			{3, "R5_w=pkt_end(off=0,imm=0)"},
 			/* (ptr - ptr) << 2 == unknown, (4n) */
-			{5, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
+			{5, "R5_w=scalar(smax=9223372036854775804,umax=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
 			/* (4n) + 14 == (4n+2).  We blow our bounds, because
 			 * the add could overflow.
 			 */
-			{6, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+			{6, "R5_w=scalar(smin=-9223372036854775806,smax=9223372036854775806,umin=2,umax=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
 			/* Checked s>=0 */
-			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+			{9, "R5=scalar(umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
 			/* packet pointer + nonnegative (4n+2) */
-			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
-			{12, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+			{11, "R6_w=pkt(id=1,off=0,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+			{12, "R4_w=pkt(id=1,off=4,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
 			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
 			 * We checked the bounds, but it might have been able
 			 * to overflow if the packet pointer started in the
@@ -467,7 +467,7 @@ static struct bpf_align_test tests[] = {
 			 * So we did not get a 'range' on R6, and the access
 			 * attempt will fail.
 			 */
-			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+			{15, "R6_w=pkt(id=1,off=0,r=0,umin=2,umax=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
 		}
 	},
 	{
@@ -502,23 +502,23 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
-			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{6, "R2_w=pkt(off=0,r=8,imm=0)"},
+			{8, "R6_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Adding 14 makes R6 be (4n+2) */
-			{9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{9, "R6_w=scalar(umin=14,umax=1034,var_off=(0x2; 0x7fc))"},
 			/* New unknown value in R7 is (4n) */
-			{10, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{10, "R7_w=scalar(umax=1020,var_off=(0x0; 0x3fc))"},
 			/* Subtracting it from R6 blows our unsigned bounds */
-			{11, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+			{11, "R6=scalar(smin=-1006,smax=1034,umin=2,umax=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
 			/* Checked s>= 0 */
-			{14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{14, "R6=scalar(umin=2,umax=1034,var_off=(0x2; 0x7fc))"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
+			{20, "R5=pkt(id=2,off=0,r=4,umin=2,umax=1034,var_off=(0x2; 0x7fc)"},
 
 		},
 	},
@@ -556,23 +556,23 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
-			{9, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+			{6, "R2_w=pkt(off=0,r=8,imm=0)"},
+			{9, "R6_w=scalar(umax=60,var_off=(0x0; 0x3c))"},
 			/* Adding 14 makes R6 be (4n+2) */
-			{10, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+			{10, "R6_w=scalar(umin=14,umax=74,var_off=(0x2; 0x7c))"},
 			/* Subtracting from packet pointer overflows ubounds */
-			{13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
+			{13, "R5_w=pkt(id=2,off=0,r=8,umin=18446744073709551542,umax=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
 			/* New unknown value in R7 is (4n), >= 76 */
-			{14, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+			{14, "R7_w=scalar(umin=76,umax=1096,var_off=(0x0; 0x7fc))"},
 			/* Adding it to packet pointer gives nice bounds again */
-			{16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
+			{16, "R5_w=pkt(id=3,off=0,r=0,umin=2,umax=1082,var_off=(0x2; 0xfffffffc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{20, "R5=pkt(id=3,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
+			{20, "R5=pkt(id=3,off=0,r=4,umin=2,umax=1082,var_off=(0x2; 0xfffffffc)"},
 		},
 	},
 };
@@ -648,8 +648,8 @@ static int do_test_single(struct bpf_align_test *test)
 			/* Check the next line as well in case the previous line
 			 * did not have a corresponding bpf insn. Example:
 			 * func#0 @0
-			 * 0: R1=ctx(id=0,off=0,imm=0) R10=fp0
-			 * 0: (b7) r3 = 2                 ; R3_w=inv2
+			 * 0: R1=ctx(off=0,imm=0) R10=fp0
+			 * 0: (b7) r3 = 2                 ; R3_w=2
 			 */
 			if (!strstr(line_ptr, m.match)) {
 				cur_line = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index 1ef377a7e731..fe9a23e65ef4 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -78,7 +78,7 @@ static void obj_load_log_buf(void)
 	ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"),
 		      "libbpf_log_not_empty");
 	ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty");
-	ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(id=0,off=0,imm=0) R10=fp0"),
+	ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"),
 		      "good_log_verbose");
 	ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"),
 		      "bad_log_not_empty");
@@ -175,7 +175,7 @@ static void bpf_prog_load_log_buf(void)
 	opts.log_level = 2;
 	fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL",
 			   good_prog_insns, good_prog_insn_cnt, &opts);
-	ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(id=0,off=0,imm=0) R10=fp0"), "good_log_2");
+	ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), "good_log_2");
 	ASSERT_GE(fd, 0, "good_fd2");
 	if (fd >= 0)
 		close(fd);
diff --git a/tools/testing/selftests/bpf/verifier/atomic_invalid.c b/tools/testing/selftests/bpf/verifier/atomic_invalid.c
index 39272720b2f6..25f4ac1c69ab 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_invalid.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_invalid.c
@@ -1,6 +1,6 @@
-#define __INVALID_ATOMIC_ACCESS_TEST(op)					\
+#define __INVALID_ATOMIC_ACCESS_TEST(op)				\
 	{								\
-		"atomic " #op " access through non-pointer ",			\
+		"atomic " #op " access through non-pointer ",		\
 		.insns = {						\
 			BPF_MOV64_IMM(BPF_REG_0, 1),			\
 			BPF_MOV64_IMM(BPF_REG_1, 0),			\
@@ -9,7 +9,7 @@
 			BPF_EXIT_INSN(),				\
 		},							\
 		.result = REJECT,					\
-		.errstr = "R1 invalid mem access 'inv'"			\
+		.errstr = "R1 invalid mem access 'scalar'"		\
 	}
 __INVALID_ATOMIC_ACCESS_TEST(BPF_ADD),
 __INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH),
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index e061e8799ce2..33125d5f6772 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -508,7 +508,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.errstr_unpriv = "R0 invalid mem access 'scalar'",
 	.result_unpriv = REJECT,
 	.result = ACCEPT
 },
@@ -530,7 +530,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.errstr_unpriv = "R0 invalid mem access 'scalar'",
 	.result_unpriv = REJECT,
 	.result = ACCEPT
 },
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 0a8ea60c2a80..f890333259ad 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -188,7 +188,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "R0 invalid mem access 'inv'",
+	.errstr = "R0 invalid mem access 'scalar'",
 },
 {
 	"calls: multiple ret types in subprog 2",
@@ -491,7 +491,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.result = REJECT,
-	.errstr = "R6 invalid mem access 'inv'",
+	.errstr = "R6 invalid mem access 'scalar'",
 	.prog_type = BPF_PROG_TYPE_XDP,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
@@ -1697,7 +1697,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.fixup_map_hash_8b = { 12, 22 },
 	.result = REJECT,
-	.errstr = "R0 invalid mem access 'inv'",
+	.errstr = "R0 invalid mem access 'scalar'",
 },
 {
 	"calls: pkt_ptr spill into caller stack",
diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c
index 23080862aafd..60f6fbe03f19 100644
--- a/tools/testing/selftests/bpf/verifier/ctx.c
+++ b/tools/testing/selftests/bpf/verifier/ctx.c
@@ -127,7 +127,7 @@
 	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 	.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
 	.result = REJECT,
-	.errstr = "R1 type=inv expected=ctx",
+	.errstr = "R1 type=scalar expected=ctx",
 },
 {
 	"pass ctx or null check, 4: ctx - const",
@@ -193,5 +193,5 @@
 	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
 	.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
 	.result = REJECT,
-	.errstr = "R1 type=inv expected=ctx",
+	.errstr = "R1 type=scalar expected=ctx",
 },
diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
index ac1e19d0f520..11acd1855acf 100644
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
@@ -339,7 +339,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "R2 invalid mem access 'inv'",
+	.errstr = "R2 invalid mem access 'scalar'",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index 0ab7f1dfc97a..a6c869a7319c 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -350,7 +350,7 @@
 	BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "R1 type=inv expected=fp",
+	.errstr = "R1 type=scalar expected=fp",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 },
@@ -471,7 +471,7 @@
 	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "R1 type=inv expected=fp",
+	.errstr = "R1 type=scalar expected=fp",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
@@ -484,7 +484,7 @@
 	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "R1 type=inv expected=fp",
+	.errstr = "R1 type=scalar expected=fp",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index 1c857b2fbdf0..6ddc418fdfaf 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -286,7 +286,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.errstr_unpriv = "R0 invalid mem access 'scalar'",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
@@ -356,7 +356,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.errstr_unpriv = "R0 invalid mem access 'scalar'",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
@@ -426,7 +426,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.errstr_unpriv = "R0 invalid mem access 'scalar'",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
@@ -496,7 +496,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.errstr_unpriv = "R0 invalid mem access 'scalar'",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
@@ -566,7 +566,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.errstr_unpriv = "R0 invalid mem access 'scalar'",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
@@ -636,7 +636,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.errstr_unpriv = "R0 invalid mem access 'scalar'",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
@@ -706,7 +706,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.errstr_unpriv = "R0 invalid mem access 'scalar'",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
@@ -776,7 +776,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.errstr_unpriv = "R0 invalid mem access 'scalar'",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 6dc8003ffc70..9e754423fa8b 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -27,7 +27,7 @@
 	BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1),
 	BPF_EXIT_INSN(),
 
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=scalar(umin=1, umax=8) */
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
@@ -87,7 +87,7 @@
 	BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1),
 	BPF_EXIT_INSN(),
 
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=scalar(umin=1, umax=8) */
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
diff --git a/tools/testing/selftests/bpf/verifier/raw_stack.c b/tools/testing/selftests/bpf/verifier/raw_stack.c
index cc8e8c3cdc03..eb5ed936580b 100644
--- a/tools/testing/selftests/bpf/verifier/raw_stack.c
+++ b/tools/testing/selftests/bpf/verifier/raw_stack.c
@@ -132,7 +132,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.result = REJECT,
-	.errstr = "R0 invalid mem access 'inv'",
+	.errstr = "R0 invalid mem access 'scalar'",
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
@@ -162,7 +162,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.result = REJECT,
-	.errstr = "R3 invalid mem access 'inv'",
+	.errstr = "R3 invalid mem access 'scalar'",
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 3b6ee009c00b..fbd682520e47 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -162,7 +162,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-	.errstr = "type=inv expected=sock",
+	.errstr = "type=scalar expected=sock",
 	.result = REJECT,
 },
 {
@@ -178,7 +178,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-	.errstr = "type=inv expected=sock",
+	.errstr = "type=scalar expected=sock",
 	.result = REJECT,
 },
 {
@@ -274,7 +274,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-	.errstr = "type=inv expected=sock",
+	.errstr = "type=scalar expected=sock",
 	.result = REJECT,
 },
 {
diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c
index 682519769fe3..68b14fdfebdb 100644
--- a/tools/testing/selftests/bpf/verifier/search_pruning.c
+++ b/tools/testing/selftests/bpf/verifier/search_pruning.c
@@ -104,7 +104,7 @@
 		BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
-	.errstr = "R6 invalid mem access 'inv'",
+	.errstr = "R6 invalid mem access 'scalar'",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index 8c224eac93df..86b24cad27a7 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -502,7 +502,7 @@
 	.fixup_sk_storage_map = { 11 },
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "R3 type=inv expected=fp",
+	.errstr = "R3 type=scalar expected=fp",
 },
 {
 	"sk_storage_get(map, skb->sk, &stack_value, 1): stack_value",
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
index 8cfc5349d2a8..e23f07175e1b 100644
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ b/tools/testing/selftests/bpf/verifier/spill_fill.c
@@ -102,7 +102,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.errstr_unpriv = "attempt to corrupt spilled",
-	.errstr = "R0 invalid mem access 'inv",
+	.errstr = "R0 invalid mem access 'scalar'",
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
@@ -147,11 +147,11 @@
 	BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -8),
 	/* r0 = r2 */
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
-	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv20 */
+	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
-	/* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=inv20 */
+	/* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */
 	BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
-	/* r0 = *(u32 *)r2 R0=pkt,off=20,r=20 R2=pkt,r=20 R3=pkt_end R4=inv20 */
+	/* r0 = *(u32 *)r2 R0=pkt,off=20,r=20 R2=pkt,r=20 R3=pkt_end R4=20 */
 	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
@@ -190,11 +190,11 @@
 	BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8),
 	/* r0 = r2 */
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
-	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */
+	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
-	/* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */
+	/* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */
 	BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
-	/* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */
+	/* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */
 	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
@@ -222,11 +222,11 @@
 	BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8),
 	/* r0 = r2 */
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
-	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */
+	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
-	/* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */
+	/* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */
 	BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
-	/* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */
+	/* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */
 	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
@@ -250,11 +250,11 @@
 	BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -6),
 	/* r0 = r2 */
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
-	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */
+	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
-	/* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */
+	/* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */
 	BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
-	/* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */
+	/* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */
 	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
@@ -280,11 +280,11 @@
 	BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -4),
 	/* r0 = r2 */
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
-	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=U32_MAX */
+	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=U32_MAX */
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
-	/* if (r0 > r3) R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4=inv */
+	/* if (r0 > r3) R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */
 	BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
-	/* r0 = *(u32 *)r2 R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4=inv */
+	/* r0 = *(u32 *)r2 R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */
 	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
@@ -305,13 +305,13 @@
 	BPF_JMP_IMM(BPF_JLE, BPF_REG_4, 40, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
-	/* *(u32 *)(r10 -8) = r4 R4=inv,umax=40 */
+	/* *(u32 *)(r10 -8) = r4 R4=umax=40 */
 	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
 	/* r4 = (*u32 *)(r10 - 8) */
 	BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -8),
-	/* r2 += r4 R2=pkt R4=inv,umax=40 */
+	/* r2 += r4 R2=pkt R4=umax=40 */
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_4),
-	/* r0 = r2 R2=pkt,umax=40 R4=inv,umax=40 */
+	/* r0 = r2 R2=pkt,umax=40 R4=umax=40 */
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
 	/* r2 += 20 R0=pkt,umax=40 R2=pkt,umax=40 */
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 20),
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index 111801aea5e3..878ca26c3f0a 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -214,7 +214,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.result = REJECT,
-	.errstr = "R1 type=inv expected=ctx",
+	.errstr = "R1 type=scalar expected=ctx",
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 },
 {
@@ -420,7 +420,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R7 invalid mem access 'inv'",
+	.errstr_unpriv = "R7 invalid mem access 'scalar'",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 0,
diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
index 489062867218..d6f29eb4bd57 100644
--- a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
+++ b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
@@ -64,7 +64,7 @@
 	},
 	.fixup_map_hash_48b = { 3 },
 	.errstr_unpriv = "R0 pointer arithmetic prohibited",
-	.errstr = "invalid mem access 'inv'",
+	.errstr = "invalid mem access 'scalar'",
 	.result = REJECT,
 	.result_unpriv = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -89,7 +89,7 @@
 	},
 	.fixup_map_hash_48b = { 3 },
 	.errstr_unpriv = "leaking pointer from stack off -8",
-	.errstr = "R0 invalid mem access 'inv'",
+	.errstr = "R0 invalid mem access 'scalar'",
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index 359f3e8f8b60..249187d3c530 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -397,7 +397,7 @@
 	.fixup_map_array_48b = { 1 },
 	.result = ACCEPT,
 	.result_unpriv = REJECT,
-	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.errstr_unpriv = "R0 invalid mem access 'scalar'",
 	.retval = 0,
 },
 {
@@ -1074,7 +1074,7 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = REJECT,
-	.errstr = "R0 invalid mem access 'inv'",
+	.errstr = "R0 invalid mem access 'scalar'",
 	.errstr_unpriv = "R0 pointer -= pointer prohibited",
 },
 {
diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
index eab1f7f56e2f..187c6f6e32bc 100644
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ b/tools/testing/selftests/bpf/verifier/var_off.c
@@ -131,7 +131,7 @@
 	 * write might have overwritten the spilled pointer (i.e. we lose track
 	 * of the spilled register when we analyze the write).
 	 */
-	.errstr = "R2 invalid mem access 'inv'",
+	.errstr = "R2 invalid mem access 'scalar'",
 	.result = REJECT,
 },
 {
-- 
cgit 


From edcb647b4bfb19674d58bff2e6ef96e0dbcccfdf Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 2 Mar 2022 23:01:18 +0500
Subject: selftests: add kselftest_install to .gitignore

Add kselftest_install directory to the .gitignore which is created while
creation of tar ball of objects:
make -C tools/testing/selftests gen_tar

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
index 055a5019b13c..cb24124ac5b9 100644
--- a/tools/testing/selftests/.gitignore
+++ b/tools/testing/selftests/.gitignore
@@ -3,6 +3,7 @@ gpiogpio-event-mon
 gpiogpio-hammer
 gpioinclude/
 gpiolsgpio
+kselftest_install/
 tpm2/SpaceTest.log
 
 # Python bytecode and cache
-- 
cgit 


From c7b9c68fc01b468983c8f4745f400fa74c831005 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 2 Mar 2022 23:01:19 +0500
Subject: selftests/exec: add generated files to .gitignore

Add generated files non-regular and null-argv to .gitignore file.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/exec/.gitignore | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index 9e2f00343f15..90c238ba6a4b 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -7,6 +7,8 @@ execveat.moved
 execveat.path.ephemeral
 execveat.ephemeral
 execveat.denatured
+non-regular
+null-argv
 /load_address_*
 /recursion-depth
 xxxxxxxx*
-- 
cgit 


From 946ad0499d984be13ec02f8257ca0527ec287bf2 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Wed, 2 Mar 2022 23:01:20 +0500
Subject: selftests: kvm: add generated file to the .gitignore

Add hyperv_svm_test to the .gitignore file.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/kvm/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index dce7de7755e6..62f9b781545b 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -20,6 +20,7 @@
 /x86_64/hyperv_clock
 /x86_64/hyperv_cpuid
 /x86_64/hyperv_features
+/x86_64/hyperv_svm_test
 /x86_64/mmio_warning_test
 /x86_64/mmu_role_test
 /x86_64/platform_info_test
-- 
cgit 


From a50a88f026fb28ece512c50e8ef7cd4ef6d0a291 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Wed, 2 Mar 2022 13:29:13 +0800
Subject: selftests: netfilter: fix a build error on openSUSE

This patch fixed the following build error on openSUSE Leap 15.3:

=======================================================================
 gcc     nf-queue.c -lmnl -o tools/testing/selftests/netfilter/nf-queue
 nf-queue.c:13:10: fatal error: libmnl/libmnl.h: No such file or directory
  #include <libmnl/libmnl.h>
           ^~~~~~~~~~~~~~~~~
 compilation terminated.
=======================================================================

It is because libmnl.h is put in the directory of
"/usr/include/libmnl/libmnl/" on openSUSE, not "/usr/include/libmnl/":

 > rpm -ql libmnl-devel
 /usr/include/libmnl
 /usr/include/libmnl/libmnl
 /usr/include/libmnl/libmnl/libmnl.h
 /usr/lib64/libmnl.so
 /usr/lib64/pkgconfig/libmnl.pc

Suggested-by: Kai Liu <kai.liu@suse.com>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/netfilter/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index e4f845dd942b..8136c1fab7ab 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -8,6 +8,7 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
 	ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
 	conntrack_vrf.sh nft_synproxy.sh
 
+CFLAGS += $(shell pkg-config --cflags libmnl 2>/dev/null || echo "-I/usr/include/libmnl")
 LDLIBS = -lmnl
 TEST_GEN_FILES =  nf-queue
 
-- 
cgit 


From f6d344cd5fa6a15e1ec2da350470b35a3f55f74c Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Thu, 17 Feb 2022 03:38:17 +0500
Subject: selftests: Fix build when $(O) points to a relative path

Build of bpf and tc-testing selftests fails when the relative path of
the build directory is specified.

make -C tools/testing/selftests O=build0
make[1]: Entering directory '/linux_mainline/tools/testing/selftests/bpf'
../../../scripts/Makefile.include:4: *** O=build0 does not exist.  Stop.
make[1]: Entering directory '/linux_mainline/tools/testing/selftests/tc-testing'
../../../scripts/Makefile.include:4: *** O=build0 does not exist.  Stop.

Makefiles of bpf and tc-testing include scripts/Makefile.include file.
This file has sanity checking inside it which checks the output path.
The output path is not relative to the bpf or tc-testing. The sanity
check fails. Expand the output path to get rid of this error. The fix is
the same as mentioned in commit 150a27328b68 ("bpf, preload: Fix build
when $(O) points to a relative path").

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 5d9d4ddccccb..2319ec87f53d 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -177,6 +177,7 @@ all: khdr
 		BUILD_TARGET=$$BUILD/$$TARGET;			\
 		mkdir $$BUILD_TARGET  -p;			\
 		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET	\
+				O=$(abs_objtree)		\
 				$(if $(FORCE_TARGETS),|| exit);	\
 		ret=$$((ret * $$?));				\
 	done; exit $$ret;
@@ -184,7 +185,8 @@ all: khdr
 run_tests: all
 	@for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
-		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests \
+				O=$(abs_objtree);		    \
 	done;
 
 hotplug:
@@ -235,6 +237,7 @@ ifdef INSTALL_PATH
 	for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install \
+				O=$(abs_objtree)		\
 				$(if $(FORCE_TARGETS),|| exit);	\
 		ret=$$((ret * $$?));		\
 	done; exit $$ret;
-- 
cgit 


From 9a0a93672c14feaf441c7078c00065c5d163d12a Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Mar 2022 11:36:26 -0800
Subject: selftests: mptcp: adjust output alignment for more tests

The number of self tests in mptcp_join.sh will soon be more than 100, the
output alignment is no longer OK. This patch adjusted it.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 77b359a49a47..88740cfe49dd 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -20,6 +20,7 @@ do_all_tests=1
 init=0
 
 TEST_COUNT=0
+nr_blank=40
 
 # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
 #				  (ip6 && (ip6[74] & 0xf0) == 0x30)'"
@@ -732,9 +733,9 @@ chk_csum_nr()
 	local dump_stats
 
 	if [ ! -z "$msg" ]; then
-		printf "%02u" "$TEST_COUNT"
+		printf "%03u" "$TEST_COUNT"
 	else
-		echo -n "  "
+		echo -n "   "
 	fi
 	printf " %-36s %s" "$msg" "sum"
 	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
@@ -766,7 +767,7 @@ chk_fail_nr()
 	local count
 	local dump_stats
 
-	printf "%-39s %s" " " "ftx"
+	printf "%-${nr_blank}s %s" " " "ftx"
 	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$mp_fail_nr_tx" ]; then
@@ -801,7 +802,7 @@ chk_join_nr()
 	local dump_stats
 	local with_cookie
 
-	printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn"
+	printf "%03u %-36s %s" "$TEST_COUNT" "$msg" "syn"
 	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$syn_nr" ]; then
@@ -863,7 +864,7 @@ chk_stale_nr()
 	local stale_nr
 	local recover_nr
 
-	printf "%-39s %-18s" " " "stale"
+	printf "%-${nr_blank}s %-18s" " " "stale"
 	stale_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}'`
 	[ -z "$stale_nr" ] && stale_nr=0
 	recover_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}'`
@@ -904,7 +905,7 @@ chk_add_nr()
 
 	timeout=`ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout`
 
-	printf "%-39s %s" " " "add"
+	printf "%-${nr_blank}s %s" " " "add"
 	count=`ip netns exec $ns2 nstat -as MPTcpExtAddAddr | grep MPTcpExtAddAddr | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 
@@ -941,7 +942,7 @@ chk_add_nr()
 			echo "[ ok ]"
 		fi
 
-		printf "%-39s %s" " " "syn"
+		printf "%-${nr_blank}s %s" " " "syn"
 		count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortSynRx |
 			awk '{print $2}'`
 		[ -z "$count" ] && count=0
@@ -980,7 +981,7 @@ chk_add_nr()
 			echo "[ ok ]"
 		fi
 
-		printf "%-39s %s" " " "syn"
+		printf "%-${nr_blank}s %s" " " "syn"
 		count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortSynRx |
 			awk '{print $2}'`
 		[ -z "$count" ] && count=0
@@ -1030,7 +1031,7 @@ chk_rm_nr()
 		subflow_ns=$ns1
 	fi
 
-	printf "%-39s %s" " " "rm "
+	printf "%-${nr_blank}s %s" " " "rm "
 	count=`ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$rm_addr_nr" ]; then
@@ -1062,7 +1063,7 @@ chk_prio_nr()
 	local count
 	local dump_stats
 
-	printf "%-39s %s" " " "ptx"
+	printf "%-${nr_blank}s %s" " " "ptx"
 	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$mp_prio_nr_tx" ]; then
@@ -1098,7 +1099,7 @@ chk_link_usage()
 	local tx_rate=$((tx_link * 100 / $tx_total))
 	local tolerance=5
 
-	printf "%-39s %-18s" " " "link usage"
+	printf "%-${nr_blank}s %-18s" " " "link usage"
 	if [ $tx_rate -lt $((expected_rate - $tolerance)) -o \
 	     $tx_rate -gt $((expected_rate + $tolerance)) ]; then
 		echo "[fail] got $tx_rate% usage, expected $expected_rate%"
-- 
cgit 


From e8e947ef50f6f24710f3557bc327deb185b52f84 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Mar 2022 11:36:28 -0800
Subject: selftests: mptcp: add the MP_FASTCLOSE mibs check

This patch added a new function chk_fclose_nr() to check the numbers
of the MP_FASTCLOSE sending and receiving mibs.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 32 +++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 88740cfe49dd..10339a796325 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -792,6 +792,38 @@ chk_fail_nr()
 	[ "${dump_stats}" = 1 ] && dump_stats
 }
 
+chk_fclose_nr()
+{
+	local fclose_tx=$1
+	local fclose_rx=$2
+	local count
+	local dump_stats
+
+	printf "%-${nr_blank}s %s" " " "ctx"
+	count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPFastcloseTx | awk '{print $2}')
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$fclose_tx" ]; then
+		echo "[fail] got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
+		ret=1
+		dump_stats=1
+	else
+		echo -n "[ ok ]"
+	fi
+
+	echo -n " - fclzrx"
+	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPFastcloseRx | awk '{print $2}')
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$fclose_rx" ]; then
+		echo "[fail] got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
+		ret=1
+		dump_stats=1
+	else
+		echo "[ ok ]"
+	fi
+
+	[ "${dump_stats}" = 1 ] && dump_stats
+}
+
 chk_join_nr()
 {
 	local msg="$1"
-- 
cgit 


From 922fd2b39e5a3c0220974687ad2afcf5654819e6 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Mar 2022 11:36:30 -0800
Subject: selftests: mptcp: add the MP_RST mibs check

This patch added a new function chk_rst_nr() to check the numbers
of the MP_RST sending and receiving mibs.

Showed in the output whether the inverted namespaces check order is used.
Since if we pass -Cz to mptcp_join.sh, the MP_RST information is showed
twice.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 45 +++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 10339a796325..12008640d9f4 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -824,6 +824,50 @@ chk_fclose_nr()
 	[ "${dump_stats}" = 1 ] && dump_stats
 }
 
+chk_rst_nr()
+{
+	local rst_tx=$1
+	local rst_rx=$2
+	local ns_invert=${3:-""}
+	local count
+	local dump_stats
+	local ns_tx=$ns1
+	local ns_rx=$ns2
+	local extra_msg=""
+
+	if [[ $ns_invert = "invert" ]]; then
+		ns_tx=$ns2
+		ns_rx=$ns1
+		extra_msg="   invert"
+	fi
+
+	printf "%-${nr_blank}s %s" " " "rtx"
+	count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPRstTx | awk '{print $2}')
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$rst_tx" ]; then
+		echo "[fail] got $count MP_RST[s] TX expected $rst_tx"
+		ret=1
+		dump_stats=1
+	else
+		echo -n "[ ok ]"
+	fi
+
+	echo -n " - rstrx "
+	count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPRstRx | awk '{print $2}')
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$rst_rx" ]; then
+		echo "[fail] got $count MP_RST[s] RX expected $rst_rx"
+		ret=1
+		dump_stats=1
+	else
+		echo -n "[ ok ]"
+	fi
+
+	[ "${dump_stats}" = 1 ] && dump_stats
+
+	echo "$extra_msg"
+}
+
 chk_join_nr()
 {
 	local msg="$1"
@@ -878,6 +922,7 @@ chk_join_nr()
 	if [ $checksum -eq 1 ]; then
 		chk_csum_nr
 		chk_fail_nr 0 0
+		chk_rst_nr 0 0
 	fi
 }
 
-- 
cgit 


From cbfafac4cf8fa885c7c5d5b3914a9f9ce3b4565b Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Mar 2022 11:36:31 -0800
Subject: selftests: mptcp: add extra_args in do_transfer

Instead of using a global variable mptcp_connect, this patch added
a new local variable extra_args in do_transfer() to store the extra
arguments passing to the mptcp_connect commands.

This patch also renamed the speed level 'least' to 'speed_*'. This
more flexible way can avoid the need to add new speed levels in the
future.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 30 ++++++++++++-------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 12008640d9f4..84c21282ee46 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -12,7 +12,6 @@ cout=""
 ksft_skip=4
 timeout_poll=30
 timeout_test=$((timeout_poll * 2 + 1))
-mptcp_connect=""
 capture=0
 checksum=0
 ip_mptcp=0
@@ -444,12 +443,13 @@ do_transfer()
 	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
 		nstat -n
 
+	local extra_args
 	if [ $speed = "fast" ]; then
-		mptcp_connect="./mptcp_connect -j"
+		extra_args="-j"
 	elif [ $speed = "slow" ]; then
-		mptcp_connect="./mptcp_connect -r 50"
-	elif [ $speed = "least" ]; then
-		mptcp_connect="./mptcp_connect -r 10"
+		extra_args="-r 50"
+	elif [[ $speed = "speed_"* ]]; then
+		extra_args="-r ${speed:6}"
 	fi
 
 	local local_addr
@@ -462,13 +462,13 @@ do_transfer()
 	if [ "$test_link_fail" -eq 2 ];then
 		timeout ${timeout_test} \
 			ip netns exec ${listener_ns} \
-				$mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-					${local_addr} < "$sinfail" > "$sout" &
+				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+					$extra_args ${local_addr} < "$sinfail" > "$sout" &
 	else
 		timeout ${timeout_test} \
 			ip netns exec ${listener_ns} \
-				$mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-					${local_addr} < "$sin" > "$sout" &
+				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+					$extra_args ${local_addr} < "$sin" > "$sout" &
 	fi
 	spid=$!
 
@@ -477,15 +477,15 @@ do_transfer()
 	if [ "$test_link_fail" -eq 0 ];then
 		timeout ${timeout_test} \
 			ip netns exec ${connector_ns} \
-				$mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
-					$connect_addr < "$cin" > "$cout" &
+				./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+					$extra_args $connect_addr < "$cin" > "$cout" &
 	else
 		( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
 			tee "$cinsent" | \
 			timeout ${timeout_test} \
 				ip netns exec ${connector_ns} \
-					$mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
-						$connect_addr > "$cout" &
+					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+						$extra_args $connect_addr > "$cout" &
 	fi
 	cpid=$!
 
@@ -1507,7 +1507,7 @@ add_addr_timeout_tests()
 	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
 	pm_nl_set_limits $ns2 2 2
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
+	run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10
 	chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2
 	chk_add_nr 8 0
 
@@ -1517,7 +1517,7 @@ add_addr_timeout_tests()
 	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
 	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
 	pm_nl_set_limits $ns2 2 2
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
+	run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10
 	chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1
 	chk_add_nr 8 0
 }
-- 
cgit 


From 34b572b76fecbcc4f209f87269a07ccb9872f6f2 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Mar 2022 11:36:32 -0800
Subject: selftests: mptcp: reuse linkfail to make given size files

This patch reused the test_linkfail values above 2 to make test files with
the given sizes (KB) for both the client side and the server side. It's
useful for the test cases using different file sizes.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 32 +++++++++++++++++++++----
 1 file changed, 27 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 84c21282ee46..1fda29d5d69f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -459,7 +459,7 @@ do_transfer()
 		local_addr="0.0.0.0"
 	fi
 
-	if [ "$test_link_fail" -eq 2 ];then
+	if [ "$test_link_fail" -gt 1 ];then
 		timeout ${timeout_test} \
 			ip netns exec ${listener_ns} \
 				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
@@ -479,13 +479,19 @@ do_transfer()
 			ip netns exec ${connector_ns} \
 				./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
 					$extra_args $connect_addr < "$cin" > "$cout" &
-	else
+	elif [ "$test_link_fail" -eq 1 ] || [ "$test_link_fail" -eq 2 ];then
 		( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
 			tee "$cinsent" | \
 			timeout ${timeout_test} \
 				ip netns exec ${connector_ns} \
 					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
 						$extra_args $connect_addr > "$cout" &
+	else
+		cat "$cinfail" | tee "$cinsent" | \
+			timeout ${timeout_test} \
+				ip netns exec ${connector_ns} \
+					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+						$extra_args $connect_addr > "$cout" &
 	fi
 	cpid=$!
 
@@ -645,7 +651,7 @@ do_transfer()
 		return 1
 	fi
 
-	if [ "$test_link_fail" -eq 2 ];then
+	if [ "$test_link_fail" -gt 1 ];then
 		check_transfer $sinfail $cout "file received by client"
 	else
 		check_transfer $sin $cout "file received by client"
@@ -690,9 +696,18 @@ run_tests()
 	speed="${7:-fast}"
 	sflags="${8:-""}"
 
+	# The values above 2 are reused to make test files
+	# with the given sizes (KB)
+	if [ "$test_linkfail" -gt 2 ]; then
+		size=$test_linkfail
+
+		if [ -z "$cinfail" ]; then
+			cinfail=$(mktemp)
+		fi
+		make_file "$cinfail" "client" $size
 	# create the input file for the failure test when
 	# the first failure test run
-	if [ "$test_linkfail" -ne 0 -a -z "$cinfail" ]; then
+	elif [ "$test_linkfail" -ne 0 -a -z "$cinfail" ]; then
 		# the client file must be considerably larger
 		# of the maximum expected cwin value, or the
 		# link utilization will be not predicable
@@ -705,7 +720,14 @@ run_tests()
 		make_file "$cinfail" "client" $size
 	fi
 
-	if [ "$test_linkfail" -eq 2 -a -z "$sinfail" ]; then
+	if [ "$test_linkfail" -gt 2 ]; then
+		size=$test_linkfail
+
+		if [ -z "$sinfail" ]; then
+			sinfail=$(mktemp)
+		fi
+		make_file "$sinfail" "server" $size
+	elif [ "$test_linkfail" -eq 2 -a -z "$sinfail" ]; then
 		size=$((RANDOM%16))
 		size=$((size+1))
 		size=$((size*2048))
-- 
cgit 


From 01542c9bf9ab04493e027f55b5e0d5fdfdc1780f Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Mar 2022 11:36:33 -0800
Subject: selftests: mptcp: add fastclose testcase

This patch added the self test for MP_FASTCLOSE. Reused the argument
addr_nr_ns2 of do_transfer() to pass the extra arguments '-I 2' to
mptcp_connect commands. Then mptcp_connect disconnected the
connections to trigger the MP_FASTCLOSE sending and receiving. Used
chk_fclose_nr to check the MP_FASTCLOSE mibs and used chk_rst_nr to
check the MP_RST mibs. This test used the test_linkfail value to make
1024KB test files.

The output looks like this:

Created /tmp/tmp.XB8sfv1hJ0 (size 1024 KB) containing data sent by client
Created /tmp/tmp.RtTDbzqrXI (size 1024 KB) containing data sent by server
001 fastclose test                syn[ ok ] - synack[ ok ] - ack[ ok ]
                                  ctx[ ok ] - fclzrx[ ok ]
                                  rtx[ ok ] - rstrx [ ok ]   invert

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 1fda29d5d69f..4604dd13a87e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -452,6 +452,12 @@ do_transfer()
 		extra_args="-r ${speed:6}"
 	fi
 
+	if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then
+		# disconnect
+		extra_args="$extra_args -I ${addr_nr_ns2:10}"
+		addr_nr_ns2=0
+	fi
+
 	local local_addr
 	if is_v6 "${connect_addr}"; then
 		local_addr="::"
@@ -2196,6 +2202,15 @@ fullmesh_tests()
 	chk_rm_nr 0 1
 }
 
+fastclose_tests()
+{
+	reset
+	run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_2
+	chk_join_nr "fastclose test" 0 0 0
+	chk_fclose_nr 1 1
+	chk_rst_nr 1 1 invert
+}
+
 all_tests()
 {
 	subflows_tests
@@ -2213,6 +2228,7 @@ all_tests()
 	checksum_tests
 	deny_join_id0_tests
 	fullmesh_tests
+	fastclose_tests
 }
 
 # [$1: error message]
@@ -2239,6 +2255,7 @@ usage()
 	echo "  -S checksum_tests"
 	echo "  -d deny_join_id0_tests"
 	echo "  -m fullmesh_tests"
+	echo "  -z fastclose_tests"
 	echo "  -c capture pcap files"
 	echo "  -C enable data checksum"
 	echo "  -i use ip mptcp"
@@ -2270,7 +2287,7 @@ if [ $do_all_tests -eq 1 ]; then
 	exit $ret
 fi
 
-while getopts 'fesltra64bpkdmchCSi' opt; do
+while getopts 'fesltra64bpkdmchzCSi' opt; do
 	case $opt in
 		f)
 			subflows_tests
@@ -2317,6 +2334,9 @@ while getopts 'fesltra64bpkdmchCSi' opt; do
 		m)
 			fullmesh_tests
 			;;
+		z)
+			fastclose_tests
+			;;
 		c)
 			;;
 		C)
-- 
cgit 


From 8117dac3e7c31b2bf4e7d24d53b5e63625871e15 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Mar 2022 11:36:34 -0800
Subject: selftests: mptcp: add invert check in check_transfer

This patch added the invert bytes check for the output data in
check_transfer().

Instead of the file mismatch error:

  [ FAIL ] file received by server does not match (in, out):
  -rw------- 1 root root 45643832 Jan 16 15:04 /tmp/tmp.9xpM6Paivv
  Trailing bytes are:
  MPTCP_TEST_FILE_END_MARKER
  -rw------- 1 root root 45643832 Jan 16 15:04 /tmp/tmp.wnz1Yp4u7Z
  Trailing bytes are:
  MPTCP_TEST_FILE_END_MARKER

Print out the inverted bytes like this:

  file received by server has inverted byte at 7454789
  file received by server has inverted byte at 7454790
  file received by server has inverted byte at 7454791
  file received by server has inverted byte at 7454792

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 4604dd13a87e..f4812e820acf 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -15,6 +15,7 @@ timeout_test=$((timeout_poll * 2 + 1))
 capture=0
 checksum=0
 ip_mptcp=0
+check_invert=0
 do_all_tests=1
 init=0
 
@@ -59,6 +60,8 @@ init_partial()
 		fi
 	done
 
+	check_invert=0
+
 	#  ns1              ns2
 	# ns1eth1    ns2eth1
 	# ns1eth2    ns2eth2
@@ -216,15 +219,21 @@ check_transfer()
 	out=$2
 	what=$3
 
-	cmp "$in" "$out" > /dev/null 2>&1
-	if [ $? -ne 0 ] ;then
-		echo "[ FAIL ] $what does not match (in, out):"
-		print_file_err "$in"
-		print_file_err "$out"
-		ret=1
+	cmp -l "$in" "$out" | while read line; do
+		local arr=($line)
 
-		return 1
-	fi
+		let sum=0${arr[1]}+0${arr[2]}
+		if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then
+			echo "[ FAIL ] $what does not match (in, out):"
+			print_file_err "$in"
+			print_file_err "$out"
+			ret=1
+
+			return 1
+		else
+			echo "$what has inverted byte at ${arr[0]}"
+		fi
+	done
 
 	return 0
 }
-- 
cgit 


From 26516e10c4335286df1cac9f8c874e08750054d2 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Mar 2022 11:36:35 -0800
Subject: selftests: mptcp: add more arguments for chk_join_nr

This patch added five more arguments for chk_join_nr(). The default
values of them are all zero.

The first two, csum_ns1 and csum_ns1, are passed to chk_csum_nr(), to
check the mib counters of the checksum errors in ns1 and ns2. A '+'
can be added into this two arguments to represent that multiple
checksum errors are allowed when doing this check. For example,

        chk_csum_nr "" +2 +2

indicates that two or more checksum errors are allowed in both ns1 and
ns2.

The remaining two, fail_nr and rst_nr, are passed to chk_fail_nr() and
chk_rst_nr() respectively, to check the sending and receiving mib
counters of MP_FAIL and MP_RST.

Also did some cleanups in chk_fail_nr(), renamed two local variables
and updated the output message.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 47 +++++++++++++++++--------
 1 file changed, 33 insertions(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f4812e820acf..2912289d63f4 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -766,8 +766,21 @@ dump_stats()
 chk_csum_nr()
 {
 	local msg=${1:-""}
+	local csum_ns1=${2:-0}
+	local csum_ns2=${3:-0}
 	local count
 	local dump_stats
+	local allow_multi_errors_ns1=0
+	local allow_multi_errors_ns2=0
+
+	if [[ "${csum_ns1}" = "+"* ]]; then
+		allow_multi_errors_ns1=1
+		csum_ns1=${csum_ns1:1}
+	fi
+	if [[ "${csum_ns2}" = "+"* ]]; then
+		allow_multi_errors_ns2=1
+		csum_ns2=${csum_ns2:1}
+	fi
 
 	if [ ! -z "$msg" ]; then
 		printf "%03u" "$TEST_COUNT"
@@ -777,8 +790,9 @@ chk_csum_nr()
 	printf " %-36s %s" "$msg" "sum"
 	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
 	[ -z "$count" ] && count=0
-	if [ "$count" != 0 ]; then
-		echo "[fail] got $count data checksum error[s] expected 0"
+	if [ "$count" != $csum_ns1 -a $allow_multi_errors_ns1 -eq 0 ] ||
+	   [ "$count" -lt $csum_ns1 -a $allow_multi_errors_ns1 -eq 1 ]; then
+		echo "[fail] got $count data checksum error[s] expected $csum_ns1"
 		ret=1
 		dump_stats=1
 	else
@@ -787,8 +801,9 @@ chk_csum_nr()
 	echo -n " - csum  "
 	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
 	[ -z "$count" ] && count=0
-	if [ "$count" != 0 ]; then
-		echo "[fail] got $count data checksum error[s] expected 0"
+	if [ "$count" != $csum_ns2 -a $allow_multi_errors_ns2 -eq 0 ] ||
+	   [ "$count" -lt $csum_ns2 -a $allow_multi_errors_ns2 -eq 1 ]; then
+		echo "[fail] got $count data checksum error[s] expected $csum_ns2"
 		ret=1
 		dump_stats=1
 	else
@@ -799,27 +814,27 @@ chk_csum_nr()
 
 chk_fail_nr()
 {
-	local mp_fail_nr_tx=$1
-	local mp_fail_nr_rx=$2
+	local fail_tx=$1
+	local fail_rx=$2
 	local count
 	local dump_stats
 
 	printf "%-${nr_blank}s %s" " " "ftx"
 	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
 	[ -z "$count" ] && count=0
-	if [ "$count" != "$mp_fail_nr_tx" ]; then
-		echo "[fail] got $count MP_FAIL[s] TX expected $mp_fail_nr_tx"
+	if [ "$count" != "$fail_tx" ]; then
+		echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx"
 		ret=1
 		dump_stats=1
 	else
 		echo -n "[ ok ]"
 	fi
 
-	echo -n " - frx   "
+	echo -n " - failrx"
 	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
 	[ -z "$count" ] && count=0
-	if [ "$count" != "$mp_fail_nr_rx" ]; then
-		echo "[fail] got $count MP_FAIL[s] RX expected $mp_fail_nr_rx"
+	if [ "$count" != "$fail_rx" ]; then
+		echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx"
 		ret=1
 		dump_stats=1
 	else
@@ -911,6 +926,10 @@ chk_join_nr()
 	local syn_nr=$2
 	local syn_ack_nr=$3
 	local ack_nr=$4
+	local csum_ns1=${5:-0}
+	local csum_ns2=${6:-0}
+	local fail_nr=${7:-0}
+	local rst_nr=${8:-0}
 	local count
 	local dump_stats
 	local with_cookie
@@ -957,9 +976,9 @@ chk_join_nr()
 	fi
 	[ "${dump_stats}" = 1 ] && dump_stats
 	if [ $checksum -eq 1 ]; then
-		chk_csum_nr
-		chk_fail_nr 0 0
-		chk_rst_nr 0 0
+		chk_csum_nr "" $csum_ns1 $csum_ns2
+		chk_fail_nr $fail_nr $fail_nr
+		chk_rst_nr $rst_nr $rst_nr
 	fi
 }
 
-- 
cgit 


From 7d9bf018f907bccd04ada1ad9c613a79b07526cd Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 4 Mar 2022 11:36:36 -0800
Subject: selftests: mptcp: update output info of chk_rm_nr

This patch updated the output info of chk_rm_nr. Renamed 'sf' to 'rmsf',
which means 'remove subflow'. Added the display of whether the inverted
namespaces has been used to check the mib counters.

The new output looks like this:

 002 remove multiple subflows          syn[ ok ] - synack[ ok ] - ack[ ok ]
                                       rm [ ok ] - rmsf  [ ok ]
 003 remove single address             syn[ ok ] - synack[ ok ] - ack[ ok ]
                                       add[ ok ] - echo  [ ok ]
                                       rm [ ok ] - rmsf  [ ok ]   invert

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 2912289d63f4..45c6e5f06916 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1153,15 +1153,14 @@ chk_rm_nr()
 	local invert=${3:-""}
 	local count
 	local dump_stats
-	local addr_ns
-	local subflow_ns
+	local addr_ns=$ns1
+	local subflow_ns=$ns2
+	local extra_msg=""
 
-	if [ -z $invert ]; then
-		addr_ns=$ns1
-		subflow_ns=$ns2
-	elif [ $invert = "invert" ]; then
+	if [[ $invert = "invert" ]]; then
 		addr_ns=$ns2
 		subflow_ns=$ns1
+		extra_msg="   invert"
 	fi
 
 	printf "%-${nr_blank}s %s" " " "rm "
@@ -1175,7 +1174,7 @@ chk_rm_nr()
 		echo -n "[ ok ]"
 	fi
 
-	echo -n " - sf    "
+	echo -n " - rmsf  "
 	count=`ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$rm_subflow_nr" ]; then
@@ -1183,10 +1182,12 @@ chk_rm_nr()
 		ret=1
 		dump_stats=1
 	else
-		echo "[ ok ]"
+		echo -n "[ ok ]"
 	fi
 
 	[ "${dump_stats}" = 1 ] && dump_stats
+
+	echo "$extra_msg"
 }
 
 chk_prio_nr()
-- 
cgit 


From aa963bcb0adc1adb79a97260fae55461359d1ed2 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 4 Mar 2022 17:01:29 -0800
Subject: selftests/bpf: Add custom SEC() handling selftest

Add a selftest validating various aspects of libbpf's handling of custom
SEC() handlers. It also demonstrates how libraries can ensure very early
callbacks registration and unregistration using
__attribute__((constructor))/__attribute__((destructor)) functions.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Alan Maguire <alan.maguire@oracle.com>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/20220305010129.1549719-4-andrii@kernel.org
---
 .../selftests/bpf/prog_tests/custom_sec_handlers.c | 176 +++++++++++++++++++++
 .../selftests/bpf/progs/test_custom_sec_handlers.c |  63 ++++++++
 2 files changed, 239 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/custom_sec_handlers.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_custom_sec_handlers.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/custom_sec_handlers.c b/tools/testing/selftests/bpf/prog_tests/custom_sec_handlers.c
new file mode 100644
index 000000000000..b2dfc5954aea
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/custom_sec_handlers.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <test_progs.h>
+#include "test_custom_sec_handlers.skel.h"
+
+#define COOKIE_ABC1 1
+#define COOKIE_ABC2 2
+#define COOKIE_CUSTOM 3
+#define COOKIE_FALLBACK 4
+#define COOKIE_KPROBE 5
+
+static int custom_setup_prog(struct bpf_program *prog, long cookie)
+{
+	if (cookie == COOKIE_ABC1)
+		bpf_program__set_autoload(prog, false);
+
+	return 0;
+}
+
+static int custom_prepare_load_prog(struct bpf_program *prog,
+				    struct bpf_prog_load_opts *opts, long cookie)
+{
+	if (cookie == COOKIE_FALLBACK)
+		opts->prog_flags |= BPF_F_SLEEPABLE;
+	else if (cookie == COOKIE_ABC1)
+		ASSERT_FALSE(true, "unexpected preload for abc");
+
+	return 0;
+}
+
+static int custom_attach_prog(const struct bpf_program *prog, long cookie,
+			      struct bpf_link **link)
+{
+	switch (cookie) {
+	case COOKIE_ABC2:
+		*link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+		return libbpf_get_error(*link);
+	case COOKIE_CUSTOM:
+		*link = bpf_program__attach_tracepoint(prog, "syscalls", "sys_enter_nanosleep");
+		return libbpf_get_error(*link);
+	case COOKIE_KPROBE:
+	case COOKIE_FALLBACK:
+		/* no auto-attach for SEC("xyz") and SEC("kprobe") */
+		*link = NULL;
+		return 0;
+	default:
+		ASSERT_FALSE(true, "unexpected cookie");
+		return -EINVAL;
+	}
+}
+
+static int abc1_id;
+static int abc2_id;
+static int custom_id;
+static int fallback_id;
+static int kprobe_id;
+
+__attribute__((constructor))
+static void register_sec_handlers(void)
+{
+	LIBBPF_OPTS(libbpf_prog_handler_opts, abc1_opts,
+		.cookie = COOKIE_ABC1,
+		.prog_setup_fn = custom_setup_prog,
+		.prog_prepare_load_fn = custom_prepare_load_prog,
+		.prog_attach_fn = NULL,
+	);
+	LIBBPF_OPTS(libbpf_prog_handler_opts, abc2_opts,
+		.cookie = COOKIE_ABC2,
+		.prog_setup_fn = custom_setup_prog,
+		.prog_prepare_load_fn = custom_prepare_load_prog,
+		.prog_attach_fn = custom_attach_prog,
+	);
+	LIBBPF_OPTS(libbpf_prog_handler_opts, custom_opts,
+		.cookie = COOKIE_CUSTOM,
+		.prog_setup_fn = NULL,
+		.prog_prepare_load_fn = NULL,
+		.prog_attach_fn = custom_attach_prog,
+	);
+
+	abc1_id = libbpf_register_prog_handler("abc", BPF_PROG_TYPE_RAW_TRACEPOINT, 0, &abc1_opts);
+	abc2_id = libbpf_register_prog_handler("abc/", BPF_PROG_TYPE_RAW_TRACEPOINT, 0, &abc2_opts);
+	custom_id = libbpf_register_prog_handler("custom+", BPF_PROG_TYPE_TRACEPOINT, 0, &custom_opts);
+}
+
+__attribute__((destructor))
+static void unregister_sec_handlers(void)
+{
+	libbpf_unregister_prog_handler(abc1_id);
+	libbpf_unregister_prog_handler(abc2_id);
+	libbpf_unregister_prog_handler(custom_id);
+}
+
+void test_custom_sec_handlers(void)
+{
+	LIBBPF_OPTS(libbpf_prog_handler_opts, opts,
+		.prog_setup_fn = custom_setup_prog,
+		.prog_prepare_load_fn = custom_prepare_load_prog,
+		.prog_attach_fn = custom_attach_prog,
+	);
+	struct test_custom_sec_handlers* skel;
+	int err;
+
+	ASSERT_GT(abc1_id, 0, "abc1_id");
+	ASSERT_GT(abc2_id, 0, "abc2_id");
+	ASSERT_GT(custom_id, 0, "custom_id");
+
+	/* override libbpf's handle of SEC("kprobe/...") but also allow pure
+	 * SEC("kprobe") due to "kprobe+" specifier. Register it as
+	 * TRACEPOINT, just for fun.
+	 */
+	opts.cookie = COOKIE_KPROBE;
+	kprobe_id = libbpf_register_prog_handler("kprobe+", BPF_PROG_TYPE_TRACEPOINT, 0, &opts);
+	/* fallback treats everything as BPF_PROG_TYPE_SYSCALL program to test
+	 * setting custom BPF_F_SLEEPABLE bit in preload handler
+	 */
+	opts.cookie = COOKIE_FALLBACK;
+	fallback_id = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_SYSCALL, 0, &opts);
+
+	if (!ASSERT_GT(fallback_id, 0, "fallback_id") /* || !ASSERT_GT(kprobe_id, 0, "kprobe_id")*/) {
+		if (fallback_id > 0)
+			libbpf_unregister_prog_handler(fallback_id);
+		if (kprobe_id > 0)
+			libbpf_unregister_prog_handler(kprobe_id);
+		return;
+	}
+
+	/* open skeleton and validate assumptions */
+	skel = test_custom_sec_handlers__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		goto cleanup;
+
+	ASSERT_EQ(bpf_program__type(skel->progs.abc1), BPF_PROG_TYPE_RAW_TRACEPOINT, "abc1_type");
+	ASSERT_FALSE(bpf_program__autoload(skel->progs.abc1), "abc1_autoload");
+
+	ASSERT_EQ(bpf_program__type(skel->progs.abc2), BPF_PROG_TYPE_RAW_TRACEPOINT, "abc2_type");
+	ASSERT_EQ(bpf_program__type(skel->progs.custom1), BPF_PROG_TYPE_TRACEPOINT, "custom1_type");
+	ASSERT_EQ(bpf_program__type(skel->progs.custom2), BPF_PROG_TYPE_TRACEPOINT, "custom2_type");
+	ASSERT_EQ(bpf_program__type(skel->progs.kprobe1), BPF_PROG_TYPE_TRACEPOINT, "kprobe1_type");
+	ASSERT_EQ(bpf_program__type(skel->progs.xyz), BPF_PROG_TYPE_SYSCALL, "xyz_type");
+
+	skel->rodata->my_pid = getpid();
+
+	/* now attempt to load everything */
+	err = test_custom_sec_handlers__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	/* now try to auto-attach everything */
+	err = test_custom_sec_handlers__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	skel->links.xyz = bpf_program__attach(skel->progs.kprobe1);
+	ASSERT_EQ(errno, EOPNOTSUPP, "xyz_attach_err");
+	ASSERT_ERR_PTR(skel->links.xyz, "xyz_attach");
+
+	/* trigger programs */
+	usleep(1);
+
+	/* SEC("abc") is set to not auto-loaded */
+	ASSERT_FALSE(skel->bss->abc1_called, "abc1_called");
+	ASSERT_TRUE(skel->bss->abc2_called, "abc2_called");
+	ASSERT_TRUE(skel->bss->custom1_called, "custom1_called");
+	ASSERT_TRUE(skel->bss->custom2_called, "custom2_called");
+	/* SEC("kprobe") shouldn't be auto-attached */
+	ASSERT_FALSE(skel->bss->kprobe1_called, "kprobe1_called");
+	/* SEC("xyz") shouldn't be auto-attached */
+	ASSERT_FALSE(skel->bss->xyz_called, "xyz_called");
+
+cleanup:
+	test_custom_sec_handlers__destroy(skel);
+
+	ASSERT_OK(libbpf_unregister_prog_handler(fallback_id), "unregister_fallback");
+	ASSERT_OK(libbpf_unregister_prog_handler(kprobe_id), "unregister_kprobe");
+}
diff --git a/tools/testing/selftests/bpf/progs/test_custom_sec_handlers.c b/tools/testing/selftests/bpf/progs/test_custom_sec_handlers.c
new file mode 100644
index 000000000000..4061f701ca50
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_custom_sec_handlers.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile int my_pid;
+
+bool abc1_called;
+bool abc2_called;
+bool custom1_called;
+bool custom2_called;
+bool kprobe1_called;
+bool xyz_called;
+
+SEC("abc")
+int abc1(void *ctx)
+{
+	abc1_called = true;
+	return 0;
+}
+
+SEC("abc/whatever")
+int abc2(void *ctx)
+{
+	abc2_called = true;
+	return 0;
+}
+
+SEC("custom")
+int custom1(void *ctx)
+{
+	custom1_called = true;
+	return 0;
+}
+
+SEC("custom/something")
+int custom2(void *ctx)
+{
+	custom2_called = true;
+	return 0;
+}
+
+SEC("kprobe")
+int kprobe1(void *ctx)
+{
+	kprobe1_called = true;
+	return 0;
+}
+
+SEC("xyz/blah")
+int xyz(void *ctx)
+{
+	int whatever;
+
+	/* use sleepable helper, custom handler should set sleepable flag */
+	bpf_copy_from_user(&whatever, sizeof(whatever), NULL);
+	xyz_called = true;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 2a7ceac9e58167fadc3496c5f694543d4bbe03ef Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Fri, 4 Mar 2022 13:52:49 +0100
Subject: memblock tests: Fix testing with 32-bit physical addresses

Building memblock simulator on x86_64 with 32BIT_PHYS_ADDR_T=1
produces "cast to pointer from integer of different size" warnings.
Fix them by building the binary in 32-bit environment when using
32-bit physical addresses.

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 tools/testing/memblock/scripts/Makefile.include | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/scripts/Makefile.include b/tools/testing/memblock/scripts/Makefile.include
index 699b0d6cda07..641569ccb7b0 100644
--- a/tools/testing/memblock/scripts/Makefile.include
+++ b/tools/testing/memblock/scripts/Makefile.include
@@ -11,7 +11,9 @@ ifeq ($(MOVABLE_NODE), 1)
 	CFLAGS += -D MOVABLE_NODE
 endif
 
-# Use 32 bit physical addresses
+# Use 32 bit physical addresses.
+# Remember to install 32-bit version of dependencies.
 ifeq ($(32BIT_PHYS_ADDR_T), 1)
-	CFLAGS += -U CONFIG_PHYS_ADDR_T_64BIT
+	CFLAGS += -m32 -U CONFIG_PHYS_ADDR_T_64BIT
+	LDFLAGS += -m32
 endif
-- 
cgit 


From e1fad0ff46b32819d30cb487f1d39ba24e515843 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 5 Mar 2022 04:16:40 +0530
Subject: bpf: Disallow negative offset in check_ptr_off_reg

check_ptr_off_reg only allows fixed offset to be set for PTR_TO_BTF_ID,
where reg->off < 0 doesn't make sense. This would shift the pointer
backwards, and fails later in btf_struct_ids_match or btf_struct_walk
due to out of bounds access (since offset is interpreted as unsigned).

Improve the verifier by rejecting this case by using a better error
message for BPF helpers and kfunc, by putting a check inside the
check_func_arg_reg_off function.

Also, update existing verifier selftests to work with new error string.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220304224645.3677453-4-memxor@gmail.com
---
 kernel/bpf/verifier.c                                   | 6 ++++++
 tools/testing/selftests/bpf/verifier/bounds_deduction.c | 2 +-
 tools/testing/selftests/bpf/verifier/ctx.c              | 8 ++++----
 3 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e37eb6020253..455b4ab69e47 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3990,6 +3990,12 @@ static int __check_ptr_off_reg(struct bpf_verifier_env *env,
 	 * is only allowed in its original, unmodified form.
 	 */
 
+	if (reg->off < 0) {
+		verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
+			reg_type_str(env, reg->type), regno, reg->off);
+		return -EACCES;
+	}
+
 	if (!fixed_off_ok && reg->off) {
 		verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
 			reg_type_str(env, reg->type), regno, reg->off);
diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c
index 91869aea6d64..3931c481e30c 100644
--- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c
+++ b/tools/testing/selftests/bpf/verifier/bounds_deduction.c
@@ -105,7 +105,7 @@
 		BPF_EXIT_INSN(),
 	},
 	.errstr_unpriv = "R1 has pointer with unsupported alu operation",
-	.errstr = "dereference of modified ctx ptr",
+	.errstr = "negative offset ctx ptr R1 off=-1 disallowed",
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c
index 60f6fbe03f19..c8eaf0536c24 100644
--- a/tools/testing/selftests/bpf/verifier/ctx.c
+++ b/tools/testing/selftests/bpf/verifier/ctx.c
@@ -58,7 +58,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "dereference of modified ctx ptr",
+	.errstr = "negative offset ctx ptr R1 off=-612 disallowed",
 },
 {
 	"pass modified ctx pointer to helper, 2",
@@ -71,8 +71,8 @@
 	},
 	.result_unpriv = REJECT,
 	.result = REJECT,
-	.errstr_unpriv = "dereference of modified ctx ptr",
-	.errstr = "dereference of modified ctx ptr",
+	.errstr_unpriv = "negative offset ctx ptr R1 off=-612 disallowed",
+	.errstr = "negative offset ctx ptr R1 off=-612 disallowed",
 },
 {
 	"pass modified ctx pointer to helper, 3",
@@ -141,7 +141,7 @@
 	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 	.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
 	.result = REJECT,
-	.errstr = "dereference of modified ctx ptr",
+	.errstr = "negative offset ctx ptr R1 off=-612 disallowed",
 },
 {
 	"pass ctx or null check, 5: null (connect)",
-- 
cgit 


From 8218ccb5bd68976ed5d75028ef50c13a857eee25 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 5 Mar 2022 04:16:45 +0530
Subject: selftests/bpf: Add tests for kfunc register offset checks

Include a few verifier selftests that test against the problems being
fixed by previous commits, i.e. release kfunc always require
PTR_TO_BTF_ID fixed and var_off to be 0, and negative offset is not
permitted and returns a helpful error message.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220304224645.3677453-9-memxor@gmail.com
---
 net/bpf/test_run.c                           | 11 ++++
 tools/testing/selftests/bpf/verifier/calls.c | 83 ++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+)

(limited to 'tools/testing')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index fcc83017cd03..ba410b069824 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -270,9 +270,14 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
 	return sk;
 }
 
+struct prog_test_member {
+	u64 c;
+};
+
 struct prog_test_ref_kfunc {
 	int a;
 	int b;
+	struct prog_test_member memb;
 	struct prog_test_ref_kfunc *next;
 };
 
@@ -295,6 +300,10 @@ noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
 {
 }
 
+noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p)
+{
+}
+
 struct prog_test_pass1 {
 	int x0;
 	struct {
@@ -379,6 +388,7 @@ BTF_ID(func, bpf_kfunc_call_test2)
 BTF_ID(func, bpf_kfunc_call_test3)
 BTF_ID(func, bpf_kfunc_call_test_acquire)
 BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_memb_release)
 BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
 BTF_ID(func, bpf_kfunc_call_test_pass1)
 BTF_ID(func, bpf_kfunc_call_test_pass2)
@@ -396,6 +406,7 @@ BTF_SET_END(test_sk_acquire_kfunc_ids)
 
 BTF_SET_START(test_sk_release_kfunc_ids)
 BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_memb_release)
 BTF_SET_END(test_sk_release_kfunc_ids)
 
 BTF_SET_START(test_sk_ret_null_kfunc_ids)
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index f890333259ad..2e03decb11b6 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -115,6 +115,89 @@
 		{ "bpf_kfunc_call_test_release", 5 },
 	},
 },
+{
+	"calls: invalid kfunc call: reg->off must be zero when passed to release kfunc",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "R1 must have zero offset when passed to release func",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_acquire", 3 },
+		{ "bpf_kfunc_call_memb_release", 8 },
+	},
+},
+{
+	"calls: invalid kfunc call: PTR_TO_BTF_ID with negative offset",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 16),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -4),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_acquire", 3 },
+		{ "bpf_kfunc_call_test_release", 9 },
+	},
+	.result_unpriv = REJECT,
+	.result = REJECT,
+	.errstr = "negative offset ptr_ ptr R1 off=-4 disallowed",
+},
+{
+	"calls: invalid kfunc call: PTR_TO_BTF_ID with variable offset",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
+	BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 3),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 3),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_acquire", 3 },
+		{ "bpf_kfunc_call_test_release", 9 },
+		{ "bpf_kfunc_call_test_release", 13 },
+		{ "bpf_kfunc_call_test_release", 17 },
+	},
+	.result_unpriv = REJECT,
+	.result = REJECT,
+	.errstr = "variable ptr_ access var_off=(0x0; 0x7) disallowed",
+},
 {
 	"calls: basic sanity",
 	.insns = {
-- 
cgit 


From 50c6b8a9aea2b8dc6c4ffb0cc502b94f7f57a0dc Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Fri, 4 Mar 2022 11:16:57 -0800
Subject: selftests/bpf: Add a test for btf_type_tag "percpu"

Add test for percpu btf_type_tag. Similar to the "user" tag, we test
the following cases:

 1. __percpu struct field.
 2. __percpu as function parameter.
 3. per_cpu_ptr() accepts dynamically allocated __percpu memory.

Because the test for "user" and the test for "percpu" are very similar,
a little bit of refactoring has been done in btf_tag.c. Basically, both
tests share the same function for loading vmlinux and module btf.

Example output from log:

 > ./test_progs -v -t btf_tag

 libbpf: prog 'test_percpu1': BPF program load failed: Permission denied
 libbpf: prog 'test_percpu1': -- BEGIN PROG LOAD LOG --
 ...
 ; g = arg->a;
 1: (61) r1 = *(u32 *)(r1 +0)
 R1 is ptr_bpf_testmod_btf_type_tag_1 access percpu memory: off=0
 ...
 test_btf_type_tag_mod_percpu:PASS:btf_type_tag_percpu 0 nsec
 #26/6 btf_tag/btf_type_tag_percpu_mod1:OK

 libbpf: prog 'test_percpu2': BPF program load failed: Permission denied
 libbpf: prog 'test_percpu2': -- BEGIN PROG LOAD LOG --
 ...
 ; g = arg->p->a;
 2: (61) r1 = *(u32 *)(r1 +0)
 R1 is ptr_bpf_testmod_btf_type_tag_1 access percpu memory: off=0
 ...
 test_btf_type_tag_mod_percpu:PASS:btf_type_tag_percpu 0 nsec
 #26/7 btf_tag/btf_type_tag_percpu_mod2:OK

 libbpf: prog 'test_percpu_load': BPF program load failed: Permission denied
 libbpf: prog 'test_percpu_load': -- BEGIN PROG LOAD LOG --
 ...
 ; g = (__u64)cgrp->rstat_cpu->updated_children;
 2: (79) r1 = *(u64 *)(r1 +48)
 R1 is ptr_cgroup_rstat_cpu access percpu memory: off=48
 ...
 test_btf_type_tag_vmlinux_percpu:PASS:btf_type_tag_percpu_load 0 nsec
 #26/8 btf_tag/btf_type_tag_percpu_vmlinux_load:OK

 load_btfs:PASS:could not load vmlinux BTF 0 nsec
 test_btf_type_tag_vmlinux_percpu:PASS:btf_type_tag_percpu 0 nsec
 test_btf_type_tag_vmlinux_percpu:PASS:btf_type_tag_percpu_helper 0 nsec
 #26/9 btf_tag/btf_type_tag_percpu_vmlinux_helper:OK

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220304191657.981240-5-haoluo@google.com
---
 .../selftests/bpf/bpf_testmod/bpf_testmod.c        |  14 ++
 tools/testing/selftests/bpf/prog_tests/btf_tag.c   | 164 +++++++++++++++++----
 .../selftests/bpf/progs/btf_type_tag_percpu.c      |  66 +++++++++
 3 files changed, 215 insertions(+), 29 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 27d63be47b95..e585e1cefc77 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -33,6 +33,10 @@ struct bpf_testmod_btf_type_tag_2 {
 	struct bpf_testmod_btf_type_tag_1 __user *p;
 };
 
+struct bpf_testmod_btf_type_tag_3 {
+	struct bpf_testmod_btf_type_tag_1 __percpu *p;
+};
+
 noinline int
 bpf_testmod_test_btf_type_tag_user_1(struct bpf_testmod_btf_type_tag_1 __user *arg) {
 	BTF_TYPE_EMIT(func_proto_typedef);
@@ -46,6 +50,16 @@ bpf_testmod_test_btf_type_tag_user_2(struct bpf_testmod_btf_type_tag_2 *arg) {
 	return arg->p->a;
 }
 
+noinline int
+bpf_testmod_test_btf_type_tag_percpu_1(struct bpf_testmod_btf_type_tag_1 __percpu *arg) {
+	return arg->a;
+}
+
+noinline int
+bpf_testmod_test_btf_type_tag_percpu_2(struct bpf_testmod_btf_type_tag_3 *arg) {
+	return arg->p->a;
+}
+
 noinline int bpf_testmod_loop_test(int n)
 {
 	int i, sum = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
index f7560b54a6bb..071430cd54de 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_tag.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
@@ -10,6 +10,7 @@ struct btf_type_tag_test {
 };
 #include "btf_type_tag.skel.h"
 #include "btf_type_tag_user.skel.h"
+#include "btf_type_tag_percpu.skel.h"
 
 static void test_btf_decl_tag(void)
 {
@@ -43,38 +44,81 @@ static void test_btf_type_tag(void)
 	btf_type_tag__destroy(skel);
 }
 
-static void test_btf_type_tag_mod_user(bool load_test_user1)
+/* loads vmlinux_btf as well as module_btf. If the caller passes NULL as
+ * module_btf, it will not load module btf.
+ *
+ * Returns 0 on success.
+ * Return -1 On error. In case of error, the loaded btf will be freed and the
+ * input parameters will be set to pointing to NULL.
+ */
+static int load_btfs(struct btf **vmlinux_btf, struct btf **module_btf,
+		     bool needs_vmlinux_tag)
 {
 	const char *module_name = "bpf_testmod";
-	struct btf *vmlinux_btf, *module_btf;
-	struct btf_type_tag_user *skel;
 	__s32 type_id;
-	int err;
 
 	if (!env.has_testmod) {
 		test__skip();
-		return;
+		return -1;
 	}
 
-	/* skip the test if the module does not have __user tags */
-	vmlinux_btf = btf__load_vmlinux_btf();
-	if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
-		return;
+	*vmlinux_btf = btf__load_vmlinux_btf();
+	if (!ASSERT_OK_PTR(*vmlinux_btf, "could not load vmlinux BTF"))
+		return -1;
+
+	if (!needs_vmlinux_tag)
+		goto load_module_btf;
 
-	module_btf = btf__load_module_btf(module_name, vmlinux_btf);
-	if (!ASSERT_OK_PTR(module_btf, "could not load module BTF"))
+	/* skip the test if the vmlinux does not have __user tags */
+	type_id = btf__find_by_name_kind(*vmlinux_btf, "user", BTF_KIND_TYPE_TAG);
+	if (type_id <= 0) {
+		printf("%s:SKIP: btf_type_tag attribute not in vmlinux btf", __func__);
+		test__skip();
 		goto free_vmlinux_btf;
+	}
 
-	type_id = btf__find_by_name_kind(module_btf, "user", BTF_KIND_TYPE_TAG);
+load_module_btf:
+	/* skip loading module_btf, if not requested by caller */
+	if (!module_btf)
+		return 0;
+
+	*module_btf = btf__load_module_btf(module_name, *vmlinux_btf);
+	if (!ASSERT_OK_PTR(*module_btf, "could not load module BTF"))
+		goto free_vmlinux_btf;
+
+	/* skip the test if the module does not have __user tags */
+	type_id = btf__find_by_name_kind(*module_btf, "user", BTF_KIND_TYPE_TAG);
 	if (type_id <= 0) {
 		printf("%s:SKIP: btf_type_tag attribute not in %s", __func__, module_name);
 		test__skip();
 		goto free_module_btf;
 	}
 
+	return 0;
+
+free_module_btf:
+	btf__free(*module_btf);
+free_vmlinux_btf:
+	btf__free(*vmlinux_btf);
+
+	*vmlinux_btf = NULL;
+	if (module_btf)
+		*module_btf = NULL;
+	return -1;
+}
+
+static void test_btf_type_tag_mod_user(bool load_test_user1)
+{
+	struct btf *vmlinux_btf = NULL, *module_btf = NULL;
+	struct btf_type_tag_user *skel;
+	int err;
+
+	if (load_btfs(&vmlinux_btf, &module_btf, /*needs_vmlinux_tag=*/false))
+		return;
+
 	skel = btf_type_tag_user__open();
 	if (!ASSERT_OK_PTR(skel, "btf_type_tag_user"))
-		goto free_module_btf;
+		goto cleanup;
 
 	bpf_program__set_autoload(skel->progs.test_sys_getsockname, false);
 	if (load_test_user1)
@@ -87,34 +131,23 @@ static void test_btf_type_tag_mod_user(bool load_test_user1)
 
 	btf_type_tag_user__destroy(skel);
 
-free_module_btf:
+cleanup:
 	btf__free(module_btf);
-free_vmlinux_btf:
 	btf__free(vmlinux_btf);
 }
 
 static void test_btf_type_tag_vmlinux_user(void)
 {
 	struct btf_type_tag_user *skel;
-	struct btf *vmlinux_btf;
-	__s32 type_id;
+	struct btf *vmlinux_btf = NULL;
 	int err;
 
-	/* skip the test if the vmlinux does not have __user tags */
-	vmlinux_btf = btf__load_vmlinux_btf();
-	if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+	if (load_btfs(&vmlinux_btf, NULL, /*needs_vmlinux_tag=*/true))
 		return;
 
-	type_id = btf__find_by_name_kind(vmlinux_btf, "user", BTF_KIND_TYPE_TAG);
-	if (type_id <= 0) {
-		printf("%s:SKIP: btf_type_tag attribute not in vmlinux btf", __func__);
-		test__skip();
-		goto free_vmlinux_btf;
-	}
-
 	skel = btf_type_tag_user__open();
 	if (!ASSERT_OK_PTR(skel, "btf_type_tag_user"))
-		goto free_vmlinux_btf;
+		goto cleanup;
 
 	bpf_program__set_autoload(skel->progs.test_user2, false);
 	bpf_program__set_autoload(skel->progs.test_user1, false);
@@ -124,7 +157,70 @@ static void test_btf_type_tag_vmlinux_user(void)
 
 	btf_type_tag_user__destroy(skel);
 
-free_vmlinux_btf:
+cleanup:
+	btf__free(vmlinux_btf);
+}
+
+static void test_btf_type_tag_mod_percpu(bool load_test_percpu1)
+{
+	struct btf *vmlinux_btf, *module_btf;
+	struct btf_type_tag_percpu *skel;
+	int err;
+
+	if (load_btfs(&vmlinux_btf, &module_btf, /*needs_vmlinux_tag=*/false))
+		return;
+
+	skel = btf_type_tag_percpu__open();
+	if (!ASSERT_OK_PTR(skel, "btf_type_tag_percpu"))
+		goto cleanup;
+
+	bpf_program__set_autoload(skel->progs.test_percpu_load, false);
+	bpf_program__set_autoload(skel->progs.test_percpu_helper, false);
+	if (load_test_percpu1)
+		bpf_program__set_autoload(skel->progs.test_percpu2, false);
+	else
+		bpf_program__set_autoload(skel->progs.test_percpu1, false);
+
+	err = btf_type_tag_percpu__load(skel);
+	ASSERT_ERR(err, "btf_type_tag_percpu");
+
+	btf_type_tag_percpu__destroy(skel);
+
+cleanup:
+	btf__free(module_btf);
+	btf__free(vmlinux_btf);
+}
+
+static void test_btf_type_tag_vmlinux_percpu(bool load_test)
+{
+	struct btf_type_tag_percpu *skel;
+	struct btf *vmlinux_btf = NULL;
+	int err;
+
+	if (load_btfs(&vmlinux_btf, NULL, /*needs_vmlinux_tag=*/true))
+		return;
+
+	skel = btf_type_tag_percpu__open();
+	if (!ASSERT_OK_PTR(skel, "btf_type_tag_percpu"))
+		goto cleanup;
+
+	bpf_program__set_autoload(skel->progs.test_percpu2, false);
+	bpf_program__set_autoload(skel->progs.test_percpu1, false);
+	if (load_test) {
+		bpf_program__set_autoload(skel->progs.test_percpu_helper, false);
+
+		err = btf_type_tag_percpu__load(skel);
+		ASSERT_ERR(err, "btf_type_tag_percpu_load");
+	} else {
+		bpf_program__set_autoload(skel->progs.test_percpu_load, false);
+
+		err = btf_type_tag_percpu__load(skel);
+		ASSERT_OK(err, "btf_type_tag_percpu_helper");
+	}
+
+	btf_type_tag_percpu__destroy(skel);
+
+cleanup:
 	btf__free(vmlinux_btf);
 }
 
@@ -134,10 +230,20 @@ void test_btf_tag(void)
 		test_btf_decl_tag();
 	if (test__start_subtest("btf_type_tag"))
 		test_btf_type_tag();
+
 	if (test__start_subtest("btf_type_tag_user_mod1"))
 		test_btf_type_tag_mod_user(true);
 	if (test__start_subtest("btf_type_tag_user_mod2"))
 		test_btf_type_tag_mod_user(false);
 	if (test__start_subtest("btf_type_tag_sys_user_vmlinux"))
 		test_btf_type_tag_vmlinux_user();
+
+	if (test__start_subtest("btf_type_tag_percpu_mod1"))
+		test_btf_type_tag_mod_percpu(true);
+	if (test__start_subtest("btf_type_tag_percpu_mod2"))
+		test_btf_type_tag_mod_percpu(false);
+	if (test__start_subtest("btf_type_tag_percpu_vmlinux_load"))
+		test_btf_type_tag_vmlinux_percpu(true);
+	if (test__start_subtest("btf_type_tag_percpu_vmlinux_helper"))
+		test_btf_type_tag_vmlinux_percpu(false);
 }
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
new file mode 100644
index 000000000000..8feddb8289cf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct bpf_testmod_btf_type_tag_1 {
+	int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+	struct bpf_testmod_btf_type_tag_1 *p;
+};
+
+__u64 g;
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_percpu_1")
+int BPF_PROG(test_percpu1, struct bpf_testmod_btf_type_tag_1 *arg)
+{
+	g = arg->a;
+	return 0;
+}
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_percpu_2")
+int BPF_PROG(test_percpu2, struct bpf_testmod_btf_type_tag_2 *arg)
+{
+	g = arg->p->a;
+	return 0;
+}
+
+/* trace_cgroup_mkdir(struct cgroup *cgrp, const char *path)
+ *
+ * struct cgroup_rstat_cpu {
+ *   ...
+ *   struct cgroup *updated_children;
+ *   ...
+ * };
+ *
+ * struct cgroup {
+ *   ...
+ *   struct cgroup_rstat_cpu __percpu *rstat_cpu;
+ *   ...
+ * };
+ */
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
+{
+	g = (__u64)cgrp->rstat_cpu->updated_children;
+	return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
+{
+	struct cgroup_rstat_cpu *rstat;
+	__u32 cpu;
+
+	cpu = bpf_get_smp_processor_id();
+	rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(cgrp->rstat_cpu, cpu);
+	if (rstat) {
+		/* READ_ONCE */
+		*(volatile int *)rstat;
+	}
+
+	return 0;
+}
-- 
cgit 


From 0273d10182ec4507a43b868dd80fd62860b7e948 Mon Sep 17 00:00:00 2001
From: Guo Zhengkui <guozhengkui@vivo.com>
Date: Sun, 6 Mar 2022 00:18:35 +0800
Subject: selftests: net: fix array_size.cocci warning

Fit the following coccicheck warning:
tools/testing/selftests/net/reuseport_bpf_numa.c:89:28-29:
WARNING: Use ARRAY_SIZE.

It has been tested with gcc (Debian 8.3.0-6) 8.3.0 on x86_64.

Signed-off-by: Guo Zhengkui <guozhengkui@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/reuseport_bpf_numa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
index b2eebf669b8c..c9ba36aa688e 100644
--- a/tools/testing/selftests/net/reuseport_bpf_numa.c
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -86,7 +86,7 @@ static void attach_bpf(int fd)
 
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
-	attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+	attr.insn_cnt = ARRAY_SIZE(prog);
 	attr.insns = (unsigned long) &prog;
 	attr.license = (unsigned long) &bpf_license;
 	attr.log_buf = (unsigned long) &bpf_log_buf;
-- 
cgit 


From aec499c75cf8e0b599be4d559e6922b613085f8f Mon Sep 17 00:00:00 2001
From: Alan Kao <alankao@andestech.com>
Date: Wed, 2 Mar 2022 15:42:45 +0800
Subject: nds32: Remove the architecture

The nds32 architecture, also known as AndeStar V3, is a custom 32-bit
RISC target designed by Andes Technologies. Support was added to the
kernel in 2016 as the replacement RISC-V based V5 processors were
already announced, and maintained by (current or former) Andes
employees.

As explained by Alan Kao, new customers are now all using RISC-V,
and all known nds32 users are already on longterm stable kernels
provided by Andes, with no development work going into mainline
support any more.

While the port is still in a reasonably good shape, it only gets
worse over time without active maintainers, so it seems best
to remove it before it becomes unusable. As always, if it turns
out that there are mainline users after all, and they volunteer
to maintain the port in the future, the removal can be reverted.

Link: https://lore.kernel.org/linux-mm/YhdWNLUhk+x9RAzU@yamatobi.andestech.com/
Link: https://lore.kernel.org/lkml/20220302065213.82702-1-alankao@andestech.com/
Link: https://www.andestech.com/en/products-solutions/andestar-architecture/
Signed-off-by: Alan Kao <alankao@andestech.com>
[arnd: rewrite changelog to provide more background]
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 .../interrupt-controller/andestech,ativic32.txt    |   19 -
 .../devicetree/bindings/nds32/andestech-boards     |   40 -
 Documentation/devicetree/bindings/nds32/atl2c.txt  |   28 -
 Documentation/devicetree/bindings/nds32/cpus.txt   |   38 -
 .../devicetree/bindings/perf/nds32v3-pmu.txt       |   17 -
 .../bindings/timer/andestech,atcpit100-timer.txt   |   33 -
 .../features/core/cBPF-JIT/arch-support.txt        |    1 -
 .../features/core/eBPF-JIT/arch-support.txt        |    1 -
 .../core/generic-idle-thread/arch-support.txt      |    1 -
 .../features/core/jump-labels/arch-support.txt     |    1 -
 .../core/thread-info-in-task/arch-support.txt      |    1 -
 .../features/core/tracehook/arch-support.txt       |    1 -
 .../features/debug/KASAN/arch-support.txt          |    1 -
 .../debug/debug-vm-pgtable/arch-support.txt        |    1 -
 .../debug/gcov-profile-all/arch-support.txt        |    1 -
 Documentation/features/debug/kcov/arch-support.txt |    1 -
 Documentation/features/debug/kgdb/arch-support.txt |    1 -
 .../features/debug/kmemleak/arch-support.txt       |    1 -
 .../debug/kprobes-on-ftrace/arch-support.txt       |    1 -
 .../features/debug/kprobes/arch-support.txt        |    1 -
 .../features/debug/kretprobes/arch-support.txt     |    1 -
 .../features/debug/optprobes/arch-support.txt      |    1 -
 .../features/debug/stackprotector/arch-support.txt |    1 -
 .../features/debug/uprobes/arch-support.txt        |    1 -
 .../debug/user-ret-profiler/arch-support.txt       |    1 -
 .../features/io/dma-contiguous/arch-support.txt    |    1 -
 .../locking/cmpxchg-local/arch-support.txt         |    1 -
 .../features/locking/lockdep/arch-support.txt      |    1 -
 .../locking/queued-rwlocks/arch-support.txt        |    1 -
 .../locking/queued-spinlocks/arch-support.txt      |    1 -
 .../features/perf/kprobes-event/arch-support.txt   |    1 -
 .../features/perf/perf-regs/arch-support.txt       |    1 -
 .../features/perf/perf-stackdump/arch-support.txt  |    1 -
 .../sched/membarrier-sync-core/arch-support.txt    |    1 -
 .../features/sched/numa-balancing/arch-support.txt |    1 -
 .../seccomp/seccomp-filter/arch-support.txt        |    1 -
 .../time/arch-tick-broadcast/arch-support.txt      |    1 -
 .../features/time/clockevents/arch-support.txt     |    1 -
 .../time/context-tracking/arch-support.txt         |    1 -
 .../features/time/irq-time-acct/arch-support.txt   |    1 -
 .../features/time/virt-cpuacct/arch-support.txt    |    1 -
 .../features/vm/ELF-ASLR/arch-support.txt          |    1 -
 .../features/vm/PG_uncached/arch-support.txt       |    1 -
 Documentation/features/vm/THP/arch-support.txt     |    1 -
 Documentation/features/vm/TLB/arch-support.txt     |    1 -
 .../features/vm/huge-vmap/arch-support.txt         |    1 -
 .../features/vm/ioremap_prot/arch-support.txt      |    1 -
 .../features/vm/pte_special/arch-support.txt       |    1 -
 MAINTAINERS                                        |   12 -
 arch/nds32/Kbuild                                  |    4 -
 arch/nds32/Kconfig                                 |  101 --
 arch/nds32/Kconfig.cpu                             |  218 ---
 arch/nds32/Kconfig.debug                           |    2 -
 arch/nds32/Makefile                                |   63 -
 arch/nds32/boot/.gitignore                         |    2 -
 arch/nds32/boot/Makefile                           |   16 -
 arch/nds32/boot/dts/Makefile                       |    2 -
 arch/nds32/boot/dts/ae3xx.dts                      |   90 --
 arch/nds32/configs/defconfig                       |  104 --
 arch/nds32/include/asm/Kbuild                      |    8 -
 arch/nds32/include/asm/assembler.h                 |   39 -
 arch/nds32/include/asm/barrier.h                   |   15 -
 arch/nds32/include/asm/bitfield.h                  |  985 -------------
 arch/nds32/include/asm/cache.h                     |   12 -
 arch/nds32/include/asm/cache_info.h                |   13 -
 arch/nds32/include/asm/cacheflush.h                |   53 -
 arch/nds32/include/asm/current.h                   |   12 -
 arch/nds32/include/asm/delay.h                     |   39 -
 arch/nds32/include/asm/elf.h                       |  180 ---
 arch/nds32/include/asm/fixmap.h                    |   29 -
 arch/nds32/include/asm/fpu.h                       |  126 --
 arch/nds32/include/asm/fpuemu.h                    |   44 -
 arch/nds32/include/asm/ftrace.h                    |   46 -
 arch/nds32/include/asm/futex.h                     |  101 --
 arch/nds32/include/asm/highmem.h                   |   65 -
 arch/nds32/include/asm/io.h                        |   84 --
 arch/nds32/include/asm/irqflags.h                  |   41 -
 arch/nds32/include/asm/l2_cache.h                  |  137 --
 arch/nds32/include/asm/linkage.h                   |   11 -
 arch/nds32/include/asm/memory.h                    |   91 --
 arch/nds32/include/asm/mmu.h                       |   12 -
 arch/nds32/include/asm/mmu_context.h               |   62 -
 arch/nds32/include/asm/nds32.h                     |   82 --
 arch/nds32/include/asm/nds32_fpu_inst.h            |  109 --
 arch/nds32/include/asm/page.h                      |   64 -
 arch/nds32/include/asm/perf_event.h                |   16 -
 arch/nds32/include/asm/pgalloc.h                   |   62 -
 arch/nds32/include/asm/pgtable.h                   |  377 -----
 arch/nds32/include/asm/pmu.h                       |  386 -----
 arch/nds32/include/asm/proc-fns.h                  |   44 -
 arch/nds32/include/asm/processor.h                 |  104 --
 arch/nds32/include/asm/ptrace.h                    |   77 -
 arch/nds32/include/asm/sfp-machine.h               |  158 ---
 arch/nds32/include/asm/shmparam.h                  |   19 -
 arch/nds32/include/asm/stacktrace.h                |   39 -
 arch/nds32/include/asm/string.h                    |   17 -
 arch/nds32/include/asm/suspend.h                   |   11 -
 arch/nds32/include/asm/swab.h                      |   35 -
 arch/nds32/include/asm/syscall.h                   |  142 --
 arch/nds32/include/asm/syscalls.h                  |   14 -
 arch/nds32/include/asm/thread_info.h               |   72 -
 arch/nds32/include/asm/tlb.h                       |   11 -
 arch/nds32/include/asm/tlbflush.h                  |   46 -
 arch/nds32/include/asm/uaccess.h                   |  282 ----
 arch/nds32/include/asm/unistd.h                    |    6 -
 arch/nds32/include/asm/vdso.h                      |   24 -
 arch/nds32/include/asm/vdso_datapage.h             |   37 -
 arch/nds32/include/asm/vdso_timer_info.h           |   14 -
 arch/nds32/include/asm/vermagic.h                  |    9 -
 arch/nds32/include/asm/vmalloc.h                   |    4 -
 arch/nds32/include/uapi/asm/Kbuild                 |    2 -
 arch/nds32/include/uapi/asm/auxvec.h               |   19 -
 arch/nds32/include/uapi/asm/byteorder.h            |   13 -
 arch/nds32/include/uapi/asm/cachectl.h             |   14 -
 arch/nds32/include/uapi/asm/fp_udfiex_crtl.h       |   16 -
 arch/nds32/include/uapi/asm/param.h                |   11 -
 arch/nds32/include/uapi/asm/ptrace.h               |   25 -
 arch/nds32/include/uapi/asm/sigcontext.h           |   84 --
 arch/nds32/include/uapi/asm/unistd.h               |   16 -
 arch/nds32/kernel/.gitignore                       |    2 -
 arch/nds32/kernel/Makefile                         |   33 -
 arch/nds32/kernel/asm-offsets.c                    |   28 -
 arch/nds32/kernel/atl2c.c                          |   65 -
 arch/nds32/kernel/cacheinfo.c                      |   49 -
 arch/nds32/kernel/devtree.c                        |   19 -
 arch/nds32/kernel/dma.c                            |   82 --
 arch/nds32/kernel/ex-entry.S                       |  177 ---
 arch/nds32/kernel/ex-exit.S                        |  193 ---
 arch/nds32/kernel/ex-scall.S                       |  100 --
 arch/nds32/kernel/fpu.c                            |  266 ----
 arch/nds32/kernel/ftrace.c                         |  278 ----
 arch/nds32/kernel/head.S                           |  197 ---
 arch/nds32/kernel/irq.c                            |    9 -
 arch/nds32/kernel/module.c                         |  278 ----
 arch/nds32/kernel/nds32_ksyms.c                    |   25 -
 arch/nds32/kernel/perf_event_cpu.c                 | 1500 --------------------
 arch/nds32/kernel/pm.c                             |   80 --
 arch/nds32/kernel/process.c                        |  256 ----
 arch/nds32/kernel/ptrace.c                         |  118 --
 arch/nds32/kernel/setup.c                          |  369 -----
 arch/nds32/kernel/signal.c                         |  384 -----
 arch/nds32/kernel/sleep.S                          |  131 --
 arch/nds32/kernel/stacktrace.c                     |   53 -
 arch/nds32/kernel/sys_nds32.c                      |   84 --
 arch/nds32/kernel/syscall_table.c                  |   17 -
 arch/nds32/kernel/time.c                           |   11 -
 arch/nds32/kernel/traps.c                          |  354 -----
 arch/nds32/kernel/vdso.c                           |  231 ---
 arch/nds32/kernel/vdso/.gitignore                  |    2 -
 arch/nds32/kernel/vdso/Makefile                    |   79 --
 arch/nds32/kernel/vdso/datapage.S                  |   21 -
 arch/nds32/kernel/vdso/gen_vdso_offsets.sh         |   15 -
 arch/nds32/kernel/vdso/gettimeofday.c              |  269 ----
 arch/nds32/kernel/vdso/note.S                      |   11 -
 arch/nds32/kernel/vdso/sigreturn.S                 |   19 -
 arch/nds32/kernel/vdso/vdso.S                      |   18 -
 arch/nds32/kernel/vdso/vdso.lds.S                  |   75 -
 arch/nds32/kernel/vmlinux.lds.S                    |   70 -
 arch/nds32/lib/Makefile                            |    4 -
 arch/nds32/lib/clear_user.S                        |   42 -
 arch/nds32/lib/copy_from_user.S                    |   45 -
 arch/nds32/lib/copy_page.S                         |   40 -
 arch/nds32/lib/copy_template.S                     |   69 -
 arch/nds32/lib/copy_to_user.S                      |   45 -
 arch/nds32/lib/memcpy.S                            |   30 -
 arch/nds32/lib/memmove.S                           |   70 -
 arch/nds32/lib/memset.S                            |   33 -
 arch/nds32/lib/memzero.S                           |   18 -
 arch/nds32/math-emu/Makefile                       |   10 -
 arch/nds32/math-emu/faddd.c                        |   24 -
 arch/nds32/math-emu/fadds.c                        |   24 -
 arch/nds32/math-emu/fcmpd.c                        |   24 -
 arch/nds32/math-emu/fcmps.c                        |   24 -
 arch/nds32/math-emu/fd2s.c                         |   22 -
 arch/nds32/math-emu/fd2si.c                        |   30 -
 arch/nds32/math-emu/fd2siz.c                       |   30 -
 arch/nds32/math-emu/fd2ui.c                        |   30 -
 arch/nds32/math-emu/fd2uiz.c                       |   30 -
 arch/nds32/math-emu/fdivd.c                        |   27 -
 arch/nds32/math-emu/fdivs.c                        |   26 -
 arch/nds32/math-emu/fmuld.c                        |   23 -
 arch/nds32/math-emu/fmuls.c                        |   23 -
 arch/nds32/math-emu/fnegd.c                        |   21 -
 arch/nds32/math-emu/fnegs.c                        |   21 -
 arch/nds32/math-emu/fpuemu.c                       |  406 ------
 arch/nds32/math-emu/fs2d.c                         |   23 -
 arch/nds32/math-emu/fs2si.c                        |   29 -
 arch/nds32/math-emu/fs2siz.c                       |   29 -
 arch/nds32/math-emu/fs2ui.c                        |   29 -
 arch/nds32/math-emu/fs2uiz.c                       |   30 -
 arch/nds32/math-emu/fsi2d.c                        |   22 -
 arch/nds32/math-emu/fsi2s.c                        |   22 -
 arch/nds32/math-emu/fsqrtd.c                       |   21 -
 arch/nds32/math-emu/fsqrts.c                       |   21 -
 arch/nds32/math-emu/fsubd.c                        |   27 -
 arch/nds32/math-emu/fsubs.c                        |   27 -
 arch/nds32/math-emu/fui2d.c                        |   22 -
 arch/nds32/math-emu/fui2s.c                        |   22 -
 arch/nds32/mm/Makefile                             |   10 -
 arch/nds32/mm/alignment.c                          |  575 --------
 arch/nds32/mm/cacheflush.c                         |  338 -----
 arch/nds32/mm/extable.c                            |   16 -
 arch/nds32/mm/fault.c                              |  396 ------
 arch/nds32/mm/init.c                               |  263 ----
 arch/nds32/mm/mm-nds32.c                           |   96 --
 arch/nds32/mm/mmap.c                               |   73 -
 arch/nds32/mm/proc.c                               |  536 -------
 arch/nds32/mm/tlb.c                                |   50 -
 drivers/clocksource/Kconfig                        |    9 -
 drivers/clocksource/Makefile                       |    1 -
 drivers/clocksource/timer-atcpit100.c              |  266 ----
 drivers/irqchip/Makefile                           |    1 -
 drivers/irqchip/irq-ativic32.c                     |  156 --
 drivers/net/ethernet/faraday/Kconfig               |   12 +-
 drivers/video/console/Kconfig                      |    2 +-
 scripts/recordmcount.pl                            |    3 -
 tools/include/asm/barrier.h                        |    2 -
 tools/perf/arch/nds32/Build                        |    1 -
 tools/perf/arch/nds32/util/Build                   |    1 -
 tools/perf/arch/nds32/util/header.c                |   29 -
 tools/testing/selftests/vDSO/vdso_config.h         |    4 -
 221 files changed, 6 insertions(+), 15814 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/interrupt-controller/andestech,ativic32.txt
 delete mode 100644 Documentation/devicetree/bindings/nds32/andestech-boards
 delete mode 100644 Documentation/devicetree/bindings/nds32/atl2c.txt
 delete mode 100644 Documentation/devicetree/bindings/nds32/cpus.txt
 delete mode 100644 Documentation/devicetree/bindings/perf/nds32v3-pmu.txt
 delete mode 100644 Documentation/devicetree/bindings/timer/andestech,atcpit100-timer.txt
 delete mode 100644 arch/nds32/Kbuild
 delete mode 100644 arch/nds32/Kconfig
 delete mode 100644 arch/nds32/Kconfig.cpu
 delete mode 100644 arch/nds32/Kconfig.debug
 delete mode 100644 arch/nds32/Makefile
 delete mode 100644 arch/nds32/boot/.gitignore
 delete mode 100644 arch/nds32/boot/Makefile
 delete mode 100644 arch/nds32/boot/dts/Makefile
 delete mode 100644 arch/nds32/boot/dts/ae3xx.dts
 delete mode 100644 arch/nds32/configs/defconfig
 delete mode 100644 arch/nds32/include/asm/Kbuild
 delete mode 100644 arch/nds32/include/asm/assembler.h
 delete mode 100644 arch/nds32/include/asm/barrier.h
 delete mode 100644 arch/nds32/include/asm/bitfield.h
 delete mode 100644 arch/nds32/include/asm/cache.h
 delete mode 100644 arch/nds32/include/asm/cache_info.h
 delete mode 100644 arch/nds32/include/asm/cacheflush.h
 delete mode 100644 arch/nds32/include/asm/current.h
 delete mode 100644 arch/nds32/include/asm/delay.h
 delete mode 100644 arch/nds32/include/asm/elf.h
 delete mode 100644 arch/nds32/include/asm/fixmap.h
 delete mode 100644 arch/nds32/include/asm/fpu.h
 delete mode 100644 arch/nds32/include/asm/fpuemu.h
 delete mode 100644 arch/nds32/include/asm/ftrace.h
 delete mode 100644 arch/nds32/include/asm/futex.h
 delete mode 100644 arch/nds32/include/asm/highmem.h
 delete mode 100644 arch/nds32/include/asm/io.h
 delete mode 100644 arch/nds32/include/asm/irqflags.h
 delete mode 100644 arch/nds32/include/asm/l2_cache.h
 delete mode 100644 arch/nds32/include/asm/linkage.h
 delete mode 100644 arch/nds32/include/asm/memory.h
 delete mode 100644 arch/nds32/include/asm/mmu.h
 delete mode 100644 arch/nds32/include/asm/mmu_context.h
 delete mode 100644 arch/nds32/include/asm/nds32.h
 delete mode 100644 arch/nds32/include/asm/nds32_fpu_inst.h
 delete mode 100644 arch/nds32/include/asm/page.h
 delete mode 100644 arch/nds32/include/asm/perf_event.h
 delete mode 100644 arch/nds32/include/asm/pgalloc.h
 delete mode 100644 arch/nds32/include/asm/pgtable.h
 delete mode 100644 arch/nds32/include/asm/pmu.h
 delete mode 100644 arch/nds32/include/asm/proc-fns.h
 delete mode 100644 arch/nds32/include/asm/processor.h
 delete mode 100644 arch/nds32/include/asm/ptrace.h
 delete mode 100644 arch/nds32/include/asm/sfp-machine.h
 delete mode 100644 arch/nds32/include/asm/shmparam.h
 delete mode 100644 arch/nds32/include/asm/stacktrace.h
 delete mode 100644 arch/nds32/include/asm/string.h
 delete mode 100644 arch/nds32/include/asm/suspend.h
 delete mode 100644 arch/nds32/include/asm/swab.h
 delete mode 100644 arch/nds32/include/asm/syscall.h
 delete mode 100644 arch/nds32/include/asm/syscalls.h
 delete mode 100644 arch/nds32/include/asm/thread_info.h
 delete mode 100644 arch/nds32/include/asm/tlb.h
 delete mode 100644 arch/nds32/include/asm/tlbflush.h
 delete mode 100644 arch/nds32/include/asm/uaccess.h
 delete mode 100644 arch/nds32/include/asm/unistd.h
 delete mode 100644 arch/nds32/include/asm/vdso.h
 delete mode 100644 arch/nds32/include/asm/vdso_datapage.h
 delete mode 100644 arch/nds32/include/asm/vdso_timer_info.h
 delete mode 100644 arch/nds32/include/asm/vermagic.h
 delete mode 100644 arch/nds32/include/asm/vmalloc.h
 delete mode 100644 arch/nds32/include/uapi/asm/Kbuild
 delete mode 100644 arch/nds32/include/uapi/asm/auxvec.h
 delete mode 100644 arch/nds32/include/uapi/asm/byteorder.h
 delete mode 100644 arch/nds32/include/uapi/asm/cachectl.h
 delete mode 100644 arch/nds32/include/uapi/asm/fp_udfiex_crtl.h
 delete mode 100644 arch/nds32/include/uapi/asm/param.h
 delete mode 100644 arch/nds32/include/uapi/asm/ptrace.h
 delete mode 100644 arch/nds32/include/uapi/asm/sigcontext.h
 delete mode 100644 arch/nds32/include/uapi/asm/unistd.h
 delete mode 100644 arch/nds32/kernel/.gitignore
 delete mode 100644 arch/nds32/kernel/Makefile
 delete mode 100644 arch/nds32/kernel/asm-offsets.c
 delete mode 100644 arch/nds32/kernel/atl2c.c
 delete mode 100644 arch/nds32/kernel/cacheinfo.c
 delete mode 100644 arch/nds32/kernel/devtree.c
 delete mode 100644 arch/nds32/kernel/dma.c
 delete mode 100644 arch/nds32/kernel/ex-entry.S
 delete mode 100644 arch/nds32/kernel/ex-exit.S
 delete mode 100644 arch/nds32/kernel/ex-scall.S
 delete mode 100644 arch/nds32/kernel/fpu.c
 delete mode 100644 arch/nds32/kernel/ftrace.c
 delete mode 100644 arch/nds32/kernel/head.S
 delete mode 100644 arch/nds32/kernel/irq.c
 delete mode 100644 arch/nds32/kernel/module.c
 delete mode 100644 arch/nds32/kernel/nds32_ksyms.c
 delete mode 100644 arch/nds32/kernel/perf_event_cpu.c
 delete mode 100644 arch/nds32/kernel/pm.c
 delete mode 100644 arch/nds32/kernel/process.c
 delete mode 100644 arch/nds32/kernel/ptrace.c
 delete mode 100644 arch/nds32/kernel/setup.c
 delete mode 100644 arch/nds32/kernel/signal.c
 delete mode 100644 arch/nds32/kernel/sleep.S
 delete mode 100644 arch/nds32/kernel/stacktrace.c
 delete mode 100644 arch/nds32/kernel/sys_nds32.c
 delete mode 100644 arch/nds32/kernel/syscall_table.c
 delete mode 100644 arch/nds32/kernel/time.c
 delete mode 100644 arch/nds32/kernel/traps.c
 delete mode 100644 arch/nds32/kernel/vdso.c
 delete mode 100644 arch/nds32/kernel/vdso/.gitignore
 delete mode 100644 arch/nds32/kernel/vdso/Makefile
 delete mode 100644 arch/nds32/kernel/vdso/datapage.S
 delete mode 100755 arch/nds32/kernel/vdso/gen_vdso_offsets.sh
 delete mode 100644 arch/nds32/kernel/vdso/gettimeofday.c
 delete mode 100644 arch/nds32/kernel/vdso/note.S
 delete mode 100644 arch/nds32/kernel/vdso/sigreturn.S
 delete mode 100644 arch/nds32/kernel/vdso/vdso.S
 delete mode 100644 arch/nds32/kernel/vdso/vdso.lds.S
 delete mode 100644 arch/nds32/kernel/vmlinux.lds.S
 delete mode 100644 arch/nds32/lib/Makefile
 delete mode 100644 arch/nds32/lib/clear_user.S
 delete mode 100644 arch/nds32/lib/copy_from_user.S
 delete mode 100644 arch/nds32/lib/copy_page.S
 delete mode 100644 arch/nds32/lib/copy_template.S
 delete mode 100644 arch/nds32/lib/copy_to_user.S
 delete mode 100644 arch/nds32/lib/memcpy.S
 delete mode 100644 arch/nds32/lib/memmove.S
 delete mode 100644 arch/nds32/lib/memset.S
 delete mode 100644 arch/nds32/lib/memzero.S
 delete mode 100644 arch/nds32/math-emu/Makefile
 delete mode 100644 arch/nds32/math-emu/faddd.c
 delete mode 100644 arch/nds32/math-emu/fadds.c
 delete mode 100644 arch/nds32/math-emu/fcmpd.c
 delete mode 100644 arch/nds32/math-emu/fcmps.c
 delete mode 100644 arch/nds32/math-emu/fd2s.c
 delete mode 100644 arch/nds32/math-emu/fd2si.c
 delete mode 100644 arch/nds32/math-emu/fd2siz.c
 delete mode 100644 arch/nds32/math-emu/fd2ui.c
 delete mode 100644 arch/nds32/math-emu/fd2uiz.c
 delete mode 100644 arch/nds32/math-emu/fdivd.c
 delete mode 100644 arch/nds32/math-emu/fdivs.c
 delete mode 100644 arch/nds32/math-emu/fmuld.c
 delete mode 100644 arch/nds32/math-emu/fmuls.c
 delete mode 100644 arch/nds32/math-emu/fnegd.c
 delete mode 100644 arch/nds32/math-emu/fnegs.c
 delete mode 100644 arch/nds32/math-emu/fpuemu.c
 delete mode 100644 arch/nds32/math-emu/fs2d.c
 delete mode 100644 arch/nds32/math-emu/fs2si.c
 delete mode 100644 arch/nds32/math-emu/fs2siz.c
 delete mode 100644 arch/nds32/math-emu/fs2ui.c
 delete mode 100644 arch/nds32/math-emu/fs2uiz.c
 delete mode 100644 arch/nds32/math-emu/fsi2d.c
 delete mode 100644 arch/nds32/math-emu/fsi2s.c
 delete mode 100644 arch/nds32/math-emu/fsqrtd.c
 delete mode 100644 arch/nds32/math-emu/fsqrts.c
 delete mode 100644 arch/nds32/math-emu/fsubd.c
 delete mode 100644 arch/nds32/math-emu/fsubs.c
 delete mode 100644 arch/nds32/math-emu/fui2d.c
 delete mode 100644 arch/nds32/math-emu/fui2s.c
 delete mode 100644 arch/nds32/mm/Makefile
 delete mode 100644 arch/nds32/mm/alignment.c
 delete mode 100644 arch/nds32/mm/cacheflush.c
 delete mode 100644 arch/nds32/mm/extable.c
 delete mode 100644 arch/nds32/mm/fault.c
 delete mode 100644 arch/nds32/mm/init.c
 delete mode 100644 arch/nds32/mm/mm-nds32.c
 delete mode 100644 arch/nds32/mm/mmap.c
 delete mode 100644 arch/nds32/mm/proc.c
 delete mode 100644 arch/nds32/mm/tlb.c
 delete mode 100644 drivers/clocksource/timer-atcpit100.c
 delete mode 100644 drivers/irqchip/irq-ativic32.c
 delete mode 100644 tools/perf/arch/nds32/Build
 delete mode 100644 tools/perf/arch/nds32/util/Build
 delete mode 100644 tools/perf/arch/nds32/util/header.c

(limited to 'tools/testing')

diff --git a/Documentation/devicetree/bindings/interrupt-controller/andestech,ativic32.txt b/Documentation/devicetree/bindings/interrupt-controller/andestech,ativic32.txt
deleted file mode 100644
index f4b4193d830e..000000000000
--- a/Documentation/devicetree/bindings/interrupt-controller/andestech,ativic32.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-* Andestech Internal Vector Interrupt Controller
-
-The Internal Vector Interrupt Controller (IVIC) is a basic interrupt controller
-suitable for a simpler SoC platform not requiring a more sophisticated and
-bigger External Vector Interrupt Controller.
-
-
-Main node required properties:
-
-- compatible : should at least contain  "andestech,ativic32".
-- interrupt-controller : Identifies the node as an interrupt controller
-- #interrupt-cells: 1 cells and refer to interrupt-controller/interrupts
-
-Examples:
-	intc: interrupt-controller {
-		compatible = "andestech,ativic32";
-		#interrupt-cells = <1>;
-		interrupt-controller;
-	};
diff --git a/Documentation/devicetree/bindings/nds32/andestech-boards b/Documentation/devicetree/bindings/nds32/andestech-boards
deleted file mode 100644
index f5d75693e3c7..000000000000
--- a/Documentation/devicetree/bindings/nds32/andestech-boards
+++ /dev/null
@@ -1,40 +0,0 @@
-Andestech(nds32) AE3XX Platform
------------------------------------------------------------------------------
-The AE3XX prototype demonstrates the AE3XX example platform on the FPGA. It
-is composed of one Andestech(nds32) processor and AE3XX.
-
-Required properties (in root node):
-- compatible = "andestech,ae3xx";
-
-Example:
-/dts-v1/;
-/ {
-	compatible = "andestech,ae3xx";
-	#address-cells = <1>;
-	#size-cells = <1>;
-	interrupt-parent = <&intc>;
-};
-
-Andestech(nds32) AG101P Platform
------------------------------------------------------------------------------
-AG101P is a generic SoC Platform IP that works with any of Andestech(nds32)
-processors to provide a cost-effective and high performance solution for
-majority of embedded systems in variety of application domains. Users may
-simply attach their IP on one of the system buses together with certain glue
-logics to complete a SoC solution for a specific application. With
-comprehensive simulation and design environments, users may evaluate the
-system performance of their applications and track bugs of their designs
-efficiently. The optional hardware development platform further provides real
-system environment for early prototyping and software/hardware co-development.
-
-Required properties (in root node):
-	compatible = "andestech,ag101p";
-
-Example:
-/dts-v1/;
-/ {
-	compatible = "andestech,ag101p";
-	#address-cells = <1>;
-	#size-cells = <1>;
-	interrupt-parent = <&intc>;
-};
diff --git a/Documentation/devicetree/bindings/nds32/atl2c.txt b/Documentation/devicetree/bindings/nds32/atl2c.txt
deleted file mode 100644
index da8ab8e7ae9b..000000000000
--- a/Documentation/devicetree/bindings/nds32/atl2c.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-* Andestech L2 cache Controller
-
-The level-2 cache controller plays an important role in reducing memory latency
-for high performance systems, such as thoese designs with AndesCore processors.
-Level-2 cache controller in general enhances overall system performance
-signigicantly and the system power consumption might be reduced as well by
-reducing DRAM accesses.
-
-This binding specifies what properties must be available in the device tree
-representation of an Andestech L2 cache controller.
-
-Required properties:
-	- compatible:
-		Usage: required
-		Value type: <string>
-		Definition: "andestech,atl2c"
-	- reg : Physical base address and size of cache controller's memory mapped
-	- cache-unified : Specifies the cache is a unified cache.
-	- cache-level : Should be set to 2 for a level 2 cache.
-
-* Example
-
-	cache-controller@e0500000 {
-		compatible = "andestech,atl2c";
-		reg = <0xe0500000 0x1000>;
-		cache-unified;
-		cache-level = <2>;
-	};
diff --git a/Documentation/devicetree/bindings/nds32/cpus.txt b/Documentation/devicetree/bindings/nds32/cpus.txt
deleted file mode 100644
index 6f9e311b6589..000000000000
--- a/Documentation/devicetree/bindings/nds32/cpus.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-* Andestech Processor Binding
-
-This binding specifies what properties must be available in the device tree
-representation of a Andestech Processor Core, which is the root node in the
-tree.
-
-Required properties:
-
-	- compatible:
-		Usage: required
-		Value type: <string>
-		Definition: Should be "andestech,<core_name>", "andestech,nds32v3" as fallback.
-		Must contain "andestech,nds32v3" as the most generic value, in addition to
-		one of the following identifiers for a particular CPU core:
-		"andestech,n13"
-		"andestech,n15"
-		"andestech,d15"
-		"andestech,n10"
-		"andestech,d10"
-	- device_type
-		Usage: required
-		Value type: <string>
-		Definition: must be "cpu"
-	- reg: Contains CPU index.
-	- clock-frequency: Contains the clock frequency for CPU, in Hz.
-
-* Examples
-
-/ {
-	cpus {
-		cpu@0 {
-			device_type = "cpu";
-			compatible = "andestech,n13", "andestech,nds32v3";
-			reg = <0x0>;
-			clock-frequency = <60000000>
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/perf/nds32v3-pmu.txt b/Documentation/devicetree/bindings/perf/nds32v3-pmu.txt
deleted file mode 100644
index 1bd15785b4ae..000000000000
--- a/Documentation/devicetree/bindings/perf/nds32v3-pmu.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-* NDS32 Performance Monitor Units
-
-NDS32 core have a PMU for counting cpu and cache events like cache misses.
-The NDS32 PMU representation in the device tree should be done as under:
-
-Required properties:
-
-- compatible :
-	"andestech,nds32v3-pmu"
-
-- interrupts : The interrupt number for NDS32 PMU is 13.
-
-Example:
-pmu{
-	compatible = "andestech,nds32v3-pmu";
-	interrupts = <13>;
-}
diff --git a/Documentation/devicetree/bindings/timer/andestech,atcpit100-timer.txt b/Documentation/devicetree/bindings/timer/andestech,atcpit100-timer.txt
deleted file mode 100644
index 4c9ea5989e35..000000000000
--- a/Documentation/devicetree/bindings/timer/andestech,atcpit100-timer.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Andestech ATCPIT100 timer
-------------------------------------------------------------------
-ATCPIT100 is a generic IP block from Andes Technology, embedded in
-Andestech AE3XX platforms and other designs.
-
-This timer is a set of compact multi-function timers, which can be
-used as pulse width modulators (PWM) as well as simple timers.
-
-It supports up to 4 PIT channels. Each PIT channel is a
-multi-function timer and provide the following usage scenarios:
-One 32-bit timer
-Two 16-bit timers
-Four 8-bit timers
-One 16-bit PWM
-One 16-bit timer and one 8-bit PWM
-Two 8-bit timer and one 8-bit PWM
-
-Required properties:
-- compatible	: Should be "andestech,atcpit100"
-- reg		: Address and length of the register set
-- interrupts	: Reference to the timer interrupt
-- clocks 	: a clock to provide the tick rate for "andestech,atcpit100"
-- clock-names 	: should be "PCLK" for the peripheral clock source.
-
-Examples:
-
-timer0: timer@f0400000 {
-	compatible = "andestech,atcpit100";
-	reg = <0xf0400000 0x1000>;
-	interrupts = <2>;
-	clocks = <&apb>;
-	clock-names = "PCLK";
-};
diff --git a/Documentation/features/core/cBPF-JIT/arch-support.txt b/Documentation/features/core/cBPF-JIT/arch-support.txt
index e59b5215402d..77bd6974dd6d 100644
--- a/Documentation/features/core/cBPF-JIT/arch-support.txt
+++ b/Documentation/features/core/cBPF-JIT/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/core/eBPF-JIT/arch-support.txt b/Documentation/features/core/eBPF-JIT/arch-support.txt
index dcbd8679f514..d5ab547b2be6 100644
--- a/Documentation/features/core/eBPF-JIT/arch-support.txt
+++ b/Documentation/features/core/eBPF-JIT/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/core/generic-idle-thread/arch-support.txt b/Documentation/features/core/generic-idle-thread/arch-support.txt
index 4efcba7b5239..ddb3762ca7f5 100644
--- a/Documentation/features/core/generic-idle-thread/arch-support.txt
+++ b/Documentation/features/core/generic-idle-thread/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: |  ok  |
     |      parisc: |  ok  |
diff --git a/Documentation/features/core/jump-labels/arch-support.txt b/Documentation/features/core/jump-labels/arch-support.txt
index 0c801d1bd2da..d5380ef68713 100644
--- a/Documentation/features/core/jump-labels/arch-support.txt
+++ b/Documentation/features/core/jump-labels/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: |  ok  |
diff --git a/Documentation/features/core/thread-info-in-task/arch-support.txt b/Documentation/features/core/thread-info-in-task/arch-support.txt
index bc74d8beea72..0654ba3a42ff 100644
--- a/Documentation/features/core/thread-info-in-task/arch-support.txt
+++ b/Documentation/features/core/thread-info-in-task/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
-    |       nds32: |  ok  |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: |  ok  |
diff --git a/Documentation/features/core/tracehook/arch-support.txt b/Documentation/features/core/tracehook/arch-support.txt
index af34308fce7f..8e95dc0c7b90 100644
--- a/Documentation/features/core/tracehook/arch-support.txt
+++ b/Documentation/features/core/tracehook/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: |  ok  |
     |       nios2: |  ok  |
     |    openrisc: |  ok  |
     |      parisc: |  ok  |
diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt
index c244ac7eee26..407ca1d91242 100644
--- a/Documentation/features/debug/KASAN/arch-support.txt
+++ b/Documentation/features/debug/KASAN/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
index fa83403b4aec..83eafe1a7f68 100644
--- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
+++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt
index b39c1a5de3f3..49e0dda11227 100644
--- a/Documentation/features/debug/gcov-profile-all/arch-support.txt
+++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/debug/kcov/arch-support.txt b/Documentation/features/debug/kcov/arch-support.txt
index 7e44013cc320..3d34b724ddae 100644
--- a/Documentation/features/debug/kcov/arch-support.txt
+++ b/Documentation/features/debug/kcov/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/debug/kgdb/arch-support.txt b/Documentation/features/debug/kgdb/arch-support.txt
index 2cb0576f9180..7aa073d908c5 100644
--- a/Documentation/features/debug/kgdb/arch-support.txt
+++ b/Documentation/features/debug/kgdb/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: |  ok  |
     |    openrisc: | TODO |
     |      parisc: |  ok  |
diff --git a/Documentation/features/debug/kmemleak/arch-support.txt b/Documentation/features/debug/kmemleak/arch-support.txt
index e9ac415f8aec..5c784ff9768a 100644
--- a/Documentation/features/debug/kmemleak/arch-support.txt
+++ b/Documentation/features/debug/kmemleak/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
-    |       nds32: |  ok  |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
index 96156e8802a7..a1f6e98c6b1f 100644
--- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
+++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: |  ok  |
diff --git a/Documentation/features/debug/kprobes/arch-support.txt b/Documentation/features/debug/kprobes/arch-support.txt
index ee95ed61909a..e7e7015175e7 100644
--- a/Documentation/features/debug/kprobes/arch-support.txt
+++ b/Documentation/features/debug/kprobes/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: |  ok  |
diff --git a/Documentation/features/debug/kretprobes/arch-support.txt b/Documentation/features/debug/kretprobes/arch-support.txt
index 612cb97d47b8..838eb53a6fa5 100644
--- a/Documentation/features/debug/kretprobes/arch-support.txt
+++ b/Documentation/features/debug/kretprobes/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: |  ok  |
diff --git a/Documentation/features/debug/optprobes/arch-support.txt b/Documentation/features/debug/optprobes/arch-support.txt
index d6ff141a6122..6358b00b1723 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt
index ad4de22a71ab..f6cdf781305f 100644
--- a/Documentation/features/debug/stackprotector/arch-support.txt
+++ b/Documentation/features/debug/stackprotector/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/debug/uprobes/arch-support.txt b/Documentation/features/debug/uprobes/arch-support.txt
index 8bd5548a4485..0b871e797d24 100644
--- a/Documentation/features/debug/uprobes/arch-support.txt
+++ b/Documentation/features/debug/uprobes/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/debug/user-ret-profiler/arch-support.txt b/Documentation/features/debug/user-ret-profiler/arch-support.txt
index 2a3fe812a5fa..a2feda7adff1 100644
--- a/Documentation/features/debug/user-ret-profiler/arch-support.txt
+++ b/Documentation/features/debug/user-ret-profiler/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/io/dma-contiguous/arch-support.txt b/Documentation/features/io/dma-contiguous/arch-support.txt
index bece89586efa..9b407f26d45e 100644
--- a/Documentation/features/io/dma-contiguous/arch-support.txt
+++ b/Documentation/features/io/dma-contiguous/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/locking/cmpxchg-local/arch-support.txt b/Documentation/features/locking/cmpxchg-local/arch-support.txt
index 52bdda004f5c..090520b0e02d 100644
--- a/Documentation/features/locking/cmpxchg-local/arch-support.txt
+++ b/Documentation/features/locking/cmpxchg-local/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt
index a8cd163c8b7e..6d840b103537 100644
--- a/Documentation/features/locking/lockdep/arch-support.txt
+++ b/Documentation/features/locking/lockdep/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
-    |       nds32: |  ok  |
     |       nios2: | TODO |
     |    openrisc: |  ok  |
     |      parisc: | TODO |
diff --git a/Documentation/features/locking/queued-rwlocks/arch-support.txt b/Documentation/features/locking/queued-rwlocks/arch-support.txt
index 8c85949752b3..de586be05d64 100644
--- a/Documentation/features/locking/queued-rwlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-rwlocks/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: |  ok  |
     |      parisc: | TODO |
diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
index 5f4e1b3841af..29d0c8b7b1c5 100644
--- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: |  ok  |
     |      parisc: | TODO |
diff --git a/Documentation/features/perf/kprobes-event/arch-support.txt b/Documentation/features/perf/kprobes-event/arch-support.txt
index 78f3fe080f0e..881a8a50e41e 100644
--- a/Documentation/features/perf/kprobes-event/arch-support.txt
+++ b/Documentation/features/perf/kprobes-event/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: |  ok  |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: |  ok  |
diff --git a/Documentation/features/perf/perf-regs/arch-support.txt b/Documentation/features/perf/perf-regs/arch-support.txt
index 5bf3b1854a1f..7639a796a8e1 100644
--- a/Documentation/features/perf/perf-regs/arch-support.txt
+++ b/Documentation/features/perf/perf-regs/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/perf/perf-stackdump/arch-support.txt b/Documentation/features/perf/perf-stackdump/arch-support.txt
index d88659bb4fc1..df1a4c679a45 100644
--- a/Documentation/features/perf/perf-stackdump/arch-support.txt
+++ b/Documentation/features/perf/perf-stackdump/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
index 883d33b265d6..51b7afc937f1 100644
--- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt
+++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
@@ -40,7 +40,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/sched/numa-balancing/arch-support.txt b/Documentation/features/sched/numa-balancing/arch-support.txt
index 9affb7c2c500..d0e308135434 100644
--- a/Documentation/features/sched/numa-balancing/arch-support.txt
+++ b/Documentation/features/sched/numa-balancing/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: |  ..  |
     |  microblaze: |  ..  |
     |        mips: | TODO |
-    |       nds32: | TODO |
     |       nios2: |  ..  |
     |    openrisc: |  ..  |
     |      parisc: |  ..  |
diff --git a/Documentation/features/seccomp/seccomp-filter/arch-support.txt b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
index 26eec58ab819..05613c5ff560 100644
--- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt
+++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: |  ok  |
diff --git a/Documentation/features/time/arch-tick-broadcast/arch-support.txt b/Documentation/features/time/arch-tick-broadcast/arch-support.txt
index 8dcaab070c7b..b4c96ebab7e3 100644
--- a/Documentation/features/time/arch-tick-broadcast/arch-support.txt
+++ b/Documentation/features/time/arch-tick-broadcast/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/time/clockevents/arch-support.txt b/Documentation/features/time/clockevents/arch-support.txt
index 9a81cb03b1fd..3f6976052e2a 100644
--- a/Documentation/features/time/clockevents/arch-support.txt
+++ b/Documentation/features/time/clockevents/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
-    |       nds32: |  ok  |
     |       nios2: |  ok  |
     |    openrisc: |  ok  |
     |      parisc: | TODO |
diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt
index 4ed116c2ec39..bb1c1801553e 100644
--- a/Documentation/features/time/context-tracking/arch-support.txt
+++ b/Documentation/features/time/context-tracking/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt
index bc30c15557c7..3cea25b80c8c 100644
--- a/Documentation/features/time/irq-time-acct/arch-support.txt
+++ b/Documentation/features/time/irq-time-acct/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: |  ..  |
diff --git a/Documentation/features/time/virt-cpuacct/arch-support.txt b/Documentation/features/time/virt-cpuacct/arch-support.txt
index 050de43bbbb9..5163a60a1c1e 100644
--- a/Documentation/features/time/virt-cpuacct/arch-support.txt
+++ b/Documentation/features/time/virt-cpuacct/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: |  ok  |
diff --git a/Documentation/features/vm/ELF-ASLR/arch-support.txt b/Documentation/features/vm/ELF-ASLR/arch-support.txt
index 2949c99fbb2f..73ec761f1e27 100644
--- a/Documentation/features/vm/ELF-ASLR/arch-support.txt
+++ b/Documentation/features/vm/ELF-ASLR/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: |  ok  |
diff --git a/Documentation/features/vm/PG_uncached/arch-support.txt b/Documentation/features/vm/PG_uncached/arch-support.txt
index 6cde38458596..9066a90b38d9 100644
--- a/Documentation/features/vm/PG_uncached/arch-support.txt
+++ b/Documentation/features/vm/PG_uncached/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/vm/THP/arch-support.txt b/Documentation/features/vm/THP/arch-support.txt
index 7dbd6967b37e..f717ab792e5a 100644
--- a/Documentation/features/vm/THP/arch-support.txt
+++ b/Documentation/features/vm/THP/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: |  ..  |
     |  microblaze: |  ..  |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: |  ..  |
     |    openrisc: |  ..  |
     |      parisc: | TODO |
diff --git a/Documentation/features/vm/TLB/arch-support.txt b/Documentation/features/vm/TLB/arch-support.txt
index e1c3a4c4d107..6fa76a37f299 100644
--- a/Documentation/features/vm/TLB/arch-support.txt
+++ b/Documentation/features/vm/TLB/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: |  ..  |
     |  microblaze: |  ..  |
     |        mips: | TODO |
-    |       nds32: | TODO |
     |       nios2: |  ..  |
     |    openrisc: |  ..  |
     |      parisc: | TODO |
diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt
index bc53905a0306..6e1792ee37fe 100644
--- a/Documentation/features/vm/huge-vmap/arch-support.txt
+++ b/Documentation/features/vm/huge-vmap/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt
index 9a0c8783b84d..a6dcbe5f47b6 100644
--- a/Documentation/features/vm/ioremap_prot/arch-support.txt
+++ b/Documentation/features/vm/ioremap_prot/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt
index 40b969f3a6bb..376477749c42 100644
--- a/Documentation/features/vm/pte_special/arch-support.txt
+++ b/Documentation/features/vm/pte_special/arch-support.txt
@@ -17,7 +17,6 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
-    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
diff --git a/MAINTAINERS b/MAINTAINERS
index 69a2935daf6c..d24a03cb0426 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1229,18 +1229,6 @@ S:	Supported
 F:	drivers/clk/analogbits/*
 F:	include/linux/clk/analogbits*
 
-ANDES ARCHITECTURE
-M:	Nick Hu <nickhu@andestech.com>
-M:	Greentime Hu <green.hu@gmail.com>
-M:	Vincent Chen <deanbo422@gmail.com>
-S:	Supported
-T:	git https://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux.git
-F:	Documentation/devicetree/bindings/interrupt-controller/andestech,ativic32.txt
-F:	Documentation/devicetree/bindings/nds32/
-F:	arch/nds32/
-N:	nds32
-K:	nds32
-
 ANDROID CONFIG FRAGMENTS
 M:	Rob Herring <robh@kernel.org>
 S:	Supported
diff --git a/arch/nds32/Kbuild b/arch/nds32/Kbuild
deleted file mode 100644
index 4e39f7abdeb6..000000000000
--- a/arch/nds32/Kbuild
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-# for cleaning
-subdir- += boot
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
deleted file mode 100644
index 013249430fa3..000000000000
--- a/arch/nds32/Kconfig
+++ /dev/null
@@ -1,101 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.rst.
-#
-
-config NDS32
-	def_bool y
-	select ARCH_32BIT_OFF_T
-	select ARCH_HAS_DMA_PREP_COHERENT
-	select ARCH_HAS_SYNC_DMA_FOR_CPU
-	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
-	select ARCH_WANT_FRAME_POINTERS if FTRACE
-	select CLKSRC_MMIO
-	select CLONE_BACKWARDS
-	select COMMON_CLK
-	select DMA_DIRECT_REMAP
-	select GENERIC_ATOMIC64
-	select GENERIC_CPU_DEVICES
-	select GENERIC_IRQ_CHIP
-	select GENERIC_IRQ_SHOW
-	select GENERIC_IOREMAP
-	select GENERIC_LIB_ASHLDI3
-	select GENERIC_LIB_ASHRDI3
-	select GENERIC_LIB_CMPDI2
-	select GENERIC_LIB_LSHRDI3
-	select GENERIC_LIB_MULDI3
-	select GENERIC_LIB_UCMPDI2
-	select GENERIC_TIME_VSYSCALL
-	select HAVE_ARCH_TRACEHOOK
-	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_EXIT_THREAD
-	select HAVE_REGS_AND_STACK_ACCESS_API
-	select HAVE_PERF_EVENTS
-	select IRQ_DOMAIN
-	select LOCKDEP_SUPPORT
-	select MODULES_USE_ELF_RELA
-	select OF
-	select OF_EARLY_FLATTREE
-	select NO_IOPORT_MAP
-	select RTC_LIB
-	select THREAD_INFO_IN_TASK
-	select HAVE_FUNCTION_TRACER
-	select HAVE_FUNCTION_GRAPH_TRACER
-	select HAVE_FTRACE_MCOUNT_RECORD
-	select HAVE_DYNAMIC_FTRACE
-	select TRACE_IRQFLAGS_SUPPORT
-	help
-	  Andes(nds32) Linux support.
-
-config GENERIC_CALIBRATE_DELAY
-	def_bool y
-
-config GENERIC_CSUM
-	def_bool y
-
-config GENERIC_HWEIGHT
-	def_bool y
-
-config GENERIC_LOCKBREAK
-	def_bool y
-	depends on PREEMPTION
-
-config STACKTRACE_SUPPORT
-	def_bool y
-
-config FIX_EARLYCON_MEM
-	def_bool y
-
-config PGTABLE_LEVELS
-	default 2
-
-menu "System Type"
-source "arch/nds32/Kconfig.cpu"
-config NR_CPUS
-	int
-	default 1
-
-config MMU
-	def_bool y
-
-config NDS32_BUILTIN_DTB
-	string "Builtin DTB"
-	default ""
-	help
-	  User can use it to specify the dts of the SoC
-endmenu
-
-menu "Kernel Features"
-source "kernel/Kconfig.hz"
-endmenu
-
-menu "Power management options"
-config SYS_SUPPORTS_APM_EMULATION
-	bool
-
-config ARCH_SUSPEND_POSSIBLE
-	def_bool y
-
-source "kernel/power/Kconfig"
-endmenu
diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu
deleted file mode 100644
index c10759952485..000000000000
--- a/arch/nds32/Kconfig.cpu
+++ /dev/null
@@ -1,218 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-comment "Processor Features"
-
-config CPU_BIG_ENDIAN
-	def_bool !CPU_LITTLE_ENDIAN
-
-config CPU_LITTLE_ENDIAN
-	bool "Little endian"
-	default y
-
-config FPU
-	bool "FPU support"
-	default n
-	help
-	  If FPU ISA is used in user space, this configuration shall be Y to
-          enable required support in kernel such as fpu context switch and
-          fpu exception handler.
-
-	  If no FPU ISA is used in user space, say N.
-
-config LAZY_FPU
-	bool "lazy FPU support"
-	depends on FPU
-	default y
-	help
-	  Say Y here to enable the lazy FPU scheme. The lazy FPU scheme can
-          enhance system performance by reducing the context switch
-	  frequency of the FPU register.
-
-	  For normal case, say Y.
-
-config SUPPORT_DENORMAL_ARITHMETIC
-	bool "Denormal arithmetic support"
-	depends on FPU
-	default n
-	help
-	  Say Y here to enable arithmetic of denormalized number. Enabling
-	  this feature can enhance the precision for tininess number.
-	  However, performance loss in float point calculations is
-	  possibly significant due to additional FPU exception.
-
-	  If the calculated tolerance for tininess number is not critical,
-	  say N to prevent performance loss.
-
-config HWZOL
-	bool "hardware zero overhead loop support"
-	depends on CPU_D10 || CPU_D15
-	default n
-	help
-	  A set of Zero-Overhead Loop mechanism is provided to reduce the
-	  instruction fetch and execution overhead of loop-control instructions.
-	  It will save 3 registers($LB, $LC, $LE) for context saving if say Y.
-	  You don't need to save these registers if you can make sure your user
-	  program doesn't use these registers.
-
-	  If unsure, say N.
-
-config CPU_CACHE_ALIASING
-	bool "Aliasing cache"
-	depends on CPU_N10 || CPU_D10 || CPU_N13 || CPU_V3
-	default y
-	help
-	  If this CPU is using VIPT data cache and its cache way size is larger
-	  than page size, say Y. If it is using PIPT data cache, say N.
-
-	  If unsure, say Y.
-
-choice
-	prompt "minimum CPU type"
-	default CPU_V3
-	help
-	  The data cache of N15/D15 is implemented as PIPT and it will not cause
-	  the cache aliasing issue. The rest cpus(N13, N10 and D10) are
-	  implemented as VIPT data cache. It may cause the cache aliasing issue
-	  if its cache way size is larger than page size. You can specify the
-	  CPU type directly or choose CPU_V3 if unsure.
-
-          A kernel built for N10 is able to run on N15, D15, N13, N10 or D10.
-          A kernel built for N15 is able to run on N15 or D15.
-          A kernel built for D10 is able to run on D10 or D15.
-          A kernel built for D15 is able to run on D15.
-          A kernel built for N13 is able to run on N15, N13 or D15.
-
-config CPU_N15
-	bool "AndesCore N15"
-config CPU_N13
-	bool "AndesCore N13"
-	select CPU_CACHE_ALIASING if ANDES_PAGE_SIZE_4KB
-config CPU_N10
-	bool "AndesCore N10"
-	select CPU_CACHE_ALIASING
-config CPU_D15
-	bool "AndesCore D15"
-config CPU_D10
-	bool "AndesCore D10"
-	select CPU_CACHE_ALIASING
-config CPU_V3
-	bool "AndesCore v3 compatible"
-	select CPU_CACHE_ALIASING
-endchoice
-choice
-	prompt "Paging -- page size "
-	default ANDES_PAGE_SIZE_4KB
-config  ANDES_PAGE_SIZE_4KB
-	bool "use 4KB page size"
-config  ANDES_PAGE_SIZE_8KB
-	bool "use 8KB page size"
-endchoice
-
-config CPU_ICACHE_DISABLE
-	bool "Disable I-Cache"
-	help
-	  Say Y here to disable the processor instruction cache. Unless
-	  you have a reason not to or are unsure, say N.
-
-config CPU_DCACHE_DISABLE
-	bool "Disable D-Cache"
-	help
-	  Say Y here to disable the processor data cache. Unless
-	  you have a reason not to or are unsure, say N.
-
-config CPU_DCACHE_WRITETHROUGH
-	bool "Force write through D-cache"
-	depends on !CPU_DCACHE_DISABLE
-	help
-	  Say Y here to use the data cache in writethrough mode. Unless you
-	  specifically require this or are unsure, say N.
-
-config WBNA
-	bool "WBNA"
-	default n
-	help
-	  Say Y here to enable write-back memory with no-write-allocation policy.
-
-config ALIGNMENT_TRAP
-	bool "Kernel support unaligned access handling by sw"
-	depends on PROC_FS
-	default n
-	help
-	  Andes processors cannot load/store information which is not
-	  naturally aligned on the bus, i.e., a 4 byte load must start at an
-	  address divisible by 4. On 32-bit Andes processors, these non-aligned
-	  load/store instructions will be emulated in software if you say Y
-	  here, which has a severe performance impact. With an IP-only
-	  configuration it is safe to say N, otherwise say Y.
-
-config HW_SUPPORT_UNALIGNMENT_ACCESS
-	bool "Kernel support unaligned access handling by hw"
-	depends on !ALIGNMENT_TRAP
-	default n
-	help
-	  Andes processors load/store world/half-word instructions can access
-	  unaligned memory locations without generating the Data Alignment
-	  Check exceptions. With an IP-only configuration it is safe to say N,
-	  otherwise say Y.
-
-config HIGHMEM
-	bool "High Memory Support"
-	depends on MMU && !CPU_CACHE_ALIASING
-	select KMAP_LOCAL
-	help
-	  The address space of Andes processors is only 4 Gigabytes large
-	  and it has to accommodate user address space, kernel address
-	  space as well as some memory mapped IO. That means that, if you
-	  have a large amount of physical memory and/or IO, not all of the
-	  memory can be "permanently mapped" by the kernel. The physical
-	  memory that is not permanently mapped is called "high memory".
-
-	  Depending on the selected kernel/user memory split, minimum
-	  vmalloc space and actual amount of RAM, you may not need this
-	  option which should result in a slightly faster kernel.
-
-	  If unsure, say N.
-
-config CACHE_L2
-	bool "Support L2 cache"
-        default y
-	help
-	  Say Y here to enable L2 cache if your SoC are integrated with L2CC.
-	  If unsure, say N.
-
-config HW_PRE
-	bool "Enable hardware prefetcher"
-	default y
-	help
-	  Say Y here to enable hardware prefetcher feature.
-	  Only when CPU_VER.REV >= 0x09 can support.
-
-menu "Memory configuration"
-
-choice
-	prompt "Memory split"
-	depends on MMU
-	default VMSPLIT_3G_OPT
-	help
-	  Select the desired split between kernel and user memory.
-
-	  If you are not absolutely sure what you are doing, leave this
-	  option alone!
-
-	config VMSPLIT_3G
-		bool "3G/1G user/kernel split"
-	config VMSPLIT_3G_OPT
-		bool "3G/1G user/kernel split (for full 1G low memory)"
-	config VMSPLIT_2G
-		bool "2G/2G user/kernel split"
-	config VMSPLIT_1G
-		bool "1G/3G user/kernel split"
-endchoice
-
-config PAGE_OFFSET
-	hex
-	default 0x40000000 if VMSPLIT_1G
-	default 0x80000000 if VMSPLIT_2G
-	default 0xB0000000 if VMSPLIT_3G_OPT
-	default 0xC0000000
-
-endmenu
diff --git a/arch/nds32/Kconfig.debug b/arch/nds32/Kconfig.debug
deleted file mode 100644
index 295942fe3fd5..000000000000
--- a/arch/nds32/Kconfig.debug
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-# dummy file, do not delete
diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile
deleted file mode 100644
index b33d5d81b6ae..000000000000
--- a/arch/nds32/Makefile
+++ /dev/null
@@ -1,63 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-LDFLAGS_vmlinux	:= --no-undefined -X
-OBJCOPYFLAGS	:= -O binary -R .note -R .note.gnu.build-id -R .comment -S
-
-ifdef CONFIG_FUNCTION_TRACER
-arch-y += -malways-save-lp -mno-relax
-endif
-
-# Avoid generating FPU instructions
-arch-y  += -mno-ext-fpu-sp -mno-ext-fpu-dp -mfloat-abi=soft
-
-# Enable <nds32_intrinsic.h>
-KBUILD_CFLAGS	+= -isystem $(shell $(CC) -print-file-name=include)
-KBUILD_CFLAGS	+= $(call cc-option, -mno-sched-prolog-epilog)
-KBUILD_CFLAGS	+= -mcmodel=large
-
-KBUILD_CFLAGS	+=$(arch-y) $(tune-y)
-KBUILD_AFLAGS	+=$(arch-y) $(tune-y)
-
-#Default value
-head-y		 := arch/nds32/kernel/head.o
-textaddr-y	 := $(CONFIG_PAGE_OFFSET)+0xc000
-
-TEXTADDR := $(textaddr-y)
-
-export	TEXTADDR
-
-
-# If we have a machine-specific directory, then include it in the build.
-core-y				+= arch/nds32/kernel/ arch/nds32/mm/
-core-$(CONFIG_FPU)              += arch/nds32/math-emu/
-libs-y				+= arch/nds32/lib/
-
-ifdef CONFIG_CPU_LITTLE_ENDIAN
-KBUILD_CFLAGS   += $(call cc-option, -EL)
-KBUILD_AFLAGS   += $(call cc-option, -EL)
-KBUILD_LDFLAGS  += $(call cc-option, -EL)
-CHECKFLAGS      += -D__NDS32_EL__
-else
-KBUILD_CFLAGS   += $(call cc-option, -EB)
-KBUILD_AFLAGS   += $(call cc-option, -EB)
-KBUILD_LDFLAGS  += $(call cc-option, -EB)
-CHECKFLAGS      += -D__NDS32_EB__
-endif
-
-boot := arch/nds32/boot
-core-y += $(boot)/dts/
-
-Image: vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-
-
-PHONY += vdso_install
-vdso_install:
-	$(Q)$(MAKE) $(build)=arch/nds32/kernel/vdso $@
-
-prepare: vdso_prepare
-vdso_prepare: prepare0
-	$(Q)$(MAKE) $(build)=arch/nds32/kernel/vdso include/generated/vdso-offsets.h
-
-define archhelp
-  echo  '  Image         - kernel image (arch/$(ARCH)/boot/Image)'
-endef
diff --git a/arch/nds32/boot/.gitignore b/arch/nds32/boot/.gitignore
deleted file mode 100644
index 9182a3a1ea0a..000000000000
--- a/arch/nds32/boot/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-/Image
diff --git a/arch/nds32/boot/Makefile b/arch/nds32/boot/Makefile
deleted file mode 100644
index c4cc0c2689f7..000000000000
--- a/arch/nds32/boot/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-targets := Image Image.gz
-
-$(obj)/Image: vmlinux FORCE
-	$(call if_changed,objcopy)
-
-$(obj)/Image.gz: $(obj)/Image FORCE
-	$(call if_changed,gzip)
-
-install: $(obj)/Image
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
-	$(obj)/Image System.map "$(INSTALL_PATH)"
-
-zinstall: $(obj)/Image.gz
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
-	$(obj)/Image.gz System.map "$(INSTALL_PATH)"
diff --git a/arch/nds32/boot/dts/Makefile b/arch/nds32/boot/dts/Makefile
deleted file mode 100644
index 4fc69562eae8..000000000000
--- a/arch/nds32/boot/dts/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_OF) += $(addsuffix .dtb.o, $(CONFIG_NDS32_BUILTIN_DTB))
diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts
deleted file mode 100644
index 16a9f54a805e..000000000000
--- a/arch/nds32/boot/dts/ae3xx.dts
+++ /dev/null
@@ -1,90 +0,0 @@
-/dts-v1/;
-/ {
-	compatible = "andestech,ae3xx";
-	#address-cells = <1>;
-	#size-cells = <1>;
-	interrupt-parent = <&intc>;
-
-	chosen {
-		stdout-path = &serial0;
-	};
-
-	memory@0 {
-		device_type = "memory";
-		reg = <0x00000000 0x40000000>;
-	};
-
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-		cpu@0 {
-			device_type = "cpu";
-			compatible = "andestech,n13", "andestech,nds32v3";
-			reg = <0>;
-			clock-frequency = <60000000>;
-			next-level-cache = <&L2>;
-		};
-	};
-
-	intc: interrupt-controller {
-		compatible = "andestech,ativic32";
-		#interrupt-cells = <1>;
-		interrupt-controller;
-	};
-
-	clock: clk {
-		#clock-cells = <0>;
-		compatible = "fixed-clock";
-		clock-frequency = <30000000>;
-	};
-
-	apb {
-		compatible = "simple-bus";
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		serial0: serial@f0300000 {
-			compatible = "andestech,uart16550", "ns16550a";
-			reg = <0xf0300000 0x1000>;
-			interrupts = <8>;
-			clock-frequency = <14745600>;
-			reg-shift = <2>;
-			reg-offset = <32>;
-			no-loopback-test = <1>;
-		};
-
-		timer0: timer@f0400000 {
-			compatible = "andestech,atcpit100";
-			reg = <0xf0400000 0x1000>;
-			interrupts = <2>;
-			clocks = <&clock>;
-			clock-names = "PCLK";
-		};
-	};
-
-	ahb {
-		compatible = "simple-bus";
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		L2: cache-controller@e0500000 {
-			compatible = "andestech,atl2c";
-			reg = <0xe0500000 0x1000>;
-			cache-unified;
-			cache-level = <2>;
-		};
-
-		mac0: ethernet@e0100000 {
-			compatible = "andestech,atmac100";
-			reg = <0xe0100000 0x1000>;
-			interrupts = <18>;
-		};
-	};
-
-	pmu {
-		compatible = "andestech,nds32v3-pmu";
-		interrupts= <13>;
-	};
-};
diff --git a/arch/nds32/configs/defconfig b/arch/nds32/configs/defconfig
deleted file mode 100644
index f9a89cf00aa6..000000000000
--- a/arch/nds32/configs/defconfig
+++ /dev/null
@@ -1,104 +0,0 @@
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_USER_NS=y
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_PROFILING=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_CACHE_L2 is not set
-CONFIG_PREEMPT=y
-# CONFIG_COMPACTION is not set
-CONFIG_HZ_100=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_BLK_DEV is not set
-CONFIG_NETDEVICES=y
-# CONFIG_NET_CADENCE is not set
-# CONFIG_NET_VENDOR_BROADCOM is not set
-CONFIG_FTMAC100=y
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_WIZNET is not set
-CONFIG_INPUT_EVDEV=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_INPUT_TOUCHSCREEN=y
-# CONFIG_SERIO is not set
-CONFIG_VT_HW_CONSOLE_BINDING=y
-CONFIG_SERIAL_8250=y
-# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=3
-CONFIG_SERIAL_8250_RUNTIME_UARTS=3
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_HID_A4TECH is not set
-# CONFIG_HID_APPLE is not set
-# CONFIG_HID_BELKIN is not set
-# CONFIG_HID_CHERRY is not set
-# CONFIG_HID_CHICONY is not set
-# CONFIG_HID_CYPRESS is not set
-# CONFIG_HID_EZKEY is not set
-# CONFIG_HID_ITE is not set
-# CONFIG_HID_KENSINGTON is not set
-# CONFIG_HID_LOGITECH is not set
-# CONFIG_HID_MICROSOFT is not set
-# CONFIG_HID_MONTEREY is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_GENERIC_PHY=y
-CONFIG_EXT4_FS=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-CONFIG_FS_ENCRYPTION=y
-CONFIG_FUSE_FS=y
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_CONFIGFS_FS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-CONFIG_NFS_V4_1=y
-CONFIG_NFS_USE_LEGACY_DNS=y
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_GDB_SCRIPTS=y
-CONFIG_READABLE_ASM=y
-CONFIG_HEADERS_INSTALL=y
-CONFIG_HEADERS_CHECK=y
-CONFIG_DEBUG_SECTION_MISMATCH=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_PANIC_ON_OOPS=y
-# CONFIG_SCHED_DEBUG is not set
-# CONFIG_DEBUG_PREEMPT is not set
-CONFIG_STACKTRACE=y
-CONFIG_RCU_CPU_STALL_TIMEOUT=300
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
deleted file mode 100644
index 82a4453c9c2d..000000000000
--- a/arch/nds32/include/asm/Kbuild
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += asm-offsets.h
-generic-y += cmpxchg.h
-generic-y += export.h
-generic-y += gpio.h
-generic-y += kvm_para.h
-generic-y += parport.h
-generic-y += user.h
diff --git a/arch/nds32/include/asm/assembler.h b/arch/nds32/include/asm/assembler.h
deleted file mode 100644
index 5e7c56926049..000000000000
--- a/arch/nds32/include/asm/assembler.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __NDS32_ASSEMBLER_H__
-#define __NDS32_ASSEMBLER_H__
-
-.macro gie_disable
-	setgie.d
-	dsb
-.endm
-
-.macro gie_enable
-	setgie.e
-	dsb
-.endm
-
-.macro gie_save oldpsw
-	mfsr \oldpsw, $ir0
-	setgie.d
-        dsb
-.endm
-
-.macro gie_restore oldpsw
-	andi \oldpsw, \oldpsw, #0x1
-	beqz \oldpsw, 7001f
-	setgie.e
-	dsb
-7001:
-.endm
-
-
-#define USER(insn,  reg, addr, opr)	\
-9999:	insn  reg, addr, opr;		\
-	.section __ex_table,"a";	\
-	.align 3;			\
-	.long	9999b, 9001f;		\
-	.previous
-
-#endif /* __NDS32_ASSEMBLER_H__ */
diff --git a/arch/nds32/include/asm/barrier.h b/arch/nds32/include/asm/barrier.h
deleted file mode 100644
index 16413172fd50..000000000000
--- a/arch/nds32/include/asm/barrier.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __NDS32_ASM_BARRIER_H
-#define __NDS32_ASM_BARRIER_H
-
-#ifndef __ASSEMBLY__
-#define mb()		asm volatile("msync all":::"memory")
-#define rmb()		asm volatile("msync all":::"memory")
-#define wmb()		asm volatile("msync store":::"memory")
-#include <asm-generic/barrier.h>
-
-#endif	/* __ASSEMBLY__ */
-
-#endif	/* __NDS32_ASM_BARRIER_H */
diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h
deleted file mode 100644
index b02a58e71f80..000000000000
--- a/arch/nds32/include/asm/bitfield.h
+++ /dev/null
@@ -1,985 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __NDS32_BITFIELD_H__
-#define __NDS32_BITFIELD_H__
-/******************************************************************************
- * cr0: CPU_VER (CPU Version Register)
- *****************************************************************************/
-#define CPU_VER_offCFGID	0	/* Minor configuration */
-#define CPU_VER_offREV		16	/* Revision of the CPU version */
-#define CPU_VER_offCPUID	24	/* Major CPU versions */
-
-#define CPU_VER_mskCFGID	( 0xFFFF  << CPU_VER_offCFGID )
-#define CPU_VER_mskREV		( 0xFF  << CPU_VER_offREV )
-#define CPU_VER_mskCPUID	( 0xFF  << CPU_VER_offCPUID )
-
-/******************************************************************************
- * cr1: ICM_CFG (Instruction Cache/Memory Configuration Register)
- *****************************************************************************/
-#define ICM_CFG_offISET		0	/* I-cache sets (# of cache lines) per way */
-#define ICM_CFG_offIWAY		3	/* I-cache ways */
-#define ICM_CFG_offISZ		6	/* I-cache line size */
-#define ICM_CFG_offILCK		9	/* I-cache locking support */
-#define ICM_CFG_offILMB		10	/* On-chip ILM banks */
-#define ICM_CFG_offBSAV		13	/* ILM base register alignment version */
-/* bit 15:31 reserved */
-
-#define ICM_CFG_mskISET		( 0x7  << ICM_CFG_offISET )
-#define ICM_CFG_mskIWAY		( 0x7  << ICM_CFG_offIWAY )
-#define ICM_CFG_mskISZ		( 0x7  << ICM_CFG_offISZ )
-#define ICM_CFG_mskILCK		( 0x1  << ICM_CFG_offILCK )
-#define ICM_CFG_mskILMB		( 0x7  << ICM_CFG_offILMB )
-#define ICM_CFG_mskBSAV		( 0x3  << ICM_CFG_offBSAV )
-
-/******************************************************************************
- * cr2: DCM_CFG (Data Cache/Memory Configuration Register)
- *****************************************************************************/
-#define DCM_CFG_offDSET		0	/* D-cache sets (# of cache lines) per way */
-#define DCM_CFG_offDWAY		3	/* D-cache ways */
-#define DCM_CFG_offDSZ		6	/* D-cache line size */
-#define DCM_CFG_offDLCK		9	/* D-cache locking support */
-#define DCM_CFG_offDLMB		10	/* On-chip DLM banks */
-#define DCM_CFG_offBSAV		13	/* DLM base register alignment version */
-/* bit 15:31 reserved */
-
-#define DCM_CFG_mskDSET		( 0x7  << DCM_CFG_offDSET )
-#define DCM_CFG_mskDWAY		( 0x7  << DCM_CFG_offDWAY )
-#define DCM_CFG_mskDSZ		( 0x7  << DCM_CFG_offDSZ )
-#define DCM_CFG_mskDLCK		( 0x1  << DCM_CFG_offDLCK )
-#define DCM_CFG_mskDLMB		( 0x7  << DCM_CFG_offDLMB )
-#define DCM_CFG_mskBSAV		( 0x3  << DCM_CFG_offBSAV )
-
-/******************************************************************************
- * cr3: MMU_CFG (MMU Configuration Register)
- *****************************************************************************/
-#define MMU_CFG_offMMPS		0	/* Memory management protection scheme */
-#define MMU_CFG_offMMPV		2	/* Memory management protection version number */
-#define MMU_CFG_offFATB		7	/* Fully-associative or non-fully-associative TLB */
-
-#define MMU_CFG_offTBW		8	/* TLB ways(non-associative) TBS */
-#define MMU_CFG_offTBS		11	/* TLB sets per way(non-associative) TBS */
-/* bit 14:14 reserved */
-
-#define MMU_CFG_offEP8MIN4	15	/* 8KB page supported while minimum page is 4KB */
-#define MMU_CFG_offfEPSZ	16	/* Extra page size supported */
-#define MMU_CFG_offTLBLCK	24	/* TLB locking support */
-#define MMU_CFG_offHPTWK	25	/* Hardware Page Table Walker implemented */
-#define MMU_CFG_offDE		26	/* Default endian */
-#define MMU_CFG_offNTPT		27	/* Partitions for non-translated attributes */
-#define MMU_CFG_offIVTB		28	/* Invisible TLB */
-#define MMU_CFG_offVLPT		29	/* VLPT for fast TLB fill handling implemented */
-#define MMU_CFG_offNTME		30	/* Non-translated VA to PA mapping */
-/* bit 31 reserved */
-
-#define MMU_CFG_mskMMPS		( 0x3  << MMU_CFG_offMMPS )
-#define MMU_CFG_mskMMPV		( 0x1F  << MMU_CFG_offMMPV )
-#define MMU_CFG_mskFATB		( 0x1  << MMU_CFG_offFATB )
-#define MMU_CFG_mskTBW		( 0x7  << MMU_CFG_offTBW )
-#define MMU_CFG_mskTBS		( 0x7  << MMU_CFG_offTBS )
-#define MMU_CFG_mskEP8MIN4	( 0x1  << MMU_CFG_offEP8MIN4 )
-#define MMU_CFG_mskfEPSZ	( 0xFF  << MMU_CFG_offfEPSZ )
-#define MMU_CFG_mskTLBLCK	( 0x1  << MMU_CFG_offTLBLCK )
-#define MMU_CFG_mskHPTWK	( 0x1  << MMU_CFG_offHPTWK )
-#define MMU_CFG_mskDE		( 0x1  << MMU_CFG_offDE )
-#define MMU_CFG_mskNTPT		( 0x1  << MMU_CFG_offNTPT )
-#define MMU_CFG_mskIVTB		( 0x1  << MMU_CFG_offIVTB )
-#define MMU_CFG_mskVLPT		( 0x1  << MMU_CFG_offVLPT )
-#define MMU_CFG_mskNTME		( 0x1  << MMU_CFG_offNTME )
-
-/******************************************************************************
- * cr4: MSC_CFG (Misc Configuration Register)
- *****************************************************************************/
-#define MSC_CFG_offEDM		0
-#define MSC_CFG_offLMDMA	1
-#define MSC_CFG_offPFM		2
-#define MSC_CFG_offHSMP		3
-#define MSC_CFG_offTRACE	4
-#define MSC_CFG_offDIV		5
-#define MSC_CFG_offMAC		6
-#define MSC_CFG_offAUDIO	7
-#define MSC_CFG_offL2C		9
-#define MSC_CFG_offRDREG	10
-#define MSC_CFG_offADR24	11
-#define MSC_CFG_offINTLC	12
-#define MSC_CFG_offBASEV	13
-#define MSC_CFG_offNOD		16
-/* bit 13:31 reserved */
-
-#define MSC_CFG_mskEDM		( 0x1  << MSC_CFG_offEDM )
-#define MSC_CFG_mskLMDMA	( 0x1  << MSC_CFG_offLMDMA )
-#define MSC_CFG_mskPFM		( 0x1  << MSC_CFG_offPFM )
-#define MSC_CFG_mskHSMP		( 0x1  << MSC_CFG_offHSMP )
-#define MSC_CFG_mskTRACE	( 0x1  << MSC_CFG_offTRACE )
-#define MSC_CFG_mskDIV		( 0x1  << MSC_CFG_offDIV )
-#define MSC_CFG_mskMAC		( 0x1  << MSC_CFG_offMAC )
-#define MSC_CFG_mskAUDIO	( 0x3  << MSC_CFG_offAUDIO )
-#define MSC_CFG_mskL2C		( 0x1  << MSC_CFG_offL2C )
-#define MSC_CFG_mskRDREG	( 0x1  << MSC_CFG_offRDREG )
-#define MSC_CFG_mskADR24	( 0x1  << MSC_CFG_offADR24 )
-#define MSC_CFG_mskINTLC	( 0x1  << MSC_CFG_offINTLC )
-#define MSC_CFG_mskBASEV	( 0x7  << MSC_CFG_offBASEV )
-#define MSC_CFG_mskNOD		( 0x1  << MSC_CFG_offNOD )
-
-/******************************************************************************
- * cr5: CORE_CFG (Core Identification Register)
- *****************************************************************************/
-#define CORE_ID_offCOREID	0
-/* bit 4:31 reserved */
-
-#define CORE_ID_mskCOREID	( 0xF  << CORE_ID_offCOREID )
-
-/******************************************************************************
- * cr6: FUCOP_EXIST (FPU and Coprocessor Existence Configuration Register)
- *****************************************************************************/
-#define FUCOP_EXIST_offCP0EX	0
-#define FUCOP_EXIST_offCP1EX	1
-#define FUCOP_EXIST_offCP2EX	2
-#define FUCOP_EXIST_offCP3EX	3
-#define FUCOP_EXIST_offCP0ISFPU	31
-
-#define FUCOP_EXIST_mskCP0EX	( 0x1  << FUCOP_EXIST_offCP0EX )
-#define FUCOP_EXIST_mskCP1EX	( 0x1  << FUCOP_EXIST_offCP1EX )
-#define FUCOP_EXIST_mskCP2EX	( 0x1  << FUCOP_EXIST_offCP2EX )
-#define FUCOP_EXIST_mskCP3EX	( 0x1  << FUCOP_EXIST_offCP3EX )
-#define FUCOP_EXIST_mskCP0ISFPU	( 0x1  << FUCOP_EXIST_offCP0ISFPU )
-
-/******************************************************************************
- * ir0: PSW (Processor Status Word Register)
- * ir1: IPSW (Interruption PSW Register)
- * ir2: P_IPSW (Previous IPSW Register)
- *****************************************************************************/
-#define PSW_offGIE		0	/* Global Interrupt Enable */
-#define PSW_offINTL		1	/* Interruption Stack Level */
-#define PSW_offPOM		3	/* Processor Operation Mode, User/Superuser */
-#define PSW_offBE		5	/* Endianness for data memory access, 1:MSB, 0:LSB */
-#define PSW_offIT		6	/* Enable instruction address translation */
-#define PSW_offDT		7	/* Enable data address translation */
-#define PSW_offIME		8	/* Instruction Machine Error flag */
-#define PSW_offDME		9	/* Data Machine Error flag */
-#define PSW_offDEX		10	/* Debug Exception */
-#define PSW_offHSS		11	/* Hardware Single Stepping */
-#define PSW_offDRBE		12	/* Device Register Endian Mode */
-#define PSW_offAEN		13	/* Audio ISA special feature */
-#define PSW_offWBNA		14	/* Write Back Non-Allocate */
-#define PSW_offIFCON		15	/* IFC On */
-#define PSW_offCPL		16	/* Current Priority Level */
-/* bit 19:31 reserved */
-
-#define PSW_mskGIE		( 0x1  << PSW_offGIE )
-#define PSW_mskINTL		( 0x3  << PSW_offINTL )
-#define PSW_mskPOM		( 0x3  << PSW_offPOM )
-#define PSW_mskBE		( 0x1  << PSW_offBE )
-#define PSW_mskIT		( 0x1  << PSW_offIT )
-#define PSW_mskDT		( 0x1  << PSW_offDT )
-#define PSW_mskIME		( 0x1  << PSW_offIME )
-#define PSW_mskDME		( 0x1  << PSW_offDME )
-#define PSW_mskDEX		( 0x1  << PSW_offDEX )
-#define PSW_mskHSS		( 0x1  << PSW_offHSS )
-#define PSW_mskDRBE		( 0x1  << PSW_offDRBE )
-#define PSW_mskAEN		( 0x1  << PSW_offAEN )
-#define PSW_mskWBNA		( 0x1  << PSW_offWBNA )
-#define PSW_mskIFCON		( 0x1  << PSW_offIFCON )
-#define PSW_mskCPL		( 0x7  << PSW_offCPL )
-
-#define PSW_SYSTEM		( 1 << PSW_offPOM )
-#define PSW_INTL_1		( 1 << PSW_offINTL )
-#define PSW_CPL_NO		( 0 << PSW_offCPL )
-#define PSW_CPL_ANY		( 7 << PSW_offCPL )
-
-#define PSW_clr			(PSW_mskGIE|PSW_mskINTL|PSW_mskPOM|PSW_mskIT|PSW_mskDT|PSW_mskIME|PSW_mskWBNA)
-#ifdef __NDS32_EB__
-#ifdef CONFIG_WBNA
-#define PSW_init		(PSW_mskWBNA|(1<<PSW_offINTL)|(1<<PSW_offPOM)|PSW_mskIT|PSW_mskDT|PSW_mskBE)
-#else
-#define PSW_init		((1<<PSW_offINTL)|(1<<PSW_offPOM)|PSW_mskIT|PSW_mskDT|PSW_mskBE)
-#endif
-#else
-#ifdef CONFIG_WBNA
-#define PSW_init		(PSW_mskWBNA|(1<<PSW_offINTL)|(1<<PSW_offPOM)|PSW_mskIT|PSW_mskDT)
-#else
-#define PSW_init		((1<<PSW_offINTL)|(1<<PSW_offPOM)|PSW_mskIT|PSW_mskDT)
-#endif
-#endif
-/******************************************************************************
- * ir3: IVB (Interruption Vector Base Register)
- *****************************************************************************/
-/* bit 0:12 reserved */
-#define IVB_offNIVIC		1	/* Number of input for IVIC Controller */
-#define IVB_offIVIC_VER		11	/* IVIC Version */
-#define IVB_offEVIC		13	/* External Vector Interrupt Controller mode */
-#define IVB_offESZ		14	/* Size of each vector entry */
-#define IVB_offIVBASE		16	/* BasePA of interrupt vector table */
-
-#define IVB_mskNIVIC		( 0x7  << IVB_offNIVIC )
-#define IVB_mskIVIC_VER		( 0x3  << IVB_offIVIC_VER )
-#define IVB_mskEVIC		( 0x1  << IVB_offEVIC )
-#define IVB_mskESZ		( 0x3  << IVB_offESZ )
-#define IVB_mskIVBASE		( 0xFFFF  << IVB_offIVBASE )
-
-#define IVB_valESZ4		0
-#define IVB_valESZ16		1
-#define IVB_valESZ64		2
-#define IVB_valESZ256		3
-/******************************************************************************
- * ir4: EVA (Exception Virtual Address Register)
- * ir5: P_EVA (Previous EVA Register)
- *****************************************************************************/
-
-	/* This register contains the VA that causes the exception */
-
-/******************************************************************************
- * ir6: ITYPE (Interruption Type Register)
- * ir7: P_ITYPE (Previous ITYPE Register)
- *****************************************************************************/
-#define ITYPE_offETYPE		0	/* Exception Type */
-#define ITYPE_offINST		4	/* Exception caused by insn fetch or data access */
-/* bit 5:15 reserved */
-#define ITYPE_offVECTOR		5	/* Vector */
-#define ITYPE_offSWID		16	/* SWID of debugging exception */
-/* bit 31:31 reserved */
-
-#define ITYPE_mskETYPE		( 0xF  << ITYPE_offETYPE )
-#define ITYPE_mskINST		( 0x1  << ITYPE_offINST )
-#define ITYPE_mskVECTOR		( 0x7F  << ITYPE_offVECTOR )
-#define ITYPE_mskSWID		( 0x7FFF  << ITYPE_offSWID )
-
-/* Additional definitions for ITYPE register */
-#define ITYPE_offSTYPE          16	/* Arithmetic Sub Type */
-#define ITYPE_offCPID           20	/* Co-Processor ID which generate the exception */
-
-#define ITYPE_mskSTYPE		( 0xF  << ITYPE_offSTYPE )
-#define ITYPE_mskCPID		( 0x3  << ITYPE_offCPID )
-
-/* Additional definitions of ITYPE register for FPU */
-#define FPU_DISABLE_EXCEPTION	(0x1  << ITYPE_offSTYPE)
-#define FPU_EXCEPTION		(0x2  << ITYPE_offSTYPE)
-#define FPU_CPID		0	/* FPU Co-Processor ID is 0 */
-
-#define NDS32_VECTOR_mskNONEXCEPTION	0x78
-#define NDS32_VECTOR_offEXCEPTION	8
-#define NDS32_VECTOR_offINTERRUPT	9
-
-/* Interrupt vector entry */
-#define ENTRY_RESET_NMI			0
-#define ENTRY_TLB_FILL			1
-#define ENTRY_PTE_NOT_PRESENT		2
-#define ENTRY_TLB_MISC			3
-#define ENTRY_TLB_VLPT_MISS		4
-#define ENTRY_MACHINE_ERROR		5
-#define ENTRY_DEBUG_RELATED		6
-#define ENTRY_GENERAL_EXCPETION		7
-#define ENTRY_SYSCALL			8
-
-/* PTE not present exception definition */
-#define ETYPE_NON_LEAF_PTE_NOT_PRESENT	0
-#define ETYPE_LEAF_PTE_NOT_PRESENT	1
-
-/* General exception ETYPE definition */
-#define ETYPE_ALIGNMENT_CHECK		0
-#define ETYPE_RESERVED_INSTRUCTION	1
-#define ETYPE_TRAP			2
-#define ETYPE_ARITHMETIC		3
-#define ETYPE_PRECISE_BUS_ERROR		4
-#define ETYPE_IMPRECISE_BUS_ERROR	5
-#define ETYPE_COPROCESSOR		6
-#define ETYPE_RESERVED_VALUE		7
-#define ETYPE_NONEXISTENT_MEM_ADDRESS	8
-#define ETYPE_MPZIU_CONTROL		9
-#define ETYPE_NEXT_PRECISE_STACK_OFL	10
-
-/* Kerenl reserves software ID */
-#define SWID_RAISE_INTERRUPT_LEVEL	0x1a	/* SWID_RAISE_INTERRUPT_LEVEL is used to
-						 * raise interrupt level for debug exception
-						 */
-
-/******************************************************************************
- * ir8: MERR (Machine Error Log Register)
- *****************************************************************************/
-/* bit 0:30 reserved */
-#define MERR_offBUSERR		31	/* Bus error caused by a load insn */
-
-#define MERR_mskBUSERR		( 0x1  << MERR_offBUSERR )
-
-/******************************************************************************
- * ir9: IPC (Interruption Program Counter Register)
- * ir10: P_IPC (Previous IPC Register)
- * ir11: OIPC (Overflow Interruption Program Counter Register)
- *****************************************************************************/
-
-	/* This is the shadow stack register of the Program Counter */
-
-/******************************************************************************
- * ir12: P_P0 (Previous P0 Register)
- * ir13: P_P1 (Previous P1 Register)
- *****************************************************************************/
-
-	/* These are shadow registers of $p0 and $p1 */
-
-/******************************************************************************
- * ir14: INT_MASK (Interruption Masking Register)
- *****************************************************************************/
-#define INT_MASK_offH0IM	0	/* Hardware Interrupt 0 Mask bit */
-#define INT_MASK_offH1IM	1	/* Hardware Interrupt 1 Mask bit */
-#define INT_MASK_offH2IM	2	/* Hardware Interrupt 2 Mask bit */
-#define INT_MASK_offH3IM	3	/* Hardware Interrupt 3 Mask bit */
-#define INT_MASK_offH4IM	4	/* Hardware Interrupt 4 Mask bit */
-#define INT_MASK_offH5IM	5	/* Hardware Interrupt 5 Mask bit */
-/* bit 6:15 reserved */
-#define INT_MASK_offSIM		16	/* Software Interrupt Mask bit */
-/* bit 17:29 reserved */
-#define INT_MASK_offIDIVZE	30	/* Enable detection for Divide-By-Zero */
-#define INT_MASK_offDSSIM	31	/* Default Single Stepping Interruption Mask */
-
-#define INT_MASK_mskH0IM	( 0x1  << INT_MASK_offH0IM )
-#define INT_MASK_mskH1IM	( 0x1  << INT_MASK_offH1IM )
-#define INT_MASK_mskH2IM	( 0x1  << INT_MASK_offH2IM )
-#define INT_MASK_mskH3IM	( 0x1  << INT_MASK_offH3IM )
-#define INT_MASK_mskH4IM	( 0x1  << INT_MASK_offH4IM )
-#define INT_MASK_mskH5IM	( 0x1  << INT_MASK_offH5IM )
-#define INT_MASK_mskSIM		( 0x1  << INT_MASK_offSIM )
-#define INT_MASK_mskIDIVZE	( 0x1  << INT_MASK_offIDIVZE )
-#define INT_MASK_mskDSSIM	( 0x1  << INT_MASK_offDSSIM )
-
-#define INT_MASK_INITAIAL_VAL	(INT_MASK_mskDSSIM|INT_MASK_mskIDIVZE)
-
-/******************************************************************************
- * ir15: INT_PEND (Interrupt Pending Register)
- *****************************************************************************/
-#define INT_PEND_offH0I		0	/* Hardware Interrupt 0 pending bit */
-#define INT_PEND_offH1I		1	/* Hardware Interrupt 1 pending bit */
-#define INT_PEND_offH2I		2	/* Hardware Interrupt 2 pending bit */
-#define INT_PEND_offH3I		3	/* Hardware Interrupt 3 pending bit */
-#define INT_PEND_offH4I		4	/* Hardware Interrupt 4 pending bit */
-#define INT_PEND_offH5I		5	/* Hardware Interrupt 5 pending bit */
-
-#define INT_PEND_offCIPL	0	/* Current Interrupt Priority Level */
-
-/* bit 6:15 reserved */
-#define INT_PEND_offSWI		16	/* Software Interrupt pending bit */
-/* bit 17:31 reserved */
-
-#define INT_PEND_mskH0I		( 0x1  << INT_PEND_offH0I )
-#define INT_PEND_mskH1I		( 0x1  << INT_PEND_offH1I )
-#define INT_PEND_mskH2I		( 0x1  << INT_PEND_offH2I )
-#define INT_PEND_mskH3I		( 0x1  << INT_PEND_offH3I )
-#define INT_PEND_mskH4I		( 0x1  << INT_PEND_offH4I )
-#define INT_PEND_mskH5I		( 0x1  << INT_PEND_offH5I )
-#define INT_PEND_mskCIPL	( 0x1  << INT_PEND_offCIPL )
-#define INT_PEND_mskSWI		( 0x1  << INT_PEND_offSWI )
-
-/******************************************************************************
- * mr0: MMU_CTL (MMU Control Register)
- *****************************************************************************/
-#define MMU_CTL_offD		0	/* Default minimum page size */
-#define MMU_CTL_offNTC0		1	/* Non-Translated Cachebility of partition 0 */
-#define MMU_CTL_offNTC1		3	/* Non-Translated Cachebility of partition 1 */
-#define MMU_CTL_offNTC2		5	/* Non-Translated Cachebility of partition 2 */
-#define MMU_CTL_offNTC3		7	/* Non-Translated Cachebility of partition 3 */
-#define MMU_CTL_offTBALCK	9	/* TLB all-lock resolution scheme */
-#define MMU_CTL_offMPZIU	10	/* Multiple Page Size In Use bit */
-#define MMU_CTL_offNTM0		11	/* Non-Translated VA to PA of partition 0 */
-#define MMU_CTL_offNTM1		13	/* Non-Translated VA to PA of partition 1 */
-#define MMU_CTL_offNTM2		15	/* Non-Translated VA to PA of partition 2 */
-#define MMU_CTL_offNTM3		17	/* Non-Translated VA to PA of partition 3 */
-#define MMU_CTL_offUNA		23	/* Unaligned access */
-/* bit 24:31 reserved */
-
-#define MMU_CTL_mskD		( 0x1  << MMU_CTL_offD )
-#define MMU_CTL_mskNTC0		( 0x3  << MMU_CTL_offNTC0 )
-#define MMU_CTL_mskNTC1		( 0x3  << MMU_CTL_offNTC1 )
-#define MMU_CTL_mskNTC2		( 0x3  << MMU_CTL_offNTC2 )
-#define MMU_CTL_mskNTC3		( 0x3  << MMU_CTL_offNTC3 )
-#define MMU_CTL_mskTBALCK	( 0x1  << MMU_CTL_offTBALCK )
-#define MMU_CTL_mskMPZIU	( 0x1  << MMU_CTL_offMPZIU )
-#define MMU_CTL_mskNTM0		( 0x3  << MMU_CTL_offNTM0 )
-#define MMU_CTL_mskNTM1         ( 0x3  << MMU_CTL_offNTM1 )
-#define MMU_CTL_mskNTM2         ( 0x3  << MMU_CTL_offNTM2 )
-#define MMU_CTL_mskNTM3         ( 0x3  << MMU_CTL_offNTM3 )
-
-#define MMU_CTL_D4KB		0
-#define MMU_CTL_D8KB		1
-#define MMU_CTL_UNA		( 0x1  << MMU_CTL_offUNA )
-
-#define MMU_CTL_CACHEABLE_NON   0
-#define MMU_CTL_CACHEABLE_WB	2
-#define MMU_CTL_CACHEABLE_WT	3
-
-/******************************************************************************
- * mr1: L1_PPTB (L1 Physical Page Table Base Register)
- *****************************************************************************/
-#define L1_PPTB_offNV		0	/* Enable Hardware Page Table Walker (HPTWK) */
-/* bit 1:11 reserved */
-#define L1_PPTB_offBASE		12	/* First level physical page table base address */
-
-#define L1_PPTB_mskNV		( 0x1  << L1_PPTB_offNV )
-#define L1_PPTB_mskBASE		( 0xFFFFF  << L1_PPTB_offBASE )
-
-/******************************************************************************
- * mr2: TLB_VPN (TLB Access VPN Register)
- *****************************************************************************/
-/* bit 0:11 reserved */
-#define TLB_VPN_offVPN		12	/* Virtual Page Number */
-
-#define TLB_VPN_mskVPN		( 0xFFFFF  << TLB_VPN_offVPN )
-
-/******************************************************************************
- * mr3: TLB_DATA (TLB Access Data Register)
- *****************************************************************************/
-#define TLB_DATA_offV		0	/* PTE is valid and present */
-#define TLB_DATA_offM		1	/* Page read/write access privilege */
-#define TLB_DATA_offD		4	/* Dirty bit */
-#define TLB_DATA_offX		5	/* Executable bit */
-#define TLB_DATA_offA		6	/* Access bit */
-#define TLB_DATA_offG		7	/* Global page (shared across contexts) */
-#define TLB_DATA_offC		8	/* Cacheability atribute */
-/* bit 11:11 reserved */
-#define TLB_DATA_offPPN		12	/* Phisical Page Number */
-
-#define TLB_DATA_mskV		( 0x1  << TLB_DATA_offV )
-#define TLB_DATA_mskM		( 0x7  << TLB_DATA_offM )
-#define TLB_DATA_mskD		( 0x1  << TLB_DATA_offD )
-#define TLB_DATA_mskX		( 0x1  << TLB_DATA_offX )
-#define TLB_DATA_mskA		( 0x1  << TLB_DATA_offA )
-#define TLB_DATA_mskG		( 0x1  << TLB_DATA_offG )
-#define TLB_DATA_mskC		( 0x7  << TLB_DATA_offC )
-#define TLB_DATA_mskPPN		( 0xFFFFF  << TLB_DATA_offPPN )
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-#define TLB_DATA_kernel_text_attr	(TLB_DATA_mskV|TLB_DATA_mskM|TLB_DATA_mskD|TLB_DATA_mskX|TLB_DATA_mskG|TLB_DATA_mskC)
-#else
-#define TLB_DATA_kernel_text_attr	(TLB_DATA_mskV|TLB_DATA_mskM|TLB_DATA_mskD|TLB_DATA_mskX|TLB_DATA_mskG|(0x6 << TLB_DATA_offC))
-#endif
-
-/******************************************************************************
- * mr4: TLB_MISC (TLB Access Misc Register)
- *****************************************************************************/
-#define TLB_MISC_offACC_PSZ	0	/* Page size of a PTE entry */
-#define TLB_MISC_offCID		4	/* Context id */
-/* bit 13:31 reserved */
-
-#define TLB_MISC_mskACC_PSZ    ( 0xF  << TLB_MISC_offACC_PSZ )
-#define TLB_MISC_mskCID        ( 0x1FF  << TLB_MISC_offCID )
-
-/******************************************************************************
- * mr5: VLPT_IDX (Virtual Linear Page Table Index Register)
- *****************************************************************************/
-#define VLPT_IDX_offZERO	0	/* Always 0 */
-#define VLPT_IDX_offEVPN	2	/* Exception Virtual Page Number */
-#define VLPT_IDX_offVLPTB	22	/* Base VA of VLPT */
-
-#define VLPT_IDX_mskZERO	( 0x3  << VLPT_IDX_offZERO )
-#define VLPT_IDX_mskEVPN	( 0xFFFFF  << VLPT_IDX_offEVPN )
-#define VLPT_IDX_mskVLPTB	( 0x3FF  << VLPT_IDX_offVLPTB )
-
-/******************************************************************************
- * mr6: ILMB (Instruction Local Memory Base Register)
- *****************************************************************************/
-#define ILMB_offIEN		0	/* Enable ILM */
-#define ILMB_offILMSZ		1	/* Size of ILM */
-/* bit 5:19 reserved */
-#define ILMB_offIBPA		20	/* Base PA of ILM */
-
-#define ILMB_mskIEN		( 0x1  << ILMB_offIEN )
-#define ILMB_mskILMSZ		( 0xF  << ILMB_offILMSZ )
-#define ILMB_mskIBPA		( 0xFFF  << ILMB_offIBPA )
-
-/******************************************************************************
- * mr7: DLMB (Data Local Memory Base Register)
- *****************************************************************************/
-#define DLMB_offDEN		0	/* Enable DLM */
-#define DLMB_offDLMSZ		1	/* Size of DLM */
-#define DLMB_offDBM		5	/* Enable Double-Buffer Mode for DLM */
-#define DLMB_offDBB		6	/* Double-buffer bank which can be accessed by the processor */
-/* bit 7:19 reserved */
-#define DLMB_offDBPA		20	/* Base PA of DLM */
-
-#define DLMB_mskDEN		( 0x1  << DLMB_offDEN )
-#define DLMB_mskDLMSZ		( 0xF  << DLMB_offDLMSZ )
-#define DLMB_mskDBM		( 0x1  << DLMB_offDBM )
-#define DLMB_mskDBB		( 0x1  << DLMB_offDBB )
-#define DLMB_mskDBPA		( 0xFFF  << DLMB_offDBPA )
-
-/******************************************************************************
- * mr8: CACHE_CTL (Cache Control Register)
- *****************************************************************************/
-#define CACHE_CTL_offIC_EN	0	/* Enable I-cache */
-#define CACHE_CTL_offDC_EN	1	/* Enable D-cache */
-#define CACHE_CTL_offICALCK	2	/* I-cache all-lock resolution scheme */
-#define CACHE_CTL_offDCALCK	3	/* D-cache all-lock resolution scheme */
-#define CACHE_CTL_offDCCWF	4	/* Enable D-cache Critical Word Forwarding */
-#define CACHE_CTL_offDCPMW	5	/* Enable D-cache concurrent miss and write-back processing */
-/* bit 6:31 reserved */
-
-#define CACHE_CTL_mskIC_EN	( 0x1  << CACHE_CTL_offIC_EN )
-#define CACHE_CTL_mskDC_EN	( 0x1  << CACHE_CTL_offDC_EN )
-#define CACHE_CTL_mskICALCK	( 0x1  << CACHE_CTL_offICALCK )
-#define CACHE_CTL_mskDCALCK	( 0x1  << CACHE_CTL_offDCALCK )
-#define CACHE_CTL_mskDCCWF	( 0x1  << CACHE_CTL_offDCCWF )
-#define CACHE_CTL_mskDCPMW	( 0x1  << CACHE_CTL_offDCPMW )
-
-/******************************************************************************
- * mr9: HSMP_SADDR (High Speed Memory Port Starting Address)
- *****************************************************************************/
-#define HSMP_SADDR_offEN	0	/* Enable control bit for the High Speed Memory port */
-/* bit 1:19 reserved */
-
-#define HSMP_SADDR_offRANGE	1	/* Denote the address range (only defined in HSMP v2 ) */
-#define HSMP_SADDR_offSADDR	20	/* Starting base PA of the High Speed Memory Port region */
-
-#define HSMP_SADDR_mskEN	( 0x1  << HSMP_SADDR_offEN )
-#define HSMP_SADDR_mskRANGE	( 0xFFF  << HSMP_SADDR_offRANGE )
-#define HSMP_SADDR_mskSADDR	( 0xFFF  << HSMP_SADDR_offSADDR )
-
-/******************************************************************************
- * mr10: HSMP_EADDR (High Speed Memory Port Ending Address)
- *****************************************************************************/
-/* bit 0:19 reserved */
-#define HSMP_EADDR_offEADDR	20
-
-#define HSMP_EADDR_mskEADDR	( 0xFFF  << HSMP_EADDR_offEADDR )
-
-/******************************************************************************
- * dr0+(n*5): BPCn (n=0-7) (Breakpoint Control Register)
- *****************************************************************************/
-#define BPC_offWP		0	/* Configuration of BPAn */
-#define BPC_offEL		1	/* Enable BPAn */
-#define BPC_offS		2	/* Data address comparison for a store instruction */
-#define BPC_offP		3	/* Compared data address is PA */
-#define BPC_offC		4	/* CID value is compared with the BPCIDn register */
-#define BPC_offBE0		5	/* Enable byte mask for the comparison with register */
-#define BPC_offBE1		6	/* Enable byte mask for the comparison with register */
-#define BPC_offBE2		7	/* Enable byte mask for the comparison with register */
-#define BPC_offBE3		8	/* Enable byte mask for the comparison with register */
-#define BPC_offT		9	/* Enable breakpoint Embedded Tracer triggering operation */
-
-#define BPC_mskWP		( 0x1  << BPC_offWP )
-#define BPC_mskEL		( 0x1  << BPC_offEL )
-#define BPC_mskS		( 0x1  << BPC_offS )
-#define BPC_mskP		( 0x1  << BPC_offP )
-#define BPC_mskC		( 0x1  << BPC_offC )
-#define BPC_mskBE0		( 0x1  << BPC_offBE0 )
-#define BPC_mskBE1		( 0x1  << BPC_offBE1 )
-#define BPC_mskBE2		( 0x1  << BPC_offBE2 )
-#define BPC_mskBE3		( 0x1  << BPC_offBE3 )
-#define BPC_mskT		( 0x1  << BPC_offT )
-
-/******************************************************************************
- * dr1+(n*5): BPAn (n=0-7) (Breakpoint Address Register)
- *****************************************************************************/
-
-	/* These registers contain break point address */
-
-/******************************************************************************
- * dr2+(n*5): BPAMn (n=0-7) (Breakpoint Address Mask Register)
- *****************************************************************************/
-
-	/* These registerd contain the address comparison mask for the BPAn register */
-
-/******************************************************************************
- * dr3+(n*5): BPVn (n=0-7) Breakpoint Data Value Register
- *****************************************************************************/
-
-	/* The BPVn register contains the data value that will be compared with the
-	 * incoming load/store data value */
-
-/******************************************************************************
- * dr4+(n*5): BPCIDn (n=0-7) (Breakpoint Context ID Register)
- *****************************************************************************/
-#define BPCID_offCID		0	/* CID that will be compared with a process's CID */
-/* bit 9:31 reserved */
-
-#define BPCID_mskCID		( 0x1FF  << BPCID_offCID )
-
-/******************************************************************************
- * dr40: EDM_CFG (EDM Configuration Register)
- *****************************************************************************/
-#define EDM_CFG_offBC		0	/* Number of hardware breakpoint sets implemented */
-#define EDM_CFG_offDIMU		3	/* Debug Instruction Memory Unit exists */
-/* bit 4:15 reserved */
-#define EDM_CFG_offVER		16	/* EDM version */
-
-#define EDM_CFG_mskBC		( 0x7  << EDM_CFG_offBC )
-#define EDM_CFG_mskDIMU		( 0x1  << EDM_CFG_offDIMU )
-#define EDM_CFG_mskVER		( 0xFFFF  << EDM_CFG_offVER )
-
-/******************************************************************************
- * dr41: EDMSW (EDM Status Word)
- *****************************************************************************/
-#define EDMSW_offWV		0	/* Write Valid */
-#define EDMSW_offRV		1	/* Read Valid */
-#define EDMSW_offDE		2	/* Debug exception has occurred for this core */
-/* bit 3:31 reserved */
-
-#define EDMSW_mskWV		( 0x1  << EDMSW_offWV )
-#define EDMSW_mskRV		( 0x1  << EDMSW_offRV )
-#define EDMSW_mskDE		( 0x1  << EDMSW_offDE )
-
-/******************************************************************************
- * dr42: EDM_CTL (EDM Control Register)
- *****************************************************************************/
-/* bit 0:30 reserved */
-#define EDM_CTL_offV3_EDM_MODE	6	/* EDM compatibility control bit */
-#define EDM_CTL_offDEH_SEL	31	/* Controls where debug exception is directed to */
-
-#define EDM_CTL_mskV3_EDM_MODE	( 0x1 << EDM_CTL_offV3_EDM_MODE )
-#define EDM_CTL_mskDEH_SEL	( 0x1 << EDM_CTL_offDEH_SEL )
-
-/******************************************************************************
- * dr43: EDM_DTR (EDM Data Transfer Register)
- *****************************************************************************/
-
-	/* This is used to exchange data between the embedded EDM logic
-	 * and the processor core */
-
-/******************************************************************************
- * dr44: BPMTC (Breakpoint Match Trigger Counter Register)
- *****************************************************************************/
-#define BPMTC_offBPMTC		0	/* Breakpoint match trigger counter value */
-/* bit 16:31 reserved */
-
-#define BPMTC_mskBPMTC		( 0xFFFF  << BPMTC_offBPMTC )
-
-/******************************************************************************
- * dr45: DIMBR (Debug Instruction Memory Base Register)
- *****************************************************************************/
-/* bit 0:11 reserved */
-#define DIMBR_offDIMB		12	/* Base address of the Debug Instruction Memory (DIM) */
-#define DIMBR_mskDIMB		( 0xFFFFF  << DIMBR_offDIMB )
-
-/******************************************************************************
- * dr46: TECR0(Trigger Event Control register 0)
- * dr47: TECR1 (Trigger Event Control register 1)
- *****************************************************************************/
-#define TECR_offBP		0	/* Controld which BP is used as a trigger source */
-#define TECR_offNMI		8	/* Use NMI as a trigger source */
-#define TECR_offHWINT		9	/* Corresponding interrupt is used as a trigger source */
-#define TECR_offEVIC		15	/* Enable HWINT as a trigger source in EVIC mode */
-#define TECR_offSYS		16	/* Enable SYSCALL instruction as a trigger source */
-#define TECR_offDBG		17	/* Enable debug exception as a trigger source */
-#define TECR_offMRE		18	/* Enable MMU related exception as a trigger source */
-#define TECR_offE		19	/* An exception is used as a trigger source */
-/* bit 20:30 reserved */
-#define TECR_offL		31	/* Link/Cascade TECR0 trigger event to TECR1 trigger event */
-
-#define TECR_mskBP		( 0xFF  << TECR_offBP )
-#define TECR_mskNMI		( 0x1  << TECR_offBNMI )
-#define TECR_mskHWINT		( 0x3F  << TECR_offBHWINT )
-#define TECR_mskEVIC		( 0x1  << TECR_offBEVIC )
-#define TECR_mskSYS		( 0x1  << TECR_offBSYS )
-#define TECR_mskDBG		( 0x1  << TECR_offBDBG )
-#define TECR_mskMRE		( 0x1  << TECR_offBMRE )
-#define TECR_mskE		( 0x1  << TECR_offE )
-#define TECR_mskL		( 0x1  << TECR_offL )
-
-/******************************************************************************
- * pfr0-2: PFMC0-2 (Performance Counter Register 0-2)
- *****************************************************************************/
-
-	/* These registers contains performance event count */
-
-/******************************************************************************
- * pfr3: PFM_CTL (Performance Counter Control Register)
- *****************************************************************************/
-#define PFM_CTL_offEN0		0	/* Enable PFMC0 */
-#define PFM_CTL_offEN1		1	/* Enable PFMC1 */
-#define PFM_CTL_offEN2		2	/* Enable PFMC2 */
-#define PFM_CTL_offIE0		3	/* Enable interrupt for PFMC0 */
-#define PFM_CTL_offIE1		4	/* Enable interrupt for PFMC1 */
-#define PFM_CTL_offIE2		5	/* Enable interrupt for PFMC2 */
-#define PFM_CTL_offOVF0		6	/* Overflow bit of PFMC0 */
-#define PFM_CTL_offOVF1		7	/* Overflow bit of PFMC1 */
-#define PFM_CTL_offOVF2		8	/* Overflow bit of PFMC2 */
-#define PFM_CTL_offKS0		9	/* Enable superuser mode event counting for PFMC0 */
-#define PFM_CTL_offKS1		10	/* Enable superuser mode event counting for PFMC1 */
-#define PFM_CTL_offKS2		11	/* Enable superuser mode event counting for PFMC2 */
-#define PFM_CTL_offKU0		12	/* Enable user mode event counting for PFMC0 */
-#define PFM_CTL_offKU1		13	/* Enable user mode event counting for PFMC1 */
-#define PFM_CTL_offKU2		14	/* Enable user mode event counting for PFMC2 */
-#define PFM_CTL_offSEL0		15	/* The event selection for PFMC0 */
-#define PFM_CTL_offSEL1		16	/* The event selection for PFMC1 */
-#define PFM_CTL_offSEL2		22	/* The event selection for PFMC2 */
-/* bit 28:31 reserved */
-
-#define PFM_CTL_mskEN0		( 0x01  << PFM_CTL_offEN0 )
-#define PFM_CTL_mskEN1		( 0x01  << PFM_CTL_offEN1 )
-#define PFM_CTL_mskEN2		( 0x01  << PFM_CTL_offEN2 )
-#define PFM_CTL_mskIE0		( 0x01  << PFM_CTL_offIE0 )
-#define PFM_CTL_mskIE1		( 0x01  << PFM_CTL_offIE1 )
-#define PFM_CTL_mskIE2		( 0x01  << PFM_CTL_offIE2 )
-#define PFM_CTL_mskOVF0		( 0x01  << PFM_CTL_offOVF0 )
-#define PFM_CTL_mskOVF1		( 0x01  << PFM_CTL_offOVF1 )
-#define PFM_CTL_mskOVF2		( 0x01  << PFM_CTL_offOVF2 )
-#define PFM_CTL_mskKS0		( 0x01  << PFM_CTL_offKS0 )
-#define PFM_CTL_mskKS1		( 0x01  << PFM_CTL_offKS1 )
-#define PFM_CTL_mskKS2		( 0x01  << PFM_CTL_offKS2 )
-#define PFM_CTL_mskKU0		( 0x01  << PFM_CTL_offKU0 )
-#define PFM_CTL_mskKU1		( 0x01  << PFM_CTL_offKU1 )
-#define PFM_CTL_mskKU2		( 0x01  << PFM_CTL_offKU2 )
-#define PFM_CTL_mskSEL0		( 0x01  << PFM_CTL_offSEL0 )
-#define PFM_CTL_mskSEL1		( 0x3F  << PFM_CTL_offSEL1 )
-#define PFM_CTL_mskSEL2		( 0x3F  << PFM_CTL_offSEL2 )
-
-/******************************************************************************
- * SDZ_CTL (Structure Downsizing Control Register)
- *****************************************************************************/
-#define SDZ_CTL_offICDZ		0	/* I-cache downsizing control */
-#define SDZ_CTL_offDCDZ		3	/* D-cache downsizing control */
-#define SDZ_CTL_offMTBDZ	6	/* MTLB downsizing control */
-#define SDZ_CTL_offBTBDZ	9	/* Branch Target Table downsizing control */
-/* bit 12:31 reserved */
-#define SDZ_CTL_mskICDZ		( 0x07  << SDZ_CTL_offICDZ )
-#define SDZ_CTL_mskDCDZ		( 0x07  << SDZ_CTL_offDCDZ )
-#define SDZ_CTL_mskMTBDZ	( 0x07  << SDZ_CTL_offMTBDZ )
-#define SDZ_CTL_mskBTBDZ	( 0x07  << SDZ_CTL_offBTBDZ )
-
-/******************************************************************************
- * N13MISC_CTL (N13 Miscellaneous Control Register)
- *****************************************************************************/
-#define N13MISC_CTL_offBTB	0	/* Disable Branch Target Buffer */
-#define N13MISC_CTL_offRTP	1	/* Disable Return Target Predictor */
-#define N13MISC_CTL_offPTEPF	2	/* Disable HPTWK L2 PTE pefetch */
-#define N13MISC_CTL_offSP_SHADOW_EN	4	/* Enable shadow stack pointers */
-#define MISC_CTL_offHWPRE      11      /* Enable HardWare PREFETCH */
-/* bit 6, 9:31 reserved */
-
-#define N13MISC_CTL_makBTB	( 0x1  << N13MISC_CTL_offBTB )
-#define N13MISC_CTL_makRTP	( 0x1  << N13MISC_CTL_offRTP )
-#define N13MISC_CTL_makPTEPF	( 0x1  << N13MISC_CTL_offPTEPF )
-#define N13MISC_CTL_makSP_SHADOW_EN	( 0x1  << N13MISC_CTL_offSP_SHADOW_EN )
-#define MISC_CTL_makHWPRE_EN     ( 0x1  << MISC_CTL_offHWPRE )
-
-#ifdef CONFIG_HW_PRE
-#define MISC_init	(N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN|MISC_CTL_makHWPRE_EN)
-#else
-#define MISC_init	(N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN)
-#endif
-
-/******************************************************************************
- * PRUSR_ACC_CTL (Privileged Resource User Access Control Registers)
- *****************************************************************************/
-#define PRUSR_ACC_CTL_offDMA_EN	0	/* Allow user mode access of DMA registers */
-#define PRUSR_ACC_CTL_offPFM_EN	1	/* Allow user mode access of PFM registers */
-
-#define PRUSR_ACC_CTL_mskDMA_EN	( 0x1  << PRUSR_ACC_CTL_offDMA_EN )
-#define PRUSR_ACC_CTL_mskPFM_EN	( 0x1  << PRUSR_ACC_CTL_offPFM_EN )
-
-/******************************************************************************
- * dmar0: DMA_CFG (DMA Configuration Register)
- *****************************************************************************/
-#define DMA_CFG_offNCHN		0	/* The number of DMA channels implemented */
-#define DMA_CFG_offUNEA		2	/* Un-aligned External Address transfer feature */
-#define DMA_CFG_off2DET		3	/* 2-D Element Transfer feature */
-/* bit 4:15 reserved */
-#define DMA_CFG_offVER		16	/* DMA architecture and implementation version */
-
-#define DMA_CFG_mskNCHN		( 0x3  << DMA_CFG_offNCHN )
-#define DMA_CFG_mskUNEA		( 0x1  << DMA_CFG_offUNEA )
-#define DMA_CFG_msk2DET		( 0x1  << DMA_CFG_off2DET )
-#define DMA_CFG_mskVER		( 0xFFFF  << DMA_CFG_offVER )
-
-/******************************************************************************
- * dmar1: DMA_GCSW (DMA Global Control and Status Word Register)
- *****************************************************************************/
-#define DMA_GCSW_offC0STAT	0	/* DMA channel 0 state */
-#define DMA_GCSW_offC1STAT	3	/* DMA channel 1 state */
-/* bit 6:11 reserved */
-#define DMA_GCSW_offC0INT	12	/* DMA channel 0 generate interrupt */
-#define DMA_GCSW_offC1INT	13	/* DMA channel 1 generate interrupt */
-/* bit 14:30 reserved */
-#define DMA_GCSW_offEN		31	/* Enable DMA engine */
-
-#define DMA_GCSW_mskC0STAT	( 0x7  << DMA_GCSW_offC0STAT )
-#define DMA_GCSW_mskC1STAT	( 0x7  << DMA_GCSW_offC1STAT )
-#define DMA_GCSW_mskC0INT	( 0x1  << DMA_GCSW_offC0INT )
-#define DMA_GCSW_mskC1INT	( 0x1  << DMA_GCSW_offC1INT )
-#define DMA_GCSW_mskEN		( 0x1  << DMA_GCSW_offEN )
-
-/******************************************************************************
- * dmar2: DMA_CHNSEL (DMA Channel Selection Register)
- *****************************************************************************/
-#define DMA_CHNSEL_offCHAN	0	/* Selected channel number */
-/* bit 2:31 reserved */
-
-#define DMA_CHNSEL_mskCHAN	( 0x3  << DMA_CHNSEL_offCHAN )
-
-/******************************************************************************
- * dmar3: DMA_ACT (DMA Action Register)
- *****************************************************************************/
-#define DMA_ACT_offACMD		0	/* DMA Action Command */
-/* bit 2:31 reserved */
-#define DMA_ACT_mskACMD		( 0x3  << DMA_ACT_offACMD )
-
-/******************************************************************************
- * dmar4: DMA_SETUP (DMA Setup Register)
- *****************************************************************************/
-#define DMA_SETUP_offLM		0	/* Local Memory Selection */
-#define DMA_SETUP_offTDIR	1	/* Transfer Direction */
-#define DMA_SETUP_offTES	2	/* Transfer Element Size */
-#define DMA_SETUP_offESTR	4	/* External memory transfer Stride */
-#define DMA_SETUP_offCIE	16	/* Interrupt Enable on Completion */
-#define DMA_SETUP_offSIE	17	/* Interrupt Enable on explicit Stop */
-#define DMA_SETUP_offEIE	18	/* Interrupt Enable on Error */
-#define DMA_SETUP_offUE		19	/* Enable the Un-aligned External Address */
-#define DMA_SETUP_off2DE	20	/* Enable the 2-D External Transfer */
-#define DMA_SETUP_offCOA	21	/* Transfer Coalescable */
-/* bit 22:31 reserved */
-
-#define DMA_SETUP_mskLM		( 0x1  << DMA_SETUP_offLM )
-#define DMA_SETUP_mskTDIR	( 0x1  << DMA_SETUP_offTDIR )
-#define DMA_SETUP_mskTES	( 0x3  << DMA_SETUP_offTES )
-#define DMA_SETUP_mskESTR	( 0xFFF  << DMA_SETUP_offESTR )
-#define DMA_SETUP_mskCIE	( 0x1  << DMA_SETUP_offCIE )
-#define DMA_SETUP_mskSIE	( 0x1  << DMA_SETUP_offSIE )
-#define DMA_SETUP_mskEIE	( 0x1  << DMA_SETUP_offEIE )
-#define DMA_SETUP_mskUE		( 0x1  << DMA_SETUP_offUE )
-#define DMA_SETUP_msk2DE	( 0x1  << DMA_SETUP_off2DE )
-#define DMA_SETUP_mskCOA	( 0x1  << DMA_SETUP_offCOA )
-
-/******************************************************************************
- * dmar5: DMA_ISADDR (DMA Internal Start Address Register)
- *****************************************************************************/
-#define DMA_ISADDR_offISADDR	0	/* Internal Start Address */
-/* bit 20:31 reserved */
-#define DMA_ISADDR_mskISADDR	( 0xFFFFF  << DMA_ISADDR_offISADDR )
-
-/******************************************************************************
- * dmar6: DMA_ESADDR (DMA External Start Address Register)
- *****************************************************************************/
-/* This register holds External Start Address */
-
-/******************************************************************************
- * dmar7: DMA_TCNT (DMA Transfer Element Count Register)
- *****************************************************************************/
-#define DMA_TCNT_offTCNT	0	/* DMA transfer element count */
-/* bit 18:31 reserved */
-#define DMA_TCNT_mskTCNT	( 0x3FFFF  << DMA_TCNT_offTCNT )
-
-/******************************************************************************
- * dmar8: DMA_STATUS (DMA Status Register)
- *****************************************************************************/
-#define DMA_STATUS_offSTAT	0	/* DMA channel state */
-#define DMA_STATUS_offSTUNA	3	/* Un-aligned error on External Stride value */
-#define DMA_STATUS_offDERR	4	/* DMA Transfer Disruption Error */
-#define DMA_STATUS_offEUNA	5	/* Un-aligned error on the External address */
-#define DMA_STATUS_offIUNA	6	/* Un-aligned error on the Internal address */
-#define DMA_STATUS_offIOOR	7	/* Out-Of-Range error on the Internal address */
-#define DMA_STATUS_offEBUS	8	/* Bus Error on an External DMA transfer */
-#define DMA_STATUS_offESUP	9	/* DMA setup error */
-/* bit 10:31 reserved */
-
-#define DMA_STATUS_mskSTAT	( 0x7  << DMA_STATUS_offSTAT )
-#define DMA_STATUS_mskSTUNA	( 0x1  << DMDMA_STATUS_offSTUNA )
-#define DMA_STATUS_mskDERR	( 0x1  << DMDMA_STATUS_offDERR )
-#define DMA_STATUS_mskEUNA	( 0x1  << DMDMA_STATUS_offEUNA )
-#define DMA_STATUS_mskIUNA	( 0x1  << DMDMA_STATUS_offIUNA )
-#define DMA_STATUS_mskIOOR	( 0x1  << DMDMA_STATUS_offIOOR )
-#define DMA_STATUS_mskEBUS	( 0x1  << DMDMA_STATUS_offEBUS )
-#define DMA_STATUS_mskESUP	( 0x1  << DMDMA_STATUS_offESUP )
-
-/******************************************************************************
- * dmar9: DMA_2DSET (DMA 2D Setup Register)
- *****************************************************************************/
-#define DMA_2DSET_offWECNT	0	/* The Width Element Count for a 2-D region */
-#define DMA_2DSET_offHTSTR	16	/* The Height Stride for a 2-D region */
-
-#define DMA_2DSET_mskHTSTR	( 0xFFFF  << DMA_2DSET_offHTSTR )
-#define DMA_2DSET_mskWECNT	( 0xFFFF  << DMA_2DSET_offWECNT )
-
-/******************************************************************************
- * dmar10: DMA_2DSCTL (DMA 2D Startup Control Register)
- *****************************************************************************/
-#define DMA_2DSCTL_offSTWECNT	0	/* Startup Width Element Count for a 2-D region */
-/* bit 16:31 reserved */
-
-#define DMA_2DSCTL_mskSTWECNT	( 0xFFFF  << DMA_2DSCTL_offSTWECNT )
-
-/******************************************************************************
- * fpcsr: FPCSR (Floating-Point Control Status Register)
- *****************************************************************************/
-#define FPCSR_offRM		0
-#define FPCSR_offIVO		2
-#define FPCSR_offDBZ		3
-#define FPCSR_offOVF		4
-#define FPCSR_offUDF		5
-#define FPCSR_offIEX		6
-#define FPCSR_offIVOE		7
-#define FPCSR_offDBZE		8
-#define FPCSR_offOVFE		9
-#define FPCSR_offUDFE		10
-#define FPCSR_offIEXE		11
-#define FPCSR_offDNZ		12
-#define FPCSR_offIVOT		13
-#define FPCSR_offDBZT		14
-#define FPCSR_offOVFT		15
-#define FPCSR_offUDFT		16
-#define FPCSR_offIEXT		17
-#define FPCSR_offDNIT		18
-#define FPCSR_offRIT		19
-
-#define FPCSR_mskRM             ( 0x3  << FPCSR_offRM )
-#define FPCSR_mskIVO            ( 0x1  << FPCSR_offIVO )
-#define FPCSR_mskDBZ            ( 0x1  << FPCSR_offDBZ )
-#define FPCSR_mskOVF            ( 0x1  << FPCSR_offOVF )
-#define FPCSR_mskUDF            ( 0x1  << FPCSR_offUDF )
-#define FPCSR_mskIEX            ( 0x1  << FPCSR_offIEX )
-#define FPCSR_mskIVOE           ( 0x1  << FPCSR_offIVOE )
-#define FPCSR_mskDBZE           ( 0x1  << FPCSR_offDBZE )
-#define FPCSR_mskOVFE           ( 0x1  << FPCSR_offOVFE )
-#define FPCSR_mskUDFE           ( 0x1  << FPCSR_offUDFE )
-#define FPCSR_mskIEXE           ( 0x1  << FPCSR_offIEXE )
-#define FPCSR_mskDNZ            ( 0x1  << FPCSR_offDNZ )
-#define FPCSR_mskIVOT           ( 0x1  << FPCSR_offIVOT )
-#define FPCSR_mskDBZT           ( 0x1  << FPCSR_offDBZT )
-#define FPCSR_mskOVFT           ( 0x1  << FPCSR_offOVFT )
-#define FPCSR_mskUDFT           ( 0x1  << FPCSR_offUDFT )
-#define FPCSR_mskIEXT           ( 0x1  << FPCSR_offIEXT )
-#define FPCSR_mskDNIT           ( 0x1  << FPCSR_offDNIT )
-#define FPCSR_mskRIT		( 0x1  << FPCSR_offRIT )
-#define FPCSR_mskALL		(FPCSR_mskIVO | FPCSR_mskDBZ | FPCSR_mskOVF | FPCSR_mskUDF | FPCSR_mskIEX)
-#define FPCSR_mskALLE_NO_UDF_IEXE (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE)
-#define FPCSR_mskALLE		(FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskUDFE | FPCSR_mskIEXE)
-#define FPCSR_mskALLT           (FPCSR_mskIVOT | FPCSR_mskDBZT | FPCSR_mskOVFT | FPCSR_mskUDFT | FPCSR_mskIEXT |FPCSR_mskDNIT | FPCSR_mskRIT)
-
-/******************************************************************************
- * fpcfg: FPCFG (Floating-Point Configuration Register)
- *****************************************************************************/
-#define	FPCFG_offSP		0
-#define FPCFG_offDP		1
-#define FPCFG_offFREG		2
-#define FPCFG_offFMA		4
-#define FPCFG_offIMVER		22
-#define FPCFG_offAVER		27
-
-#define FPCFG_mskSP		( 0x1  << FPCFG_offSP )
-#define FPCFG_mskDP		( 0x1  << FPCFG_offDP )
-#define FPCFG_mskFREG		( 0x3  << FPCFG_offFREG )
-#define FPCFG_mskFMA		( 0x1  << FPCFG_offFMA )
-#define FPCFG_mskIMVER		( 0x1F  << FPCFG_offIMVER )
-#define FPCFG_mskAVER		( 0x1F  << FPCFG_offAVER )
-
-/* 8 Single precision or 4 double precision registers are available */
-#define SP8_DP4_reg		0
-/* 16 Single precision or 8 double precision registers are available */
-#define SP16_DP8_reg		1
-/* 32 Single precision or 16 double precision registers are available */
-#define SP32_DP16_reg		2
-/* 32 Single precision or 32 double precision registers are available */
-#define SP32_DP32_reg		3
-
-/******************************************************************************
- * fucpr: FUCOP_CTL (FPU and Coprocessor Enable Control Register)
- *****************************************************************************/
-#define FUCOP_CTL_offCP0EN	0
-#define FUCOP_CTL_offCP1EN	1
-#define FUCOP_CTL_offCP2EN	2
-#define FUCOP_CTL_offCP3EN	3
-#define FUCOP_CTL_offAUEN	31
-
-#define FUCOP_CTL_mskCP0EN	( 0x1  << FUCOP_CTL_offCP0EN )
-#define FUCOP_CTL_mskCP1EN	( 0x1  << FUCOP_CTL_offCP1EN )
-#define FUCOP_CTL_mskCP2EN      ( 0x1  << FUCOP_CTL_offCP2EN )
-#define FUCOP_CTL_mskCP3EN      ( 0x1  << FUCOP_CTL_offCP3EN )
-#define FUCOP_CTL_mskAUEN       ( 0x1  << FUCOP_CTL_offAUEN )
-
-#endif /* __NDS32_BITFIELD_H__ */
diff --git a/arch/nds32/include/asm/cache.h b/arch/nds32/include/asm/cache.h
deleted file mode 100644
index fc3c41b59169..000000000000
--- a/arch/nds32/include/asm/cache.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __NDS32_CACHE_H__
-#define __NDS32_CACHE_H__
-
-#define L1_CACHE_BYTES	32
-#define L1_CACHE_SHIFT	5
-
-#define ARCH_DMA_MINALIGN   L1_CACHE_BYTES
-
-#endif /* __NDS32_CACHE_H__ */
diff --git a/arch/nds32/include/asm/cache_info.h b/arch/nds32/include/asm/cache_info.h
deleted file mode 100644
index e89d8078f3a6..000000000000
--- a/arch/nds32/include/asm/cache_info.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-struct cache_info {
-	unsigned char ways;
-	unsigned char line_size;
-	unsigned short sets;
-	unsigned short size;
-#if defined(CONFIG_CPU_CACHE_ALIASING)
-	unsigned short aliasing_num;
-	unsigned int aliasing_mask;
-#endif
-};
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
deleted file mode 100644
index c2a222ebfa2a..000000000000
--- a/arch/nds32/include/asm/cacheflush.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __NDS32_CACHEFLUSH_H__
-#define __NDS32_CACHEFLUSH_H__
-
-#include <linux/mm.h>
-
-#define PG_dcache_dirty PG_arch_1
-
-void flush_icache_range(unsigned long start, unsigned long end);
-#define flush_icache_range flush_icache_range
-
-void flush_icache_page(struct vm_area_struct *vma, struct page *page);
-#define flush_icache_page flush_icache_page
-
-#ifdef CONFIG_CPU_CACHE_ALIASING
-void flush_cache_mm(struct mm_struct *mm);
-void flush_cache_dup_mm(struct mm_struct *mm);
-void flush_cache_range(struct vm_area_struct *vma,
-		       unsigned long start, unsigned long end);
-void flush_cache_page(struct vm_area_struct *vma,
-		      unsigned long addr, unsigned long pfn);
-void flush_cache_kmaps(void);
-void flush_cache_vmap(unsigned long start, unsigned long end);
-void flush_cache_vunmap(unsigned long start, unsigned long end);
-
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-void flush_dcache_page(struct page *page);
-void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
-		       unsigned long vaddr, void *dst, void *src, int len);
-void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
-			 unsigned long vaddr, void *dst, void *src, int len);
-
-#define ARCH_HAS_FLUSH_ANON_PAGE
-void flush_anon_page(struct vm_area_struct *vma,
-		     struct page *page, unsigned long vaddr);
-
-#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
-void flush_kernel_vmap_range(void *addr, int size);
-void invalidate_kernel_vmap_range(void *addr, int size);
-#define flush_dcache_mmap_lock(mapping)   xa_lock_irq(&(mapping)->i_pages)
-#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages)
-
-#else
-void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
-	                     unsigned long addr, int len);
-#define flush_icache_user_page flush_icache_user_page
-
-#include <asm-generic/cacheflush.h>
-#endif
-
-#endif /* __NDS32_CACHEFLUSH_H__ */
diff --git a/arch/nds32/include/asm/current.h b/arch/nds32/include/asm/current.h
deleted file mode 100644
index 65d30096142b..000000000000
--- a/arch/nds32/include/asm/current.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef _ASM_NDS32_CURRENT_H
-#define _ASM_NDS32_CURRENT_H
-
-#ifndef __ASSEMBLY__
-register struct task_struct *current asm("$r25");
-#endif /* __ASSEMBLY__ */
-#define tsk $r25
-
-#endif /* _ASM_NDS32_CURRENT_H */
diff --git a/arch/nds32/include/asm/delay.h b/arch/nds32/include/asm/delay.h
deleted file mode 100644
index 56ea3894f8f8..000000000000
--- a/arch/nds32/include/asm/delay.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __NDS32_DELAY_H__
-#define __NDS32_DELAY_H__
-
-#include <asm/param.h>
-
-/* There is no clocksource cycle counter in the CPU. */
-static inline void __delay(unsigned long loops)
-{
-	__asm__ __volatile__(".align 2\n"
-			     "1:\n"
-			     "\taddi\t%0, %0, -1\n"
-			     "\tbgtz\t%0, 1b\n"
-			     :"=r"(loops)
-			     :"0"(loops));
-}
-
-static inline void __udelay(unsigned long usecs, unsigned long lpj)
-{
-	usecs *= (unsigned long)(((0x8000000000000000ULL / (500000 / HZ)) +
-				  0x80000000ULL) >> 32);
-	usecs = (unsigned long)(((unsigned long long)usecs * lpj) >> 32);
-	__delay(usecs);
-}
-
-#define udelay(usecs) __udelay((usecs), loops_per_jiffy)
-
-/* make sure "usecs *= ..." in udelay do not overflow. */
-#if HZ >= 1000
-#define MAX_UDELAY_MS	1
-#elif HZ <= 200
-#define MAX_UDELAY_MS	5
-#else
-#define MAX_UDELAY_MS	(1000 / HZ)
-#endif
-
-#endif
diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h
deleted file mode 100644
index 1853dc89b8ac..000000000000
--- a/arch/nds32/include/asm/elf.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASMNDS32_ELF_H
-#define __ASMNDS32_ELF_H
-
-/*
- * ELF register definitions..
- */
-
-#include <asm/ptrace.h>
-#include <asm/fpu.h>
-#include <linux/elf-em.h>
-
-typedef unsigned long elf_greg_t;
-typedef unsigned long elf_freg_t[3];
-
-extern unsigned int elf_hwcap;
-
-#define R_NDS32_NONE			0
-#define R_NDS32_16_RELA			19
-#define R_NDS32_32_RELA			20
-#define R_NDS32_9_PCREL_RELA		22
-#define R_NDS32_15_PCREL_RELA		23
-#define R_NDS32_17_PCREL_RELA		24
-#define R_NDS32_25_PCREL_RELA		25
-#define R_NDS32_HI20_RELA		26
-#define R_NDS32_LO12S3_RELA		27
-#define R_NDS32_LO12S2_RELA		28
-#define R_NDS32_LO12S1_RELA		29
-#define R_NDS32_LO12S0_RELA		30
-#define R_NDS32_SDA15S3_RELA    	31
-#define R_NDS32_SDA15S2_RELA    	32
-#define R_NDS32_SDA15S1_RELA    	33
-#define R_NDS32_SDA15S0_RELA    	34
-#define R_NDS32_GOT20			37
-#define R_NDS32_25_PLTREL		38
-#define R_NDS32_COPY			39
-#define R_NDS32_GLOB_DAT		40
-#define R_NDS32_JMP_SLOT		41
-#define R_NDS32_RELATIVE		42
-#define R_NDS32_GOTOFF			43
-#define R_NDS32_GOTPC20			44
-#define R_NDS32_GOT_HI20		45
-#define R_NDS32_GOT_LO12		46
-#define R_NDS32_GOTPC_HI20		47
-#define R_NDS32_GOTPC_LO12		48
-#define R_NDS32_GOTOFF_HI20		49
-#define R_NDS32_GOTOFF_LO12		50
-#define R_NDS32_INSN16			51
-#define R_NDS32_LABEL			52
-#define R_NDS32_LONGCALL1		53
-#define R_NDS32_LONGCALL2		54
-#define R_NDS32_LONGCALL3		55
-#define R_NDS32_LONGJUMP1		56
-#define R_NDS32_LONGJUMP2		57
-#define R_NDS32_LONGJUMP3		58
-#define R_NDS32_LOADSTORE		59
-#define R_NDS32_9_FIXED_RELA		60
-#define R_NDS32_15_FIXED_RELA		61
-#define R_NDS32_17_FIXED_RELA		62
-#define R_NDS32_25_FIXED_RELA		63
-#define R_NDS32_PLTREL_HI20		64
-#define R_NDS32_PLTREL_LO12		65
-#define R_NDS32_PLT_GOTREL_HI20		66
-#define R_NDS32_PLT_GOTREL_LO12		67
-#define R_NDS32_LO12S0_ORI_RELA		72
-#define R_NDS32_DWARF2_OP1_RELA     	77
-#define R_NDS32_DWARF2_OP2_RELA     	78
-#define R_NDS32_DWARF2_LEB_RELA     	79
-#define R_NDS32_WORD_9_PCREL_RELA	94
-#define R_NDS32_LONGCALL4 		107
-#define R_NDS32_RELA_NOP_MIX		192
-#define R_NDS32_RELA_NOP_MAX		255
-
-#define ELF_NGREG (sizeof (struct user_pt_regs) / sizeof(elf_greg_t))
-#define ELF_CORE_COPY_REGS(dest, regs)	\
-	*(struct user_pt_regs *)&(dest) = (regs)->user_regs;
-
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-/* Core file format: The core file is written in such a way that gdb
-   can understand it and provide useful information to the user (under
-   linux we use the 'trad-core' bfd).  There are quite a number of
-   obstacles to being able to view the contents of the floating point
-   registers, and until these are solved you will not be able to view the
-   contents of them.  Actually, you can read in the core file and look at
-   the contents of the user struct to find out what the floating point
-   registers contain.
-   The actual file contents are as follows:
-   UPAGE: 1 page consisting of a user struct that tells gdb what is present
-   in the file.  Directly after this is a copy of the task_struct, which
-   is currently not used by gdb, but it may come in useful at some point.
-   All of the registers are stored as part of the upage.  The upage should
-   always be only one page.
-   DATA: The data area is stored.  We use current->end_text to
-   current->brk to pick up all of the user variables, plus any memory
-   that may have been malloced.  No attempt is made to determine if a page
-   is demand-zero or if a page is totally unused, we just cover the entire
-   range.  All of the addresses are rounded in such a way that an integral
-   number of pages is written.
-   STACK: We need the stack information in order to get a meaningful
-   backtrace.  We need to write the data from (esp) to
-   current->start_stack, so we round each of these off in order to be able
-   to write an integer number of pages.
-   The minimum core file size is 3 pages, or 12288 bytes.
-*/
-
-struct user_fp {
-        unsigned long long fd_regs[32];
-        unsigned long fpcsr;
-};
-
-typedef struct user_fp elf_fpregset_t;
-
-struct elf32_hdr;
-#define elf_check_arch(x)		((x)->e_machine == EM_NDS32)
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_CLASS	ELFCLASS32
-#ifdef __NDS32_EB__
-#define ELF_DATA	ELFDATA2MSB
-#else
-#define ELF_DATA	ELFDATA2LSB
-#endif
-#define ELF_ARCH	EM_NDS32
-#define ELF_EXEC_PAGESIZE	PAGE_SIZE
-
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#define ELF_ET_DYN_BASE	(2 * TASK_SIZE / 3)
-
-/* When the program starts, a1 contains a pointer to a function to be
-   registered with atexit, as per the SVR4 ABI.  A value of 0 means we
-   have no such handler.  */
-#define ELF_PLAT_INIT(_r, load_addr)	(_r)->uregs[0] = 0
-
-/* This yields a mask that user programs can use to figure out what
-   instruction set this cpu supports. */
-
-#define ELF_HWCAP	(elf_hwcap)
-
-#ifdef __KERNEL__
-
-#define ELF_PLATFORM    (NULL)
-
-/* Old NetWinder binaries were compiled in such a way that the iBCS
-   heuristic always trips on them.  Until these binaries become uncommon
-   enough not to care, don't trust the `ibcs' flag here.  In any case
-   there is no other ELF system currently supported by iBCS.
-   @@ Could print a warning message to encourage users to upgrade.  */
-#define SET_PERSONALITY(ex)	set_personality(PER_LINUX)
-
-#endif
-
-
-#if IS_ENABLED(CONFIG_FPU)
-#define FPU_AUX_ENT	NEW_AUX_ENT(AT_FPUCW, FPCSR_INIT)
-#else
-#define FPU_AUX_ENT	NEW_AUX_ENT(AT_IGNORE, 0)
-#endif
-
-#define ARCH_DLINFO						\
-do {								\
-	/* Optional FPU initialization */			\
-	FPU_AUX_ENT;						\
-								\
-	NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
-		    (elf_addr_t)current->mm->context.vdso);	\
-} while (0)
-#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
-struct linux_binprm;
-int arch_setup_additional_pages(struct linux_binprm *, int);
-
-#endif
diff --git a/arch/nds32/include/asm/fixmap.h b/arch/nds32/include/asm/fixmap.h
deleted file mode 100644
index 2fa09a2de428..000000000000
--- a/arch/nds32/include/asm/fixmap.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASM_NDS32_FIXMAP_H
-#define __ASM_NDS32_FIXMAP_H
-
-#ifdef CONFIG_HIGHMEM
-#include <linux/threads.h>
-#include <asm/kmap_size.h>
-#endif
-
-enum fixed_addresses {
-	FIX_HOLE,
-	FIX_KMAP_RESERVED,
-	FIX_KMAP_BEGIN,
-#ifdef CONFIG_HIGHMEM
-	FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1,
-#endif
-	FIX_EARLYCON_MEM_BASE,
-	__end_of_fixed_addresses
-};
-#define FIXADDR_TOP             ((unsigned long) (-(16 * PAGE_SIZE)))
-#define FIXADDR_SIZE		((__end_of_fixed_addresses) << PAGE_SHIFT)
-#define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
-#define FIXMAP_PAGE_IO		__pgprot(PAGE_DEVICE)
-void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
-
-#include <asm-generic/fixmap.h>
-#endif /* __ASM_NDS32_FIXMAP_H */
diff --git a/arch/nds32/include/asm/fpu.h b/arch/nds32/include/asm/fpu.h
deleted file mode 100644
index 8294ed4aaa2c..000000000000
--- a/arch/nds32/include/asm/fpu.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2005-2018 Andes Technology Corporation */
-
-#ifndef __ASM_NDS32_FPU_H
-#define __ASM_NDS32_FPU_H
-
-#if IS_ENABLED(CONFIG_FPU)
-#ifndef __ASSEMBLY__
-#include <linux/sched/task_stack.h>
-#include <linux/preempt.h>
-#include <asm/ptrace.h>
-
-extern bool has_fpu;
-
-extern void save_fpu(struct task_struct *__tsk);
-extern void load_fpu(const struct fpu_struct *fpregs);
-extern bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs);
-extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu);
-
-#define test_tsk_fpu(regs)	(regs->fucop_ctl & FUCOP_CTL_mskCP0EN)
-
-/*
- * Initially load the FPU with signalling NANS.  This bit pattern
- * has the property that no matter whether considered as single or as
- * double precision, it still represents a signalling NAN.
- */
-
-#define sNAN64    0xFFFFFFFFFFFFFFFFULL
-#define sNAN32    0xFFFFFFFFUL
-
-#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
-/*
- * Denormalized number is unsupported by nds32 FPU. Hence the operation
- * is treated as underflow cases when the final result is a denormalized
- * number. To enhance precision, underflow exception trap should be
- * enabled by default and kerenl will re-execute it by fpu emulator
- * when getting underflow exception.
- */
-#define FPCSR_INIT  (FPCSR_mskUDFE | FPCSR_mskIEXE)
-#else
-#define FPCSR_INIT  0x0UL
-#endif
-
-extern const struct fpu_struct init_fpuregs;
-
-static inline void disable_ptreg_fpu(struct pt_regs *regs)
-{
-	regs->fucop_ctl &= ~FUCOP_CTL_mskCP0EN;
-}
-
-static inline void enable_ptreg_fpu(struct pt_regs *regs)
-{
-	regs->fucop_ctl |= FUCOP_CTL_mskCP0EN;
-}
-
-static inline void enable_fpu(void)
-{
-	unsigned long fucop_ctl;
-
-	fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) | FUCOP_CTL_mskCP0EN;
-	__nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
-	__nds32__isb();
-}
-
-static inline void disable_fpu(void)
-{
-	unsigned long fucop_ctl;
-
-	fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) & ~FUCOP_CTL_mskCP0EN;
-	__nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
-	__nds32__isb();
-}
-
-static inline void lose_fpu(void)
-{
-	preempt_disable();
-#if IS_ENABLED(CONFIG_LAZY_FPU)
-	if (last_task_used_math == current) {
-		last_task_used_math = NULL;
-#else
-	if (test_tsk_fpu(task_pt_regs(current))) {
-#endif
-		save_fpu(current);
-	}
-	disable_ptreg_fpu(task_pt_regs(current));
-	preempt_enable();
-}
-
-static inline void own_fpu(void)
-{
-	preempt_disable();
-#if IS_ENABLED(CONFIG_LAZY_FPU)
-	if (last_task_used_math != current) {
-		if (last_task_used_math != NULL)
-			save_fpu(last_task_used_math);
-		load_fpu(&current->thread.fpu);
-		last_task_used_math = current;
-	}
-#else
-	if (!test_tsk_fpu(task_pt_regs(current))) {
-		load_fpu(&current->thread.fpu);
-	}
-#endif
-	enable_ptreg_fpu(task_pt_regs(current));
-	preempt_enable();
-}
-
-#if !IS_ENABLED(CONFIG_LAZY_FPU)
-static inline void unlazy_fpu(struct task_struct *tsk)
-{
-	preempt_disable();
-	if (test_tsk_fpu(task_pt_regs(tsk)))
-		save_fpu(tsk);
-	preempt_enable();
-}
-#endif /* !CONFIG_LAZY_FPU */
-static inline void clear_fpu(struct pt_regs *regs)
-{
-	preempt_disable();
-	if (test_tsk_fpu(regs))
-		disable_ptreg_fpu(regs);
-	preempt_enable();
-}
-#endif /* CONFIG_FPU */
-#endif /* __ASSEMBLY__ */
-#endif /* __ASM_NDS32_FPU_H */
diff --git a/arch/nds32/include/asm/fpuemu.h b/arch/nds32/include/asm/fpuemu.h
deleted file mode 100644
index 63e7ef5f7969..000000000000
--- a/arch/nds32/include/asm/fpuemu.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2005-2018 Andes Technology Corporation */
-
-#ifndef __ARCH_NDS32_FPUEMU_H
-#define __ARCH_NDS32_FPUEMU_H
-
-/*
- * single precision
- */
-
-void fadds(void *ft, void *fa, void *fb);
-void fsubs(void *ft, void *fa, void *fb);
-void fmuls(void *ft, void *fa, void *fb);
-void fdivs(void *ft, void *fa, void *fb);
-void fs2d(void *ft, void *fa);
-void fs2si(void *ft, void *fa);
-void fs2si_z(void *ft, void *fa);
-void fs2ui(void *ft, void *fa);
-void fs2ui_z(void *ft, void *fa);
-void fsi2s(void *ft, void *fa);
-void fui2s(void *ft, void *fa);
-void fsqrts(void *ft, void *fa);
-void fnegs(void *ft, void *fa);
-int fcmps(void *ft, void *fa, void *fb, int cop);
-
-/*
- * double precision
- */
-void faddd(void *ft, void *fa, void *fb);
-void fsubd(void *ft, void *fa, void *fb);
-void fmuld(void *ft, void *fa, void *fb);
-void fdivd(void *ft, void *fa, void *fb);
-void fsqrtd(void *ft, void *fa);
-void fd2s(void *ft, void *fa);
-void fd2si(void *ft, void *fa);
-void fd2si_z(void *ft, void *fa);
-void fd2ui(void *ft, void *fa);
-void fd2ui_z(void *ft, void *fa);
-void fsi2d(void *ft, void *fa);
-void fui2d(void *ft, void *fa);
-void fnegd(void *ft, void *fa);
-int fcmpd(void *ft, void *fa, void *fb, int cop);
-
-#endif /* __ARCH_NDS32_FPUEMU_H */
diff --git a/arch/nds32/include/asm/ftrace.h b/arch/nds32/include/asm/ftrace.h
deleted file mode 100644
index 2f96cc96aa35..000000000000
--- a/arch/nds32/include/asm/ftrace.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef __ASM_NDS32_FTRACE_H
-#define __ASM_NDS32_FTRACE_H
-
-#ifdef CONFIG_FUNCTION_TRACER
-
-#define HAVE_FUNCTION_GRAPH_FP_TEST
-
-#define MCOUNT_ADDR ((unsigned long)(_mcount))
-/* mcount call is composed of three instructions:
- * sethi + ori + jral
- */
-#define MCOUNT_INSN_SIZE 12
-
-extern void _mcount(unsigned long parent_ip);
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-#define FTRACE_ADDR ((unsigned long)_ftrace_caller)
-
-#ifdef __NDS32_EL__
-#define INSN_NOP		0x09000040
-#define INSN_SIZE(insn)		(((insn & 0x00000080) == 0) ? 4 : 2)
-#define IS_SETHI(insn)		((insn & 0x000000fe) == 0x00000046)
-#define ENDIAN_CONVERT(insn)	be32_to_cpu(insn)
-#else /* __NDS32_EB__ */
-#define INSN_NOP		0x40000009
-#define INSN_SIZE(insn)		(((insn & 0x80000000) == 0) ? 4 : 2)
-#define IS_SETHI(insn)		((insn & 0xfe000000) == 0x46000000)
-#define ENDIAN_CONVERT(insn)	(insn)
-#endif
-
-extern void _ftrace_caller(unsigned long parent_ip);
-static inline unsigned long ftrace_call_adjust(unsigned long addr)
-{
-	return addr;
-}
-struct dyn_arch_ftrace {
-};
-
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#endif /* CONFIG_FUNCTION_TRACER */
-
-#endif /* __ASM_NDS32_FTRACE_H */
diff --git a/arch/nds32/include/asm/futex.h b/arch/nds32/include/asm/futex.h
deleted file mode 100644
index 4223f473bd36..000000000000
--- a/arch/nds32/include/asm/futex.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __NDS32_FUTEX_H__
-#define __NDS32_FUTEX_H__
-
-#include <linux/futex.h>
-#include <linux/uaccess.h>
-#include <asm/errno.h>
-
-#define __futex_atomic_ex_table(err_reg)			\
-	"	.pushsection __ex_table,\"a\"\n"		\
-	"	.align	3\n"					\
-	"	.long	1b, 4f\n"				\
-	"	.long	2b, 4f\n"				\
-	"	.popsection\n"					\
-	"	.pushsection .fixup,\"ax\"\n"			\
-	"4:	move	%0, " err_reg "\n"			\
-	"	b	3b\n"					\
-	"	.popsection"
-
-#define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg)	\
-	smp_mb();						\
-	asm volatile(					\
-	"	movi	$ta, #0\n"				\
-	"1:	llw	%1, [%2+$ta]\n"				\
-	"	" insn "\n"					\
-	"2:	scw	%0, [%2+$ta]\n"				\
-	"	beqz	%0, 1b\n"				\
-	"	movi	%0, #0\n"				\
-	"3:\n"							\
-	__futex_atomic_ex_table("%4")				\
-	: "=&r" (ret), "=&r" (oldval)				\
-	: "r" (uaddr), "r" (oparg), "i" (-EFAULT)		\
-	: "cc", "memory")
-static inline int
-futex_atomic_cmpxchg_inatomic(u32 * uval, u32 __user * uaddr,
-			      u32 oldval, u32 newval)
-{
-	int ret = 0;
-	u32 val, tmp, flags;
-
-	if (!access_ok(uaddr, sizeof(u32)))
-		return -EFAULT;
-
-	smp_mb();
-	asm volatile ("       movi    $ta, #0\n"
-		      "1:     llw     %1, [%6 + $ta]\n"
-		      "       sub     %3, %1, %4\n"
-		      "       cmovz   %2, %5, %3\n"
-		      "       cmovn   %2, %1, %3\n"
-		      "2:     scw     %2, [%6 + $ta]\n"
-		      "       beqz    %2, 1b\n"
-		      "3:\n                   " __futex_atomic_ex_table("%7")
-		      :"+&r"(ret), "=&r"(val), "=&r"(tmp), "=&r"(flags)
-		      :"r"(oldval), "r"(newval), "r"(uaddr), "i"(-EFAULT)
-		      :"$ta", "memory");
-	smp_mb();
-
-	*uval = val;
-	return ret;
-}
-
-static inline int
-arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
-{
-	int oldval = 0, ret;
-
-	if (!access_ok(uaddr, sizeof(u32)))
-		return -EFAULT;
-	switch (op) {
-	case FUTEX_OP_SET:
-		__futex_atomic_op("move	%0, %3", ret, oldval, tmp, uaddr,
-				  oparg);
-		break;
-	case FUTEX_OP_ADD:
-		__futex_atomic_op("add	%0, %1, %3", ret, oldval, tmp, uaddr,
-				  oparg);
-		break;
-	case FUTEX_OP_OR:
-		__futex_atomic_op("or	%0, %1, %3", ret, oldval, tmp, uaddr,
-				  oparg);
-		break;
-	case FUTEX_OP_ANDN:
-		__futex_atomic_op("and	%0, %1, %3", ret, oldval, tmp, uaddr,
-				  ~oparg);
-		break;
-	case FUTEX_OP_XOR:
-		__futex_atomic_op("xor	%0, %1, %3", ret, oldval, tmp, uaddr,
-				  oparg);
-		break;
-	default:
-		ret = -ENOSYS;
-	}
-
-	if (!ret)
-		*oval = oldval;
-
-	return ret;
-}
-#endif /* __NDS32_FUTEX_H__ */
diff --git a/arch/nds32/include/asm/highmem.h b/arch/nds32/include/asm/highmem.h
deleted file mode 100644
index 16159a8716f2..000000000000
--- a/arch/nds32/include/asm/highmem.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef _ASM_HIGHMEM_H
-#define _ASM_HIGHMEM_H
-
-#include <asm/proc-fns.h>
-#include <asm/fixmap.h>
-
-/*
- * Right now we initialize only a single pte table. It can be extended
- * easily, subsequent pte tables have to be allocated in one physical
- * chunk of RAM.
- */
-/*
- * Ordering is (from lower to higher memory addresses):
- *
- * high_memory
- *			Persistent kmap area
- * PKMAP_BASE
- *			fixed_addresses
- * FIXADDR_START
- * FIXADDR_TOP
- *			Vmalloc area
- * VMALLOC_START
- * VMALLOC_END
- */
-#define PKMAP_BASE		((FIXADDR_START - PGDIR_SIZE) & (PGDIR_MASK))
-#define LAST_PKMAP		PTRS_PER_PTE
-#define LAST_PKMAP_MASK		(LAST_PKMAP - 1)
-#define PKMAP_NR(virt)		(((virt) - (PKMAP_BASE)) >> PAGE_SHIFT)
-#define PKMAP_ADDR(nr)		(PKMAP_BASE + ((nr) << PAGE_SHIFT))
-
-static inline void flush_cache_kmaps(void)
-{
-	cpu_dcache_wbinval_all();
-}
-
-/* declarations for highmem.c */
-extern unsigned long highstart_pfn, highend_pfn;
-
-extern pte_t *pkmap_page_table;
-
-extern void kmap_init(void);
-
-/*
- * FIXME: The below looks broken vs. a kmap_atomic() in task context which
- * is interupted and another kmap_atomic() happens in interrupt context.
- * But what do I know about nds32. -- tglx
- */
-#define arch_kmap_local_post_map(vaddr, pteval)			\
-	do {							\
-		__nds32__tlbop_inv(vaddr);			\
-		__nds32__mtsr_dsb(vaddr, NDS32_SR_TLB_VPN);	\
-		__nds32__tlbop_rwr(pteval);			\
-		__nds32__isb();					\
-	} while (0)
-
-#define arch_kmap_local_pre_unmap(vaddr)			\
-	do {							\
-		__nds32__tlbop_inv(vaddr);			\
-		__nds32__isb();					\
-	} while (0)
-
-#endif
diff --git a/arch/nds32/include/asm/io.h b/arch/nds32/include/asm/io.h
deleted file mode 100644
index e57378d04006..000000000000
--- a/arch/nds32/include/asm/io.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASM_NDS32_IO_H
-#define __ASM_NDS32_IO_H
-
-#include <linux/types.h>
-
-#define __raw_writeb __raw_writeb
-static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
-{
-	asm volatile("sbi %0, [%1]" : : "r" (val), "r" (addr));
-}
-
-#define __raw_writew __raw_writew
-static inline void __raw_writew(u16 val, volatile void __iomem *addr)
-{
-	asm volatile("shi %0, [%1]" : : "r" (val), "r" (addr));
-}
-
-#define __raw_writel __raw_writel
-static inline void __raw_writel(u32 val, volatile void __iomem *addr)
-{
-	asm volatile("swi %0, [%1]" : : "r" (val), "r" (addr));
-}
-
-#define __raw_readb __raw_readb
-static inline u8 __raw_readb(const volatile void __iomem *addr)
-{
-	u8 val;
-
-	asm volatile("lbi %0, [%1]" : "=r" (val) : "r" (addr));
-	return val;
-}
-
-#define __raw_readw __raw_readw
-static inline u16 __raw_readw(const volatile void __iomem *addr)
-{
-	u16 val;
-
-	asm volatile("lhi %0, [%1]" : "=r" (val) : "r" (addr));
-	return val;
-}
-
-#define __raw_readl __raw_readl
-static inline u32 __raw_readl(const volatile void __iomem *addr)
-{
-	u32 val;
-
-	asm volatile("lwi %0, [%1]" : "=r" (val) : "r" (addr));
-	return val;
-}
-
-#define __iormb()               rmb()
-#define __iowmb()               wmb()
-
-/*
- * {read,write}{b,w,l,q}_relaxed() are like the regular version, but
- * are not guaranteed to provide ordering against spinlocks or memory
- * accesses.
- */
-
-#define readb_relaxed(c)	({ u8  __v = __raw_readb(c); __v; })
-#define readw_relaxed(c)	({ u16 __v = le16_to_cpu((__force __le16)__raw_readw(c)); __v; })
-#define readl_relaxed(c)	({ u32 __v = le32_to_cpu((__force __le32)__raw_readl(c)); __v; })
-#define writeb_relaxed(v,c)	((void)__raw_writeb((v),(c)))
-#define writew_relaxed(v,c)	((void)__raw_writew((__force u16)cpu_to_le16(v),(c)))
-#define writel_relaxed(v,c)	((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
-
-/*
- * {read,write}{b,w,l,q}() access little endian memory and return result in
- * native endianness.
- */
-#define readb(c)	({ u8  __v = readb_relaxed(c); __iormb(); __v; })
-#define readw(c)	({ u16 __v = readw_relaxed(c); __iormb(); __v; })
-#define readl(c)	({ u32 __v = readl_relaxed(c); __iormb(); __v; })
-
-#define writeb(v,c)	({ __iowmb(); writeb_relaxed((v),(c)); })
-#define writew(v,c)	({ __iowmb(); writew_relaxed((v),(c)); })
-#define writel(v,c)	({ __iowmb(); writel_relaxed((v),(c)); })
-
-#include <asm-generic/io.h>
-
-#endif /* __ASM_NDS32_IO_H */
diff --git a/arch/nds32/include/asm/irqflags.h b/arch/nds32/include/asm/irqflags.h
deleted file mode 100644
index 51ef800bb301..000000000000
--- a/arch/nds32/include/asm/irqflags.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <asm/nds32.h>
-#include <nds32_intrinsic.h>
-
-#define arch_local_irq_disable()	\
-	GIE_DISABLE();
-
-#define arch_local_irq_enable()	\
-	GIE_ENABLE();
-static inline unsigned long arch_local_irq_save(void)
-{
-	unsigned long flags;
-	flags = __nds32__mfsr(NDS32_SR_PSW) & PSW_mskGIE;
-	GIE_DISABLE();
-	return flags;
-}
-
-static inline unsigned long arch_local_save_flags(void)
-{
-	unsigned long flags;
-	flags = __nds32__mfsr(NDS32_SR_PSW) & PSW_mskGIE;
-	return flags;
-}
-
-static inline void arch_local_irq_restore(unsigned long flags)
-{
-	if(flags)
-		GIE_ENABLE();
-}
-
-static inline int arch_irqs_disabled_flags(unsigned long flags)
-{
-	return !flags;
-}
-
-static inline int arch_irqs_disabled(void)
-{
-	return arch_irqs_disabled_flags(arch_local_save_flags());
-}
diff --git a/arch/nds32/include/asm/l2_cache.h b/arch/nds32/include/asm/l2_cache.h
deleted file mode 100644
index 3ea48e19e6de..000000000000
--- a/arch/nds32/include/asm/l2_cache.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef L2_CACHE_H
-#define L2_CACHE_H
-
-/* CCTL_CMD_OP */
-#define L2_CA_CONF_OFF		0x0
-#define L2_IF_CONF_OFF		0x4
-#define L2CC_SETUP_OFF		0x8
-#define L2CC_PROT_OFF		0xC
-#define L2CC_CTRL_OFF		0x10
-#define L2_INT_EN_OFF           0x20
-#define L2_STA_OFF              0x24
-#define RDERR_ADDR_OFF		0x28
-#define WRERR_ADDR_OFF		0x2c
-#define EVDPTERR_ADDR_OFF	0x30
-#define IMPL3ERR_ADDR_OFF	0x34
-#define L2_CNT0_CTRL_OFF        0x40
-#define L2_EVNT_CNT0_OFF        0x44
-#define L2_CNT1_CTRL_OFF        0x48
-#define L2_EVNT_CNT1_OFF        0x4c
-#define L2_CCTL_CMD_OFF		0x60
-#define L2_CCTL_STATUS_OFF	0x64
-#define L2_LINE_TAG_OFF		0x68
-#define L2_LINE_DPT_OFF		0x70
-
-#define CCTL_CMD_L2_IX_INVAL    0x0
-#define CCTL_CMD_L2_PA_INVAL    0x1
-#define CCTL_CMD_L2_IX_WB       0x2
-#define CCTL_CMD_L2_PA_WB       0x3
-#define CCTL_CMD_L2_PA_WBINVAL  0x5
-#define CCTL_CMD_L2_SYNC        0xa
-
-/* CCTL_CMD_TYPE */
-#define CCTL_SINGLE_CMD         0
-#define CCTL_BLOCK_CMD          0x10
-#define CCTL_ALL_CMD		0x10
-
-/******************************************************************************
- * L2_CA_CONF (Cache architecture configuration)
- *****************************************************************************/
-#define L2_CA_CONF_offL2SET		0
-#define L2_CA_CONF_offL2WAY		4
-#define L2_CA_CONF_offL2CLSZ            8
-#define L2_CA_CONF_offL2DW		11
-#define L2_CA_CONF_offL2PT		14
-#define L2_CA_CONF_offL2VER		16
-
-#define L2_CA_CONF_mskL2SET	(0xFUL << L2_CA_CONF_offL2SET)
-#define L2_CA_CONF_mskL2WAY	(0xFUL << L2_CA_CONF_offL2WAY)
-#define L2_CA_CONF_mskL2CLSZ    (0x7UL << L2_CA_CONF_offL2CLSZ)
-#define L2_CA_CONF_mskL2DW	(0x7UL << L2_CA_CONF_offL2DW)
-#define L2_CA_CONF_mskL2PT	(0x3UL << L2_CA_CONF_offL2PT)
-#define L2_CA_CONF_mskL2VER	(0xFFFFUL << L2_CA_CONF_offL2VER)
-
-/******************************************************************************
- * L2CC_SETUP (L2CC Setup register)
- *****************************************************************************/
-#define L2CC_SETUP_offPART              0
-#define L2CC_SETUP_mskPART              (0x3UL << L2CC_SETUP_offPART)
-#define L2CC_SETUP_offDDLATC            4
-#define L2CC_SETUP_mskDDLATC            (0x3UL << L2CC_SETUP_offDDLATC)
-#define L2CC_SETUP_offTDLATC            8
-#define L2CC_SETUP_mskTDLATC            (0x3UL << L2CC_SETUP_offTDLATC)
-
-/******************************************************************************
- * L2CC_PROT (L2CC Protect register)
- *****************************************************************************/
-#define L2CC_PROT_offMRWEN              31
-#define L2CC_PROT_mskMRWEN      (0x1UL << L2CC_PROT_offMRWEN)
-
-/******************************************************************************
- * L2_CCTL_STATUS_Mn (The L2CCTL command working status for Master n)
- *****************************************************************************/
-#define L2CC_CTRL_offEN                 31
-#define L2CC_CTRL_mskEN                 (0x1UL << L2CC_CTRL_offEN)
-
-/******************************************************************************
- * L2_CCTL_STATUS_Mn (The L2CCTL command working status for Master n)
- *****************************************************************************/
-#define L2_CCTL_STATUS_offCMD_COMP      31
-#define L2_CCTL_STATUS_mskCMD_COMP      (0x1 << L2_CCTL_STATUS_offCMD_COMP)
-
-extern void __iomem *atl2c_base;
-#include <linux/smp.h>
-#include <asm/io.h>
-#include <asm/bitfield.h>
-
-#define L2C_R_REG(offset)               readl(atl2c_base + offset)
-#define L2C_W_REG(offset, value)        writel(value, atl2c_base + offset)
-
-#define L2_CMD_RDY()    \
-        do{;}while((L2C_R_REG(L2_CCTL_STATUS_OFF) & L2_CCTL_STATUS_mskCMD_COMP) == 0)
-
-static inline unsigned long L2_CACHE_SET(void)
-{
-	return 64 << ((L2C_R_REG(L2_CA_CONF_OFF) & L2_CA_CONF_mskL2SET) >>
-		      L2_CA_CONF_offL2SET);
-}
-
-static inline unsigned long L2_CACHE_WAY(void)
-{
-	return 1 +
-	    ((L2C_R_REG(L2_CA_CONF_OFF) & L2_CA_CONF_mskL2WAY) >>
-	     L2_CA_CONF_offL2WAY);
-}
-
-static inline unsigned long L2_CACHE_LINE_SIZE(void)
-{
-
-	return 4 << ((L2C_R_REG(L2_CA_CONF_OFF) & L2_CA_CONF_mskL2CLSZ) >>
-		     L2_CA_CONF_offL2CLSZ);
-}
-
-static inline unsigned long GET_L2CC_CTRL_CPU(unsigned long cpu)
-{
-	if (cpu == smp_processor_id())
-		return L2C_R_REG(L2CC_CTRL_OFF);
-	return L2C_R_REG(L2CC_CTRL_OFF + (cpu << 8));
-}
-
-static inline void SET_L2CC_CTRL_CPU(unsigned long cpu, unsigned long val)
-{
-	if (cpu == smp_processor_id())
-		L2C_W_REG(L2CC_CTRL_OFF, val);
-	else
-		L2C_W_REG(L2CC_CTRL_OFF + (cpu << 8), val);
-}
-
-static inline unsigned long GET_L2CC_STATUS_CPU(unsigned long cpu)
-{
-	if (cpu == smp_processor_id())
-		return L2C_R_REG(L2_CCTL_STATUS_OFF);
-	return L2C_R_REG(L2_CCTL_STATUS_OFF + (cpu << 8));
-}
-#endif
diff --git a/arch/nds32/include/asm/linkage.h b/arch/nds32/include/asm/linkage.h
deleted file mode 100644
index a696469abb70..000000000000
--- a/arch/nds32/include/asm/linkage.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASM_LINKAGE_H
-#define __ASM_LINKAGE_H
-
-/* This file is required by include/linux/linkage.h */
-#define __ALIGN .align 2
-#define __ALIGN_STR ".align 2"
-
-#endif
diff --git a/arch/nds32/include/asm/memory.h b/arch/nds32/include/asm/memory.h
deleted file mode 100644
index 62faafbc28e4..000000000000
--- a/arch/nds32/include/asm/memory.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASM_NDS32_MEMORY_H
-#define __ASM_NDS32_MEMORY_H
-
-#include <linux/compiler.h>
-#include <linux/sizes.h>
-
-#ifndef __ASSEMBLY__
-#include <asm/page.h>
-#endif
-
-#ifndef PHYS_OFFSET
-#define PHYS_OFFSET     (0x0)
-#endif
-
-/*
- * TASK_SIZE - the maximum size of a user space task.
- * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
- */
-#define TASK_SIZE		((CONFIG_PAGE_OFFSET) - (SZ_32M))
-#define TASK_UNMAPPED_BASE	ALIGN(TASK_SIZE / 3, SZ_32M)
-#define PAGE_OFFSET		(CONFIG_PAGE_OFFSET)
-
-/*
- * Physical vs virtual RAM address space conversion.  These are
- * private definitions which should NOT be used outside memory.h
- * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
- */
-#ifndef __virt_to_phys
-#define __virt_to_phys(x)	((x) - PAGE_OFFSET + PHYS_OFFSET)
-#define __phys_to_virt(x)	((x) - PHYS_OFFSET + PAGE_OFFSET)
-#endif
-
-/*
- * The module space lives between the addresses given by TASK_SIZE
- * and PAGE_OFFSET - it must be within 32MB of the kernel text.
- */
-#define MODULES_END	(PAGE_OFFSET)
-#define MODULES_VADDR	(MODULES_END - SZ_32M)
-
-#if TASK_SIZE > MODULES_VADDR
-#error Top of user space clashes with start of module space
-#endif
-
-#ifndef __ASSEMBLY__
-
-/*
- * PFNs are used to describe any physical page; this means
- * PFN 0 == physical address 0.
- *
- * This is the PFN of the first RAM page in the kernel
- * direct-mapped view.  We assume this is the first page
- * of RAM in the mem_map as well.
- */
-#define PHYS_PFN_OFFSET	(PHYS_OFFSET >> PAGE_SHIFT)
-
-/*
- * Drivers should NOT use these either.
- */
-#define __pa(x)			__virt_to_phys((unsigned long)(x))
-#define __va(x)			((void *)__phys_to_virt((unsigned long)(x)))
-
-/*
- * Conversion between a struct page and a physical address.
- *
- * Note: when converting an unknown physical address to a
- * struct page, the resulting pointer must be validated
- * using VALID_PAGE().  It must return an invalid struct page
- * for any physical address not corresponding to a system
- * RAM address.
- *
- *  pfn_valid(pfn)	indicates whether a PFN number is valid
- *
- *  virt_to_page(k)	convert a _valid_ virtual address to struct page *
- *  virt_addr_valid(k)	indicates whether a virtual address is valid
- */
-#define ARCH_PFN_OFFSET		PHYS_PFN_OFFSET
-#define pfn_valid(pfn)		((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr))
-
-#define virt_to_page(kaddr)	(pfn_to_page(__pa(kaddr) >> PAGE_SHIFT))
-#define virt_addr_valid(kaddr)	((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory)
-
-#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
-
-#endif
-
-#include <asm-generic/memory_model.h>
-
-#endif
diff --git a/arch/nds32/include/asm/mmu.h b/arch/nds32/include/asm/mmu.h
deleted file mode 100644
index 89d63afee455..000000000000
--- a/arch/nds32/include/asm/mmu.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __NDS32_MMU_H
-#define __NDS32_MMU_H
-
-typedef struct {
-	unsigned int id;
-	void *vdso;
-} mm_context_t;
-
-#endif
diff --git a/arch/nds32/include/asm/mmu_context.h b/arch/nds32/include/asm/mmu_context.h
deleted file mode 100644
index c651bc8cacdc..000000000000
--- a/arch/nds32/include/asm/mmu_context.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASM_NDS32_MMU_CONTEXT_H
-#define __ASM_NDS32_MMU_CONTEXT_H
-
-#include <linux/spinlock.h>
-#include <asm/tlbflush.h>
-#include <asm/proc-fns.h>
-#include <asm-generic/mm_hooks.h>
-
-#define init_new_context init_new_context
-static inline int
-init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-	mm->context.id = 0;
-	return 0;
-}
-
-#define CID_BITS	9
-extern spinlock_t cid_lock;
-extern unsigned int cpu_last_cid;
-
-static inline void __new_context(struct mm_struct *mm)
-{
-	unsigned int cid;
-	unsigned long flags;
-
-	spin_lock_irqsave(&cid_lock, flags);
-	cid = cpu_last_cid;
-	cpu_last_cid += 1 << TLB_MISC_offCID;
-	if (cpu_last_cid == 0)
-		cpu_last_cid = 1 << TLB_MISC_offCID << CID_BITS;
-
-	if ((cid & TLB_MISC_mskCID) == 0)
-		flush_tlb_all();
-	spin_unlock_irqrestore(&cid_lock, flags);
-
-	mm->context.id = cid;
-}
-
-static inline void check_context(struct mm_struct *mm)
-{
-	if (unlikely
-	    ((mm->context.id ^ cpu_last_cid) >> TLB_MISC_offCID >> CID_BITS))
-		__new_context(mm);
-}
-
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
-			     struct task_struct *tsk)
-{
-	unsigned int cpu = smp_processor_id();
-
-	if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) {
-		check_context(next);
-		cpu_switch_mm(next);
-	}
-}
-
-#include <asm-generic/mmu_context.h>
-
-#endif
diff --git a/arch/nds32/include/asm/nds32.h b/arch/nds32/include/asm/nds32.h
deleted file mode 100644
index 4994f6a9e0a0..000000000000
--- a/arch/nds32/include/asm/nds32.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef _ASM_NDS32_NDS32_H_
-#define _ASM_NDS32_NDS32_H_
-
-#include <asm/bitfield.h>
-#include <asm/cachectl.h>
-
-#ifndef __ASSEMBLY__
-#include <linux/init.h>
-#include <asm/barrier.h>
-#include <nds32_intrinsic.h>
-
-#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-#define FP_OFFSET (-3)
-#else
-#define FP_OFFSET (-2)
-#endif
-#define LP_OFFSET (-1)
-
-extern void __init early_trap_init(void);
-static inline void GIE_ENABLE(void)
-{
-	mb();
-	__nds32__gie_en();
-}
-
-static inline void GIE_DISABLE(void)
-{
-	mb();
-	__nds32__gie_dis();
-}
-
-static inline unsigned long CACHE_SET(unsigned char cache)
-{
-
-	if (cache == ICACHE)
-		return 64 << ((__nds32__mfsr(NDS32_SR_ICM_CFG) & ICM_CFG_mskISET) >>
-			      ICM_CFG_offISET);
-	else
-		return 64 << ((__nds32__mfsr(NDS32_SR_DCM_CFG) & DCM_CFG_mskDSET) >>
-			      DCM_CFG_offDSET);
-}
-
-static inline unsigned long CACHE_WAY(unsigned char cache)
-{
-
-	if (cache == ICACHE)
-		return 1 +
-		    ((__nds32__mfsr(NDS32_SR_ICM_CFG) & ICM_CFG_mskIWAY) >> ICM_CFG_offIWAY);
-	else
-		return 1 +
-		    ((__nds32__mfsr(NDS32_SR_DCM_CFG) & DCM_CFG_mskDWAY) >> DCM_CFG_offDWAY);
-}
-
-static inline unsigned long CACHE_LINE_SIZE(unsigned char cache)
-{
-
-	if (cache == ICACHE)
-		return 8 <<
-		    (((__nds32__mfsr(NDS32_SR_ICM_CFG) & ICM_CFG_mskISZ) >> ICM_CFG_offISZ) - 1);
-	else
-		return 8 <<
-		    (((__nds32__mfsr(NDS32_SR_DCM_CFG) & DCM_CFG_mskDSZ) >> DCM_CFG_offDSZ) - 1);
-}
-
-#endif /* __ASSEMBLY__ */
-
-#define IVB_BASE		PHYS_OFFSET	/* in user space for intr/exc/trap/break table base, 64KB aligned
-						 * We defined at the start of the physical memory */
-
-/* dispatched sub-entry exception handler numbering */
-#define RD_PROT			0	/* read protrection */
-#define WRT_PROT		1	/* write protection */
-#define NOEXEC			2	/* non executable */
-#define PAGE_MODIFY		3	/* page modified */
-#define ACC_BIT			4	/* access bit */
-#define RESVED_PTE		5	/* reserved PTE attribute */
-/* reserved 6 ~ 16 */
-
-#endif /* _ASM_NDS32_NDS32_H_ */
diff --git a/arch/nds32/include/asm/nds32_fpu_inst.h b/arch/nds32/include/asm/nds32_fpu_inst.h
deleted file mode 100644
index 1e4b86a90a48..000000000000
--- a/arch/nds32/include/asm/nds32_fpu_inst.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2005-2018 Andes Technology Corporation */
-
-#ifndef __NDS32_FPU_INST_H
-#define __NDS32_FPU_INST_H
-
-#define cop0_op	0x35
-
-/*
- * COP0 field of opcodes.
- */
-#define fs1_op	0x0
-#define fs2_op  0x4
-#define fd1_op  0x8
-#define fd2_op  0xc
-
-/*
- * FS1 opcode.
- */
-enum fs1 {
-	fadds_op, fsubs_op, fcpynss_op, fcpyss_op,
-	fmadds_op, fmsubs_op, fcmovns_op, fcmovzs_op,
-	fnmadds_op, fnmsubs_op,
-	fmuls_op = 0xc, fdivs_op,
-	fs1_f2op_op = 0xf
-};
-
-/*
- * FS1/F2OP opcode.
- */
-enum fs1_f2 {
-	fs2d_op, fsqrts_op,
-	fui2s_op = 0x8, fsi2s_op = 0xc,
-	fs2ui_op = 0x10, fs2ui_z_op = 0x14,
-	fs2si_op = 0x18, fs2si_z_op = 0x1c
-};
-
-/*
- * FS2 opcode.
- */
-enum fs2 {
-	fcmpeqs_op, fcmpeqs_e_op, fcmplts_op, fcmplts_e_op,
-	fcmples_op, fcmples_e_op, fcmpuns_op, fcmpuns_e_op
-};
-
-/*
- * FD1 opcode.
- */
-enum fd1 {
-	faddd_op, fsubd_op, fcpynsd_op, fcpysd_op,
-	fmaddd_op, fmsubd_op, fcmovnd_op, fcmovzd_op,
-	fnmaddd_op, fnmsubd_op,
-	fmuld_op = 0xc, fdivd_op, fd1_f2op_op = 0xf
-};
-
-/*
- * FD1/F2OP opcode.
- */
-enum fd1_f2 {
-	fd2s_op, fsqrtd_op,
-	fui2d_op = 0x8, fsi2d_op = 0xc,
-	fd2ui_op = 0x10, fd2ui_z_op = 0x14,
-	fd2si_op = 0x18, fd2si_z_op = 0x1c
-};
-
-/*
- * FD2 opcode.
- */
-enum fd2 {
-	fcmpeqd_op, fcmpeqd_e_op, fcmpltd_op, fcmpltd_e_op,
-	fcmpled_op, fcmpled_e_op, fcmpund_op, fcmpund_e_op
-};
-
-#define NDS32Insn(x) x
-
-#define I_OPCODE_off			25
-#define NDS32Insn_OPCODE(x)		(NDS32Insn(x) >> I_OPCODE_off)
-
-#define I_OPCODE_offRt			20
-#define I_OPCODE_mskRt			(0x1fUL << I_OPCODE_offRt)
-#define NDS32Insn_OPCODE_Rt(x) \
-	((NDS32Insn(x) & I_OPCODE_mskRt) >> I_OPCODE_offRt)
-
-#define I_OPCODE_offRa			15
-#define I_OPCODE_mskRa			(0x1fUL << I_OPCODE_offRa)
-#define NDS32Insn_OPCODE_Ra(x) \
-	((NDS32Insn(x) & I_OPCODE_mskRa) >> I_OPCODE_offRa)
-
-#define I_OPCODE_offRb			10
-#define I_OPCODE_mskRb			(0x1fUL << I_OPCODE_offRb)
-#define NDS32Insn_OPCODE_Rb(x) \
-	((NDS32Insn(x) & I_OPCODE_mskRb) >> I_OPCODE_offRb)
-
-#define I_OPCODE_offbit1014		10
-#define I_OPCODE_mskbit1014		(0x1fUL << I_OPCODE_offbit1014)
-#define NDS32Insn_OPCODE_BIT1014(x) \
-	((NDS32Insn(x) & I_OPCODE_mskbit1014) >> I_OPCODE_offbit1014)
-
-#define I_OPCODE_offbit69		6
-#define I_OPCODE_mskbit69		(0xfUL << I_OPCODE_offbit69)
-#define NDS32Insn_OPCODE_BIT69(x) \
-	((NDS32Insn(x) & I_OPCODE_mskbit69) >> I_OPCODE_offbit69)
-
-#define I_OPCODE_offCOP0		0
-#define I_OPCODE_mskCOP0		(0x3fUL << I_OPCODE_offCOP0)
-#define NDS32Insn_OPCODE_COP0(x) \
-	((NDS32Insn(x) & I_OPCODE_mskCOP0) >> I_OPCODE_offCOP0)
-
-#endif /* __NDS32_FPU_INST_H */
diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h
deleted file mode 100644
index add33a7f02c8..000000000000
--- a/arch/nds32/include/asm/page.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2005-2017 Andes Technology Corporation
- */
-
-#ifndef _ASMNDS32_PAGE_H
-#define _ASMNDS32_PAGE_H
-
-#ifdef CONFIG_ANDES_PAGE_SIZE_4KB
-#define PAGE_SHIFT      12
-#endif
-#ifdef CONFIG_ANDES_PAGE_SIZE_8KB
-#define PAGE_SHIFT      13
-#endif
-#include <linux/const.h>
-#define PAGE_SIZE       (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK       (~(PAGE_SIZE-1))
-
-#ifdef __KERNEL__
-
-#ifndef __ASSEMBLY__
-
-struct page;
-struct vm_area_struct;
-#ifdef CONFIG_CPU_CACHE_ALIASING
-extern void copy_user_highpage(struct page *to, struct page *from,
-			       unsigned long vaddr, struct vm_area_struct *vma);
-extern void clear_user_highpage(struct page *page, unsigned long vaddr);
-
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-		    struct page *to);
-void clear_user_page(void *addr, unsigned long vaddr, struct page *page);
-#define __HAVE_ARCH_COPY_USER_HIGHPAGE
-#define clear_user_highpage	clear_user_highpage
-#else
-#define clear_user_page(page, vaddr, pg)        clear_page(page)
-#define copy_user_page(to, from, vaddr, pg)     copy_page(to, from)
-#endif
-
-void clear_page(void *page);
-void copy_page(void *to, void *from);
-
-typedef unsigned long pte_t;
-typedef unsigned long pgd_t;
-typedef unsigned long pgprot_t;
-
-#define pte_val(x)      (x)
-#define pgd_val(x)	(x)
-#define pgprot_val(x)   (x)
-
-#define __pte(x)        (x)
-#define __pgd(x)        (x)
-#define __pgprot(x)     (x)
-
-typedef struct page *pgtable_t;
-
-#include <asm/memory.h>
-#include <asm-generic/getorder.h>
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
-
-#endif
diff --git a/arch/nds32/include/asm/perf_event.h b/arch/nds32/include/asm/perf_event.h
deleted file mode 100644
index fcdff02acc14..000000000000
--- a/arch/nds32/include/asm/perf_event.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2008-2018 Andes Technology Corporation */
-
-#ifndef __ASM_PERF_EVENT_H
-#define __ASM_PERF_EVENT_H
-
-/*
- * This file is request by Perf,
- * please refer to tools/perf/design.txt for more details
- */
-struct pt_regs;
-unsigned long perf_instruction_pointer(struct pt_regs *regs);
-unsigned long perf_misc_flags(struct pt_regs *regs);
-#define perf_misc_flags(regs)   perf_misc_flags(regs)
-
-#endif
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
deleted file mode 100644
index a08e1ebca70e..000000000000
--- a/arch/nds32/include/asm/pgalloc.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef _ASMNDS32_PGALLOC_H
-#define _ASMNDS32_PGALLOC_H
-
-#include <asm/processor.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/proc-fns.h>
-
-#define __HAVE_ARCH_PTE_ALLOC_ONE
-#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
-
-extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
-	pgtable_t pte;
-
-	pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
-	if (pte)
-		cpu_dcache_wb_page((unsigned long)page_address(pte));
-
-	return pte;
-}
-
-/*
- * Populate the pmdp entry with a pointer to the pte.  This pmd is part
- * of the mm address space.
- *
- * Ensure that we always set both PMD entries.
- */
-static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmdp, pte_t * ptep)
-{
-	unsigned long pte_ptr = (unsigned long)ptep;
-	unsigned long pmdval;
-
-	BUG_ON(mm != &init_mm);
-
-	/*
-	 * The pmd must be loaded with the physical
-	 * address of the PTE table
-	 */
-	pmdval = __pa(pte_ptr) | _PAGE_KERNEL_TABLE;
-	set_pmd(pmdp, __pmd(pmdval));
-}
-
-static inline void
-pmd_populate(struct mm_struct *mm, pmd_t * pmdp, pgtable_t ptep)
-{
-	unsigned long pmdval;
-
-	BUG_ON(mm == &init_mm);
-
-	pmdval = page_to_pfn(ptep) << PAGE_SHIFT | _PAGE_USER_TABLE;
-	set_pmd(pmdp, __pmd(pmdval));
-}
-
-#endif
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
deleted file mode 100644
index 419f984eef70..000000000000
--- a/arch/nds32/include/asm/pgtable.h
+++ /dev/null
@@ -1,377 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef _ASMNDS32_PGTABLE_H
-#define _ASMNDS32_PGTABLE_H
-
-#include <asm-generic/pgtable-nopmd.h>
-#include <linux/sizes.h>
-
-#include <asm/memory.h>
-#include <asm/nds32.h>
-#ifndef __ASSEMBLY__
-#include <asm/fixmap.h>
-#include <nds32_intrinsic.h>
-#endif
-
-#ifdef CONFIG_ANDES_PAGE_SIZE_4KB
-#define PGDIR_SHIFT      22
-#define PTRS_PER_PGD     1024
-#define PTRS_PER_PTE     1024
-#endif
-
-#ifdef CONFIG_ANDES_PAGE_SIZE_8KB
-#define PGDIR_SHIFT      24
-#define PTRS_PER_PGD     256
-#define PTRS_PER_PTE     2048
-#endif
-
-#ifndef __ASSEMBLY__
-extern void __pte_error(const char *file, int line, unsigned long val);
-extern void __pgd_error(const char *file, int line, unsigned long val);
-
-#define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte_val(pte))
-#define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
-#endif /* !__ASSEMBLY__ */
-
-#define PMD_SIZE		(1UL << PMD_SHIFT)
-#define PMD_MASK		(~(PMD_SIZE-1))
-#define PGDIR_SIZE		(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK		(~(PGDIR_SIZE-1))
-
-/*
- * This is the lowest virtual address we can permit any user space
- * mapping to be mapped at.  This is particularly important for
- * non-high vector CPUs.
- */
-#define FIRST_USER_ADDRESS	0x8000
-
-#ifdef CONFIG_HIGHMEM
-#define CONSISTENT_BASE		((PKMAP_BASE) - (SZ_2M))
-#define CONSISTENT_END		(PKMAP_BASE)
-#else
-#define CONSISTENT_BASE		(FIXADDR_START - SZ_2M)
-#define CONSISTENT_END		(FIXADDR_START)
-#endif
-#define CONSISTENT_OFFSET(x)	(((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
-
-#ifdef CONFIG_HIGHMEM
-#ifndef __ASSEMBLY__
-#include <asm/highmem.h>
-#endif
-#endif
-
-#define VMALLOC_RESERVE 	SZ_128M
-#define VMALLOC_END		(CONSISTENT_BASE - PAGE_SIZE)
-#define VMALLOC_START		((VMALLOC_END) - VMALLOC_RESERVE)
-#define VMALLOC_VMADDR(x)	((unsigned long)(x))
-#define MAXMEM			__pa(VMALLOC_START)
-#define MAXMEM_PFN		PFN_DOWN(MAXMEM)
-
-#define FIRST_USER_PGD_NR	0
-#define USER_PTRS_PER_PGD	((TASK_SIZE/PGDIR_SIZE) + FIRST_USER_PGD_NR)
-
-/* L2 PTE */
-#define _PAGE_V			(1UL << 0)
-
-#define _PAGE_M_XKRW            (0UL << 1)
-#define _PAGE_M_UR_KR		(1UL << 1)
-#define _PAGE_M_UR_KRW		(2UL << 1)
-#define _PAGE_M_URW_KRW		(3UL << 1)
-#define _PAGE_M_KR		(5UL << 1)
-#define _PAGE_M_KRW		(7UL << 1)
-
-#define _PAGE_D			(1UL << 4)
-#define _PAGE_E			(1UL << 5)
-#define _PAGE_A			(1UL << 6)
-#define _PAGE_G			(1UL << 7)
-
-#define _PAGE_C_DEV		(0UL << 8)
-#define _PAGE_C_DEV_WB		(1UL << 8)
-#define _PAGE_C_MEM		(2UL << 8)
-#define _PAGE_C_MEM_SHRD_WB	(4UL << 8)
-#define _PAGE_C_MEM_SHRD_WT	(5UL << 8)
-#define _PAGE_C_MEM_WB		(6UL << 8)
-#define _PAGE_C_MEM_WT		(7UL << 8)
-
-#define _PAGE_L			(1UL << 11)
-
-#define _HAVE_PAGE_L		(_PAGE_L)
-#define _PAGE_FILE		(1UL << 1)
-#define _PAGE_YOUNG		0
-#define _PAGE_M_MASK		_PAGE_M_KRW
-#define _PAGE_C_MASK		_PAGE_C_MEM_WT
-
-#ifdef CONFIG_SMP
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-#define _PAGE_CACHE_SHRD	_PAGE_C_MEM_SHRD_WT
-#else
-#define _PAGE_CACHE_SHRD	_PAGE_C_MEM_SHRD_WB
-#endif
-#else
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-#define _PAGE_CACHE_SHRD	_PAGE_C_MEM_WT
-#else
-#define _PAGE_CACHE_SHRD	_PAGE_C_MEM_WB
-#endif
-#endif
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-#define _PAGE_CACHE		_PAGE_C_MEM_WT
-#else
-#define _PAGE_CACHE		_PAGE_C_MEM_WB
-#endif
-
-#define _PAGE_IOREMAP \
-	(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_G | _PAGE_C_DEV)
-
-/*
- * + Level 1 descriptor (PMD)
- */
-#define PMD_TYPE_TABLE		0
-
-#ifndef __ASSEMBLY__
-
-#define _PAGE_USER_TABLE     PMD_TYPE_TABLE
-#define _PAGE_KERNEL_TABLE   PMD_TYPE_TABLE
-
-#define PAGE_EXEC	__pgprot(_PAGE_V | _PAGE_M_XKRW | _PAGE_E)
-#define PAGE_NONE	__pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_A)
-#define PAGE_READ	__pgprot(_PAGE_V | _PAGE_M_UR_KR)
-#define PAGE_RDWR	__pgprot(_PAGE_V | _PAGE_M_URW_KRW | _PAGE_D)
-#define PAGE_COPY	__pgprot(_PAGE_V | _PAGE_M_UR_KR)
-
-#define PAGE_UXKRWX_V1	__pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD)
-#define PAGE_UXKRWX_V2	__pgprot(_PAGE_V | _PAGE_M_XKRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD)
-#define PAGE_URXKRWX_V2	__pgprot(_PAGE_V | _PAGE_M_UR_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD)
-#define PAGE_CACHE_L1	__pgprot(_HAVE_PAGE_L | _PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE)
-#define PAGE_MEMORY	__pgprot(_HAVE_PAGE_L | _PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD)
-#define PAGE_KERNEL	__pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD)
-#define PAGE_SHARED	__pgprot(_PAGE_V | _PAGE_M_URW_KRW | _PAGE_D | _PAGE_CACHE_SHRD)
-#define PAGE_DEVICE    __pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_G | _PAGE_C_DEV)
-#endif /* __ASSEMBLY__ */
-
-/*         xwr */
-#define __P000  (PAGE_NONE | _PAGE_CACHE_SHRD)
-#define __P001  (PAGE_READ | _PAGE_CACHE_SHRD)
-#define __P010  (PAGE_COPY | _PAGE_CACHE_SHRD)
-#define __P011  (PAGE_COPY | _PAGE_CACHE_SHRD)
-#define __P100  (PAGE_EXEC | _PAGE_CACHE_SHRD)
-#define __P101  (PAGE_READ | _PAGE_E | _PAGE_CACHE_SHRD)
-#define __P110  (PAGE_COPY | _PAGE_E | _PAGE_CACHE_SHRD)
-#define __P111  (PAGE_COPY | _PAGE_E | _PAGE_CACHE_SHRD)
-
-#define __S000  (PAGE_NONE | _PAGE_CACHE_SHRD)
-#define __S001  (PAGE_READ | _PAGE_CACHE_SHRD)
-#define __S010  (PAGE_RDWR | _PAGE_CACHE_SHRD)
-#define __S011  (PAGE_RDWR | _PAGE_CACHE_SHRD)
-#define __S100  (PAGE_EXEC | _PAGE_CACHE_SHRD)
-#define __S101  (PAGE_READ | _PAGE_E | _PAGE_CACHE_SHRD)
-#define __S110  (PAGE_RDWR | _PAGE_E | _PAGE_CACHE_SHRD)
-#define __S111  (PAGE_RDWR | _PAGE_E | _PAGE_CACHE_SHRD)
-
-#ifndef __ASSEMBLY__
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern struct page *empty_zero_page;
-extern void paging_init(void);
-#define ZERO_PAGE(vaddr)	(empty_zero_page)
-
-#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
-#define pfn_pte(pfn,prot)	(__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
-
-#define pte_none(pte)	        !(pte_val(pte))
-#define pte_clear(mm,addr,ptep)	set_pte_at((mm),(addr),(ptep), __pte(0))
-#define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
-
-static unsigned long pmd_page_vaddr(pmd_t pmd)
-{
-	return ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK));
-}
-
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
-/*
- * Set a level 1 translation table entry, and clean it out of
- * any caches such that the MMUs can load it correctly.
- */
-static inline void set_pmd(pmd_t * pmdp, pmd_t pmd)
-{
-
-	*pmdp = pmd;
-#if !defined(CONFIG_CPU_DCACHE_DISABLE) && !defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
-	__asm__ volatile ("\n\tcctl %0, L1D_VA_WB"::"r" (pmdp):"memory");
-	__nds32__msync_all();
-	__nds32__dsb();
-#endif
-}
-
-/*
- * Set a PTE and flush it out
- */
-static inline void set_pte(pte_t * ptep, pte_t pte)
-{
-
-	*ptep = pte;
-#if !defined(CONFIG_CPU_DCACHE_DISABLE) && !defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
-	__asm__ volatile ("\n\tcctl %0, L1D_VA_WB"::"r" (ptep):"memory");
-	__nds32__msync_all();
-	__nds32__dsb();
-#endif
-}
-
-/*
- * The following only work if pte_present() is true.
- * Undefined behaviour if not..
- */
-
-/*
- * pte_write: 	     this page is writeable for user mode
- * pte_read:         this page is readable for user mode
- * pte_kernel_write: this page is writeable for kernel mode
- *
- * We don't have pte_kernel_read because kernel always can read.
- *
- * */
-
-#define pte_present(pte)        (pte_val(pte) & _PAGE_V)
-#define pte_write(pte)          ((pte_val(pte) & _PAGE_M_MASK) == _PAGE_M_URW_KRW)
-#define pte_read(pte)		(((pte_val(pte) & _PAGE_M_MASK) == _PAGE_M_UR_KR) || \
-				((pte_val(pte) & _PAGE_M_MASK) == _PAGE_M_UR_KRW) || \
-				((pte_val(pte) & _PAGE_M_MASK) == _PAGE_M_URW_KRW))
-#define pte_kernel_write(pte)   (((pte_val(pte) & _PAGE_M_MASK) == _PAGE_M_URW_KRW) || \
-				((pte_val(pte) & _PAGE_M_MASK) == _PAGE_M_UR_KRW) || \
-				((pte_val(pte) & _PAGE_M_MASK) == _PAGE_M_KRW) || \
-				(((pte_val(pte) & _PAGE_M_MASK) == _PAGE_M_XKRW) && pte_exec(pte)))
-#define pte_exec(pte)		(pte_val(pte) & _PAGE_E)
-#define pte_dirty(pte)		(pte_val(pte) & _PAGE_D)
-#define pte_young(pte)		(pte_val(pte) & _PAGE_YOUNG)
-
-/*
- * The following only works if pte_present() is not true.
- */
-#define pte_file(pte)		(pte_val(pte) & _PAGE_FILE)
-#define pte_to_pgoff(x)		(pte_val(x) >> 2)
-#define pgoff_to_pte(x)		__pte(((x) << 2) | _PAGE_FILE)
-
-#define PTE_FILE_MAX_BITS	29
-
-#define PTE_BIT_FUNC(fn,op) \
-static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
-
-static inline pte_t pte_wrprotect(pte_t pte)
-{
-	pte_val(pte) = pte_val(pte) & ~_PAGE_M_MASK;
-	pte_val(pte) = pte_val(pte) | _PAGE_M_UR_KR;
-	return pte;
-}
-
-static inline pte_t pte_mkwrite(pte_t pte)
-{
-	pte_val(pte) = pte_val(pte) & ~_PAGE_M_MASK;
-	pte_val(pte) = pte_val(pte) | _PAGE_M_URW_KRW;
-	return pte;
-}
-
-PTE_BIT_FUNC(exprotect, &=~_PAGE_E);
-PTE_BIT_FUNC(mkexec, |=_PAGE_E);
-PTE_BIT_FUNC(mkclean, &=~_PAGE_D);
-PTE_BIT_FUNC(mkdirty, |=_PAGE_D);
-PTE_BIT_FUNC(mkold, &=~_PAGE_YOUNG);
-PTE_BIT_FUNC(mkyoung, |=_PAGE_YOUNG);
-
-/*
- * Mark the prot value as uncacheable and unbufferable.
- */
-#define pgprot_noncached(prot)	   __pgprot((pgprot_val(prot)&~_PAGE_C_MASK) | _PAGE_C_DEV)
-#define pgprot_writecombine(prot)  __pgprot((pgprot_val(prot)&~_PAGE_C_MASK) | _PAGE_C_DEV_WB)
-
-#define pmd_none(pmd)         (pmd_val(pmd)&0x1)
-#define pmd_present(pmd)      (!pmd_none(pmd))
-#define	pmd_bad(pmd)	      pmd_none(pmd)
-
-#define copy_pmd(pmdpd,pmdps)	set_pmd((pmdpd), *(pmdps))
-#define pmd_clear(pmdp)		set_pmd((pmdp), __pmd(1))
-
-static inline pmd_t __mk_pmd(pte_t * ptep, unsigned long prot)
-{
-	unsigned long ptr = (unsigned long)ptep;
-	pmd_t pmd;
-
-	/*
-	 * The pmd must be loaded with the physical
-	 * address of the PTE table
-	 */
-
-	pmd_val(pmd) = __virt_to_phys(ptr) | prot;
-	return pmd;
-}
-
-#define pmd_page(pmd)        virt_to_page(__va(pmd_val(pmd)))
-
-/*
- * Permanent address of a page. We never have highmem, so this is trivial.
- */
-#define pages_to_mb(x)       ((x) >> (20 - PAGE_SHIFT))
-
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- */
-#define mk_pte(page,prot)	pfn_pte(page_to_pfn(page),prot)
-
-/*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
- */
-#define pgd_none(pgd)		(0)
-#define pgd_bad(pgd)		(0)
-#define pgd_present(pgd)  	(1)
-#define pgd_clear(pgdp)		do { } while (0)
-
-#define page_pte_prot(page,prot)     	mk_pte(page, prot)
-#define page_pte(page)		        mk_pte(page, __pgprot(0))
-/*
- *     L1PTE = $mr1 + ((virt >> PMD_SHIFT) << 2);
- *     L2PTE = (((virt >> PAGE_SHIFT) & (PTRS_PER_PTE -1 )) << 2);
- *     PPN = (phys & 0xfffff000);
- *
-*/
-
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
-	const unsigned long mask = 0xfff;
-	pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
-	return pte;
-}
-
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-
-/* Encode and decode a swap entry.
- *
- * We support up to 32GB of swap on 4k machines
- */
-#define __swp_type(x)	 	     (((x).val >> 2) & 0x7f)
-#define __swp_offset(x)	   	     ((x).val >> 9)
-#define __swp_entry(type,offset)     ((swp_entry_t) { ((type) << 2) | ((offset) << 9) })
-#define __pte_to_swp_entry(pte)	     ((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(swp)	     ((pte_t) { (swp).val })
-
-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
-#define kern_addr_valid(addr)	(1)
-
-/*
- * We provide our own arch_get_unmapped_area to cope with VIPT caches.
- */
-#define HAVE_ARCH_UNMAPPED_AREA
-
-/*
- * remap a physical address `phys' of size `size' with page protection `prot'
- * into virtual address `from'
- */
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* _ASMNDS32_PGTABLE_H */
diff --git a/arch/nds32/include/asm/pmu.h b/arch/nds32/include/asm/pmu.h
deleted file mode 100644
index e1ac0b0b8bcf..000000000000
--- a/arch/nds32/include/asm/pmu.h
+++ /dev/null
@@ -1,386 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2008-2018 Andes Technology Corporation */
-
-#ifndef __ASM_PMU_H
-#define __ASM_PMU_H
-
-#include <linux/interrupt.h>
-#include <linux/perf_event.h>
-#include <asm/unistd.h>
-#include <asm/bitfield.h>
-
-/* Has special meaning for perf core implementation */
-#define HW_OP_UNSUPPORTED		0x0
-#define C(_x)				PERF_COUNT_HW_CACHE_##_x
-#define CACHE_OP_UNSUPPORTED		0x0
-
-/* Enough for both software and hardware defined events */
-#define SOFTWARE_EVENT_MASK		0xFF
-
-#define PFM_OFFSET_MAGIC_0		2	/* DO NOT START FROM 0 */
-#define PFM_OFFSET_MAGIC_1		(PFM_OFFSET_MAGIC_0 + 36)
-#define PFM_OFFSET_MAGIC_2		(PFM_OFFSET_MAGIC_1 + 36)
-
-enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS };
-
-u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1,
-		       PFM_CTL_mskOVF2 };
-u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1,
-		      PFM_CTL_mskEN2 };
-u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1,
-			  PFM_CTL_offSEL2 };
-u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 };
-u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 };
-u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 };
-u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 };
-/*
- * Perf Events' indices
- */
-#define NDS32_IDX_CYCLE_COUNTER			0
-#define NDS32_IDX_COUNTER0			1
-#define NDS32_IDX_COUNTER1			2
-
-/* The events for a given PMU register set. */
-struct pmu_hw_events {
-	/*
-	 * The events that are active on the PMU for the given index.
-	 */
-	struct perf_event *events[MAX_COUNTERS];
-
-	/*
-	 * A 1 bit for an index indicates that the counter is being used for
-	 * an event. A 0 means that the counter can be used.
-	 */
-	unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)];
-
-	/*
-	 * Hardware lock to serialize accesses to PMU registers. Needed for the
-	 * read/modify/write sequences.
-	 */
-	raw_spinlock_t pmu_lock;
-};
-
-struct nds32_pmu {
-	struct pmu pmu;
-	cpumask_t active_irqs;
-	char *name;
-	 irqreturn_t (*handle_irq)(int irq_num, void *dev);
-	void (*enable)(struct perf_event *event);
-	void (*disable)(struct perf_event *event);
-	int (*get_event_idx)(struct pmu_hw_events *hw_events,
-			     struct perf_event *event);
-	int (*set_event_filter)(struct hw_perf_event *evt,
-				struct perf_event_attr *attr);
-	u32 (*read_counter)(struct perf_event *event);
-	void (*write_counter)(struct perf_event *event, u32 val);
-	void (*start)(struct nds32_pmu *nds32_pmu);
-	void (*stop)(struct nds32_pmu *nds32_pmu);
-	void (*reset)(void *data);
-	int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler);
-	void (*free_irq)(struct nds32_pmu *nds32_pmu);
-	int (*map_event)(struct perf_event *event);
-	int num_events;
-	atomic_t active_events;
-	u64 max_period;
-	struct platform_device *plat_device;
-	struct pmu_hw_events *(*get_hw_events)(void);
-};
-
-#define to_nds32_pmu(p)			(container_of(p, struct nds32_pmu, pmu))
-
-int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type);
-
-u64 nds32_pmu_event_update(struct perf_event *event);
-
-int nds32_pmu_event_set_period(struct perf_event *event);
-
-/*
- * Common NDS32 SPAv3 event types
- *
- * Note: An implementation may not be able to count all of these events
- * but the encodings are considered to be `reserved' in the case that
- * they are not available.
- *
- * SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as
- * NOT_SUPPORTED EVENT mapping in generic perf code.
- * You will need to deal it in the event writing implementation.
- */
-enum spav3_counter_0_perf_types {
-	SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0,	/* counting symbol */
-	SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0,
-	SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0,
-	SPAV3_0_SEL_LAST	/* counting symbol */
-};
-
-enum spav3_counter_1_perf_types {
-	SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1,	/* counting symbol */
-	SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1,
-	SPAV3_1_SEL_LAST	/* counting symbol */
-};
-
-enum spav3_counter_2_perf_types {
-	SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2,	/* counting symbol */
-	SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2,
-	SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2,
-	SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2,
-	SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT =
-	    3 + PFM_OFFSET_MAGIC_2,
-	SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2,
-	SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2,
-	SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2,
-	SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2,
-	SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2,
-	SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2,
-	SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2,
-	SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2,
-	SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2,
-	SPAV3_2_SEL_LAST	/* counting symbol */
-};
-
-/* Get converted event counter index */
-static inline int get_converted_event_idx(unsigned long event)
-{
-	int idx;
-
-	if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) {
-		idx = 0;
-	} else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) {
-		idx = 1;
-	} else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) {
-		idx = 2;
-	} else {
-		pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n");
-		return -EPERM;
-	}
-
-	return idx;
-}
-
-/* Get converted hardware event number */
-static inline u32 get_converted_evet_hw_num(u32 event)
-{
-	if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST)
-		event -= PFM_OFFSET_MAGIC_0;
-	else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST)
-		event -= PFM_OFFSET_MAGIC_1;
-	else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST)
-		event -= PFM_OFFSET_MAGIC_2;
-	else if (event != 0)
-		pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n");
-
-	return event;
-}
-
-/*
- * NDS32 HW events mapping
- *
- * The hardware events that we support. We do support cache operations but
- * we have harvard caches and no way to combine instruction and data
- * accesses/misses in hardware.
- */
-static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = {
-	[PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION,
-	[PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS,
-	[PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
-	[PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED
-};
-
-static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
-	[PERF_COUNT_HW_CACHE_OP_MAX]
-	[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
-	[C(L1D)] = {
-		    [C(OP_READ)] = {
-				    [C(RESULT_ACCESS)] =
-				    SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS,
-				    [C(RESULT_MISS)] =
-				    SPAV3_2_SEL_LOAD_DATA_CACHE_MISS,
-				    },
-		    [C(OP_WRITE)] = {
-				     [C(RESULT_ACCESS)] =
-				     SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS,
-				     [C(RESULT_MISS)] =
-				     SPAV3_2_SEL_STORE_DATA_CACHE_MISS,
-				     },
-		    [C(OP_PREFETCH)] = {
-					[C(RESULT_ACCESS)] =
-						CACHE_OP_UNSUPPORTED,
-					[C(RESULT_MISS)] =
-						CACHE_OP_UNSUPPORTED,
-					},
-		    },
-	[C(L1I)] = {
-		    [C(OP_READ)] = {
-				    [C(RESULT_ACCESS)] =
-				    SPAV3_1_SEL_CODE_CACHE_ACCESS,
-				    [C(RESULT_MISS)] =
-				    SPAV3_2_SEL_CODE_CACHE_MISS,
-				    },
-		    [C(OP_WRITE)] = {
-				     [C(RESULT_ACCESS)] =
-				     SPAV3_1_SEL_CODE_CACHE_ACCESS,
-				     [C(RESULT_MISS)] =
-				     SPAV3_2_SEL_CODE_CACHE_MISS,
-				     },
-		    [C(OP_PREFETCH)] = {
-					[C(RESULT_ACCESS)] =
-					CACHE_OP_UNSUPPORTED,
-					[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
-					},
-		    },
-	/* TODO: L2CC */
-	[C(LL)] = {
-		   [C(OP_READ)] = {
-				   [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
-				   [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
-				   },
-		   [C(OP_WRITE)] = {
-				    [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
-				    [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
-				    },
-		   [C(OP_PREFETCH)] = {
-				       [C(RESULT_ACCESS)] =
-				       CACHE_OP_UNSUPPORTED,
-				       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
-				       },
-		   },
-	/* NDS32 PMU does not support TLB read/write hit/miss,
-	 * However, it can count access/miss, which mixed with read and write.
-	 * Therefore, only READ counter will use it.
-	 * We do as possible as we can.
-	 */
-	[C(DTLB)] = {
-		     [C(OP_READ)] = {
-				     [C(RESULT_ACCESS)] =
-					SPAV3_1_SEL_UDTLB_ACCESS,
-				     [C(RESULT_MISS)] =
-					SPAV3_2_SEL_UDTLB_MISS,
-				     },
-		     [C(OP_WRITE)] = {
-				      [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
-				      [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
-				      },
-		     [C(OP_PREFETCH)] = {
-					 [C(RESULT_ACCESS)] =
-					 CACHE_OP_UNSUPPORTED,
-					 [C(RESULT_MISS)] =
-					 CACHE_OP_UNSUPPORTED,
-					 },
-		     },
-	[C(ITLB)] = {
-		     [C(OP_READ)] = {
-				     [C(RESULT_ACCESS)] =
-					SPAV3_1_SEL_UITLB_ACCESS,
-				     [C(RESULT_MISS)] =
-					SPAV3_2_SEL_UITLB_MISS,
-				     },
-		     [C(OP_WRITE)] = {
-				      [C(RESULT_ACCESS)] =
-					CACHE_OP_UNSUPPORTED,
-				      [C(RESULT_MISS)] =
-					CACHE_OP_UNSUPPORTED,
-				      },
-		     [C(OP_PREFETCH)] = {
-					 [C(RESULT_ACCESS)] =
-						CACHE_OP_UNSUPPORTED,
-					 [C(RESULT_MISS)] =
-						CACHE_OP_UNSUPPORTED,
-					 },
-		     },
-	[C(BPU)] = {		/* What is BPU? */
-		    [C(OP_READ)] = {
-				    [C(RESULT_ACCESS)] =
-					CACHE_OP_UNSUPPORTED,
-				    [C(RESULT_MISS)] =
-					CACHE_OP_UNSUPPORTED,
-				    },
-		    [C(OP_WRITE)] = {
-				     [C(RESULT_ACCESS)] =
-					CACHE_OP_UNSUPPORTED,
-				     [C(RESULT_MISS)] =
-					CACHE_OP_UNSUPPORTED,
-				     },
-		    [C(OP_PREFETCH)] = {
-					[C(RESULT_ACCESS)] =
-						CACHE_OP_UNSUPPORTED,
-					[C(RESULT_MISS)] =
-						CACHE_OP_UNSUPPORTED,
-					},
-		    },
-	[C(NODE)] = {		/* What is NODE? */
-		     [C(OP_READ)] = {
-				     [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
-				     [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
-				     },
-		     [C(OP_WRITE)] = {
-				      [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
-				      [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
-				      },
-		     [C(OP_PREFETCH)] = {
-					 [C(RESULT_ACCESS)] =
-						CACHE_OP_UNSUPPORTED,
-					 [C(RESULT_MISS)] =
-						CACHE_OP_UNSUPPORTED,
-					 },
-		     },
-};
-
-int nds32_pmu_map_event(struct perf_event *event,
-			const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
-			const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
-			[PERF_COUNT_HW_CACHE_OP_MAX]
-			[PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
-
-#endif /* __ASM_PMU_H */
diff --git a/arch/nds32/include/asm/proc-fns.h b/arch/nds32/include/asm/proc-fns.h
deleted file mode 100644
index 27c617fa77af..000000000000
--- a/arch/nds32/include/asm/proc-fns.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __NDS32_PROCFNS_H__
-#define __NDS32_PROCFNS_H__
-
-#ifdef __KERNEL__
-#include <asm/page.h>
-
-struct mm_struct;
-struct vm_area_struct;
-extern void cpu_proc_init(void);
-extern void cpu_proc_fin(void);
-extern void cpu_do_idle(void);
-extern void cpu_reset(unsigned long reset);
-extern void cpu_switch_mm(struct mm_struct *mm);
-
-extern void cpu_dcache_inval_all(void);
-extern void cpu_dcache_wbinval_all(void);
-extern void cpu_dcache_inval_page(unsigned long page);
-extern void cpu_dcache_wb_page(unsigned long page);
-extern void cpu_dcache_wbinval_page(unsigned long page);
-extern void cpu_dcache_inval_range(unsigned long start, unsigned long end);
-extern void cpu_dcache_wb_range(unsigned long start, unsigned long end);
-extern void cpu_dcache_wbinval_range(unsigned long start, unsigned long end);
-
-extern void cpu_icache_inval_all(void);
-extern void cpu_icache_inval_page(unsigned long page);
-extern void cpu_icache_inval_range(unsigned long start, unsigned long end);
-
-extern void cpu_cache_wbinval_page(unsigned long page, int flushi);
-extern void cpu_cache_wbinval_range(unsigned long start,
-				    unsigned long end, int flushi);
-extern void cpu_cache_wbinval_range_check(struct vm_area_struct *vma,
-					  unsigned long start,
-					  unsigned long end, bool flushi,
-					  bool wbd);
-
-extern void cpu_dma_wb_range(unsigned long start, unsigned long end);
-extern void cpu_dma_inval_range(unsigned long start, unsigned long end);
-extern void cpu_dma_wbinval_range(unsigned long start, unsigned long end);
-
-#endif /* __KERNEL__ */
-#endif /* __NDS32_PROCFNS_H__ */
diff --git a/arch/nds32/include/asm/processor.h b/arch/nds32/include/asm/processor.h
deleted file mode 100644
index e6bfc74972bb..000000000000
--- a/arch/nds32/include/asm/processor.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASM_NDS32_PROCESSOR_H
-#define __ASM_NDS32_PROCESSOR_H
-
-#ifdef __KERNEL__
-
-#include <asm/ptrace.h>
-#include <asm/types.h>
-#include <asm/sigcontext.h>
-
-#define KERNEL_STACK_SIZE	PAGE_SIZE
-#define STACK_TOP	TASK_SIZE
-#define STACK_TOP_MAX   TASK_SIZE
-
-struct cpu_context {
-	unsigned long r6;
-	unsigned long r7;
-	unsigned long r8;
-	unsigned long r9;
-	unsigned long r10;
-	unsigned long r11;
-	unsigned long r12;
-	unsigned long r13;
-	unsigned long r14;
-	unsigned long fp;
-	unsigned long pc;
-	unsigned long sp;
-};
-
-struct thread_struct {
-	struct cpu_context cpu_context;	/* cpu context */
-	/* fault info     */
-	unsigned long address;
-	unsigned long trap_no;
-	unsigned long error_code;
-
-	struct fpu_struct fpu;
-};
-
-#define INIT_THREAD  {	}
-
-#ifdef __NDS32_EB__
-#define PSW_DE	PSW_mskBE
-#else
-#define PSW_DE	0x0
-#endif
-
-#ifdef CONFIG_WBNA
-#define PSW_valWBNA	PSW_mskWBNA
-#else
-#define PSW_valWBNA	0x0
-#endif
-
-#ifdef CONFIG_HWZOL
-#define	PSW_valINIT (PSW_CPL_ANY | PSW_mskAEN | PSW_valWBNA | PSW_mskDT | PSW_mskIT | PSW_DE | PSW_mskGIE)
-#else
-#define	PSW_valINIT (PSW_CPL_ANY | PSW_valWBNA | PSW_mskDT | PSW_mskIT | PSW_DE | PSW_mskGIE)
-#endif
-
-#define start_thread(regs,pc,stack)			\
-({							\
-	memzero(regs, sizeof(struct pt_regs));		\
-	forget_syscall(regs);				\
-	regs->ipsw = PSW_valINIT;			\
-	regs->ir0 = (PSW_CPL_ANY | PSW_valWBNA | PSW_mskDT | PSW_mskIT | PSW_DE | PSW_SYSTEM | PSW_INTL_1);	\
-	regs->ipc = pc;					\
-	regs->sp = stack;				\
-})
-
-/* Forward declaration, a strange C thing */
-struct task_struct;
-
-/* Free all resources held by a thread. */
-#define release_thread(thread) do { } while(0)
-#if IS_ENABLED(CONFIG_FPU)
-#if !IS_ENABLED(CONFIG_UNLAZU_FPU)
-extern struct task_struct *last_task_used_math;
-#endif
-#endif
-
-/* Prepare to copy thread state - unlazy all lazy status */
-#define prepare_to_copy(tsk)	do { } while (0)
-
-unsigned long __get_wchan(struct task_struct *p);
-
-#define cpu_relax()			barrier()
-
-#define task_pt_regs(task) \
-	((struct pt_regs *) (task_stack_page(task) + THREAD_SIZE \
-		- 8) - 1)
-
-/*
- * Create a new kernel thread
- */
-extern int kernel_thread(int (*fn) (void *), void *arg, unsigned long flags);
-
-#define KSTK_EIP(tsk)	instruction_pointer(task_pt_regs(tsk))
-#define KSTK_ESP(tsk)	user_stack_pointer(task_pt_regs(tsk))
-
-#endif
-
-#endif /* __ASM_NDS32_PROCESSOR_H */
diff --git a/arch/nds32/include/asm/ptrace.h b/arch/nds32/include/asm/ptrace.h
deleted file mode 100644
index 919ee223620c..000000000000
--- a/arch/nds32/include/asm/ptrace.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASM_NDS32_PTRACE_H
-#define __ASM_NDS32_PTRACE_H
-
-#include <uapi/asm/ptrace.h>
-
-/*
- * If pt_regs.syscallno == NO_SYSCALL, then the thread is not executing
- * a syscall -- i.e., its most recent entry into the kernel from
- * userspace was not via syscall, or otherwise a tracer cancelled the
- * syscall.
- *
- * This must have the value -1, for ABI compatibility with ptrace etc.
- */
-#define NO_SYSCALL (-1)
-#ifndef __ASSEMBLY__
-#include <linux/types.h>
-
-struct pt_regs {
-	union {
-		struct user_pt_regs user_regs;
-		struct {
-			long uregs[26];
-			long fp;
-			long gp;
-			long lp;
-			long sp;
-			long ipc;
-#if defined(CONFIG_HWZOL)
-			long lb;
-			long le;
-			long lc;
-#else
-			long dummy[3];
-#endif
-			long syscallno;
-		};
-	};
-	long orig_r0;
-	long ir0;
-	long ipsw;
-	long pipsw;
-	long pipc;
-	long pp0;
-	long pp1;
-	long fucop_ctl;
-	long osp;
-};
-
-static inline bool in_syscall(struct pt_regs const *regs)
-{
-	return regs->syscallno != NO_SYSCALL;
-}
-
-static inline void forget_syscall(struct pt_regs *regs)
-{
-	regs->syscallno = NO_SYSCALL;
-}
-static inline unsigned long regs_return_value(struct pt_regs *regs)
-{
-	return regs->uregs[0];
-}
-extern void show_regs(struct pt_regs *);
-/* Avoid circular header include via sched.h */
-struct task_struct;
-
-#define arch_has_single_step()		(1)
-#define user_mode(regs)			(((regs)->ipsw & PSW_mskPOM) == 0)
-#define interrupts_enabled(regs)	(!!((regs)->ipsw & PSW_mskGIE))
-#define user_stack_pointer(regs)	((regs)->sp)
-#define instruction_pointer(regs)	((regs)->ipc)
-#define profile_pc(regs) 		instruction_pointer(regs)
-
-#endif /* __ASSEMBLY__ */
-#endif
diff --git a/arch/nds32/include/asm/sfp-machine.h b/arch/nds32/include/asm/sfp-machine.h
deleted file mode 100644
index b1a5caa332b5..000000000000
--- a/arch/nds32/include/asm/sfp-machine.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2005-2018 Andes Technology Corporation */
-
-#include <asm/bitfield.h>
-
-#define _FP_W_TYPE_SIZE		32
-#define _FP_W_TYPE		unsigned long
-#define _FP_WS_TYPE		signed long
-#define _FP_I_TYPE		long
-
-#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
-#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
-#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
-
-#define _FP_MUL_MEAT_S(R, X, Y)				\
-	_FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
-#define _FP_MUL_MEAT_D(R, X, Y)				\
-	_FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
-#define _FP_MUL_MEAT_Q(R, X, Y)				\
-	_FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q, R, X, Y, umul_ppmm)
-
-#define _FP_MUL_MEAT_DW_S(R, X, Y)			\
-	_FP_MUL_MEAT_DW_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
-#define _FP_MUL_MEAT_DW_D(R, X, Y)			\
-	_FP_MUL_MEAT_DW_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
-
-#define _FP_DIV_MEAT_S(R, X, Y)	_FP_DIV_MEAT_1_udiv_norm(S, R, X, Y)
-#define _FP_DIV_MEAT_D(R, X, Y)	_FP_DIV_MEAT_2_udiv(D, R, X, Y)
-
-#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
-#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
-#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
-#define _FP_NANSIGN_S		0
-#define _FP_NANSIGN_D		0
-#define _FP_NANSIGN_Q		0
-
-#define _FP_KEEPNANFRACP 1
-#define _FP_QNANNEGATEDP 0
-
-#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
-do {								\
-	if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)	\
-	  && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) { \
-		R##_s = Y##_s;					\
-		_FP_FRAC_COPY_##wc(R, Y);			\
-	} else {						\
-		R##_s = X##_s;					\
-		_FP_FRAC_COPY_##wc(R, X);			\
-	}							\
-	R##_c = FP_CLS_NAN;					\
-} while (0)
-
-#define __FPU_FPCSR	(current->thread.fpu.fpcsr)
-
-/* Obtain the current rounding mode. */
-#define FP_ROUNDMODE                    \
-({                                      \
-	__FPU_FPCSR & FPCSR_mskRM;      \
-})
-
-#define FP_RND_NEAREST		0
-#define FP_RND_PINF		1
-#define FP_RND_MINF		2
-#define FP_RND_ZERO		3
-
-#define FP_EX_INVALID		FPCSR_mskIVO
-#define FP_EX_DIVZERO		FPCSR_mskDBZ
-#define FP_EX_OVERFLOW		FPCSR_mskOVF
-#define FP_EX_UNDERFLOW		FPCSR_mskUDF
-#define FP_EX_INEXACT		FPCSR_mskIEX
-
-#define SF_CEQ	2
-#define SF_CLT	1
-#define SF_CGT	3
-#define SF_CUN	4
-
-#include <asm/byteorder.h>
-
-#ifdef __BIG_ENDIAN__
-#define __BYTE_ORDER __BIG_ENDIAN
-#define __LITTLE_ENDIAN 0
-#else
-#define __BYTE_ORDER __LITTLE_ENDIAN
-#define __BIG_ENDIAN 0
-#endif
-
-#define abort() do { } while (0)
-#define umul_ppmm(w1, w0, u, v)						\
-do {									\
-	UWtype __x0, __x1, __x2, __x3;                                  \
-	UHWtype __ul, __vl, __uh, __vh;                                 \
-									\
-	__ul = __ll_lowpart(u);						\
-	__uh = __ll_highpart(u);					\
-	__vl = __ll_lowpart(v);						\
-	__vh = __ll_highpart(v);					\
-									\
-	__x0 = (UWtype) __ul * __vl;                                    \
-	__x1 = (UWtype) __ul * __vh;                                    \
-	__x2 = (UWtype) __uh * __vl;                                    \
-	__x3 = (UWtype) __uh * __vh;                                    \
-									\
-	__x1 += __ll_highpart(__x0);					\
-	__x1 += __x2;							\
-	if (__x1 < __x2)						\
-		__x3 += __ll_B;						\
-									\
-	(w1) = __x3 + __ll_highpart(__x1);				\
-	(w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);	\
-} while (0)
-
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-do { \
-	UWtype __x; \
-	__x = (al) + (bl); \
-	(sh) = (ah) + (bh) + (__x < (al)); \
-	(sl) = __x; \
-} while (0)
-
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-do { \
-	UWtype __x; \
-	__x = (al) - (bl); \
-	(sh) = (ah) - (bh) - (__x > (al)); \
-	(sl) = __x; \
-} while (0)
-
-#define udiv_qrnnd(q, r, n1, n0, d)				\
-do {								\
-	UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;		\
-	__d1 = __ll_highpart(d);				\
-	__d0 = __ll_lowpart(d);					\
-								\
-	__r1 = (n1) % __d1;					\
-	__q1 = (n1) / __d1;					\
-	__m = (UWtype) __q1 * __d0;				\
-	__r1 = __r1 * __ll_B | __ll_highpart(n0);		\
-	if (__r1 < __m)	{					\
-		__q1--, __r1 += (d);				\
-		if (__r1 >= (d))				\
-			if (__r1 < __m)				\
-				__q1--, __r1 += (d);		\
-	}							\
-	__r1 -= __m;						\
-	__r0 = __r1 % __d1;					\
-	__q0 = __r1 / __d1;					\
-	__m = (UWtype) __q0 * __d0;				\
-	__r0 = __r0 * __ll_B | __ll_lowpart(n0);		\
-	if (__r0 < __m)	{					\
-		__q0--, __r0 += (d);				\
-		if (__r0 >= (d))				\
-			if (__r0 < __m)				\
-				__q0--, __r0 += (d);		\
-	}							\
-	__r0 -= __m;						\
-	(q) = (UWtype) __q1 * __ll_B | __q0;			\
-	(r) = __r0;						\
-} while (0)
diff --git a/arch/nds32/include/asm/shmparam.h b/arch/nds32/include/asm/shmparam.h
deleted file mode 100644
index 3aeee946973d..000000000000
--- a/arch/nds32/include/asm/shmparam.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef _ASMNDS32_SHMPARAM_H
-#define _ASMNDS32_SHMPARAM_H
-
-/*
- * This should be the size of the virtually indexed cache/ways,
- * whichever is greater since the cache aliases every size/ways
- * bytes.
- */
-#define	SHMLBA	(4 * SZ_8K)	/* attach addr a multiple of this */
-
-/*
- * Enforce SHMLBA in shmat
- */
-#define __ARCH_FORCE_SHMLBA
-
-#endif /* _ASMNDS32_SHMPARAM_H */
diff --git a/arch/nds32/include/asm/stacktrace.h b/arch/nds32/include/asm/stacktrace.h
deleted file mode 100644
index 6bf7c777bda4..000000000000
--- a/arch/nds32/include/asm/stacktrace.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2008-2018 Andes Technology Corporation */
-
-#ifndef __ASM_STACKTRACE_H
-#define __ASM_STACKTRACE_H
-
-/* Kernel callchain */
-struct stackframe {
-	unsigned long fp;
-	unsigned long sp;
-	unsigned long lp;
-};
-
-/*
- * struct frame_tail: User callchain
- * IMPORTANT:
- * This struct is used for call-stack walking,
- * the order and types matters.
- * Do not use array, it only stores sizeof(pointer)
- *
- * The details can refer to arch/arm/kernel/perf_event.c
- */
-struct frame_tail {
-	unsigned long stack_fp;
-	unsigned long stack_lp;
-};
-
-/* For User callchain with optimize for size */
-struct frame_tail_opt_size {
-	unsigned long stack_r6;
-	unsigned long stack_fp;
-	unsigned long stack_gp;
-	unsigned long stack_lp;
-};
-
-extern void
-get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph);
-
-#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/nds32/include/asm/string.h b/arch/nds32/include/asm/string.h
deleted file mode 100644
index cae8fe16de98..000000000000
--- a/arch/nds32/include/asm/string.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASM_NDS32_STRING_H
-#define __ASM_NDS32_STRING_H
-
-#define __HAVE_ARCH_MEMCPY
-extern void *memcpy(void *, const void *, __kernel_size_t);
-
-#define __HAVE_ARCH_MEMMOVE
-extern void *memmove(void *, const void *, __kernel_size_t);
-
-#define __HAVE_ARCH_MEMSET
-extern void *memset(void *, int, __kernel_size_t);
-
-extern void *memzero(void *ptr, __kernel_size_t n);
-#endif
diff --git a/arch/nds32/include/asm/suspend.h b/arch/nds32/include/asm/suspend.h
deleted file mode 100644
index 6ed2418af1ac..000000000000
--- a/arch/nds32/include/asm/suspend.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2008-2017 Andes Technology Corporation
-
-#ifndef __ASM_NDS32_SUSPEND_H
-#define __ASM_NDS32_SUSPEND_H
-
-extern void suspend2ram(void);
-extern void cpu_resume(void);
-extern unsigned long wake_mask;
-
-#endif
diff --git a/arch/nds32/include/asm/swab.h b/arch/nds32/include/asm/swab.h
deleted file mode 100644
index 362a466f2976..000000000000
--- a/arch/nds32/include/asm/swab.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __NDS32_SWAB_H__
-#define __NDS32_SWAB_H__
-
-#include <linux/types.h>
-#include <linux/compiler.h>
-
-static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 x)
-{
-	__asm__("wsbh   %0, %0\n\t"	/* word swap byte within halfword */
-		"rotri  %0, %0, #16\n"
-		:"=r"(x)
-		:"0"(x));
-	return x;
-}
-
-static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 x)
-{
-	__asm__("wsbh   %0, %0\n"	/* word swap byte within halfword */
-		:"=r"(x)
-		:"0"(x));
-	return x;
-}
-
-#define __arch_swab32(x) ___arch__swab32(x)
-#define __arch_swab16(x) ___arch__swab16(x)
-
-#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
-#define __BYTEORDER_HAS_U64__
-#define __SWAB_64_THRU_32__
-#endif
-
-#endif /* __NDS32_SWAB_H__ */
diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h
deleted file mode 100644
index 90aa56c94af1..000000000000
--- a/arch/nds32/include/asm/syscall.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef _ASM_NDS32_SYSCALL_H
-#define _ASM_NDS32_SYSCALL_H	1
-
-#include <uapi/linux/audit.h>
-#include <linux/err.h>
-struct task_struct;
-struct pt_regs;
-
-/**
- * syscall_get_nr - find what system call a task is executing
- * @task:	task of interest, must be blocked
- * @regs:	task_pt_regs() of @task
- *
- * If @task is executing a system call or is at system call
- * tracing about to attempt one, returns the system call number.
- * If @task is not executing a system call, i.e. it's blocked
- * inside the kernel for a fault or signal, returns -1.
- *
- * Note this returns int even on 64-bit machines.  Only 32 bits of
- * system call number can be meaningful.  If the actual arch value
- * is 64 bits, this truncates to 32 bits so 0xffffffff means -1.
- *
- * It's only valid to call this when @task is known to be blocked.
- */
-static inline int
-syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
-{
-	return regs->syscallno;
-}
-
-/**
- * syscall_rollback - roll back registers after an aborted system call
- * @task:	task of interest, must be in system call exit tracing
- * @regs:	task_pt_regs() of @task
- *
- * It's only valid to call this when @task is stopped for system
- * call exit tracing (due to TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT),
- * after tracehook_report_syscall_entry() returned nonzero to prevent
- * the system call from taking place.
- *
- * This rolls back the register state in @regs so it's as if the
- * system call instruction was a no-op.  The registers containing
- * the system call number and arguments are as they were before the
- * system call instruction.  This may not be the same as what the
- * register state looked like at system call entry tracing.
- */
-static inline void
-syscall_rollback(struct task_struct *task, struct pt_regs *regs)
-{
-	regs->uregs[0] = regs->orig_r0;
-}
-
-/**
- * syscall_get_error - check result of traced system call
- * @task:	task of interest, must be blocked
- * @regs:	task_pt_regs() of @task
- *
- * Returns 0 if the system call succeeded, or -ERRORCODE if it failed.
- *
- * It's only valid to call this when @task is stopped for tracing on exit
- * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- */
-static inline long
-syscall_get_error(struct task_struct *task, struct pt_regs *regs)
-{
-	unsigned long error = regs->uregs[0];
-	return IS_ERR_VALUE(error) ? error : 0;
-}
-
-/**
- * syscall_get_return_value - get the return value of a traced system call
- * @task:	task of interest, must be blocked
- * @regs:	task_pt_regs() of @task
- *
- * Returns the return value of the successful system call.
- * This value is meaningless if syscall_get_error() returned nonzero.
- *
- * It's only valid to call this when @task is stopped for tracing on exit
- * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- */
-static inline long
-syscall_get_return_value(struct task_struct *task, struct pt_regs *regs)
-{
-	return regs->uregs[0];
-}
-
-/**
- * syscall_set_return_value - change the return value of a traced system call
- * @task:	task of interest, must be blocked
- * @regs:	task_pt_regs() of @task
- * @error:	negative error code, or zero to indicate success
- * @val:	user return value if @error is zero
- *
- * This changes the results of the system call that user mode will see.
- * If @error is zero, the user sees a successful system call with a
- * return value of @val.  If @error is nonzero, it's a negated errno
- * code; the user sees a failed system call with this errno code.
- *
- * It's only valid to call this when @task is stopped for tracing on exit
- * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- */
-static inline void
-syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
-			 int error, long val)
-{
-	regs->uregs[0] = (long)error ? error : val;
-}
-
-/**
- * syscall_get_arguments - extract system call parameter values
- * @task:	task of interest, must be blocked
- * @regs:	task_pt_regs() of @task
- * @args:	array filled with argument values
- *
- * Fetches 6 arguments to the system call (from 0 through 5). The first
- * argument is stored in @args[0], and so on.
- *
- * It's only valid to call this when @task is stopped for tracing on
- * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- */
-#define SYSCALL_MAX_ARGS 6
-static inline void
-syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned long *args)
-{
-	args[0] = regs->orig_r0;
-	args++;
-	memcpy(args, &regs->uregs[0] + 1, 5 * sizeof(args[0]));
-}
-
-static inline int
-syscall_get_arch(struct task_struct *task)
-{
-	return IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
-		? AUDIT_ARCH_NDS32BE : AUDIT_ARCH_NDS32;
-}
-
-#endif /* _ASM_NDS32_SYSCALL_H */
diff --git a/arch/nds32/include/asm/syscalls.h b/arch/nds32/include/asm/syscalls.h
deleted file mode 100644
index 4e7216082a67..000000000000
--- a/arch/nds32/include/asm/syscalls.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASM_NDS32_SYSCALLS_H
-#define __ASM_NDS32_SYSCALLS_H
-
-asmlinkage long sys_cacheflush(unsigned long addr, unsigned long len, unsigned int op);
-asmlinkage long sys_fadvise64_64_wrapper(int fd, int advice, loff_t offset, loff_t len);
-asmlinkage long sys_rt_sigreturn_wrapper(void);
-asmlinkage long sys_fp_udfiex_crtl(int cmd, int act);
-
-#include <asm-generic/syscalls.h>
-
-#endif /* __ASM_NDS32_SYSCALLS_H */
diff --git a/arch/nds32/include/asm/thread_info.h b/arch/nds32/include/asm/thread_info.h
deleted file mode 100644
index bd8f81cf2ce5..000000000000
--- a/arch/nds32/include/asm/thread_info.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASM_NDS32_THREAD_INFO_H
-#define __ASM_NDS32_THREAD_INFO_H
-
-#ifdef __KERNEL__
-
-#define THREAD_SIZE_ORDER 	(1)
-#define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
-
-#ifndef __ASSEMBLY__
-
-struct task_struct;
-
-#include <asm/ptrace.h>
-#include <asm/types.h>
-
-/*
- * low level task data that entry.S needs immediate access to.
- * __switch_to() assumes cpu_context follows immediately after cpu_domain.
- */
-struct thread_info {
-	unsigned long flags;	/* low level flags */
-	__s32 preempt_count;	/* 0 => preemptable, <0 => bug */
-};
-#define INIT_THREAD_INFO(tsk)						\
-{									\
-	.preempt_count	= INIT_PREEMPT_COUNT,				\
-}
-#define thread_saved_pc(tsk) ((unsigned long)(tsk->thread.cpu_context.pc))
-#define thread_saved_fp(tsk) ((unsigned long)(tsk->thread.cpu_context.fp))
-#endif
-
-/*
- * thread information flags:
- *  TIF_SYSCALL_TRACE	- syscall trace active
- *  TIF_SIGPENDING	- signal pending
- *  TIF_NEED_RESCHED	- rescheduling necessary
- *  TIF_NOTIFY_RESUME	- callback before returning to user
- *  TIF_POLLING_NRFLAG	- true if poll_idle() is polling TIF_NEED_RESCHED
- */
-#define TIF_SIGPENDING		1
-#define TIF_NEED_RESCHED	2
-#define TIF_SINGLESTEP		3
-#define TIF_NOTIFY_RESUME	4	/* callback before returning to user */
-#define TIF_NOTIFY_SIGNAL	5	/* signal notifications exist */
-#define TIF_SYSCALL_TRACE	8
-#define TIF_POLLING_NRFLAG	17
-#define TIF_MEMDIE		18
-#define TIF_FREEZE		19
-#define TIF_RESTORE_SIGMASK	20
-
-#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
-#define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
-#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
-#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
-#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
-#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
-
-/*
- * Change these and you break ASM code in entry-common.S
- */
-#define _TIF_WORK_MASK		0x000000ff
-#define _TIF_WORK_SYSCALL_ENTRY (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)
-#define _TIF_WORK_SYSCALL_LEAVE (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)
-
-#endif /* __KERNEL__ */
-#endif /* __ASM_NDS32_THREAD_INFO_H */
diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h
deleted file mode 100644
index 672603804a3b..000000000000
--- a/arch/nds32/include/asm/tlb.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASMNDS32_TLB_H
-#define __ASMNDS32_TLB_H
-
-#include <asm-generic/tlb.h>
-
-#define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
-
-#endif
diff --git a/arch/nds32/include/asm/tlbflush.h b/arch/nds32/include/asm/tlbflush.h
deleted file mode 100644
index 97155366ea01..000000000000
--- a/arch/nds32/include/asm/tlbflush.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef _ASMNDS32_TLBFLUSH_H
-#define _ASMNDS32_TLBFLUSH_H
-
-#include <linux/spinlock.h>
-#include <linux/mm.h>
-#include <nds32_intrinsic.h>
-
-static inline void local_flush_tlb_all(void)
-{
-	__nds32__tlbop_flua();
-	__nds32__isb();
-}
-
-static inline void local_flush_tlb_mm(struct mm_struct *mm)
-{
-	__nds32__tlbop_flua();
-	__nds32__isb();
-}
-
-static inline void local_flush_tlb_kernel_range(unsigned long start,
-						unsigned long end)
-{
-	while (start < end) {
-		__nds32__tlbop_inv(start);
-		__nds32__isb();
-		start += PAGE_SIZE;
-	}
-}
-
-void local_flush_tlb_range(struct vm_area_struct *vma,
-			   unsigned long start, unsigned long end);
-void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
-
-#define flush_tlb_all		local_flush_tlb_all
-#define flush_tlb_mm		local_flush_tlb_mm
-#define flush_tlb_range		local_flush_tlb_range
-#define flush_tlb_page		local_flush_tlb_page
-#define flush_tlb_kernel_range	local_flush_tlb_kernel_range
-
-void update_mmu_cache(struct vm_area_struct *vma,
-		      unsigned long address, pte_t * pte);
-
-#endif
diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h
deleted file mode 100644
index 377548d4451a..000000000000
--- a/arch/nds32/include/asm/uaccess.h
+++ /dev/null
@@ -1,282 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef _ASMANDES_UACCESS_H
-#define _ASMANDES_UACCESS_H
-
-/*
- * User space memory access functions
- */
-#include <linux/sched.h>
-#include <asm/errno.h>
-#include <asm/memory.h>
-#include <asm/types.h>
-#include <asm-generic/access_ok.h>
-
-#define __asmeq(x, y)  ".ifnc " x "," y " ; .err ; .endif\n\t"
-
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path.  This means when everything is well,
- * we don't even have to jump over them.  Further, they do not intrude
- * on our cache or tlb entries.
- */
-
-struct exception_table_entry {
-	unsigned long insn, fixup;
-};
-
-extern int fixup_exception(struct pt_regs *regs);
-
-/*
- * Single-value transfer routines.  They automatically use the right
- * size if we just have the right pointer type.  Note that the functions
- * which read from user space (*get_*) need to take care not to leak
- * kernel data even if the calling code is buggy and fails to check
- * the return value.  This means zeroing out the destination variable
- * or buffer on error.  Normally this is done out of line by the
- * fixup code, but there are a few places where it intrudes on the
- * main code path.  When we only write to user space, there is no
- * problem.
- *
- * The "__xxx" versions of the user access functions do not verify the
- * address space - it must have been done previously with a separate
- * "access_ok()" call.
- *
- * The "xxx_error" versions set the third argument to EFAULT if an
- * error occurs, and leave it unchanged on success.  Note that these
- * versions are void (ie, don't return a value as such).
- */
-
-#define get_user(x, ptr)						\
-({									\
-	long __gu_err = 0;						\
-	__get_user_check((x), (ptr), __gu_err);				\
-	__gu_err;							\
-})
-
-#define __get_user_error(x, ptr, err)					\
-({									\
-	__get_user_check((x), (ptr), (err));				\
-	(void)0;							\
-})
-
-#define __get_user(x, ptr)						\
-({									\
-	long __gu_err = 0;						\
-	const __typeof__(*(ptr)) __user *__p = (ptr);			\
-	__get_user_err((x), __p, (__gu_err));				\
-	__gu_err;							\
-})
-
-#define __get_user_check(x, ptr, err)					\
-({									\
-	const __typeof__(*(ptr)) __user *__p = (ptr);			\
-	might_fault();							\
-	if (access_ok(__p, sizeof(*__p))) {		\
-		__get_user_err((x), __p, (err));			\
-	} else {							\
-		(x) = 0; (err) = -EFAULT;				\
-	}								\
-})
-
-#define __get_user_err(x, ptr, err)					\
-do {									\
-	unsigned long __gu_val;						\
-	__chk_user_ptr(ptr);						\
-	switch (sizeof(*(ptr))) {					\
-	case 1:								\
-		__get_user_asm("lbi", __gu_val, (ptr), (err));		\
-		break;							\
-	case 2:								\
-		__get_user_asm("lhi", __gu_val, (ptr), (err));		\
-		break;							\
-	case 4:								\
-		__get_user_asm("lwi", __gu_val, (ptr), (err));		\
-		break;							\
-	case 8:								\
-		__get_user_asm_dword(__gu_val, (ptr), (err));		\
-		break;							\
-	default:							\
-		BUILD_BUG(); 						\
-		break;							\
-	}								\
-	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
-} while (0)
-
-#define __get_user_asm(inst, x, addr, err)				\
-	__asm__ __volatile__ (						\
-		"1:	"inst"	%1,[%2]\n"				\
-		"2:\n"							\
-		"	.section .fixup,\"ax\"\n"			\
-		"	.align	2\n"					\
-		"3:	move %0, %3\n"					\
-		"	move %1, #0\n"					\
-		"	b	2b\n"					\
-		"	.previous\n"					\
-		"	.section __ex_table,\"a\"\n"			\
-		"	.align	3\n"					\
-		"	.long	1b, 3b\n"				\
-		"	.previous"					\
-		: "+r" (err), "=&r" (x)					\
-		: "r" (addr), "i" (-EFAULT)				\
-		: "cc")
-
-#ifdef __NDS32_EB__
-#define __gu_reg_oper0 "%H1"
-#define __gu_reg_oper1 "%L1"
-#else
-#define __gu_reg_oper0 "%L1"
-#define __gu_reg_oper1 "%H1"
-#endif
-
-#define __get_user_asm_dword(x, addr, err) 				\
-	__asm__ __volatile__ (						\
-		"\n1:\tlwi " __gu_reg_oper0 ",[%2]\n"			\
-		"\n2:\tlwi " __gu_reg_oper1 ",[%2+4]\n"			\
-		"3:\n"							\
-		"	.section .fixup,\"ax\"\n"			\
-		"	.align	2\n"					\
-		"4:	move	%0, %3\n"				\
-		"	b	3b\n"					\
-		"	.previous\n"					\
-		"	.section __ex_table,\"a\"\n"			\
-		"	.align	3\n"					\
-		"	.long	1b, 4b\n"				\
-		"	.long	2b, 4b\n"				\
-		"	.previous"					\
-		: "+r"(err), "=&r"(x)					\
-		: "r"(addr), "i"(-EFAULT)				\
-		: "cc")
-
-#define put_user(x, ptr)						\
-({									\
-	long __pu_err = 0;						\
-	__put_user_check((x), (ptr), __pu_err);				\
-	__pu_err;							\
-})
-
-#define __put_user(x, ptr)						\
-({									\
-	long __pu_err = 0;						\
-	__typeof__(*(ptr)) __user *__p = (ptr);				\
-	__put_user_err((x), __p, __pu_err);				\
-	__pu_err;							\
-})
-
-#define __put_user_error(x, ptr, err)					\
-({									\
-	__put_user_err((x), (ptr), (err));				\
-	(void)0;							\
-})
-
-#define __put_user_check(x, ptr, err)					\
-({									\
-	__typeof__(*(ptr)) __user *__p = (ptr);				\
-	might_fault();							\
-	if (access_ok(__p, sizeof(*__p))) {		\
-		__put_user_err((x), __p, (err));			\
-	} else	{							\
-		(err) = -EFAULT;					\
-	}								\
-})
-
-#define __put_user_err(x, ptr, err)					\
-do {									\
-	__typeof__(*(ptr)) __pu_val = (x);				\
-	__chk_user_ptr(ptr);						\
-	switch (sizeof(*(ptr))) {					\
-	case 1:								\
-		__put_user_asm("sbi", __pu_val, (ptr), (err));		\
-		break;							\
-	case 2: 							\
-		__put_user_asm("shi", __pu_val, (ptr), (err));		\
-		break;							\
-	case 4: 							\
-		__put_user_asm("swi", __pu_val, (ptr), (err));		\
-		break;							\
-	case 8:								\
-		__put_user_asm_dword(__pu_val, (ptr), (err));		\
-		break;							\
-	default:							\
-		BUILD_BUG(); 						\
-		break;							\
-	}								\
-} while (0)
-
-#define __put_user_asm(inst, x, addr, err)				\
-	__asm__ __volatile__ (						\
-		"1:	"inst"	%1,[%2]\n"				\
-		"2:\n"							\
-		"	.section .fixup,\"ax\"\n"			\
-		"	.align	2\n"					\
-		"3:	move	%0, %3\n"				\
-		"	b	2b\n"					\
-		"	.previous\n"					\
-		"	.section __ex_table,\"a\"\n"			\
-		"	.align	3\n"					\
-		"	.long	1b, 3b\n"				\
-		"	.previous"					\
-		: "+r" (err)						\
-		: "r" (x), "r" (addr), "i" (-EFAULT)			\
-		: "cc")
-
-#ifdef __NDS32_EB__
-#define __pu_reg_oper0 "%H2"
-#define __pu_reg_oper1 "%L2"
-#else
-#define __pu_reg_oper0 "%L2"
-#define __pu_reg_oper1 "%H2"
-#endif
-
-#define __put_user_asm_dword(x, addr, err) 				\
-	__asm__ __volatile__ (						\
-		"\n1:\tswi " __pu_reg_oper0 ",[%1]\n"			\
-		"\n2:\tswi " __pu_reg_oper1 ",[%1+4]\n"			\
-		"3:\n"							\
-		"	.section .fixup,\"ax\"\n"			\
-		"	.align	2\n"					\
-		"4:	move	%0, %3\n"				\
-		"	b	3b\n"					\
-		"	.previous\n"					\
-		"	.section __ex_table,\"a\"\n"			\
-		"	.align	3\n"					\
-		"	.long	1b, 4b\n"				\
-		"	.long	2b, 4b\n"				\
-		"	.previous"					\
-		: "+r"(err)						\
-		: "r"(addr), "r"(x), "i"(-EFAULT)			\
-		: "cc")
-
-extern unsigned long __arch_clear_user(void __user * addr, unsigned long n);
-extern long strncpy_from_user(char *dest, const char __user * src, long count);
-extern __must_check long strnlen_user(const char __user * str, long n);
-extern unsigned long __arch_copy_from_user(void *to, const void __user * from,
-                                           unsigned long n);
-extern unsigned long __arch_copy_to_user(void __user * to, const void *from,
-                                         unsigned long n);
-
-#define raw_copy_from_user __arch_copy_from_user
-#define raw_copy_to_user __arch_copy_to_user
-
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-static inline unsigned long clear_user(void __user * to, unsigned long n)
-{
-	if (access_ok(to, n))
-		n = __arch_clear_user(to, n);
-	return n;
-}
-
-static inline unsigned long __clear_user(void __user * to, unsigned long n)
-{
-	return __arch_clear_user(to, n);
-}
-
-#endif /* _ASMNDS32_UACCESS_H */
diff --git a/arch/nds32/include/asm/unistd.h b/arch/nds32/include/asm/unistd.h
deleted file mode 100644
index bf5e2d440913..000000000000
--- a/arch/nds32/include/asm/unistd.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#define __ARCH_WANT_SYS_CLONE
-
-#include <uapi/asm/unistd.h>
diff --git a/arch/nds32/include/asm/vdso.h b/arch/nds32/include/asm/vdso.h
deleted file mode 100644
index 89b113ffc3dc..000000000000
--- a/arch/nds32/include/asm/vdso.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2005-2017 Andes Technology Corporation
- */
-
-#ifndef __ASM_VDSO_H
-#define __ASM_VDSO_H
-
-#ifdef __KERNEL__
-
-#ifndef __ASSEMBLY__
-
-#include <generated/vdso-offsets.h>
-
-#define VDSO_SYMBOL(base, name)						   \
-({									   \
-	(unsigned long)(vdso_offset_##name + (unsigned long)(base)); \
-})
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASM_VDSO_H */
diff --git a/arch/nds32/include/asm/vdso_datapage.h b/arch/nds32/include/asm/vdso_datapage.h
deleted file mode 100644
index 74c68802021e..000000000000
--- a/arch/nds32/include/asm/vdso_datapage.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2012 ARM Limited
-// Copyright (C) 2005-2017 Andes Technology Corporation
-#ifndef __ASM_VDSO_DATAPAGE_H
-#define __ASM_VDSO_DATAPAGE_H
-
-#ifdef __KERNEL__
-
-#ifndef __ASSEMBLY__
-
-struct vdso_data {
-	bool cycle_count_down;	/* timer cyclye counter is decrease with time */
-	u32 cycle_count_offset;	/* offset of timer cycle counter register */
-	u32 seq_count;		/* sequence count - odd during updates */
-	u32 xtime_coarse_sec;	/* coarse time */
-	u32 xtime_coarse_nsec;
-
-	u32 wtm_clock_sec;	/* wall to monotonic offset */
-	u32 wtm_clock_nsec;
-	u32 xtime_clock_sec;	/* CLOCK_REALTIME - seconds */
-	u32 cs_mult;		/* clocksource multiplier */
-	u32 cs_shift;		/* Cycle to nanosecond divisor (power of two) */
-	u32 hrtimer_res;	/* hrtimer resolution */
-
-	u64 cs_cycle_last;	/* last cycle value */
-	u64 cs_mask;		/* clocksource mask */
-
-	u64 xtime_clock_nsec;	/* CLOCK_REALTIME sub-ns base */
-	u32 tz_minuteswest;	/* timezone info for gettimeofday(2) */
-	u32 tz_dsttime;
-};
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASM_VDSO_DATAPAGE_H */
diff --git a/arch/nds32/include/asm/vdso_timer_info.h b/arch/nds32/include/asm/vdso_timer_info.h
deleted file mode 100644
index 328439ce37db..000000000000
--- a/arch/nds32/include/asm/vdso_timer_info.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-extern struct timer_info_t timer_info;
-#define EMPTY_VALUE ~(0UL)
-#define EMPTY_TIMER_MAPPING EMPTY_VALUE
-#define EMPTY_REG_OFFSET EMPTY_VALUE
-
-struct timer_info_t
-{
-	bool cycle_count_down;
-	unsigned long mapping_base;
-	unsigned long cycle_count_reg_offset;
-};
diff --git a/arch/nds32/include/asm/vermagic.h b/arch/nds32/include/asm/vermagic.h
deleted file mode 100644
index f772e7ba33f1..000000000000
--- a/arch/nds32/include/asm/vermagic.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef _ASM_VERMAGIC_H
-#define _ASM_VERMAGIC_H
-
-#define MODULE_ARCH_VERMAGIC	"NDS32v3"
-
-#endif /* _ASM_VERMAGIC_H */
diff --git a/arch/nds32/include/asm/vmalloc.h b/arch/nds32/include/asm/vmalloc.h
deleted file mode 100644
index caeed3898419..000000000000
--- a/arch/nds32/include/asm/vmalloc.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef _ASM_NDS32_VMALLOC_H
-#define _ASM_NDS32_VMALLOC_H
-
-#endif /* _ASM_NDS32_VMALLOC_H */
diff --git a/arch/nds32/include/uapi/asm/Kbuild b/arch/nds32/include/uapi/asm/Kbuild
deleted file mode 100644
index e78470141932..000000000000
--- a/arch/nds32/include/uapi/asm/Kbuild
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-generic-y += ucontext.h
diff --git a/arch/nds32/include/uapi/asm/auxvec.h b/arch/nds32/include/uapi/asm/auxvec.h
deleted file mode 100644
index bc0b92ab8c15..000000000000
--- a/arch/nds32/include/uapi/asm/auxvec.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASM_AUXVEC_H
-#define __ASM_AUXVEC_H
-
-/*
- * This entry gives some information about the FPU initialization
- * performed by the kernel.
- */
-#define AT_FPUCW	18	/* Used FPU control word.  */
-
-
-/* VDSO location */
-#define AT_SYSINFO_EHDR	33
-
-#define AT_VECTOR_SIZE_ARCH 1
-
-#endif
diff --git a/arch/nds32/include/uapi/asm/byteorder.h b/arch/nds32/include/uapi/asm/byteorder.h
deleted file mode 100644
index c264ef12c49c..000000000000
--- a/arch/nds32/include/uapi/asm/byteorder.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __NDS32_BYTEORDER_H__
-#define __NDS32_BYTEORDER_H__
-
-#ifdef __NDS32_EB__
-#include <linux/byteorder/big_endian.h>
-#else
-#include <linux/byteorder/little_endian.h>
-#endif
-
-#endif /* __NDS32_BYTEORDER_H__ */
diff --git a/arch/nds32/include/uapi/asm/cachectl.h b/arch/nds32/include/uapi/asm/cachectl.h
deleted file mode 100644
index 31b9b439d819..000000000000
--- a/arch/nds32/include/uapi/asm/cachectl.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 1994, 1995, 1996 by Ralf Baechle
-// Copyright (C) 2005-2017 Andes Technology Corporation
-#ifndef	_ASM_CACHECTL
-#define	_ASM_CACHECTL
-
-/*
- * Options for cacheflush system call
- */
-#define	ICACHE	0		/* flush instruction cache        */
-#define	DCACHE	1		/* writeback and flush data cache */
-#define	BCACHE	2		/* flush instruction cache + writeback and flush data cache */
-
-#endif /* _ASM_CACHECTL */
diff --git a/arch/nds32/include/uapi/asm/fp_udfiex_crtl.h b/arch/nds32/include/uapi/asm/fp_udfiex_crtl.h
deleted file mode 100644
index f17396db16ec..000000000000
--- a/arch/nds32/include/uapi/asm/fp_udfiex_crtl.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/* Copyright (C) 2005-2019 Andes Technology Corporation */
-#ifndef	_FP_UDF_IEX_CRTL_H
-#define	_FP_UDF_IEX_CRTL_H
-
-/*
- * The cmd list of sys_fp_udfiex_crtl()
- */
-/* Disable UDF or IEX trap based on the content of parameter act */
-#define DISABLE_UDF_IEX_TRAP	0
-/* Enable UDF or IEX trap based on the content of parameter act */
-#define ENABLE_UDF_IEX_TRAP	1
-/* Get current status of UDF and IEX trap */
-#define GET_UDF_IEX_TRAP	2
-
-#endif /* _FP_UDF_IEX_CRTL_H */
diff --git a/arch/nds32/include/uapi/asm/param.h b/arch/nds32/include/uapi/asm/param.h
deleted file mode 100644
index 48d00328d328..000000000000
--- a/arch/nds32/include/uapi/asm/param.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __ASM_NDS32_PARAM_H
-#define __ASM_NDS32_PARAM_H
-
-#define EXEC_PAGESIZE	8192
-
-#include <asm-generic/param.h>
-
-#endif /* __ASM_NDS32_PARAM_H */
diff --git a/arch/nds32/include/uapi/asm/ptrace.h b/arch/nds32/include/uapi/asm/ptrace.h
deleted file mode 100644
index d76217c7c010..000000000000
--- a/arch/nds32/include/uapi/asm/ptrace.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef __UAPI_ASM_NDS32_PTRACE_H
-#define __UAPI_ASM_NDS32_PTRACE_H
-
-#ifndef __ASSEMBLY__
-
-/*
- * User structures for general purpose register.
- */
-struct user_pt_regs {
-	long uregs[26];
-	long fp;
-	long gp;
-	long lp;
-	long sp;
-	long ipc;
-	long lb;
-	long le;
-	long lc;
-	long syscallno;
-};
-#endif
-#endif
diff --git a/arch/nds32/include/uapi/asm/sigcontext.h b/arch/nds32/include/uapi/asm/sigcontext.h
deleted file mode 100644
index 6c1e6648878f..000000000000
--- a/arch/nds32/include/uapi/asm/sigcontext.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef _ASMNDS32_SIGCONTEXT_H
-#define _ASMNDS32_SIGCONTEXT_H
-
-/*
- * Signal context structure - contains all info to do with the state
- * before the signal handler was invoked.  Note: only add new entries
- * to the end of the structure.
- */
-struct fpu_struct {
-	unsigned long long fd_regs[32];
-	unsigned long fpcsr;
-	/*
-	 * When CONFIG_SUPPORT_DENORMAL_ARITHMETIC is defined, kernel prevents
-	 * hardware from treating the denormalized output as an underflow case
-	 * and rounding it to a normal number. Hence kernel enables the UDF and
-	 * IEX trap in the fpcsr register to step in the calculation.
-	 * However, the UDF and IEX trap enable bit in $fpcsr also lose
-	 * their use.
-	 *
-	 * UDF_IEX_trap replaces the feature of UDF and IEX trap enable bit in
-	 * $fpcsr to control the trap of underflow and inexact. The bit filed
-	 * of UDF_IEX_trap is the same as $fpcsr, 10th bit is used to enable UDF
-	 * exception trapping and 11th bit is used to enable IEX exception
-	 * trapping.
-	 *
-	 * UDF_IEX_trap is only modified through fp_udfiex_crtl syscall.
-	 * Therefore, UDF_IEX_trap needn't be saved and restored in each
-	 * context switch.
-	 */
-	unsigned long UDF_IEX_trap;
-};
-
-struct zol_struct {
-	unsigned long nds32_lc;	/* $LC */
-	unsigned long nds32_le;	/* $LE */
-	unsigned long nds32_lb;	/* $LB */
-};
-
-struct sigcontext {
-	unsigned long trap_no;
-	unsigned long error_code;
-	unsigned long oldmask;
-	unsigned long nds32_r0;
-	unsigned long nds32_r1;
-	unsigned long nds32_r2;
-	unsigned long nds32_r3;
-	unsigned long nds32_r4;
-	unsigned long nds32_r5;
-	unsigned long nds32_r6;
-	unsigned long nds32_r7;
-	unsigned long nds32_r8;
-	unsigned long nds32_r9;
-	unsigned long nds32_r10;
-	unsigned long nds32_r11;
-	unsigned long nds32_r12;
-	unsigned long nds32_r13;
-	unsigned long nds32_r14;
-	unsigned long nds32_r15;
-	unsigned long nds32_r16;
-	unsigned long nds32_r17;
-	unsigned long nds32_r18;
-	unsigned long nds32_r19;
-	unsigned long nds32_r20;
-	unsigned long nds32_r21;
-	unsigned long nds32_r22;
-	unsigned long nds32_r23;
-	unsigned long nds32_r24;
-	unsigned long nds32_r25;
-	unsigned long nds32_fp;	/* $r28 */
-	unsigned long nds32_gp;	/* $r29 */
-	unsigned long nds32_lp;	/* $r30 */
-	unsigned long nds32_sp;	/* $r31 */
-	unsigned long nds32_ipc;
-	unsigned long fault_address;
-	unsigned long used_math_flag;
-	/* FPU Registers */
-	struct fpu_struct fpu;
-	struct zol_struct zol;
-};
-
-#endif
diff --git a/arch/nds32/include/uapi/asm/unistd.h b/arch/nds32/include/uapi/asm/unistd.h
deleted file mode 100644
index 410795e280fe..000000000000
--- a/arch/nds32/include/uapi/asm/unistd.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#define __ARCH_WANT_STAT64
-#define __ARCH_WANT_SYNC_FILE_RANGE2
-#define __ARCH_WANT_SET_GET_RLIMIT
-#define __ARCH_WANT_TIME32_SYSCALLS
-
-/* Use the standard ABI for syscalls */
-#include <asm-generic/unistd.h>
-
-/* Additional NDS32 specific syscalls. */
-#define __NR_cacheflush		(__NR_arch_specific_syscall)
-#define __NR_fp_udfiex_crtl	(__NR_arch_specific_syscall + 1)
-__SYSCALL(__NR_cacheflush, sys_cacheflush)
-__SYSCALL(__NR_fp_udfiex_crtl, sys_fp_udfiex_crtl)
diff --git a/arch/nds32/kernel/.gitignore b/arch/nds32/kernel/.gitignore
deleted file mode 100644
index bbb90f92d051..000000000000
--- a/arch/nds32/kernel/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-vmlinux.lds
diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile
deleted file mode 100644
index 394df3f6442c..000000000000
--- a/arch/nds32/kernel/Makefile
+++ /dev/null
@@ -1,33 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for the linux kernel.
-#
-
-CPPFLAGS_vmlinux.lds	:= -DTEXTADDR=$(TEXTADDR)
-AFLAGS_head.o		:= -DTEXTADDR=$(TEXTADDR)
-# Object file lists.
-
-obj-y			:= ex-entry.o ex-exit.o ex-scall.o irq.o \
-			process.o ptrace.o setup.o signal.o \
-			sys_nds32.o time.o traps.o cacheinfo.o \
-			dma.o syscall_table.o vdso.o
-
-obj-$(CONFIG_MODULES)		+= nds32_ksyms.o module.o
-obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
-obj-$(CONFIG_FPU)		+= fpu.o
-obj-$(CONFIG_OF)		+= devtree.o
-obj-$(CONFIG_CACHE_L2)		+= atl2c.o
-obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o
-obj-$(CONFIG_PM)		+= pm.o sleep.o
-extra-y := head.o vmlinux.lds
-
-CFLAGS_fpu.o += -mext-fpu-sp -mext-fpu-dp
-
-
-obj-y				+= vdso/
-
-obj-$(CONFIG_FUNCTION_TRACER)   += ftrace.o
-
-ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
-endif
diff --git a/arch/nds32/kernel/asm-offsets.c b/arch/nds32/kernel/asm-offsets.c
deleted file mode 100644
index 3541d5981de7..000000000000
--- a/arch/nds32/kernel/asm-offsets.c
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
-#include <linux/kbuild.h>
-#include <asm/thread_info.h>
-#include <asm/ptrace.h>
-
-int main(void)
-{
-	DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
-	DEFINE(TSK_TI_PREEMPT,
-	       offsetof(struct task_struct, thread_info.preempt_count));
-	DEFINE(THREAD_CPU_CONTEXT,
-	       offsetof(struct task_struct, thread.cpu_context));
-	DEFINE(OSP_OFFSET, offsetof(struct pt_regs, osp));
-	DEFINE(SP_OFFSET, offsetof(struct pt_regs, sp));
-	DEFINE(FUCOP_CTL_OFFSET, offsetof(struct pt_regs, fucop_ctl));
-	DEFINE(IPSW_OFFSET, offsetof(struct pt_regs, ipsw));
-	DEFINE(SYSCALLNO_OFFSET, offsetof(struct pt_regs, syscallno));
-	DEFINE(IPC_OFFSET, offsetof(struct pt_regs, ipc));
-	DEFINE(R0_OFFSET, offsetof(struct pt_regs, uregs[0]));
-	DEFINE(R15_OFFSET, offsetof(struct pt_regs, uregs[15]));
-	DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
-	DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
-	return 0;
-}
diff --git a/arch/nds32/kernel/atl2c.c b/arch/nds32/kernel/atl2c.c
deleted file mode 100644
index 0c5386e72098..000000000000
--- a/arch/nds32/kernel/atl2c.c
+++ /dev/null
@@ -1,65 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/compiler.h>
-#include <linux/of_address.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
-#include <asm/l2_cache.h>
-
-void __iomem *atl2c_base;
-static const struct of_device_id atl2c_ids[] __initconst = {
-	{.compatible = "andestech,atl2c",},
-	{}
-};
-
-static int __init atl2c_of_init(void)
-{
-	struct device_node *np;
-	struct resource res;
-	unsigned long tmp = 0;
-	unsigned long l2set, l2way, l2clsz;
-
-	if (!(__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskL2C))
-		return -ENODEV;
-
-	np = of_find_matching_node(NULL, atl2c_ids);
-	if (!np)
-		return -ENODEV;
-
-	if (of_address_to_resource(np, 0, &res))
-		return -ENODEV;
-
-	atl2c_base = ioremap(res.start, resource_size(&res));
-	if (!atl2c_base)
-		return -ENOMEM;
-
-	l2set =
-	    64 << ((L2C_R_REG(L2_CA_CONF_OFF) & L2_CA_CONF_mskL2SET) >>
-		   L2_CA_CONF_offL2SET);
-	l2way =
-	    1 +
-	    ((L2C_R_REG(L2_CA_CONF_OFF) & L2_CA_CONF_mskL2WAY) >>
-	     L2_CA_CONF_offL2WAY);
-	l2clsz =
-	    4 << ((L2C_R_REG(L2_CA_CONF_OFF) & L2_CA_CONF_mskL2CLSZ) >>
-		  L2_CA_CONF_offL2CLSZ);
-	pr_info("L2:%luKB/%luS/%luW/%luB\n",
-		l2set * l2way * l2clsz / 1024, l2set, l2way, l2clsz);
-
-	tmp = L2C_R_REG(L2CC_PROT_OFF);
-	tmp &= ~L2CC_PROT_mskMRWEN;
-	L2C_W_REG(L2CC_PROT_OFF, tmp);
-
-	tmp = L2C_R_REG(L2CC_SETUP_OFF);
-	tmp &= ~L2CC_SETUP_mskPART;
-	L2C_W_REG(L2CC_SETUP_OFF, tmp);
-
-	tmp = L2C_R_REG(L2CC_CTRL_OFF);
-	tmp |= L2CC_CTRL_mskEN;
-	L2C_W_REG(L2CC_CTRL_OFF, tmp);
-
-	return 0;
-}
-
-subsys_initcall(atl2c_of_init);
diff --git a/arch/nds32/kernel/cacheinfo.c b/arch/nds32/kernel/cacheinfo.c
deleted file mode 100644
index aab98e447feb..000000000000
--- a/arch/nds32/kernel/cacheinfo.c
+++ /dev/null
@@ -1,49 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/bitops.h>
-#include <linux/cacheinfo.h>
-#include <linux/cpu.h>
-
-static void ci_leaf_init(struct cacheinfo *this_leaf,
-			 enum cache_type type, unsigned int level)
-{
-	char cache_type = (type & CACHE_TYPE_INST ? ICACHE : DCACHE);
-
-	this_leaf->level = level;
-	this_leaf->type = type;
-	this_leaf->coherency_line_size = CACHE_LINE_SIZE(cache_type);
-	this_leaf->number_of_sets = CACHE_SET(cache_type);
-	this_leaf->ways_of_associativity = CACHE_WAY(cache_type);
-	this_leaf->size = this_leaf->number_of_sets *
-	    this_leaf->coherency_line_size * this_leaf->ways_of_associativity;
-#if defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
-	this_leaf->attributes = CACHE_WRITE_THROUGH;
-#else
-	this_leaf->attributes = CACHE_WRITE_BACK;
-#endif
-}
-
-int init_cache_level(unsigned int cpu)
-{
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-
-	/* Only 1 level and I/D cache seperate. */
-	this_cpu_ci->num_levels = 1;
-	this_cpu_ci->num_leaves = 2;
-	return 0;
-}
-
-int populate_cache_leaves(unsigned int cpu)
-{
-	unsigned int level, idx;
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
-
-	for (idx = 0, level = 1; level <= this_cpu_ci->num_levels &&
-	     idx < this_cpu_ci->num_leaves; idx++, level++) {
-		ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
-		ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
-	}
-	return 0;
-}
diff --git a/arch/nds32/kernel/devtree.c b/arch/nds32/kernel/devtree.c
deleted file mode 100644
index bdce0fe5af9f..000000000000
--- a/arch/nds32/kernel/devtree.c
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/bug.h>
-#include <linux/printk.h>
-#include <linux/of_fdt.h>
-
-void __init early_init_devtree(void *params)
-{
-	if (!params || !early_init_dt_scan(params)) {
-		pr_crit("\n"
-			"Error: invalid device tree blob at (virtual address 0x%p)\n"
-			"\nPlease check your bootloader.", params);
-
-		BUG_ON(1);
-	}
-
-	dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name());
-}
diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c
deleted file mode 100644
index 2ac8e6c82a61..000000000000
--- a/arch/nds32/kernel/dma.c
+++ /dev/null
@@ -1,82 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/dma-map-ops.h>
-#include <linux/cache.h>
-#include <linux/highmem.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/proc-fns.h>
-
-static inline void cache_op(phys_addr_t paddr, size_t size,
-		void (*fn)(unsigned long start, unsigned long end))
-{
-	struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
-	unsigned offset = paddr & ~PAGE_MASK;
-	size_t left = size;
-	unsigned long start;
-
-	do {
-		size_t len = left;
-
-		if (PageHighMem(page)) {
-			void *addr;
-
-			if (offset + len > PAGE_SIZE) {
-				if (offset >= PAGE_SIZE) {
-					page += offset >> PAGE_SHIFT;
-					offset &= ~PAGE_MASK;
-				}
-				len = PAGE_SIZE - offset;
-			}
-
-			addr = kmap_atomic(page);
-			start = (unsigned long)(addr + offset);
-			fn(start, start + len);
-			kunmap_atomic(addr);
-		} else {
-			start = (unsigned long)phys_to_virt(paddr);
-			fn(start, start + size);
-		}
-		offset = 0;
-		page++;
-		left -= len;
-	} while (left);
-}
-
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
-{
-	switch (dir) {
-	case DMA_FROM_DEVICE:
-		break;
-	case DMA_TO_DEVICE:
-	case DMA_BIDIRECTIONAL:
-		cache_op(paddr, size, cpu_dma_wb_range);
-		break;
-	default:
-		BUG();
-	}
-}
-
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
-{
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		break;
-	case DMA_FROM_DEVICE:
-	case DMA_BIDIRECTIONAL:
-		cache_op(paddr, size, cpu_dma_inval_range);
-		break;
-	default:
-		BUG();
-	}
-}
-
-void arch_dma_prep_coherent(struct page *page, size_t size)
-{
-	cache_op(page_to_phys(page), size, cpu_dma_wbinval_range);
-}
diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S
deleted file mode 100644
index 107d98a1d1b8..000000000000
--- a/arch/nds32/kernel/ex-entry.S
+++ /dev/null
@@ -1,177 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/memory.h>
-#include <asm/nds32.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/fpu.h>
-
-#ifdef CONFIG_HWZOL
-	.macro push_zol
-	mfusr	$r14, $LB
-	mfusr	$r15, $LE
-	mfusr	$r16, $LC
-	.endm
-#endif
-	.macro  skip_save_fucop_ctl
-#if defined(CONFIG_FPU)
-skip_fucop_ctl:
-	smw.adm $p0, [$sp], $p0, #0x1
-	j fucop_ctl_done
-#endif
-	.endm
-
-	.macro	save_user_regs
-#if defined(CONFIG_FPU)
-	sethi   $p0, hi20(has_fpu)
-	lbsi 	$p0, [$p0+lo12(has_fpu)]
-	beqz	$p0, skip_fucop_ctl
-	mfsr    $p0, $FUCOP_CTL
-	smw.adm $p0, [$sp], $p0, #0x1
-	bclr    $p0, $p0, #FUCOP_CTL_offCP0EN
-	mtsr    $p0, $FUCOP_CTL
-fucop_ctl_done:
-	/* move $SP to the bottom of pt_regs */
-	addi    $sp, $sp, -FUCOP_CTL_OFFSET
-#else
-	smw.adm $sp, [$sp], $sp, #0x1
-	/* move $SP to the bottom of pt_regs */
-	addi    $sp, $sp, -OSP_OFFSET
-#endif
-
-	/* push $r0 ~ $r25 */
-	smw.bim $r0, [$sp], $r25
-	/* push $fp, $gp, $lp */
-	smw.bim $sp, [$sp], $sp, #0xe
-
-	mfsr	$r12, $SP_USR
-	mfsr	$r13, $IPC
-#ifdef CONFIG_HWZOL
-	push_zol
-#endif
-	movi	$r17, -1
-	move	$r18, $r0
-	mfsr	$r19, $PSW
-	mfsr	$r20, $IPSW
-	mfsr	$r21, $P_IPSW
-	mfsr	$r22, $P_IPC
-	mfsr	$r23, $P_P0
-	mfsr	$r24, $P_P1
-	smw.bim $r12, [$sp], $r24, #0
-	addi	$sp, $sp, -FUCOP_CTL_OFFSET
-
-	/* Initialize kernel space $fp */
-	andi    $p0, $r20, #PSW_mskPOM
-	movi    $p1, #0x0
-	cmovz   $fp, $p1, $p0
-
-	andi	$r16, $r19, #PSW_mskINTL
-	slti	$r17, $r16, #4
-	bnez	$r17, 1f
-	addi	$r17, $r19, #-2
-	mtsr	$r17, $PSW
-	isb
-1:
-	/* If it was superuser mode, we don't need to update $r25 */
-	bnez	$p0, 2f
-	la	$p0, __entry_task
-	lw	$r25, [$p0]
-2:
-	.endm
-
-	.text
-
-/*
- * Exception Vector
- */
-exception_handlers:
-	.long	unhandled_exceptions	!Reset/NMI
-	.long	unhandled_exceptions	!TLB fill
-	.long	do_page_fault		!PTE not present
-	.long	do_dispatch_tlb_misc	!TLB misc
-	.long	unhandled_exceptions	!TLB VLPT
-	.long	unhandled_exceptions	!Machine Error
-	.long	do_debug_trap		!Debug related
-	.long	do_dispatch_general	!General exception
-	.long	eh_syscall		!Syscall
-	.long	asm_do_IRQ		!IRQ
-
-	skip_save_fucop_ctl
-common_exception_handler:
-	save_user_regs
-	mfsr	$p0, $ITYPE
-	andi	$p0, $p0, #ITYPE_mskVECTOR
-	srli	$p0, $p0, #ITYPE_offVECTOR
-	andi	$p1, $p0, #NDS32_VECTOR_mskNONEXCEPTION
-	bnez	$p1, 1f
-	sethi	$lp, hi20(ret_from_exception)
-	ori	$lp, $lp, lo12(ret_from_exception)
-	sethi	$p1, hi20(exception_handlers)
-	ori	$p1, $p1, lo12(exception_handlers)
-	lw	$p1, [$p1+$p0<<2]
-	move	$r0, $p0
-	mfsr	$r1, $EVA
-	mfsr	$r2, $ITYPE
-	move	$r3, $sp
-	mfsr    $r4, $OIPC
-	/* enable gie if it is enabled in IPSW. */
-	mfsr	$r21, $PSW
-	andi	$r20, $r20, #PSW_mskGIE	/* r20 is $IPSW*/
-	or	$r21, $r21, $r20
-	mtsr	$r21, $PSW
-	dsb
-	jr	$p1
-	/* syscall */
-1:
-	addi	$p1, $p0, #-NDS32_VECTOR_offEXCEPTION
-	bnez	$p1, 2f
-	sethi	$lp, hi20(ret_from_exception)
-	ori	$lp, $lp, lo12(ret_from_exception)
-	sethi	$p1, hi20(exception_handlers)
-	ori	$p1, $p1, lo12(exception_handlers)
-	lwi	$p1, [$p1+#NDS32_VECTOR_offEXCEPTION<<2]
-	jr	$p1
-
-	/* interrupt */
-2:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	jal     __trace_hardirqs_off
-#endif
-	move	$r0, $sp
-	sethi	$lp, hi20(ret_from_intr)
-	ori	$lp, $lp, lo12(ret_from_intr)
-	sethi	$p0, hi20(exception_handlers)
-	ori	$p0, $p0, lo12(exception_handlers)
-	lwi	$p0, [$p0+#NDS32_VECTOR_offINTERRUPT<<2]
-	jr	$p0
-
-	.macro	EXCEPTION_VECTOR_DEBUG
-	.align 4
-	mfsr     $p0, $EDM_CTL
-	andi     $p0, $p0, EDM_CTL_mskV3_EDM_MODE
-	tnez     $p0, SWID_RAISE_INTERRUPT_LEVEL
-	.endm
-
-	.macro	EXCEPTION_VECTOR
-	.align 4
-	sethi	 $p0, hi20(common_exception_handler)
-	ori	 $p0, $p0, lo12(common_exception_handler)
-	jral.ton $p0, $p0
-	.endm
-
-	.section	".text.init", #alloc, #execinstr
-	.global	exception_vector
-exception_vector:
-.rept 6
-	EXCEPTION_VECTOR
-.endr
-	EXCEPTION_VECTOR_DEBUG
-.rept 121
-	EXCEPTION_VECTOR
-.endr
-	.align 4
-	.global	exception_vector_end
-exception_vector_end:
diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S
deleted file mode 100644
index b30699911b81..000000000000
--- a/arch/nds32/kernel/ex-exit.S
+++ /dev/null
@@ -1,193 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-#include <asm/assembler.h>
-#include <asm/nds32.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/current.h>
-#include <asm/fpu.h>
-
-
-
-#ifdef CONFIG_HWZOL
-	.macro pop_zol
-	mtusr	$r14, $LB
-	mtusr	$r15, $LE
-	mtusr	$r16, $LC
-	.endm
-#endif
-
-	.macro	restore_user_regs_first
-	setgie.d
-	isb
-#if defined(CONFIG_FPU)
-	addi    $sp, $sp, OSP_OFFSET
-	lmw.adm $r12, [$sp], $r25, #0x0
-	sethi   $p0, hi20(has_fpu)
-	lbsi 	$p0, [$p0+lo12(has_fpu)]
-	beqz	$p0, 2f
-	mtsr    $r25, $FUCOP_CTL
-2:
-#else
-	addi	$sp, $sp, FUCOP_CTL_OFFSET
-	lmw.adm $r12, [$sp], $r24, #0x0
-#endif
-	mtsr	$r12, $SP_USR
-	mtsr	$r13, $IPC
-#ifdef CONFIG_HWZOL
-	pop_zol
-#endif
-	mtsr	$r19, $PSW
-	mtsr	$r20, $IPSW
-	mtsr    $r21, $P_IPSW
-	mtsr	$r22, $P_IPC
-	mtsr	$r23, $P_P0
-	mtsr	$r24, $P_P1
-	lmw.adm $sp, [$sp], $sp, #0xe
-	.endm
-
-	.macro	restore_user_regs_last
-	pop	$p0
-	cmovn	$sp, $p0, $p0
-
-	iret
-	nop
-
-	.endm
-
-	.macro	restore_user_regs
-	restore_user_regs_first
-	lmw.adm $r0, [$sp], $r25, #0x0
-	addi	$sp, $sp, OSP_OFFSET
-	restore_user_regs_last
-	.endm
-
-	.macro	fast_restore_user_regs
-	restore_user_regs_first
-	lmw.adm $r1, [$sp], $r25, #0x0
-	addi	$sp, $sp, OSP_OFFSET-4
-	restore_user_regs_last
-	.endm
-
-#ifdef CONFIG_PREEMPTION
-	.macro	preempt_stop
-	.endm
-#else
-	.macro	preempt_stop
-	setgie.d
-	isb
-	.endm
-#define	resume_kernel	no_work_pending
-#endif
-
-ENTRY(ret_from_exception)
-	preempt_stop
-ENTRY(ret_from_intr)
-
-/*
- * judge Kernel or user mode
- *
- */
-	lwi	$p0, [$sp+(#IPSW_OFFSET)]	! Check if in nested interrupt
-	andi	$p0, $p0, #PSW_mskINTL
-	bnez	$p0, resume_kernel		! done with iret
-	j	resume_userspace
-
-
-/*
- * This is the fast syscall return path.  We do as little as
- * possible here, and this includes saving $r0 back into the SVC
- * stack.
- * fixed: tsk - $r25, syscall # - $r7, syscall table pointer - $r8
- */
-ENTRY(ret_fast_syscall)
-	gie_disable
-	lwi	$r1, [tsk+#TSK_TI_FLAGS]
-	andi	$p1, $r1, #_TIF_WORK_MASK
-	bnez	$p1, fast_work_pending
-	fast_restore_user_regs			! iret
-
-/*
- * Ok, we need to do extra processing,
- * enter the slow path returning from syscall, while pending work.
- */
-fast_work_pending:
-	swi	$r0, [$sp+(#R0_OFFSET)]		! what is different from ret_from_exception
-work_pending:
-	andi	$p1, $r1, #_TIF_NEED_RESCHED
-	bnez	$p1, work_resched
-
-	andi	$p1, $r1, #_TIF_SIGPENDING|#_TIF_NOTIFY_RESUME|#_TIF_NOTIFY_SIGNAL
-	beqz	$p1, no_work_pending
-
-	move	$r0, $sp			! 'regs'
-	gie_enable
-	bal	do_notify_resume
-	b       ret_slow_syscall
-work_resched:
-	bal	schedule			! path, return to user mode
-
-/*
- * "slow" syscall return path.
- */
-ENTRY(resume_userspace)
-ENTRY(ret_slow_syscall)
-	gie_disable
-	lwi	$p0, [$sp+(#IPSW_OFFSET)]	! Check if in nested interrupt
-	andi	$p0, $p0, #PSW_mskINTL
-	bnez	$p0, no_work_pending		! done with iret
-	lwi	$r1, [tsk+#TSK_TI_FLAGS]
-	andi	$p1, $r1, #_TIF_WORK_MASK
-	bnez	$p1, work_pending		! handle work_resched, sig_pend
-
-no_work_pending:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	lwi	$p0, [$sp+(#IPSW_OFFSET)]
-	andi	$p0, $p0, #0x1
-	la	$r10, __trace_hardirqs_off
-	la	$r9, __trace_hardirqs_on
-	cmovz	$r9, $p0, $r10
-	jral	$r9
-#endif
-	restore_user_regs			! return from iret
-
-
-/*
- * preemptive kernel
- */
-#ifdef CONFIG_PREEMPTION
-resume_kernel:
-	gie_disable
-	lwi	$t0, [tsk+#TSK_TI_PREEMPT]
-	bnez	$t0, no_work_pending
-
-	lwi	$t0, [tsk+#TSK_TI_FLAGS]
-	andi	$p1, $t0, #_TIF_NEED_RESCHED
-	beqz	$p1, no_work_pending
-
-	lwi	$t0, [$sp+(#IPSW_OFFSET)]	! Interrupts off?
-	andi	$t0, $t0, #1
-	beqz	$t0, no_work_pending
-
-	jal	preempt_schedule_irq
-	b	no_work_pending
-#endif
-
-/*
- * This is how we return from a fork.
- */
-ENTRY(ret_from_fork)
-	bal	schedule_tail
-	beqz	$r6, 1f				! r6 stores fn for kernel thread
-	move	$r0, $r7			! prepare kernel thread arg
-	jral	$r6
-1:
-	lwi	$r1, [tsk+#TSK_TI_FLAGS]		! check for syscall tracing
-	andi	$p1, $r1, #_TIF_WORK_SYSCALL_LEAVE	! are we tracing syscalls?
-	beqz	$p1, ret_slow_syscall
-	move    $r0, $sp
-	bal	syscall_trace_leave
-	b	ret_slow_syscall
diff --git a/arch/nds32/kernel/ex-scall.S b/arch/nds32/kernel/ex-scall.S
deleted file mode 100644
index 270050f1b7b1..000000000000
--- a/arch/nds32/kernel/ex-scall.S
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-#include <asm/assembler.h>
-#include <asm/nds32.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/current.h>
-
-/*
- * $r0 = previous task_struct,
- * $r1 = next task_struct,
- * previous and next are guaranteed not to be the same.
- */
-
-ENTRY(__switch_to)
-
-	la	$p0, __entry_task
-	sw	$r1, [$p0]
-	addi	$p1, $r0, #THREAD_CPU_CONTEXT
-	smw.bi 	$r6, [$p1], $r14, #0xb		! push r6~r14, fp, lp, sp
-	move	$r25, $r1
-#if defined(CONFIG_FPU)
-	call	_switch_fpu
-#endif
-	addi	$r1, $r25, #THREAD_CPU_CONTEXT
-	lmw.bi 	$r6, [$r1], $r14, #0xb		! pop r6~r14, fp, lp, sp
-	ret
-
-
-#define tbl $r8
-
-/*
- * $r7 will be writen as syscall nr
- */
-	.macro	get_scno
-	lwi	$r7, [$sp + R15_OFFSET]
-	swi	$r7, [$sp + SYSCALLNO_OFFSET]
-	.endm
-
-	.macro	updateipc
-	addi	$r17, $r13, #4				! $r13 is $IPC
-	swi	$r17, [$sp + IPC_OFFSET]
-	.endm
-
-ENTRY(eh_syscall)
-	updateipc
-
-	get_scno
-	gie_enable
-
-	lwi	$p0, [tsk+#TSK_TI_FLAGS]		! check for syscall tracing
-
-	andi	$p1, $p0, #_TIF_WORK_SYSCALL_ENTRY	! are we tracing syscalls?
-	bnez	$p1, __sys_trace
-
-	la	$lp, ret_fast_syscall		! return address
-jmp_systbl:
-	addi	$p1, $r7, #-__NR_syscalls	! syscall number of syscall instruction is guarded by addembler
-	bgez	$p1, _SCNO_EXCEED		! call sys_* routine
-	la	tbl, sys_call_table		! load syscall table pointer
-	slli	$p1, $r7, #2
-	add	$p1, tbl, $p1
-	lwi	$p1, [$p1]
-	jr	$p1				! no return
-
-_SCNO_EXCEED:
-	ori	$r0, $r7, #0
-        ori	$r1, $sp, #0
-	b	bad_syscall
-
-/*
- * This is the really slow path.  We're going to be doing
- * context switches, and waiting for our parent to respond.
- */
-__sys_trace:
-	move	$r0, $sp
-	bal	syscall_trace_enter
-	move	$r7, $r0
-	la	$lp, __sys_trace_return		! return address
-
-	addi    $p1, $r7, #1
-	beqz    $p1, ret_slow_syscall		! fatal signal is pending
-
-	addi	$p1, $sp, #R0_OFFSET		! pointer to regs
-	lmw.bi	$r0, [$p1], $r5			! have to reload $r0 - $r5
-	b	jmp_systbl
-
-__sys_trace_return:
-	swi	$r0, [$sp+#R0_OFFSET]		! T: save returned $r0
-	move	$r0, $sp			! set pt_regs for syscall_trace_leave
-	bal	syscall_trace_leave
-	b	ret_slow_syscall
-
-ENTRY(sys_rt_sigreturn_wrapper)
-	addi	$r0, $sp, #0
-	b	sys_rt_sigreturn
-ENDPROC(sys_rt_sigreturn_wrapper)
diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c
deleted file mode 100644
index 701c09a668de..000000000000
--- a/arch/nds32/kernel/fpu.c
+++ /dev/null
@@ -1,266 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/sched/signal.h>
-#include <asm/processor.h>
-#include <asm/user.h>
-#include <asm/io.h>
-#include <asm/bitfield.h>
-#include <asm/fpu.h>
-
-const struct fpu_struct init_fpuregs = {
-	.fd_regs = {[0 ... 31] = sNAN64},
-	.fpcsr = FPCSR_INIT,
-#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
-	.UDF_IEX_trap = 0
-#endif
-};
-
-void save_fpu(struct task_struct *tsk)
-{
-	unsigned int fpcfg, fpcsr;
-
-	enable_fpu();
-	fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG);
-	switch (fpcfg) {
-	case SP32_DP32_reg:
-		asm volatile ("fsdi $fd31, [%0+0xf8]\n\t"
-			      "fsdi $fd30, [%0+0xf0]\n\t"
-			      "fsdi $fd29, [%0+0xe8]\n\t"
-			      "fsdi $fd28, [%0+0xe0]\n\t"
-			      "fsdi $fd27, [%0+0xd8]\n\t"
-			      "fsdi $fd26, [%0+0xd0]\n\t"
-			      "fsdi $fd25, [%0+0xc8]\n\t"
-			      "fsdi $fd24, [%0+0xc0]\n\t"
-			      "fsdi $fd23, [%0+0xb8]\n\t"
-			      "fsdi $fd22, [%0+0xb0]\n\t"
-			      "fsdi $fd21, [%0+0xa8]\n\t"
-			      "fsdi $fd20, [%0+0xa0]\n\t"
-			      "fsdi $fd19, [%0+0x98]\n\t"
-			      "fsdi $fd18, [%0+0x90]\n\t"
-			      "fsdi $fd17, [%0+0x88]\n\t"
-			      "fsdi $fd16, [%0+0x80]\n\t"
-			      :	/* no output */
-			      : "r" (&tsk->thread.fpu)
-			      : "memory");
-		fallthrough;
-	case SP32_DP16_reg:
-		asm volatile ("fsdi $fd15, [%0+0x78]\n\t"
-			      "fsdi $fd14, [%0+0x70]\n\t"
-			      "fsdi $fd13, [%0+0x68]\n\t"
-			      "fsdi $fd12, [%0+0x60]\n\t"
-			      "fsdi $fd11, [%0+0x58]\n\t"
-			      "fsdi $fd10, [%0+0x50]\n\t"
-			      "fsdi $fd9,  [%0+0x48]\n\t"
-			      "fsdi $fd8,  [%0+0x40]\n\t"
-			      :	/* no output */
-			      : "r" (&tsk->thread.fpu)
-			      : "memory");
-		fallthrough;
-	case SP16_DP8_reg:
-		asm volatile ("fsdi $fd7,  [%0+0x38]\n\t"
-			      "fsdi $fd6,  [%0+0x30]\n\t"
-			      "fsdi $fd5,  [%0+0x28]\n\t"
-			      "fsdi $fd4,  [%0+0x20]\n\t"
-			      :	/* no output */
-			      : "r" (&tsk->thread.fpu)
-			      : "memory");
-		fallthrough;
-	case SP8_DP4_reg:
-		asm volatile ("fsdi $fd3,  [%1+0x18]\n\t"
-			      "fsdi $fd2,  [%1+0x10]\n\t"
-			      "fsdi $fd1,  [%1+0x8]\n\t"
-			      "fsdi $fd0,  [%1+0x0]\n\t"
-			      "fmfcsr	%0\n\t"
-			      "swi  %0, [%1+0x100]\n\t"
-			      : "=&r" (fpcsr)
-			      : "r"(&tsk->thread.fpu)
-			      : "memory");
-	}
-	disable_fpu();
-}
-
-void load_fpu(const struct fpu_struct *fpregs)
-{
-	unsigned int fpcfg, fpcsr;
-
-	enable_fpu();
-	fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG);
-	switch (fpcfg) {
-	case SP32_DP32_reg:
-		asm volatile ("fldi $fd31, [%0+0xf8]\n\t"
-			      "fldi $fd30, [%0+0xf0]\n\t"
-			      "fldi $fd29, [%0+0xe8]\n\t"
-			      "fldi $fd28, [%0+0xe0]\n\t"
-			      "fldi $fd27, [%0+0xd8]\n\t"
-			      "fldi $fd26, [%0+0xd0]\n\t"
-			      "fldi $fd25, [%0+0xc8]\n\t"
-			      "fldi $fd24, [%0+0xc0]\n\t"
-			      "fldi $fd23, [%0+0xb8]\n\t"
-			      "fldi $fd22, [%0+0xb0]\n\t"
-			      "fldi $fd21, [%0+0xa8]\n\t"
-			      "fldi $fd20, [%0+0xa0]\n\t"
-			      "fldi $fd19, [%0+0x98]\n\t"
-			      "fldi $fd18, [%0+0x90]\n\t"
-			      "fldi $fd17, [%0+0x88]\n\t"
-			      "fldi $fd16, [%0+0x80]\n\t"
-			      :	/* no output */
-			      : "r" (fpregs));
-		fallthrough;
-	case SP32_DP16_reg:
-		asm volatile ("fldi $fd15, [%0+0x78]\n\t"
-			      "fldi $fd14, [%0+0x70]\n\t"
-			      "fldi $fd13, [%0+0x68]\n\t"
-			      "fldi $fd12, [%0+0x60]\n\t"
-			      "fldi $fd11, [%0+0x58]\n\t"
-			      "fldi $fd10, [%0+0x50]\n\t"
-			      "fldi $fd9,  [%0+0x48]\n\t"
-			      "fldi $fd8,  [%0+0x40]\n\t"
-			      :	/* no output */
-			      : "r" (fpregs));
-		fallthrough;
-	case SP16_DP8_reg:
-		asm volatile ("fldi $fd7,  [%0+0x38]\n\t"
-			      "fldi $fd6,  [%0+0x30]\n\t"
-			      "fldi $fd5,  [%0+0x28]\n\t"
-			      "fldi $fd4,  [%0+0x20]\n\t"
-			      :	/* no output */
-			      : "r" (fpregs));
-		fallthrough;
-	case SP8_DP4_reg:
-		asm volatile ("fldi $fd3,  [%1+0x18]\n\t"
-			      "fldi $fd2,  [%1+0x10]\n\t"
-			      "fldi $fd1,  [%1+0x8]\n\t"
-			      "fldi $fd0,  [%1+0x0]\n\t"
-			      "lwi  %0, [%1+0x100]\n\t"
-			      "fmtcsr	%0\n\t":"=&r" (fpcsr)
-			      : "r"(fpregs));
-	}
-	disable_fpu();
-}
-void store_fpu_for_suspend(void)
-{
-#ifdef CONFIG_LAZY_FPU
-	if (last_task_used_math != NULL)
-		save_fpu(last_task_used_math);
-	last_task_used_math = NULL;
-#else
-	if (!used_math())
-		return;
-	unlazy_fpu(current);
-#endif
-	clear_fpu(task_pt_regs(current));
-}
-inline void do_fpu_context_switch(struct pt_regs *regs)
-{
-	/* Enable to use FPU. */
-
-	if (!user_mode(regs)) {
-		pr_err("BUG: FPU is used in kernel mode.\n");
-		BUG();
-		return;
-	}
-
-	enable_ptreg_fpu(regs);
-#ifdef CONFIG_LAZY_FPU	//Lazy FPU is used
-	if (last_task_used_math == current)
-		return;
-	if (last_task_used_math != NULL)
-		/* Other processes fpu state, save away */
-		save_fpu(last_task_used_math);
-	last_task_used_math = current;
-#endif
-	if (used_math()) {
-		load_fpu(&current->thread.fpu);
-	} else {
-		/* First time FPU user.  */
-		load_fpu(&init_fpuregs);
-#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
-		current->thread.fpu.UDF_IEX_trap = init_fpuregs.UDF_IEX_trap;
-#endif
-		set_used_math();
-	}
-
-}
-
-inline void fill_sigfpe_signo(unsigned int fpcsr, int *signo)
-{
-	if (fpcsr & FPCSR_mskOVFT)
-		*signo = FPE_FLTOVF;
-#ifndef CONFIG_SUPPORT_DENORMAL_ARITHMETIC
-	else if (fpcsr & FPCSR_mskUDFT)
-		*signo = FPE_FLTUND;
-#endif
-	else if (fpcsr & FPCSR_mskIVOT)
-		*signo = FPE_FLTINV;
-	else if (fpcsr & FPCSR_mskDBZT)
-		*signo = FPE_FLTDIV;
-	else if (fpcsr & FPCSR_mskIEXT)
-		*signo = FPE_FLTRES;
-}
-
-inline void handle_fpu_exception(struct pt_regs *regs)
-{
-	unsigned int fpcsr;
-	int si_code = 0, si_signo = SIGFPE;
-#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
-	unsigned long redo_except = FPCSR_mskDNIT|FPCSR_mskUDFT|FPCSR_mskIEXT;
-#else
-	unsigned long redo_except = FPCSR_mskDNIT;
-#endif
-
-	lose_fpu();
-	fpcsr = current->thread.fpu.fpcsr;
-
-	if (fpcsr & redo_except) {
-		si_signo = do_fpuemu(regs, &current->thread.fpu);
-		fpcsr = current->thread.fpu.fpcsr;
-		if (!si_signo) {
-			current->thread.fpu.fpcsr &= ~(redo_except);
-			goto done;
-		}
-	} else if (fpcsr & FPCSR_mskRIT) {
-		if (!user_mode(regs))
-			make_task_dead(SIGILL);
-		si_signo = SIGILL;
-	}
-
-	switch (si_signo) {
-	case SIGFPE:
-		fill_sigfpe_signo(fpcsr, &si_code);
-		break;
-	case SIGILL:
-		show_regs(regs);
-		si_code = ILL_COPROC;
-		break;
-	case SIGBUS:
-		si_code = BUS_ADRERR;
-		break;
-	default:
-		break;
-	}
-
-	force_sig_fault(si_signo, si_code,
-			(void __user *)instruction_pointer(regs));
-done:
-	own_fpu();
-}
-
-bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs)
-{
-	int done = true;
-	/* Coprocessor disabled exception */
-	if (subtype == FPU_DISABLE_EXCEPTION) {
-		preempt_disable();
-		do_fpu_context_switch(regs);
-		preempt_enable();
-	}
-	/* Coprocessor exception such as underflow and overflow */
-	else if (subtype == FPU_EXCEPTION)
-		handle_fpu_exception(regs);
-	else
-		done = false;
-	return done;
-}
diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
deleted file mode 100644
index 711bc8cd186d..000000000000
--- a/arch/nds32/kernel/ftrace.c
+++ /dev/null
@@ -1,278 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/ftrace.h>
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-
-#ifndef CONFIG_DYNAMIC_FTRACE
-extern void (*ftrace_trace_function)(unsigned long, unsigned long,
-				     struct ftrace_ops*, struct ftrace_regs*);
-extern void ftrace_graph_caller(void);
-
-noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip,
-				  struct ftrace_ops *op, struct ftrace_regs *fregs)
-{
-	__asm__ ("");  /* avoid to optimize as pure function */
-}
-
-noinline void _mcount(unsigned long parent_ip)
-{
-	/* save all state by the compiler prologue */
-
-	unsigned long ip = (unsigned long)__builtin_return_address(0);
-
-	if (ftrace_trace_function != ftrace_stub)
-		ftrace_trace_function(ip - MCOUNT_INSN_SIZE, parent_ip,
-				      NULL, NULL);
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	if (ftrace_graph_return != (trace_func_graph_ret_t)ftrace_stub
-	    || ftrace_graph_entry != ftrace_graph_entry_stub)
-		ftrace_graph_caller();
-#endif
-
-	/* restore all state by the compiler epilogue */
-}
-EXPORT_SYMBOL(_mcount);
-
-#else /* CONFIG_DYNAMIC_FTRACE */
-
-noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip,
-				  struct ftrace_ops *op, struct ftrace_regs *fregs)
-{
-	__asm__ ("");  /* avoid to optimize as pure function */
-}
-
-noinline void __naked _mcount(unsigned long parent_ip)
-{
-	__asm__ ("");  /* avoid to optimize as pure function */
-}
-EXPORT_SYMBOL(_mcount);
-
-#define XSTR(s) STR(s)
-#define STR(s) #s
-void _ftrace_caller(unsigned long parent_ip)
-{
-	/* save all state needed by the compiler prologue */
-
-	/*
-	 * prepare arguments for real tracing function
-	 * first  arg : __builtin_return_address(0) - MCOUNT_INSN_SIZE
-	 * second arg : parent_ip
-	 */
-	__asm__ __volatile__ (
-		"move $r1, %0				   \n\t"
-		"addi $r0, %1, #-" XSTR(MCOUNT_INSN_SIZE) "\n\t"
-		:
-		: "r" (parent_ip), "r" (__builtin_return_address(0)));
-
-	/* a placeholder for the call to a real tracing function */
-	__asm__ __volatile__ (
-		"ftrace_call:		\n\t"
-		"nop			\n\t"
-		"nop			\n\t"
-		"nop			\n\t");
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/* a placeholder for the call to ftrace_graph_caller */
-	__asm__ __volatile__ (
-		"ftrace_graph_call:	\n\t"
-		"nop			\n\t"
-		"nop			\n\t"
-		"nop			\n\t");
-#endif
-	/* restore all state needed by the compiler epilogue */
-}
-
-static unsigned long gen_sethi_insn(unsigned long addr)
-{
-	unsigned long opcode = 0x46000000;
-	unsigned long imm = addr >> 12;
-	unsigned long rt_num = 0xf << 20;
-
-	return ENDIAN_CONVERT(opcode | rt_num | imm);
-}
-
-static unsigned long gen_ori_insn(unsigned long addr)
-{
-	unsigned long opcode = 0x58000000;
-	unsigned long imm = addr & 0x0000fff;
-	unsigned long rt_num = 0xf << 20;
-	unsigned long ra_num = 0xf << 15;
-
-	return ENDIAN_CONVERT(opcode | rt_num | ra_num | imm);
-}
-
-static unsigned long gen_jral_insn(unsigned long addr)
-{
-	unsigned long opcode = 0x4a000001;
-	unsigned long rt_num = 0x1e << 20;
-	unsigned long rb_num = 0xf << 10;
-
-	return ENDIAN_CONVERT(opcode | rt_num | rb_num);
-}
-
-static void ftrace_gen_call_insn(unsigned long *call_insns,
-				 unsigned long addr)
-{
-	call_insns[0] = gen_sethi_insn(addr); /* sethi $r15, imm20u       */
-	call_insns[1] = gen_ori_insn(addr);   /* ori   $r15, $r15, imm15u */
-	call_insns[2] = gen_jral_insn(addr);  /* jral  $lp,  $r15         */
-}
-
-static int __ftrace_modify_code(unsigned long pc, unsigned long *old_insn,
-				unsigned long *new_insn, bool validate)
-{
-	unsigned long orig_insn[3];
-
-	if (validate) {
-		if (copy_from_kernel_nofault(orig_insn, (void *)pc,
-				MCOUNT_INSN_SIZE))
-			return -EFAULT;
-		if (memcmp(orig_insn, old_insn, MCOUNT_INSN_SIZE))
-			return -EINVAL;
-	}
-
-	if (copy_to_kernel_nofault((void *)pc, new_insn, MCOUNT_INSN_SIZE))
-		return -EPERM;
-
-	return 0;
-}
-
-static int ftrace_modify_code(unsigned long pc, unsigned long *old_insn,
-			      unsigned long *new_insn, bool validate)
-{
-	int ret;
-
-	ret = __ftrace_modify_code(pc, old_insn, new_insn, validate);
-	if (ret)
-		return ret;
-
-	flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
-
-	return ret;
-}
-
-int ftrace_update_ftrace_func(ftrace_func_t func)
-{
-	unsigned long pc = (unsigned long)&ftrace_call;
-	unsigned long old_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
-	unsigned long new_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
-
-	if (func != ftrace_stub)
-		ftrace_gen_call_insn(new_insn, (unsigned long)func);
-
-	return ftrace_modify_code(pc, old_insn, new_insn, false);
-}
-
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
-{
-	unsigned long pc = rec->ip;
-	unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
-	unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
-
-	ftrace_gen_call_insn(call_insn, addr);
-
-	return ftrace_modify_code(pc, nop_insn, call_insn, true);
-}
-
-int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
-		    unsigned long addr)
-{
-	unsigned long pc = rec->ip;
-	unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
-	unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
-
-	ftrace_gen_call_insn(call_insn, addr);
-
-	return ftrace_modify_code(pc, call_insn, nop_insn, true);
-}
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
-			   unsigned long frame_pointer)
-{
-	unsigned long return_hooker = (unsigned long)&return_to_handler;
-	unsigned long old;
-
-	if (unlikely(atomic_read(&current->tracing_graph_pause)))
-		return;
-
-	old = *parent;
-
-	if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
-		*parent = return_hooker;
-}
-
-noinline void ftrace_graph_caller(void)
-{
-	unsigned long *parent_ip =
-		(unsigned long *)(__builtin_frame_address(2) - 4);
-
-	unsigned long selfpc =
-		(unsigned long)(__builtin_return_address(1) - MCOUNT_INSN_SIZE);
-
-	unsigned long frame_pointer =
-		(unsigned long)__builtin_frame_address(3);
-
-	prepare_ftrace_return(parent_ip, selfpc, frame_pointer);
-}
-
-extern unsigned long ftrace_return_to_handler(unsigned long frame_pointer);
-void __naked return_to_handler(void)
-{
-	__asm__ __volatile__ (
-		/* save state needed by the ABI     */
-		"smw.adm $r0,[$sp],$r1,#0x0  \n\t"
-
-		/* get original return address      */
-		"move $r0, $fp               \n\t"
-		"bal ftrace_return_to_handler\n\t"
-		"move $lp, $r0               \n\t"
-
-		/* restore state needed by the ABI  */
-		"lmw.bim $r0,[$sp],$r1,#0x0  \n\t");
-}
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-extern unsigned long ftrace_graph_call;
-
-static int ftrace_modify_graph_caller(bool enable)
-{
-	unsigned long pc = (unsigned long)&ftrace_graph_call;
-	unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
-	unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
-
-	ftrace_gen_call_insn(call_insn, (unsigned long)ftrace_graph_caller);
-
-	if (enable)
-		return ftrace_modify_code(pc, nop_insn, call_insn, true);
-	else
-		return ftrace_modify_code(pc, call_insn, nop_insn, true);
-}
-
-int ftrace_enable_ftrace_graph_caller(void)
-{
-	return ftrace_modify_graph_caller(true);
-}
-
-int ftrace_disable_ftrace_graph_caller(void)
-{
-	return ftrace_modify_graph_caller(false);
-}
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-noinline void __trace_hardirqs_off(void)
-{
-	trace_hardirqs_off();
-}
-noinline void __trace_hardirqs_on(void)
-{
-	trace_hardirqs_on();
-}
-#endif /* CONFIG_TRACE_IRQFLAGS */
diff --git a/arch/nds32/kernel/head.S b/arch/nds32/kernel/head.S
deleted file mode 100644
index 7347f00451a9..000000000000
--- a/arch/nds32/kernel/head.S
+++ /dev/null
@@ -1,197 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <linux/pgtable.h>
-#include <asm/ptrace.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <linux/sizes.h>
-#include <asm/thread_info.h>
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define OF_DT_MAGIC 0xd00dfeed
-#else
-#define OF_DT_MAGIC 0xedfe0dd0
-#endif
-
-	.globl  swapper_pg_dir
-	.equ    swapper_pg_dir, TEXTADDR - 0x4000
-
-/*
- * Kernel startup entry point.
- */
-	.section ".head.text", "ax"
-	.type   _stext, %function
-ENTRY(_stext)
-	setgie.d                            ! Disable interrupt
-	isb
-/*
- * Disable I/D-cache and enable it at a proper time
- */
-	mfsr    $r0, $mr8
-	li      $r1, #~(CACHE_CTL_mskIC_EN|CACHE_CTL_mskDC_EN)
-	and     $r0, $r0, $r1
-	mtsr    $r0, $mr8
-
-/*
- * Process device tree blob
- */
-	andi 	$r0,$r2,#0x3
-	li	$r10, 0
-	bne     $r0, $r10, _nodtb
-	lwi	$r0, [$r2]
-	li	$r1, OF_DT_MAGIC
-	bne     $r0, $r1, _nodtb
-	move	$r10, $r2
-_nodtb:
-
-/*
- * Create a temporary mapping area for booting, before start_kernel
- */
-	sethi   $r4, hi20(swapper_pg_dir)
-	li      $p0, (PAGE_OFFSET - PHYS_OFFSET)
-	sub     $r4, $r4, $p0
-	tlbop   FlushAll            ! invalidate TLB\n"
-	isb
-	mtsr    $r4, $L1_PPTB       ! load page table pointer\n"
-
-#ifdef CONFIG_CPU_DCACHE_DISABLE
-	#define MMU_CTL_NTCC MMU_CTL_CACHEABLE_NON
-#else
-	#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-		#define MMU_CTL_NTCC MMU_CTL_CACHEABLE_WT
-	#else
-		#define MMU_CTL_NTCC MMU_CTL_CACHEABLE_WB
-	#endif
-#endif
-
-/* set NTC cacheability, mutliple page size in use */
-	mfsr    $r3, $MMU_CTL
-#if CONFIG_MEMORY_START >= 0xc0000000
-	ori     $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC3)
-#elif CONFIG_MEMORY_START >= 0x80000000
-	ori     $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC2)
-#elif CONFIG_MEMORY_START >= 0x40000000
-	ori     $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC1)
-#else
-	ori     $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC0)
-#endif
-
-#ifdef CONFIG_ANDES_PAGE_SIZE_4KB
-	ori     $r3, $r3, #(MMU_CTL_mskMPZIU)
-#else
-	ori     $r3, $r3, #(MMU_CTL_mskMPZIU|MMU_CTL_D8KB)
-#endif
-#ifdef CONFIG_HW_SUPPORT_UNALIGNMENT_ACCESS
-	li      $r0, #MMU_CTL_UNA
-	or      $r3, $r3, $r0
-#endif
-	mtsr    $r3, $MMU_CTL
-	isb
-
-/* set page size and size of kernel image */
-        mfsr    $r0, $MMU_CFG
-        srli    $r3, $r0, MMU_CFG_offfEPSZ
-        zeb     $r3, $r3
-        bnez    $r3, _extra_page_size_support
-#ifdef CONFIG_ANDES_PAGE_SIZE_4KB
-        li      $r5, #SZ_4K                 ! Use 4KB page size
-#else
-        li      $r5, #SZ_8K                 ! Use 8KB page size
-        li      $r3, #1
-#endif
-        mtsr    $r3, $TLB_MISC
-        b       _image_size_check
-
-_extra_page_size_support:                    ! Use epzs pages size
-        clz     $r6, $r3
-        subri   $r2, $r6, #31
-        li      $r3, #1
-        sll     $r3, $r3, $r2
-        /* MMU_CFG.EPSZ value -> meaning */
-        mul     $r5, $r3, $r3
-        slli    $r5, $r5, #14
-        /* MMU_CFG.EPSZ  -> TLB_MISC.ACC_PSZ */
-        addi    $r3, $r2, #0x2
-        mtsr    $r3, $TLB_MISC
-
-_image_size_check:
-        /* calculate the image maximum size accepted by TLB config */
-        andi    $r6, $r0, MMU_CFG_mskTBW
-        andi    $r0, $r0, MMU_CFG_mskTBS
-        srli    $r6, $r6, MMU_CFG_offTBW
-        srli    $r0, $r0, MMU_CFG_offTBS
-	addi    $r6, $r6, #0x1               ! MMU_CFG.TBW value -> meaning
-        addi    $r0, $r0, #0x2               ! MMU_CFG.TBS value -> meaning
-        sll     $r0, $r6, $r0                ! entries = k-way * n-set
-        mul     $r6, $r0, $r5                ! max size = entries * page size
-        /* check kernel image size */
-        la      $r3, (_end - PAGE_OFFSET)
-        bgt     $r3, $r6, __error
-
-	li      $r2, #(PHYS_OFFSET + TLB_DATA_kernel_text_attr)
-        li      $r3, PAGE_OFFSET
-        add     $r6, $r6, $r3
-
-_tlb:
-	mtsr    $r3, $TLB_VPN
-	dsb
-	tlbop   $r2, RWR
-	isb
-	add     $r3, $r3, $r5
-	add     $r2, $r2, $r5
-	bgt     $r6, $r3, _tlb
-	mfsr    $r3, $TLB_MISC      ! setup access page size
-	li      $r2, #~0xf
-	and     $r3, $r3, $r2
-#ifdef CONFIG_ANDES_PAGE_SIZE_8KB
-	ori    $r3, $r3, #0x1
-#endif
-	mtsr    $r3, $TLB_MISC
-
-	mfsr    $r0, $MISC_CTL      ! Enable BTB, RTP, shadow sp, and HW_PRE
-	ori     $r0, $r0, #MISC_init
-	mtsr    $r0, $MISC_CTL
-
-	mfsr    $p1, $PSW
-	li      $r15, #~PSW_clr             ! clear WBNA|DME|IME|DT|IT|POM|INTL|GIE
-	and     $p1, $p1, $r15
-	ori     $p1, $p1, #PSW_init
-	mtsr    $p1, $IPSW                  ! when iret, it will automatically enable MMU
-	la      $lp, __mmap_switched
-	mtsr    $lp, $IPC
-	iret
-	nop
-
-	.type   __switch_data, %object
-__switch_data:
-	.long   __bss_start                 ! $r6
-	.long   _end                        ! $r7
-	.long	__atags_pointer 	    ! $atag_pointer
-	.long   init_task                   ! $r9, move to $r25
-	.long   init_thread_union + THREAD_SIZE    ! $sp
-
-
-/*
- * The following fragment of code is executed with the MMU on in MMU mode,
- * and uses absolute addresses; this is not position independent.
- */
-	.align
-	.type   __mmap_switched, %function
-__mmap_switched:
-	la  $r3, __switch_data
-	lmw.bim $r6, [$r3], $r9, #0b0001
-	move	$r25, $r9
-	move    $fp, #0             ! Clear  BSS (and zero $fp)
-	beq $r7, $r6, _RRT
-1:	swi.bi  $fp, [$r6], #4
-	bne $r7, $r6, 1b
-	swi	$r10, [$r8]
-
-_RRT:
-	b   start_kernel
-
-__error:
-	b   __error
diff --git a/arch/nds32/kernel/irq.c b/arch/nds32/kernel/irq.c
deleted file mode 100644
index 6ff5a672be27..000000000000
--- a/arch/nds32/kernel/irq.c
+++ /dev/null
@@ -1,9 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/irqchip.h>
-
-void __init init_IRQ(void)
-{
-	irqchip_init();
-}
diff --git a/arch/nds32/kernel/module.c b/arch/nds32/kernel/module.c
deleted file mode 100644
index 3897fd14a21d..000000000000
--- a/arch/nds32/kernel/module.c
+++ /dev/null
@@ -1,278 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/module.h>
-#include <linux/elf.h>
-#include <linux/vmalloc.h>
-#include <linux/moduleloader.h>
-#include <linux/pgtable.h>
-
-void *module_alloc(unsigned long size)
-{
-	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-				    GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
-				    __builtin_return_address(0));
-}
-
-void module_free(struct module *module, void *region)
-{
-	vfree(region);
-}
-
-int module_frob_arch_sections(Elf_Ehdr * hdr,
-			      Elf_Shdr * sechdrs,
-			      char *secstrings, struct module *mod)
-{
-	return 0;
-}
-
-void do_reloc16(unsigned int val, unsigned int *loc, unsigned int val_mask,
-		unsigned int val_shift, unsigned int loc_mask,
-		unsigned int partial_in_place, unsigned int swap)
-{
-	unsigned int tmp = 0, tmp2 = 0;
-
-	__asm__ __volatile__("\tlhi.bi\t%0, [%2], 0\n"
-			     "\tbeqz\t%3, 1f\n"
-			     "\twsbh\t%0, %1\n"
-			     "1:\n":"=r"(tmp):"0"(tmp), "r"(loc), "r"(swap)
-	    );
-
-	tmp2 = tmp & loc_mask;
-	if (partial_in_place) {
-		tmp &= (~loc_mask);
-		tmp =
-		    tmp2 | ((tmp + ((val & val_mask) >> val_shift)) & val_mask);
-	} else {
-		tmp = tmp2 | ((val & val_mask) >> val_shift);
-	}
-
-	__asm__ __volatile__("\tbeqz\t%3, 2f\n"
-			     "\twsbh\t%0, %1\n"
-			     "2:\n"
-			     "\tshi.bi\t%0, [%2], 0\n":"=r"(tmp):"0"(tmp),
-			     "r"(loc), "r"(swap)
-	    );
-}
-
-void do_reloc32(unsigned int val, unsigned int *loc, unsigned int val_mask,
-		unsigned int val_shift, unsigned int loc_mask,
-		unsigned int partial_in_place, unsigned int swap)
-{
-	unsigned int tmp = 0, tmp2 = 0;
-
-	__asm__ __volatile__("\tlmw.bi\t%0, [%2], %0, 0\n"
-			     "\tbeqz\t%3, 1f\n"
-			     "\twsbh\t%0, %1\n"
-			     "\trotri\t%0, %1, 16\n"
-			     "1:\n":"=r"(tmp):"0"(tmp), "r"(loc), "r"(swap)
-	    );
-
-	tmp2 = tmp & loc_mask;
-	if (partial_in_place) {
-		tmp &= (~loc_mask);
-		tmp =
-		    tmp2 | ((tmp + ((val & val_mask) >> val_shift)) & val_mask);
-	} else {
-		tmp = tmp2 | ((val & val_mask) >> val_shift);
-	}
-
-	__asm__ __volatile__("\tbeqz\t%3, 2f\n"
-			     "\twsbh\t%0, %1\n"
-			     "\trotri\t%0, %1, 16\n"
-			     "2:\n"
-			     "\tsmw.bi\t%0, [%2], %0, 0\n":"=r"(tmp):"0"(tmp),
-			     "r"(loc), "r"(swap)
-	    );
-}
-
-static inline int exceed_limit(int offset, unsigned int val_mask,
-			       struct module *module, Elf32_Rela * rel,
-			       unsigned int relindex, unsigned int reloc_order)
-{
-	int abs_off = offset < 0 ? ~offset : offset;
-
-	if (abs_off & (~val_mask)) {
-		pr_err("\n%s: relocation type %d out of range.\n"
-		       "please rebuild the kernel module with gcc option \"-Wa,-mno-small-text\".\n",
-		       module->name, ELF32_R_TYPE(rel->r_info));
-		pr_err("section %d reloc %d offset 0x%x relative 0x%x.\n",
-		       relindex, reloc_order, rel->r_offset, offset);
-		return true;
-	}
-	return false;
-}
-
-#ifdef __NDS32_EL__
-#define NEED_SWAP 1
-#else
-#define NEED_SWAP 0
-#endif
-
-int
-apply_relocate_add(Elf32_Shdr * sechdrs, const char *strtab,
-		   unsigned int symindex, unsigned int relindex,
-		   struct module *module)
-{
-	Elf32_Shdr *symsec = sechdrs + symindex;
-	Elf32_Shdr *relsec = sechdrs + relindex;
-	Elf32_Shdr *dstsec = sechdrs + relsec->sh_info;
-	Elf32_Rela *rel = (void *)relsec->sh_addr;
-	unsigned int i;
-
-	for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rela); i++, rel++) {
-		Elf32_Addr *loc;
-		Elf32_Sym *sym;
-		Elf32_Addr v;
-		s32 offset;
-
-		offset = ELF32_R_SYM(rel->r_info);
-		if (offset < 0
-		    || offset > (symsec->sh_size / sizeof(Elf32_Sym))) {
-			pr_err("%s: bad relocation\n", module->name);
-			pr_err("section %d reloc %d\n", relindex, i);
-			return -ENOEXEC;
-		}
-
-		sym = ((Elf32_Sym *) symsec->sh_addr) + offset;
-
-		if (rel->r_offset < 0
-		    || rel->r_offset > dstsec->sh_size - sizeof(u16)) {
-			pr_err("%s: out of bounds relocation\n", module->name);
-			pr_err("section %d reloc %d offset 0x%0x size %d\n",
-			       relindex, i, rel->r_offset, dstsec->sh_size);
-			return -ENOEXEC;
-		}
-
-		loc = (Elf32_Addr *) (dstsec->sh_addr + rel->r_offset);
-		v = sym->st_value + rel->r_addend;
-
-		switch (ELF32_R_TYPE(rel->r_info)) {
-		case R_NDS32_NONE:
-		case R_NDS32_INSN16:
-		case R_NDS32_LABEL:
-		case R_NDS32_LONGCALL1:
-		case R_NDS32_LONGCALL2:
-		case R_NDS32_LONGCALL3:
-		case R_NDS32_LONGCALL4:
-		case R_NDS32_LONGJUMP1:
-		case R_NDS32_LONGJUMP2:
-		case R_NDS32_LONGJUMP3:
-		case R_NDS32_9_FIXED_RELA:
-		case R_NDS32_15_FIXED_RELA:
-		case R_NDS32_17_FIXED_RELA:
-		case R_NDS32_25_FIXED_RELA:
-		case R_NDS32_LOADSTORE:
-		case R_NDS32_DWARF2_OP1_RELA:
-		case R_NDS32_DWARF2_OP2_RELA:
-		case R_NDS32_DWARF2_LEB_RELA:
-		case R_NDS32_RELA_NOP_MIX ... R_NDS32_RELA_NOP_MAX:
-			break;
-
-		case R_NDS32_32_RELA:
-			do_reloc32(v, loc, 0xffffffff, 0, 0, 0, 0);
-			break;
-
-		case R_NDS32_HI20_RELA:
-			do_reloc32(v, loc, 0xfffff000, 12, 0xfff00000, 0,
-				   NEED_SWAP);
-			break;
-
-		case R_NDS32_LO12S3_RELA:
-			do_reloc32(v, loc, 0x00000fff, 3, 0xfffff000, 0,
-				   NEED_SWAP);
-			break;
-
-		case R_NDS32_LO12S2_RELA:
-			do_reloc32(v, loc, 0x00000fff, 2, 0xfffff000, 0,
-				   NEED_SWAP);
-			break;
-
-		case R_NDS32_LO12S1_RELA:
-			do_reloc32(v, loc, 0x00000fff, 1, 0xfffff000, 0,
-				   NEED_SWAP);
-			break;
-
-		case R_NDS32_LO12S0_RELA:
-		case R_NDS32_LO12S0_ORI_RELA:
-			do_reloc32(v, loc, 0x00000fff, 0, 0xfffff000, 0,
-				   NEED_SWAP);
-			break;
-
-		case R_NDS32_9_PCREL_RELA:
-			if (exceed_limit
-			    ((v - (Elf32_Addr) loc), 0x000000ff, module, rel,
-			     relindex, i))
-				return -ENOEXEC;
-			do_reloc16(v - (Elf32_Addr) loc, loc, 0x000001ff, 1,
-				   0xffffff00, 0, NEED_SWAP);
-			break;
-
-		case R_NDS32_15_PCREL_RELA:
-			if (exceed_limit
-			    ((v - (Elf32_Addr) loc), 0x00003fff, module, rel,
-			     relindex, i))
-				return -ENOEXEC;
-			do_reloc32(v - (Elf32_Addr) loc, loc, 0x00007fff, 1,
-				   0xffffc000, 0, NEED_SWAP);
-			break;
-
-		case R_NDS32_17_PCREL_RELA:
-			if (exceed_limit
-			    ((v - (Elf32_Addr) loc), 0x0000ffff, module, rel,
-			     relindex, i))
-				return -ENOEXEC;
-			do_reloc32(v - (Elf32_Addr) loc, loc, 0x0001ffff, 1,
-				   0xffff0000, 0, NEED_SWAP);
-			break;
-
-		case R_NDS32_25_PCREL_RELA:
-			if (exceed_limit
-			    ((v - (Elf32_Addr) loc), 0x00ffffff, module, rel,
-			     relindex, i))
-				return -ENOEXEC;
-			do_reloc32(v - (Elf32_Addr) loc, loc, 0x01ffffff, 1,
-				   0xff000000, 0, NEED_SWAP);
-			break;
-		case R_NDS32_WORD_9_PCREL_RELA:
-			if (exceed_limit
-			    ((v - (Elf32_Addr) loc), 0x000000ff, module, rel,
-			     relindex, i))
-				return -ENOEXEC;
-			do_reloc32(v - (Elf32_Addr) loc, loc, 0x000001ff, 1,
-				   0xffffff00, 0, NEED_SWAP);
-			break;
-
-		case R_NDS32_SDA15S3_RELA:
-		case R_NDS32_SDA15S2_RELA:
-		case R_NDS32_SDA15S1_RELA:
-		case R_NDS32_SDA15S0_RELA:
-			pr_err("%s: unsupported relocation type %d.\n",
-			       module->name, ELF32_R_TYPE(rel->r_info));
-			pr_err
-			    ("Small data section access doesn't work in the kernel space; "
-			     "please rebuild the kernel module with gcc option -mcmodel=large.\n");
-			pr_err("section %d reloc %d offset 0x%x size %d\n",
-			       relindex, i, rel->r_offset, dstsec->sh_size);
-			break;
-
-		default:
-			pr_err("%s: unsupported relocation type %d.\n",
-			       module->name, ELF32_R_TYPE(rel->r_info));
-			pr_err("section %d reloc %d offset 0x%x size %d\n",
-			       relindex, i, rel->r_offset, dstsec->sh_size);
-		}
-	}
-	return 0;
-}
-
-int
-module_finalize(const Elf32_Ehdr * hdr, const Elf_Shdr * sechdrs,
-		struct module *module)
-{
-	return 0;
-}
-
-void module_arch_cleanup(struct module *mod)
-{
-}
diff --git a/arch/nds32/kernel/nds32_ksyms.c b/arch/nds32/kernel/nds32_ksyms.c
deleted file mode 100644
index 20719e42ae36..000000000000
--- a/arch/nds32/kernel/nds32_ksyms.c
+++ /dev/null
@@ -1,25 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/delay.h>
-#include <linux/in6.h>
-#include <linux/syscalls.h>
-#include <linux/uaccess.h>
-
-#include <asm/checksum.h>
-#include <asm/io.h>
-#include <asm/ftrace.h>
-#include <asm/proc-fns.h>
-
-/* mem functions */
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(memzero);
-
-/* user mem (segment) */
-EXPORT_SYMBOL(__arch_copy_from_user);
-EXPORT_SYMBOL(__arch_copy_to_user);
-EXPORT_SYMBOL(__arch_clear_user);
diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c
deleted file mode 100644
index a78a879e7ef1..000000000000
--- a/arch/nds32/kernel/perf_event_cpu.c
+++ /dev/null
@@ -1,1500 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2008-2017 Andes Technology Corporation
- *
- * Reference ARMv7: Jean Pihet <jpihet@mvista.com>
- * 2010 (c) MontaVista Software, LLC.
- */
-
-#include <linux/perf_event.h>
-#include <linux/bitmap.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/pm_runtime.h>
-#include <linux/ftrace.h>
-#include <linux/uaccess.h>
-#include <linux/sched/clock.h>
-#include <linux/percpu-defs.h>
-
-#include <asm/pmu.h>
-#include <asm/irq_regs.h>
-#include <asm/nds32.h>
-#include <asm/stacktrace.h>
-#include <asm/perf_event.h>
-#include <nds32_intrinsic.h>
-
-/* Set at runtime when we know what CPU type we are. */
-static struct nds32_pmu *cpu_pmu;
-
-static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
-static void nds32_pmu_start(struct nds32_pmu *cpu_pmu);
-static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu);
-static struct platform_device_id cpu_pmu_plat_device_ids[] = {
-	{.name = "nds32-pfm"},
-	{},
-};
-
-static int nds32_pmu_map_cache_event(const unsigned int (*cache_map)
-				  [PERF_COUNT_HW_CACHE_MAX]
-				  [PERF_COUNT_HW_CACHE_OP_MAX]
-				  [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config)
-{
-	unsigned int cache_type, cache_op, cache_result, ret;
-
-	cache_type = (config >> 0) & 0xff;
-	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
-		return -EINVAL;
-
-	cache_op = (config >> 8) & 0xff;
-	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
-		return -EINVAL;
-
-	cache_result = (config >> 16) & 0xff;
-	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-		return -EINVAL;
-
-	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
-
-	if (ret == CACHE_OP_UNSUPPORTED)
-		return -ENOENT;
-
-	return ret;
-}
-
-static int
-nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
-		       u64 config)
-{
-	int mapping;
-
-	if (config >= PERF_COUNT_HW_MAX)
-		return -ENOENT;
-
-	mapping = (*event_map)[config];
-	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
-}
-
-static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config)
-{
-	int ev_type = (int)(config & raw_event_mask);
-	int idx = config >> 8;
-
-	switch (idx) {
-	case 0:
-		ev_type = PFM_OFFSET_MAGIC_0 + ev_type;
-		if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE)
-			return -ENOENT;
-		break;
-	case 1:
-		ev_type = PFM_OFFSET_MAGIC_1 + ev_type;
-		if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE)
-			return -ENOENT;
-		break;
-	case 2:
-		ev_type = PFM_OFFSET_MAGIC_2 + ev_type;
-		if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE)
-			return -ENOENT;
-		break;
-	default:
-		return -ENOENT;
-	}
-
-	return ev_type;
-}
-
-int
-nds32_pmu_map_event(struct perf_event *event,
-		    const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
-		    const unsigned int (*cache_map)
-		    [PERF_COUNT_HW_CACHE_MAX]
-		    [PERF_COUNT_HW_CACHE_OP_MAX]
-		    [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask)
-{
-	u64 config = event->attr.config;
-
-	switch (event->attr.type) {
-	case PERF_TYPE_HARDWARE:
-		return nds32_pmu_map_hw_event(event_map, config);
-	case PERF_TYPE_HW_CACHE:
-		return nds32_pmu_map_cache_event(cache_map, config);
-	case PERF_TYPE_RAW:
-		return nds32_pmu_map_raw_event(raw_event_mask, config);
-	}
-
-	return -ENOENT;
-}
-
-static int nds32_spav3_map_event(struct perf_event *event)
-{
-	return nds32_pmu_map_event(event, &nds32_pfm_perf_map,
-				&nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK);
-}
-
-static inline u32 nds32_pfm_getreset_flags(void)
-{
-	/* Read overflow status */
-	u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL);
-	u32 old_val = val;
-
-	/* Write overflow bit to clear status, and others keep it 0 */
-	u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
-
-	__nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL);
-
-	return old_val;
-}
-
-static inline int nds32_pfm_has_overflowed(u32 pfm)
-{
-	u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
-
-	return pfm & ov_flag;
-}
-
-static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx)
-{
-	u32 mask = 0;
-
-	switch (idx) {
-	case 0:
-		mask = PFM_CTL_OVF[0];
-		break;
-	case 1:
-		mask = PFM_CTL_OVF[1];
-		break;
-	case 2:
-		mask = PFM_CTL_OVF[2];
-		break;
-	default:
-		pr_err("%s index wrong\n", __func__);
-		break;
-	}
-	return pfm & mask;
-}
-
-/*
- * Set the next IRQ period, based on the hwc->period_left value.
- * To be called with the event disabled in hw:
- */
-int nds32_pmu_event_set_period(struct perf_event *event)
-{
-	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	s64 left = local64_read(&hwc->period_left);
-	s64 period = hwc->sample_period;
-	int ret = 0;
-
-	/* The period may have been changed by PERF_EVENT_IOC_PERIOD */
-	if (unlikely(period != hwc->last_period))
-		left = period - (hwc->last_period - left);
-
-	if (unlikely(left <= -period)) {
-		left = period;
-		local64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	if (unlikely(left <= 0)) {
-		left += period;
-		local64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	if (left > (s64)nds32_pmu->max_period)
-		left = nds32_pmu->max_period;
-
-	/*
-	 * The hw event starts counting from this event offset,
-	 * mark it to be able to extract future "deltas":
-	 */
-	local64_set(&hwc->prev_count, (u64)(-left));
-
-	nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period);
-
-	perf_event_update_userpage(event);
-
-	return ret;
-}
-
-static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev)
-{
-	u32 pfm;
-	struct perf_sample_data data;
-	struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev;
-	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
-	struct pt_regs *regs;
-	int idx;
-	/*
-	 * Get and reset the IRQ flags
-	 */
-	pfm = nds32_pfm_getreset_flags();
-
-	/*
-	 * Did an overflow occur?
-	 */
-	if (!nds32_pfm_has_overflowed(pfm))
-		return IRQ_NONE;
-
-	/*
-	 * Handle the counter(s) overflow(s)
-	 */
-	regs = get_irq_regs();
-
-	nds32_pmu_stop(cpu_pmu);
-	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
-		struct perf_event *event = cpuc->events[idx];
-		struct hw_perf_event *hwc;
-
-		/* Ignore if we don't have an event. */
-		if (!event)
-			continue;
-
-		/*
-		 * We have a single interrupt for all counters. Check that
-		 * each counter has overflowed before we process it.
-		 */
-		if (!nds32_pfm_counter_has_overflowed(pfm, idx))
-			continue;
-
-		hwc = &event->hw;
-		nds32_pmu_event_update(event);
-		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (!nds32_pmu_event_set_period(event))
-			continue;
-
-		if (perf_event_overflow(event, &data, regs))
-			cpu_pmu->disable(event);
-	}
-	nds32_pmu_start(cpu_pmu);
-	/*
-	 * Handle the pending perf events.
-	 *
-	 * Note: this call *must* be run with interrupts disabled. For
-	 * platforms that can have the PMU interrupts raised as an NMI, this
-	 * will not work.
-	 */
-	irq_work_run();
-
-	return IRQ_HANDLED;
-}
-
-static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx)
-{
-	return ((idx >= 0) && (idx < cpu_pmu->num_events));
-}
-
-static inline int nds32_pfm_disable_counter(int idx)
-{
-	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
-	u32 mask = 0;
-
-	mask = PFM_CTL_EN[idx];
-	val &= ~mask;
-	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
-	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
-	return idx;
-}
-
-/*
- * Add an event filter to a given event.
- */
-static int nds32_pmu_set_event_filter(struct hw_perf_event *event,
-				      struct perf_event_attr *attr)
-{
-	unsigned long config_base = 0;
-	int idx = event->idx;
-	unsigned long no_kernel_tracing = 0;
-	unsigned long no_user_tracing = 0;
-	/* If index is -1, do not do anything */
-	if (idx == -1)
-		return 0;
-
-	no_kernel_tracing = PFM_CTL_KS[idx];
-	no_user_tracing = PFM_CTL_KU[idx];
-	/*
-	 * Default: enable both kernel and user mode tracing.
-	 */
-	if (attr->exclude_user)
-		config_base |= no_user_tracing;
-
-	if (attr->exclude_kernel)
-		config_base |= no_kernel_tracing;
-
-	/*
-	 * Install the filter into config_base as this is used to
-	 * construct the event type.
-	 */
-	event->config_base |= config_base;
-	return 0;
-}
-
-static inline void nds32_pfm_write_evtsel(int idx, u32 evnum)
-{
-	u32 offset = 0;
-	u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL);
-	u32 ev_mask = 0;
-	u32 no_kernel_mask = 0;
-	u32 no_user_mask = 0;
-	u32 val;
-
-	offset = PFM_CTL_OFFSEL[idx];
-	/* Clear previous mode selection, and write new one */
-	no_kernel_mask = PFM_CTL_KS[idx];
-	no_user_mask = PFM_CTL_KU[idx];
-	ori_val &= ~no_kernel_mask;
-	ori_val &= ~no_user_mask;
-	if (evnum & no_kernel_mask)
-		ori_val |= no_kernel_mask;
-
-	if (evnum & no_user_mask)
-		ori_val |= no_user_mask;
-
-	/* Clear previous event selection */
-	ev_mask = PFM_CTL_SEL[idx];
-	ori_val &= ~ev_mask;
-	evnum &= SOFTWARE_EVENT_MASK;
-
-	/* undo the linear mapping */
-	evnum = get_converted_evet_hw_num(evnum);
-	val = ori_val | (evnum << offset);
-	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
-	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
-}
-
-static inline int nds32_pfm_enable_counter(int idx)
-{
-	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
-	u32 mask = 0;
-
-	mask = PFM_CTL_EN[idx];
-	val |= mask;
-	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
-	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
-	return idx;
-}
-
-static inline int nds32_pfm_enable_intens(int idx)
-{
-	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
-	u32 mask = 0;
-
-	mask = PFM_CTL_IE[idx];
-	val |= mask;
-	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
-	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
-	return idx;
-}
-
-static inline int nds32_pfm_disable_intens(int idx)
-{
-	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
-	u32 mask = 0;
-
-	mask = PFM_CTL_IE[idx];
-	val &= ~mask;
-	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
-	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
-	return idx;
-}
-
-static int event_requires_mode_exclusion(struct perf_event_attr *attr)
-{
-	/* Other modes NDS32 does not support */
-	return attr->exclude_user || attr->exclude_kernel;
-}
-
-static void nds32_pmu_enable_event(struct perf_event *event)
-{
-	unsigned long flags;
-	unsigned int evnum = 0;
-	struct hw_perf_event *hwc = &event->hw;
-	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
-	int idx = hwc->idx;
-
-	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
-		pr_err("CPU enabling wrong pfm counter IRQ enable\n");
-		return;
-	}
-
-	/*
-	 * Enable counter and interrupt, and set the counter to count
-	 * the event that we're interested in.
-	 */
-	raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
-	/*
-	 * Disable counter
-	 */
-	nds32_pfm_disable_counter(idx);
-
-	/*
-	 * Check whether we need to exclude the counter from certain modes.
-	 */
-	if ((!cpu_pmu->set_event_filter ||
-	     cpu_pmu->set_event_filter(hwc, &event->attr)) &&
-	     event_requires_mode_exclusion(&event->attr)) {
-		pr_notice
-		("NDS32 performance counters do not support mode exclusion\n");
-		hwc->config_base = 0;
-	}
-	/* Write event */
-	evnum = hwc->config_base;
-	nds32_pfm_write_evtsel(idx, evnum);
-
-	/*
-	 * Enable interrupt for this counter
-	 */
-	nds32_pfm_enable_intens(idx);
-
-	/*
-	 * Enable counter
-	 */
-	nds32_pfm_enable_counter(idx);
-
-	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static void nds32_pmu_disable_event(struct perf_event *event)
-{
-	unsigned long flags;
-	struct hw_perf_event *hwc = &event->hw;
-	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
-	int idx = hwc->idx;
-
-	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
-		pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx);
-		return;
-	}
-
-	/*
-	 * Disable counter and interrupt
-	 */
-	raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
-	/*
-	 * Disable counter
-	 */
-	nds32_pfm_disable_counter(idx);
-
-	/*
-	 * Disable interrupt for this counter
-	 */
-	nds32_pfm_disable_intens(idx);
-
-	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static inline u32 nds32_pmu_read_counter(struct perf_event *event)
-{
-	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
-	u32 count = 0;
-
-	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
-		pr_err("CPU reading wrong counter %d\n", idx);
-	} else {
-		switch (idx) {
-		case PFMC0:
-			count = __nds32__mfsr(NDS32_SR_PFMC0);
-			break;
-		case PFMC1:
-			count = __nds32__mfsr(NDS32_SR_PFMC1);
-			break;
-		case PFMC2:
-			count = __nds32__mfsr(NDS32_SR_PFMC2);
-			break;
-		default:
-			pr_err
-			    ("%s: CPU has no performance counters %d\n",
-			     __func__, idx);
-		}
-	}
-	return count;
-}
-
-static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value)
-{
-	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
-
-	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
-		pr_err("CPU writing wrong counter %d\n", idx);
-	} else {
-		switch (idx) {
-		case PFMC0:
-			__nds32__mtsr_isb(value, NDS32_SR_PFMC0);
-			break;
-		case PFMC1:
-			__nds32__mtsr_isb(value, NDS32_SR_PFMC1);
-			break;
-		case PFMC2:
-			__nds32__mtsr_isb(value, NDS32_SR_PFMC2);
-			break;
-		default:
-			pr_err
-			    ("%s: CPU has no performance counters %d\n",
-			     __func__, idx);
-		}
-	}
-}
-
-static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc,
-				   struct perf_event *event)
-{
-	int idx;
-	struct hw_perf_event *hwc = &event->hw;
-	/*
-	 * Current implementation maps cycles, instruction count and cache-miss
-	 * to specific counter.
-	 * However, multiple of the 3 counters are able to count these events.
-	 *
-	 *
-	 * SOFTWARE_EVENT_MASK mask for getting event num ,
-	 * This is defined by Jia-Rung, you can change the polocies.
-	 * However, do not exceed 8 bits. This is hardware specific.
-	 * The last number is SPAv3_2_SEL_LAST.
-	 */
-	unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK;
-
-	idx = get_converted_event_idx(evtype);
-	/*
-	 * Try to get the counter for correpsonding event
-	 */
-	if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) {
-		if (!test_and_set_bit(idx, cpuc->used_mask))
-			return idx;
-		if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask))
-			return NDS32_IDX_COUNTER0;
-		if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
-			return NDS32_IDX_COUNTER1;
-	} else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) {
-		if (!test_and_set_bit(idx, cpuc->used_mask))
-			return idx;
-		else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
-			return NDS32_IDX_COUNTER1;
-		else if (!test_and_set_bit
-			 (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask))
-			return NDS32_IDX_CYCLE_COUNTER;
-	} else {
-		if (!test_and_set_bit(idx, cpuc->used_mask))
-			return idx;
-	}
-	return -EAGAIN;
-}
-
-static void nds32_pmu_start(struct nds32_pmu *cpu_pmu)
-{
-	unsigned long flags;
-	unsigned int val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
-
-	raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
-	/* Enable all counters , NDS PFM has 3 counters */
-	val = __nds32__mfsr(NDS32_SR_PFM_CTL);
-	val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
-	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
-	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
-
-	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu)
-{
-	unsigned long flags;
-	unsigned int val;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
-
-	raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
-	/* Disable all counters , NDS PFM has 3 counters */
-	val = __nds32__mfsr(NDS32_SR_PFM_CTL);
-	val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
-	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
-	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
-
-	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static void nds32_pmu_reset(void *info)
-{
-	u32 val = 0;
-
-	val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
-	__nds32__mtsr(val, NDS32_SR_PFM_CTL);
-	__nds32__mtsr(0, NDS32_SR_PFM_CTL);
-	__nds32__mtsr(0, NDS32_SR_PFMC0);
-	__nds32__mtsr(0, NDS32_SR_PFMC1);
-	__nds32__mtsr(0, NDS32_SR_PFMC2);
-}
-
-static void nds32_pmu_init(struct nds32_pmu *cpu_pmu)
-{
-	cpu_pmu->handle_irq = nds32_pmu_handle_irq;
-	cpu_pmu->enable = nds32_pmu_enable_event;
-	cpu_pmu->disable = nds32_pmu_disable_event;
-	cpu_pmu->read_counter = nds32_pmu_read_counter;
-	cpu_pmu->write_counter = nds32_pmu_write_counter;
-	cpu_pmu->get_event_idx = nds32_pmu_get_event_idx;
-	cpu_pmu->start = nds32_pmu_start;
-	cpu_pmu->stop = nds32_pmu_stop;
-	cpu_pmu->reset = nds32_pmu_reset;
-	cpu_pmu->max_period = 0xFFFFFFFF;	/* Maximum counts */
-};
-
-static u32 nds32_read_num_pfm_events(void)
-{
-	/* NDS32 SPAv3 PMU support 3 counter */
-	return 3;
-}
-
-static int device_pmu_init(struct nds32_pmu *cpu_pmu)
-{
-	nds32_pmu_init(cpu_pmu);
-	/*
-	 * This name should be devive-specific name, whatever you like :)
-	 * I think "PMU" will be a good generic name.
-	 */
-	cpu_pmu->name = "nds32v3-pmu";
-	cpu_pmu->map_event = nds32_spav3_map_event;
-	cpu_pmu->num_events = nds32_read_num_pfm_events();
-	cpu_pmu->set_event_filter = nds32_pmu_set_event_filter;
-	return 0;
-}
-
-/*
- * CPU PMU identification and probing.
- */
-static int probe_current_pmu(struct nds32_pmu *pmu)
-{
-	int ret;
-
-	get_cpu();
-	ret = -ENODEV;
-	/*
-	 * If ther are various CPU types with its own PMU, initialize with
-	 *
-	 * the corresponding one
-	 */
-	device_pmu_init(pmu);
-	put_cpu();
-	return ret;
-}
-
-static void nds32_pmu_enable(struct pmu *pmu)
-{
-	struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
-	struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
-	int enabled = bitmap_weight(hw_events->used_mask,
-				    nds32_pmu->num_events);
-
-	if (enabled)
-		nds32_pmu->start(nds32_pmu);
-}
-
-static void nds32_pmu_disable(struct pmu *pmu)
-{
-	struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
-
-	nds32_pmu->stop(nds32_pmu);
-}
-
-static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu)
-{
-	nds32_pmu->free_irq(nds32_pmu);
-	pm_runtime_put_sync(&nds32_pmu->plat_device->dev);
-}
-
-static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev)
-{
-	struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev;
-	int ret;
-	u64 start_clock, finish_clock;
-
-	start_clock = local_clock();
-	ret = nds32_pmu->handle_irq(irq, dev);
-	finish_clock = local_clock();
-
-	perf_sample_event_took(finish_clock - start_clock);
-	return ret;
-}
-
-static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu)
-{
-	int err;
-	struct platform_device *pmu_device = nds32_pmu->plat_device;
-
-	if (!pmu_device)
-		return -ENODEV;
-
-	pm_runtime_get_sync(&pmu_device->dev);
-	err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq);
-	if (err) {
-		nds32_pmu_release_hardware(nds32_pmu);
-		return err;
-	}
-
-	return 0;
-}
-
-static int
-validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
-	       struct perf_event *event)
-{
-	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
-
-	if (is_software_event(event))
-		return 1;
-
-	if (event->pmu != pmu)
-		return 0;
-
-	if (event->state < PERF_EVENT_STATE_OFF)
-		return 1;
-
-	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
-		return 1;
-
-	return nds32_pmu->get_event_idx(hw_events, event) >= 0;
-}
-
-static int validate_group(struct perf_event *event)
-{
-	struct perf_event *sibling, *leader = event->group_leader;
-	struct pmu_hw_events fake_pmu;
-	DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS);
-	/*
-	 * Initialize the fake PMU. We only need to populate the
-	 * used_mask for the purposes of validation.
-	 */
-	memset(fake_used_mask, 0, sizeof(fake_used_mask));
-
-	if (!validate_event(event->pmu, &fake_pmu, leader))
-		return -EINVAL;
-
-	for_each_sibling_event(sibling, leader) {
-		if (!validate_event(event->pmu, &fake_pmu, sibling))
-			return -EINVAL;
-	}
-
-	if (!validate_event(event->pmu, &fake_pmu, event))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int __hw_perf_event_init(struct perf_event *event)
-{
-	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	int mapping;
-
-	mapping = nds32_pmu->map_event(event);
-
-	if (mapping < 0) {
-		pr_debug("event %x:%llx not supported\n", event->attr.type,
-			 event->attr.config);
-		return mapping;
-	}
-
-	/*
-	 * We don't assign an index until we actually place the event onto
-	 * hardware. Use -1 to signify that we haven't decided where to put it
-	 * yet. For SMP systems, each core has it's own PMU so we can't do any
-	 * clever allocation or constraints checking at this point.
-	 */
-	hwc->idx = -1;
-	hwc->config_base = 0;
-	hwc->config = 0;
-	hwc->event_base = 0;
-
-	/*
-	 * Check whether we need to exclude the counter from certain modes.
-	 */
-	if ((!nds32_pmu->set_event_filter ||
-	     nds32_pmu->set_event_filter(hwc, &event->attr)) &&
-	    event_requires_mode_exclusion(&event->attr)) {
-		pr_debug
-			("NDS performance counters do not support mode exclusion\n");
-		return -EOPNOTSUPP;
-	}
-
-	/*
-	 * Store the event encoding into the config_base field.
-	 */
-	hwc->config_base |= (unsigned long)mapping;
-
-	if (!hwc->sample_period) {
-		/*
-		 * For non-sampling runs, limit the sample_period to half
-		 * of the counter width. That way, the new counter value
-		 * is far less likely to overtake the previous one unless
-		 * you have some serious IRQ latency issues.
-		 */
-		hwc->sample_period = nds32_pmu->max_period >> 1;
-		hwc->last_period = hwc->sample_period;
-		local64_set(&hwc->period_left, hwc->sample_period);
-	}
-
-	if (event->group_leader != event) {
-		if (validate_group(event) != 0)
-			return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int nds32_pmu_event_init(struct perf_event *event)
-{
-	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
-	int err = 0;
-	atomic_t *active_events = &nds32_pmu->active_events;
-
-	/* does not support taken branch sampling */
-	if (has_branch_stack(event))
-		return -EOPNOTSUPP;
-
-	if (nds32_pmu->map_event(event) == -ENOENT)
-		return -ENOENT;
-
-	if (!atomic_inc_not_zero(active_events)) {
-		if (atomic_read(active_events) == 0) {
-			/* Register irq handler */
-			err = nds32_pmu_reserve_hardware(nds32_pmu);
-		}
-
-		if (!err)
-			atomic_inc(active_events);
-	}
-
-	if (err)
-		return err;
-
-	err = __hw_perf_event_init(event);
-
-	return err;
-}
-
-static void nds32_start(struct perf_event *event, int flags)
-{
-	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	/*
-	 * NDS pmu always has to reprogram the period, so ignore
-	 * PERF_EF_RELOAD, see the comment below.
-	 */
-	if (flags & PERF_EF_RELOAD)
-		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-
-	hwc->state = 0;
-	/* Set the period for the event. */
-	nds32_pmu_event_set_period(event);
-
-	nds32_pmu->enable(event);
-}
-
-static int nds32_pmu_add(struct perf_event *event, int flags)
-{
-	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
-	struct hw_perf_event *hwc = &event->hw;
-	int idx;
-	int err = 0;
-
-	perf_pmu_disable(event->pmu);
-
-	/* If we don't have a space for the counter then finish early. */
-	idx = nds32_pmu->get_event_idx(hw_events, event);
-	if (idx < 0) {
-		err = idx;
-		goto out;
-	}
-
-	/*
-	 * If there is an event in the counter we are going to use then make
-	 * sure it is disabled.
-	 */
-	event->hw.idx = idx;
-	nds32_pmu->disable(event);
-	hw_events->events[idx] = event;
-
-	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
-	if (flags & PERF_EF_START)
-		nds32_start(event, PERF_EF_RELOAD);
-
-	/* Propagate our changes to the userspace mapping. */
-	perf_event_update_userpage(event);
-
-out:
-	perf_pmu_enable(event->pmu);
-	return err;
-}
-
-u64 nds32_pmu_event_update(struct perf_event *event)
-{
-	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	u64 delta, prev_raw_count, new_raw_count;
-
-again:
-	prev_raw_count = local64_read(&hwc->prev_count);
-	new_raw_count = nds32_pmu->read_counter(event);
-
-	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			    new_raw_count) != prev_raw_count) {
-		goto again;
-	}
-	/*
-	 * Whether overflow or not, "unsigned substraction"
-	 * will always get their delta
-	 */
-	delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period;
-
-	local64_add(delta, &event->count);
-	local64_sub(delta, &hwc->period_left);
-
-	return new_raw_count;
-}
-
-static void nds32_stop(struct perf_event *event, int flags)
-{
-	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	/*
-	 * NDS pmu always has to update the counter, so ignore
-	 * PERF_EF_UPDATE, see comments in nds32_start().
-	 */
-	if (!(hwc->state & PERF_HES_STOPPED)) {
-		nds32_pmu->disable(event);
-		nds32_pmu_event_update(event);
-		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
-	}
-}
-
-static void nds32_pmu_del(struct perf_event *event, int flags)
-{
-	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
-	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
-
-	nds32_stop(event, PERF_EF_UPDATE);
-	hw_events->events[idx] = NULL;
-	clear_bit(idx, hw_events->used_mask);
-
-	perf_event_update_userpage(event);
-}
-
-static void nds32_pmu_read(struct perf_event *event)
-{
-	nds32_pmu_event_update(event);
-}
-
-/* Please refer to SPAv3 for more hardware specific details */
-PMU_FORMAT_ATTR(event, "config:0-63");
-
-static struct attribute *nds32_arch_formats_attr[] = {
-	&format_attr_event.attr,
-	NULL,
-};
-
-static struct attribute_group nds32_pmu_format_group = {
-	.name = "format",
-	.attrs = nds32_arch_formats_attr,
-};
-
-static ssize_t nds32_pmu_cpumask_show(struct device *dev,
-				      struct device_attribute *attr,
-				      char *buf)
-{
-	return 0;
-}
-
-static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL);
-
-static struct attribute *nds32_pmu_common_attrs[] = {
-	&dev_attr_cpus.attr,
-	NULL,
-};
-
-static struct attribute_group nds32_pmu_common_group = {
-	.attrs = nds32_pmu_common_attrs,
-};
-
-static const struct attribute_group *nds32_pmu_attr_groups[] = {
-	&nds32_pmu_format_group,
-	&nds32_pmu_common_group,
-	NULL,
-};
-
-static void nds32_init(struct nds32_pmu *nds32_pmu)
-{
-	atomic_set(&nds32_pmu->active_events, 0);
-
-	nds32_pmu->pmu = (struct pmu) {
-		.pmu_enable = nds32_pmu_enable,
-		.pmu_disable = nds32_pmu_disable,
-		.attr_groups = nds32_pmu_attr_groups,
-		.event_init = nds32_pmu_event_init,
-		.add = nds32_pmu_add,
-		.del = nds32_pmu_del,
-		.start = nds32_start,
-		.stop = nds32_stop,
-		.read = nds32_pmu_read,
-	};
-}
-
-int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type)
-{
-	nds32_init(nds32_pmu);
-	pm_runtime_enable(&nds32_pmu->plat_device->dev);
-	pr_info("enabled with %s PMU driver, %d counters available\n",
-		nds32_pmu->name, nds32_pmu->num_events);
-	return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type);
-}
-
-static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
-{
-	return this_cpu_ptr(&cpu_hw_events);
-}
-
-static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler)
-{
-	int err, irq, irqs;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-
-	if (!pmu_device)
-		return -ENODEV;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-	if (irqs < 1) {
-		pr_err("no irqs for PMUs defined\n");
-		return -ENODEV;
-	}
-
-	irq = platform_get_irq(pmu_device, 0);
-	err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm",
-			  cpu_pmu);
-	if (err) {
-		pr_err("unable to request IRQ%d for NDS PMU counters\n",
-		       irq);
-		return err;
-	}
-	return 0;
-}
-
-static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu)
-{
-	int irq;
-	struct platform_device *pmu_device = cpu_pmu->plat_device;
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq >= 0)
-		free_irq(irq, cpu_pmu);
-}
-
-static void cpu_pmu_init(struct nds32_pmu *cpu_pmu)
-{
-	int cpu;
-	struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
-
-	raw_spin_lock_init(&events->pmu_lock);
-
-	cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events;
-	cpu_pmu->request_irq = cpu_pmu_request_irq;
-	cpu_pmu->free_irq = cpu_pmu_free_irq;
-
-	/* Ensure the PMU has sane values out of reset. */
-	if (cpu_pmu->reset)
-		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
-}
-
-static const struct of_device_id cpu_pmu_of_device_ids[] = {
-	{.compatible = "andestech,nds32v3-pmu",
-	 .data = device_pmu_init},
-	{},
-};
-
-static int cpu_pmu_device_probe(struct platform_device *pdev)
-{
-	const struct of_device_id *of_id;
-	int (*init_fn)(struct nds32_pmu *nds32_pmu);
-	struct device_node *node = pdev->dev.of_node;
-	struct nds32_pmu *pmu;
-	int ret = -ENODEV;
-
-	if (cpu_pmu) {
-		pr_notice("[perf] attempt to register multiple PMU devices!\n");
-		return -ENOSPC;
-	}
-
-	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
-	if (!pmu)
-		return -ENOMEM;
-
-	of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node);
-	if (node && of_id) {
-		init_fn = of_id->data;
-		ret = init_fn(pmu);
-	} else {
-		ret = probe_current_pmu(pmu);
-	}
-
-	if (ret) {
-		pr_notice("[perf] failed to probe PMU!\n");
-		goto out_free;
-	}
-
-	cpu_pmu = pmu;
-	cpu_pmu->plat_device = pdev;
-	cpu_pmu_init(cpu_pmu);
-	ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW);
-
-	if (!ret)
-		return 0;
-
-out_free:
-	pr_notice("[perf] failed to register PMU devices!\n");
-	kfree(pmu);
-	return ret;
-}
-
-static struct platform_driver cpu_pmu_driver = {
-	.driver = {
-		   .name = "nds32-pfm",
-		   .of_match_table = cpu_pmu_of_device_ids,
-		   },
-	.probe = cpu_pmu_device_probe,
-	.id_table = cpu_pmu_plat_device_ids,
-};
-
-static int __init register_pmu_driver(void)
-{
-	int err = 0;
-
-	err = platform_driver_register(&cpu_pmu_driver);
-	if (err)
-		pr_notice("[perf] PMU initialization failed\n");
-	else
-		pr_notice("[perf] PMU initialization done\n");
-
-	return err;
-}
-
-device_initcall(register_pmu_driver);
-
-/*
- * References: arch/nds32/kernel/traps.c:__dump()
- * You will need to know the NDS ABI first.
- */
-static int unwind_frame_kernel(struct stackframe *frame)
-{
-	int graph = 0;
-#ifdef CONFIG_FRAME_POINTER
-	/* 0x3 means misalignment */
-	if (!kstack_end((void *)frame->fp) &&
-	    !((unsigned long)frame->fp & 0x3) &&
-	    ((unsigned long)frame->fp >= TASK_SIZE)) {
-		/*
-		 *	The array index is based on the ABI, the below graph
-		 *	illustrate the reasons.
-		 *	Function call procedure: "smw" and "lmw" will always
-		 *	update SP and FP for you automatically.
-		 *
-		 *	Stack                                 Relative Address
-		 *	|  |                                          0
-		 *	----
-		 *	|LP| <-- SP(before smw)  <-- FP(after smw)   -1
-		 *	----
-		 *	|FP|                                         -2
-		 *	----
-		 *	|  | <-- SP(after smw)                       -3
-		 */
-		frame->lp = ((unsigned long *)frame->fp)[-1];
-		frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
-		/* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
-		if (__kernel_text_address(frame->lp))
-			frame->lp = ftrace_graph_ret_addr
-						(NULL, &graph, frame->lp, NULL);
-
-		return 0;
-	} else {
-		return -EPERM;
-	}
-#else
-	/*
-	 * You can refer to arch/nds32/kernel/traps.c:__dump()
-	 * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
-	 * And, the "sp" is not always correct.
-	 *
-	 *   Stack                                 Relative Address
-	 *   |  |                                          0
-	 *   ----
-	 *   |LP| <-- SP(before smw)                      -1
-	 *   ----
-	 *   |  | <-- SP(after smw)                       -2
-	 *   ----
-	 */
-	if (!kstack_end((void *)frame->sp)) {
-		frame->lp = ((unsigned long *)frame->sp)[1];
-		/* TODO: How to deal with the value in first
-		 * "sp" is not correct?
-		 */
-		if (__kernel_text_address(frame->lp))
-			frame->lp = ftrace_graph_ret_addr
-						(tsk, &graph, frame->lp, NULL);
-
-		frame->sp = ((unsigned long *)frame->sp) + 1;
-
-		return 0;
-	} else {
-		return -EPERM;
-	}
-#endif
-}
-
-static void notrace
-walk_stackframe(struct stackframe *frame,
-		int (*fn_record)(struct stackframe *, void *),
-		void *data)
-{
-	while (1) {
-		int ret;
-
-		if (fn_record(frame, data))
-			break;
-
-		ret = unwind_frame_kernel(frame);
-		if (ret < 0)
-			break;
-	}
-}
-
-/*
- * Gets called by walk_stackframe() for every stackframe. This will be called
- * whist unwinding the stackframe and is like a subroutine return so we use
- * the PC.
- */
-static int callchain_trace(struct stackframe *fr, void *data)
-{
-	struct perf_callchain_entry_ctx *entry = data;
-
-	perf_callchain_store(entry, fr->lp);
-	return 0;
-}
-
-/*
- * Get the return address for a single stackframe and return a pointer to the
- * next frame tail.
- */
-static unsigned long
-user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp)
-{
-	struct frame_tail buftail;
-	unsigned long lp = 0;
-	unsigned long *user_frame_tail =
-		(unsigned long *)(fp - (unsigned long)sizeof(buftail));
-
-	/* Check accessibility of one struct frame_tail beyond */
-	if (!access_ok(user_frame_tail, sizeof(buftail)))
-		return 0;
-	if (__copy_from_user_inatomic
-		(&buftail, user_frame_tail, sizeof(buftail)))
-		return 0;
-
-	/*
-	 * Refer to unwind_frame_kernel() for more illurstration
-	 */
-	lp = buftail.stack_lp;  /* ((unsigned long *)fp)[-1] */
-	fp = buftail.stack_fp;  /* ((unsigned long *)fp)[FP_OFFSET] */
-	perf_callchain_store(entry, lp);
-	return fp;
-}
-
-static unsigned long
-user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry,
-			unsigned long fp)
-{
-	struct frame_tail_opt_size buftail;
-	unsigned long lp = 0;
-
-	unsigned long *user_frame_tail =
-		(unsigned long *)(fp - (unsigned long)sizeof(buftail));
-
-	/* Check accessibility of one struct frame_tail beyond */
-	if (!access_ok(user_frame_tail, sizeof(buftail)))
-		return 0;
-	if (__copy_from_user_inatomic
-		(&buftail, user_frame_tail, sizeof(buftail)))
-		return 0;
-
-	/*
-	 * Refer to unwind_frame_kernel() for more illurstration
-	 */
-	lp = buftail.stack_lp;  /* ((unsigned long *)fp)[-1] */
-	fp = buftail.stack_fp;  /* ((unsigned long *)fp)[FP_OFFSET] */
-
-	perf_callchain_store(entry, lp);
-	return fp;
-}
-
-/*
- * This will be called when the target is in user mode
- * This function will only be called when we use
- * "PERF_SAMPLE_CALLCHAIN" in
- * kernel/events/core.c:perf_prepare_sample()
- *
- * How to trigger perf_callchain_[user/kernel] :
- * $ perf record -e cpu-clock --call-graph fp ./program
- * $ perf report --call-graph
- */
-unsigned long leaf_fp;
-void
-perf_callchain_user(struct perf_callchain_entry_ctx *entry,
-		    struct pt_regs *regs)
-{
-	unsigned long fp = 0;
-	unsigned long gp = 0;
-	unsigned long lp = 0;
-	unsigned long sp = 0;
-	unsigned long *user_frame_tail;
-
-	leaf_fp = 0;
-
-	perf_callchain_store(entry, regs->ipc);
-	fp = regs->fp;
-	gp = regs->gp;
-	lp = regs->lp;
-	sp = regs->sp;
-	if (entry->nr < PERF_MAX_STACK_DEPTH &&
-	    (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) {
-		user_frame_tail =
-			(unsigned long *)(fp - (unsigned long)sizeof(fp));
-
-		if (!access_ok(user_frame_tail, sizeof(fp)))
-			return;
-
-		if (__copy_from_user_inatomic
-			(&leaf_fp, user_frame_tail, sizeof(fp)))
-			return;
-
-		if (leaf_fp == lp) {
-			/*
-			 * Maybe this is non leaf function
-			 * with optimize for size,
-			 * or maybe this is the function
-			 * with optimize for size
-			 */
-			struct frame_tail buftail;
-
-			user_frame_tail =
-				(unsigned long *)(fp -
-					(unsigned long)sizeof(buftail));
-
-			if (!access_ok(user_frame_tail, sizeof(buftail)))
-				return;
-
-			if (__copy_from_user_inatomic
-				(&buftail, user_frame_tail, sizeof(buftail)))
-				return;
-
-			if (buftail.stack_fp == gp) {
-				/* non leaf function with optimize
-				 * for size condition
-				 */
-				struct frame_tail_opt_size buftail_opt_size;
-
-				user_frame_tail =
-					(unsigned long *)(fp - (unsigned long)
-						sizeof(buftail_opt_size));
-
-				if (!access_ok(user_frame_tail,
-					       sizeof(buftail_opt_size)))
-					return;
-
-				if (__copy_from_user_inatomic
-				   (&buftail_opt_size, user_frame_tail,
-				   sizeof(buftail_opt_size)))
-					return;
-
-				perf_callchain_store(entry, lp);
-				fp = buftail_opt_size.stack_fp;
-
-				while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
-				       (unsigned long)fp &&
-						!((unsigned long)fp & 0x7) &&
-						fp > sp) {
-					sp = fp;
-					fp = user_backtrace_opt_size(entry, fp);
-				}
-
-			} else {
-				/* this is the function
-				 * without optimize for size
-				 */
-				fp = buftail.stack_fp;
-				perf_callchain_store(entry, lp);
-				while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
-				       (unsigned long)fp &&
-						!((unsigned long)fp & 0x7) &&
-						fp > sp) {
-					sp = fp;
-					fp = user_backtrace(entry, fp);
-				}
-			}
-		} else {
-			/* this is leaf function */
-			fp = leaf_fp;
-			perf_callchain_store(entry, lp);
-
-			/* previous function callcahin  */
-			while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
-			       (unsigned long)fp &&
-				   !((unsigned long)fp & 0x7) && fp > sp) {
-				sp = fp;
-				fp = user_backtrace(entry, fp);
-			}
-		}
-		return;
-	}
-}
-
-/* This will be called when the target is in kernel mode */
-void
-perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
-		      struct pt_regs *regs)
-{
-	struct stackframe fr;
-
-	fr.fp = regs->fp;
-	fr.lp = regs->lp;
-	fr.sp = regs->sp;
-	walk_stackframe(&fr, callchain_trace, entry);
-}
-
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
-	return instruction_pointer(regs);
-}
-
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
-	int misc = 0;
-
-	if (user_mode(regs))
-		misc |= PERF_RECORD_MISC_USER;
-	else
-		misc |= PERF_RECORD_MISC_KERNEL;
-
-	return misc;
-}
diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c
deleted file mode 100644
index e25700e125d8..000000000000
--- a/arch/nds32/kernel/pm.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2008-2017 Andes Technology Corporation
-
-#include <linux/init.h>
-#include <linux/suspend.h>
-#include <linux/device.h>
-#include <linux/printk.h>
-#include <asm/suspend.h>
-#include <nds32_intrinsic.h>
-
-unsigned int resume_addr;
-unsigned int *phy_addr_sp_tmp;
-
-static void nds32_suspend2ram(void)
-{
-	pgd_t *pgdv;
-	p4d_t *p4dv;
-	pud_t *pudv;
-	pmd_t *pmdv;
-	pte_t *ptev;
-
-	pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) &
-		L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume);
-
-	p4dv = p4d_offset(pgdv, (unsigned int)cpu_resume);
-	pudv = pud_offset(p4dv, (unsigned int)cpu_resume);
-	pmdv = pmd_offset(pudv, (unsigned int)cpu_resume);
-	ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume);
-
-	resume_addr = ((*ptev) & TLB_DATA_mskPPN)
-			| ((unsigned int)cpu_resume & 0x00000fff);
-
-	suspend2ram();
-}
-
-static void nds32_suspend_cpu(void)
-{
-	while (!(__nds32__mfsr(NDS32_SR_INT_PEND) & wake_mask))
-		__asm__ volatile ("standby no_wake_grant\n\t");
-}
-
-static int nds32_pm_valid(suspend_state_t state)
-{
-	switch (state) {
-	case PM_SUSPEND_ON:
-	case PM_SUSPEND_STANDBY:
-	case PM_SUSPEND_MEM:
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-static int nds32_pm_enter(suspend_state_t state)
-{
-	pr_debug("%s:state:%d\n", __func__, state);
-	switch (state) {
-	case PM_SUSPEND_STANDBY:
-		nds32_suspend_cpu();
-		return 0;
-	case PM_SUSPEND_MEM:
-		nds32_suspend2ram();
-		return 0;
-	default:
-		return -EINVAL;
-	}
-}
-
-static const struct platform_suspend_ops nds32_pm_ops = {
-	.valid = nds32_pm_valid,
-	.enter = nds32_pm_enter,
-};
-
-static int __init nds32_pm_init(void)
-{
-	pr_debug("Enter %s\n", __func__);
-	suspend_set_ops(&nds32_pm_ops);
-	return 0;
-}
-late_initcall(nds32_pm_init);
diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c
deleted file mode 100644
index d35c1f63fa11..000000000000
--- a/arch/nds32/kernel/process.c
+++ /dev/null
@@ -1,256 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/sched.h>
-#include <linux/sched/debug.h>
-#include <linux/sched/task_stack.h>
-#include <linux/delay.h>
-#include <linux/kallsyms.h>
-#include <linux/uaccess.h>
-#include <asm/elf.h>
-#include <asm/proc-fns.h>
-#include <asm/fpu.h>
-#include <linux/ptrace.h>
-#include <linux/reboot.h>
-
-#if IS_ENABLED(CONFIG_LAZY_FPU)
-struct task_struct *last_task_used_math;
-#endif
-
-extern void setup_mm_for_reboot(char mode);
-
-extern inline void arch_reset(char mode)
-{
-	if (mode == 's') {
-		/* Use cpu handler, jump to 0 */
-		cpu_reset(0);
-	}
-}
-
-void (*pm_power_off) (void);
-EXPORT_SYMBOL(pm_power_off);
-
-static char reboot_mode_nds32 = 'h';
-
-int __init reboot_setup(char *str)
-{
-	reboot_mode_nds32 = str[0];
-	return 1;
-}
-
-static int cpub_pwroff(void)
-{
-	return 0;
-}
-
-__setup("reboot=", reboot_setup);
-
-void machine_halt(void)
-{
-	cpub_pwroff();
-}
-
-EXPORT_SYMBOL(machine_halt);
-
-void machine_power_off(void)
-{
-	if (pm_power_off)
-		pm_power_off();
-}
-
-EXPORT_SYMBOL(machine_power_off);
-
-void machine_restart(char *cmd)
-{
-	/*
-	 * Clean and disable cache, and turn off interrupts
-	 */
-	cpu_proc_fin();
-
-	/*
-	 * Tell the mm system that we are going to reboot -
-	 * we may need it to insert some 1:1 mappings so that
-	 * soft boot works.
-	 */
-	setup_mm_for_reboot(reboot_mode_nds32);
-
-	/* Execute kernel restart handler call chain */
-	do_kernel_restart(cmd);
-
-	/*
-	 * Now call the architecture specific reboot code.
-	 */
-	arch_reset(reboot_mode_nds32);
-
-	/*
-	 * Whoops - the architecture was unable to reboot.
-	 * Tell the user!
-	 */
-	mdelay(1000);
-	pr_info("Reboot failed -- System halted\n");
-	while (1) ;
-}
-
-EXPORT_SYMBOL(machine_restart);
-
-void show_regs(struct pt_regs *regs)
-{
-	printk("PC is at %pS\n", (void *)instruction_pointer(regs));
-	printk("LP is at %pS\n", (void *)regs->lp);
-	pr_info("pc : [<%08lx>]    lp : [<%08lx>]    %s\n"
-		"sp : %08lx  fp : %08lx  gp : %08lx\n",
-		instruction_pointer(regs),
-		regs->lp, print_tainted(), regs->sp, regs->fp, regs->gp);
-	pr_info("r25: %08lx  r24: %08lx\n", regs->uregs[25], regs->uregs[24]);
-
-	pr_info("r23: %08lx  r22: %08lx  r21: %08lx  r20: %08lx\n",
-		regs->uregs[23], regs->uregs[22],
-		regs->uregs[21], regs->uregs[20]);
-	pr_info("r19: %08lx  r18: %08lx  r17: %08lx  r16: %08lx\n",
-		regs->uregs[19], regs->uregs[18],
-		regs->uregs[17], regs->uregs[16]);
-	pr_info("r15: %08lx  r14: %08lx  r13: %08lx  r12: %08lx\n",
-		regs->uregs[15], regs->uregs[14],
-		regs->uregs[13], regs->uregs[12]);
-	pr_info("r11: %08lx  r10: %08lx  r9 : %08lx  r8 : %08lx\n",
-		regs->uregs[11], regs->uregs[10],
-		regs->uregs[9], regs->uregs[8]);
-	pr_info("r7 : %08lx  r6 : %08lx  r5 : %08lx  r4 : %08lx\n",
-		regs->uregs[7], regs->uregs[6], regs->uregs[5], regs->uregs[4]);
-	pr_info("r3 : %08lx  r2 : %08lx  r1 : %08lx  r0 : %08lx\n",
-		regs->uregs[3], regs->uregs[2], regs->uregs[1], regs->uregs[0]);
-	pr_info("  IRQs o%s  Segment user\n",
-		interrupts_enabled(regs) ? "n" : "ff");
-}
-
-EXPORT_SYMBOL(show_regs);
-
-void exit_thread(struct task_struct *tsk)
-{
-#if defined(CONFIG_FPU) && defined(CONFIG_LAZY_FPU)
-	if (last_task_used_math == tsk)
-		last_task_used_math = NULL;
-#endif
-}
-
-void flush_thread(void)
-{
-#if defined(CONFIG_FPU)
-	clear_fpu(task_pt_regs(current));
-	clear_used_math();
-# ifdef CONFIG_LAZY_FPU
-	if (last_task_used_math == current)
-		last_task_used_math = NULL;
-# endif
-#endif
-}
-
-DEFINE_PER_CPU(struct task_struct *, __entry_task);
-
-asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-int copy_thread(unsigned long clone_flags, unsigned long stack_start,
-		unsigned long stk_sz, struct task_struct *p, unsigned long tls)
-{
-	struct pt_regs *childregs = task_pt_regs(p);
-
-	memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
-
-	if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
-		memset(childregs, 0, sizeof(struct pt_regs));
-		/* kernel thread fn */
-		p->thread.cpu_context.r6 = stack_start;
-		/* kernel thread argument */
-		p->thread.cpu_context.r7 = stk_sz;
-	} else {
-		*childregs = *current_pt_regs();
-		if (stack_start)
-			childregs->sp = stack_start;
-		/* child get zero as ret. */
-		childregs->uregs[0] = 0;
-		childregs->osp = 0;
-		if (clone_flags & CLONE_SETTLS)
-			childregs->uregs[25] = tls;
-	}
-	/* cpu context switching  */
-	p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
-	p->thread.cpu_context.sp = (unsigned long)childregs;
-
-#if IS_ENABLED(CONFIG_FPU)
-	if (used_math()) {
-# if !IS_ENABLED(CONFIG_LAZY_FPU)
-		unlazy_fpu(current);
-# else
-		preempt_disable();
-		if (last_task_used_math == current)
-			save_fpu(current);
-		preempt_enable();
-# endif
-		p->thread.fpu = current->thread.fpu;
-		clear_fpu(task_pt_regs(p));
-		set_stopped_child_used_math(p);
-	}
-#endif
-
-#ifdef CONFIG_HWZOL
-	childregs->lb = 0;
-	childregs->le = 0;
-	childregs->lc = 0;
-#endif
-
-	return 0;
-}
-
-#if IS_ENABLED(CONFIG_FPU)
-struct task_struct *_switch_fpu(struct task_struct *prev, struct task_struct *next)
-{
-#if !IS_ENABLED(CONFIG_LAZY_FPU)
-	unlazy_fpu(prev);
-#endif
-	if (!(next->flags & PF_KTHREAD))
-		clear_fpu(task_pt_regs(next));
-	return prev;
-}
-#endif
-
-/*
- * fill in the fpe structure for a core dump...
- */
-int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu)
-{
-	int fpvalid = 0;
-#if IS_ENABLED(CONFIG_FPU)
-	struct task_struct *tsk = current;
-
-	fpvalid = tsk_used_math(tsk);
-	if (fpvalid) {
-		lose_fpu();
-		memcpy(fpu, &tsk->thread.fpu, sizeof(*fpu));
-	}
-#endif
-	return fpvalid;
-}
-
-EXPORT_SYMBOL(dump_fpu);
-
-unsigned long __get_wchan(struct task_struct *p)
-{
-	unsigned long fp, lr;
-	unsigned long stack_start, stack_end;
-	int count = 0;
-
-	if (IS_ENABLED(CONFIG_FRAME_POINTER)) {
-		stack_start = (unsigned long)end_of_stack(p);
-		stack_end = (unsigned long)task_stack_page(p) + THREAD_SIZE;
-
-		fp = thread_saved_fp(p);
-		do {
-			if (fp < stack_start || fp > stack_end)
-				return 0;
-			lr = ((unsigned long *)fp)[0];
-			if (!in_sched_functions(lr))
-				return lr;
-			fp = *(unsigned long *)(fp + 4);
-		} while (count++ < 16);
-	}
-	return 0;
-}
diff --git a/arch/nds32/kernel/ptrace.c b/arch/nds32/kernel/ptrace.c
deleted file mode 100644
index d0eda870fbc2..000000000000
--- a/arch/nds32/kernel/ptrace.c
+++ /dev/null
@@ -1,118 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/ptrace.h>
-#include <linux/regset.h>
-#include <linux/tracehook.h>
-#include <linux/elf.h>
-#include <linux/sched/task_stack.h>
-
-enum nds32_regset {
-	REGSET_GPR,
-};
-
-static int gpr_get(struct task_struct *target,
-		   const struct user_regset *regset,
-		   struct membuf to)
-{
-	return membuf_write(&to, &task_pt_regs(target)->user_regs,
-				sizeof(struct user_pt_regs));
-}
-
-static int gpr_set(struct task_struct *target, const struct user_regset *regset,
-		   unsigned int pos, unsigned int count,
-		   const void *kbuf, const void __user * ubuf)
-{
-	int err;
-	struct user_pt_regs newregs = task_pt_regs(target)->user_regs;
-
-	err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, 0, -1);
-	if (err)
-		return err;
-
-	task_pt_regs(target)->user_regs = newregs;
-	return 0;
-}
-
-static const struct user_regset nds32_regsets[] = {
-	[REGSET_GPR] = {
-			.core_note_type = NT_PRSTATUS,
-			.n = sizeof(struct user_pt_regs) / sizeof(u32),
-			.size = sizeof(elf_greg_t),
-			.align = sizeof(elf_greg_t),
-			.regset_get = gpr_get,
-			.set = gpr_set}
-};
-
-static const struct user_regset_view nds32_user_view = {
-	.name = "nds32",
-	.e_machine = EM_NDS32,
-	.regsets = nds32_regsets,
-	.n = ARRAY_SIZE(nds32_regsets)
-};
-
-const struct user_regset_view *task_user_regset_view(struct task_struct *task)
-{
-	return &nds32_user_view;
-}
-
-void ptrace_disable(struct task_struct *child)
-{
-	user_disable_single_step(child);
-}
-
-/* do_ptrace()
- *
- * Provide ptrace defined service.
- */
-long arch_ptrace(struct task_struct *child, long request, unsigned long addr,
-		 unsigned long data)
-{
-	int ret = -EIO;
-
-	switch (request) {
-	default:
-		ret = ptrace_request(child, request, addr, data);
-		break;
-	}
-
-	return ret;
-}
-
-void user_enable_single_step(struct task_struct *child)
-{
-	struct pt_regs *regs;
-	regs = task_pt_regs(child);
-	regs->ipsw |= PSW_mskHSS;
-	set_tsk_thread_flag(child, TIF_SINGLESTEP);
-}
-
-void user_disable_single_step(struct task_struct *child)
-{
-	struct pt_regs *regs;
-	regs = task_pt_regs(child);
-	regs->ipsw &= ~PSW_mskHSS;
-	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-}
-
-/* sys_trace()
- *
- * syscall trace handler.
- */
-
-asmlinkage int syscall_trace_enter(struct pt_regs *regs)
-{
-	if (test_thread_flag(TIF_SYSCALL_TRACE)) {
-		if (tracehook_report_syscall_entry(regs))
-			forget_syscall(regs);
-	}
-	return regs->syscallno;
-}
-
-asmlinkage void syscall_trace_leave(struct pt_regs *regs)
-{
-	int step = test_thread_flag(TIF_SINGLESTEP);
-	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
-		tracehook_report_syscall_exit(regs, step);
-
-}
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
deleted file mode 100644
index b3d34d646652..000000000000
--- a/arch/nds32/kernel/setup.c
+++ /dev/null
@@ -1,369 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/cpu.h>
-#include <linux/memblock.h>
-#include <linux/seq_file.h>
-#include <linux/console.h>
-#include <linux/screen_info.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
-#include <asm/setup.h>
-#include <asm/sections.h>
-#include <asm/proc-fns.h>
-#include <asm/cache_info.h>
-#include <asm/elf.h>
-#include <asm/fpu.h>
-#include <nds32_intrinsic.h>
-
-#define HWCAP_MFUSR_PC		0x000001
-#define HWCAP_EXT		0x000002
-#define HWCAP_EXT2		0x000004
-#define HWCAP_FPU		0x000008
-#define HWCAP_AUDIO		0x000010
-#define HWCAP_BASE16		0x000020
-#define HWCAP_STRING		0x000040
-#define HWCAP_REDUCED_REGS	0x000080
-#define HWCAP_VIDEO		0x000100
-#define HWCAP_ENCRYPT		0x000200
-#define HWCAP_EDM		0x000400
-#define HWCAP_LMDMA		0x000800
-#define HWCAP_PFM		0x001000
-#define HWCAP_HSMP		0x002000
-#define HWCAP_TRACE		0x004000
-#define HWCAP_DIV		0x008000
-#define HWCAP_MAC		0x010000
-#define HWCAP_L2C		0x020000
-#define HWCAP_FPU_DP		0x040000
-#define HWCAP_V2		0x080000
-#define HWCAP_DX_REGS		0x100000
-#define HWCAP_HWPRE		0x200000
-
-unsigned long cpu_id, cpu_rev, cpu_cfgid;
-bool has_fpu = false;
-char cpu_series;
-char *endianness = NULL;
-
-unsigned int __atags_pointer __initdata;
-unsigned int elf_hwcap;
-EXPORT_SYMBOL(elf_hwcap);
-
-/*
- * The following string table, must sync with HWCAP_xx bitmask,
- * which is defined above
- */
-static const char *hwcap_str[] = {
-	"mfusr_pc",
-	"perf1",
-	"perf2",
-	"fpu",
-	"audio",
-	"16b",
-	"string",
-	"reduced_regs",
-	"video",
-	"encrypt",
-	"edm",
-	"lmdma",
-	"pfm",
-	"hsmp",
-	"trace",
-	"div",
-	"mac",
-	"l2c",
-	"fpu_dp",
-	"v2",
-	"dx_regs",
-	"hw_pre",
-	NULL,
-};
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-#define WRITE_METHOD "write through"
-#else
-#define WRITE_METHOD "write back"
-#endif
-
-struct cache_info L1_cache_info[2];
-static void __init dump_cpu_info(int cpu)
-{
-	int i, p = 0;
-	char str[sizeof(hwcap_str) + 16];
-
-	for (i = 0; hwcap_str[i]; i++) {
-		if (elf_hwcap & (1 << i)) {
-			sprintf(str + p, "%s ", hwcap_str[i]);
-			p += strlen(hwcap_str[i]) + 1;
-		}
-	}
-
-	pr_info("CPU%d Features: %s\n", cpu, str);
-
-	L1_cache_info[ICACHE].ways = CACHE_WAY(ICACHE);
-	L1_cache_info[ICACHE].line_size = CACHE_LINE_SIZE(ICACHE);
-	L1_cache_info[ICACHE].sets = CACHE_SET(ICACHE);
-	L1_cache_info[ICACHE].size =
-	    L1_cache_info[ICACHE].ways * L1_cache_info[ICACHE].line_size *
-	    L1_cache_info[ICACHE].sets / 1024;
-	pr_info("L1I:%dKB/%dS/%dW/%dB\n", L1_cache_info[ICACHE].size,
-		L1_cache_info[ICACHE].sets, L1_cache_info[ICACHE].ways,
-		L1_cache_info[ICACHE].line_size);
-	L1_cache_info[DCACHE].ways = CACHE_WAY(DCACHE);
-	L1_cache_info[DCACHE].line_size = CACHE_LINE_SIZE(DCACHE);
-	L1_cache_info[DCACHE].sets = CACHE_SET(DCACHE);
-	L1_cache_info[DCACHE].size =
-	    L1_cache_info[DCACHE].ways * L1_cache_info[DCACHE].line_size *
-	    L1_cache_info[DCACHE].sets / 1024;
-	pr_info("L1D:%dKB/%dS/%dW/%dB\n", L1_cache_info[DCACHE].size,
-		L1_cache_info[DCACHE].sets, L1_cache_info[DCACHE].ways,
-		L1_cache_info[DCACHE].line_size);
-	pr_info("L1 D-Cache is %s\n", WRITE_METHOD);
-	if (L1_cache_info[DCACHE].size != L1_CACHE_BYTES)
-		pr_crit
-		    ("The cache line size(%d) of this processor is not the same as L1_CACHE_BYTES(%d).\n",
-		     L1_cache_info[DCACHE].size, L1_CACHE_BYTES);
-#ifdef CONFIG_CPU_CACHE_ALIASING
-	{
-		int aliasing_num;
-		aliasing_num =
-		    L1_cache_info[ICACHE].size * 1024 / PAGE_SIZE /
-		    L1_cache_info[ICACHE].ways;
-		L1_cache_info[ICACHE].aliasing_num = aliasing_num;
-		L1_cache_info[ICACHE].aliasing_mask =
-		    (aliasing_num - 1) << PAGE_SHIFT;
-		aliasing_num =
-		    L1_cache_info[DCACHE].size * 1024 / PAGE_SIZE /
-		    L1_cache_info[DCACHE].ways;
-		L1_cache_info[DCACHE].aliasing_num = aliasing_num;
-		L1_cache_info[DCACHE].aliasing_mask =
-		    (aliasing_num - 1) << PAGE_SHIFT;
-	}
-#endif
-#ifdef CONFIG_FPU
-	/* Disable fpu and enable when it is used. */
-	if (has_fpu)
-		disable_fpu();
-#endif
-}
-
-static void __init setup_cpuinfo(void)
-{
-	unsigned long tmp = 0, cpu_name;
-
-	cpu_dcache_inval_all();
-	cpu_icache_inval_all();
-	__nds32__isb();
-
-	cpu_id = (__nds32__mfsr(NDS32_SR_CPU_VER) & CPU_VER_mskCPUID) >> CPU_VER_offCPUID;
-	cpu_name = ((cpu_id) & 0xf0) >> 4;
-	cpu_series = cpu_name ? cpu_name - 10 + 'A' : 'N';
-	cpu_id = cpu_id & 0xf;
-	cpu_rev = (__nds32__mfsr(NDS32_SR_CPU_VER) & CPU_VER_mskREV) >> CPU_VER_offREV;
-	cpu_cfgid = (__nds32__mfsr(NDS32_SR_CPU_VER) & CPU_VER_mskCFGID) >> CPU_VER_offCFGID;
-
-	pr_info("CPU:%c%ld, CPU_VER 0x%08x(id %lu, rev %lu, cfg %lu)\n",
-		cpu_series, cpu_id, __nds32__mfsr(NDS32_SR_CPU_VER), cpu_id, cpu_rev, cpu_cfgid);
-
-	elf_hwcap |= HWCAP_MFUSR_PC;
-
-	if (((__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskBASEV) >> MSC_CFG_offBASEV) == 0) {
-		if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskDIV)
-			elf_hwcap |= HWCAP_DIV;
-
-		if ((__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskMAC)
-		    || (cpu_id == 12 && cpu_rev < 4))
-			elf_hwcap |= HWCAP_MAC;
-	} else {
-		elf_hwcap |= HWCAP_V2;
-		elf_hwcap |= HWCAP_DIV;
-		elf_hwcap |= HWCAP_MAC;
-	}
-
-	if (cpu_cfgid & 0x0001)
-		elf_hwcap |= HWCAP_EXT;
-
-	if (cpu_cfgid & 0x0002)
-		elf_hwcap |= HWCAP_BASE16;
-
-	if (cpu_cfgid & 0x0004)
-		elf_hwcap |= HWCAP_EXT2;
-
-	if (cpu_cfgid & 0x0008) {
-		elf_hwcap |= HWCAP_FPU;
-		has_fpu = true;
-	}
-	if (cpu_cfgid & 0x0010)
-		elf_hwcap |= HWCAP_STRING;
-
-	if (__nds32__mfsr(NDS32_SR_MMU_CFG) & MMU_CFG_mskDE)
-		endianness = "MSB";
-	else
-		endianness = "LSB";
-
-	if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskEDM)
-		elf_hwcap |= HWCAP_EDM;
-
-	if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskLMDMA)
-		elf_hwcap |= HWCAP_LMDMA;
-
-	if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskPFM)
-		elf_hwcap |= HWCAP_PFM;
-
-	if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskHSMP)
-		elf_hwcap |= HWCAP_HSMP;
-
-	if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskTRACE)
-		elf_hwcap |= HWCAP_TRACE;
-
-	if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskAUDIO)
-		elf_hwcap |= HWCAP_AUDIO;
-
-	if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskL2C)
-		elf_hwcap |= HWCAP_L2C;
-
-#ifdef CONFIG_HW_PRE
-	if (__nds32__mfsr(NDS32_SR_MISC_CTL) & MISC_CTL_makHWPRE_EN)
-		elf_hwcap |= HWCAP_HWPRE;
-#endif
-
-	tmp = __nds32__mfsr(NDS32_SR_CACHE_CTL);
-	if (!IS_ENABLED(CONFIG_CPU_DCACHE_DISABLE))
-		tmp |= CACHE_CTL_mskDC_EN;
-
-	if (!IS_ENABLED(CONFIG_CPU_ICACHE_DISABLE))
-		tmp |= CACHE_CTL_mskIC_EN;
-	__nds32__mtsr_isb(tmp, NDS32_SR_CACHE_CTL);
-
-	dump_cpu_info(smp_processor_id());
-}
-
-static void __init setup_memory(void)
-{
-	unsigned long ram_start_pfn;
-	unsigned long free_ram_start_pfn;
-	phys_addr_t memory_start, memory_end;
-
-	memory_end = memory_start = 0;
-
-	/* Find main memory where is the kernel */
-	memory_start = memblock_start_of_DRAM();
-	memory_end = memblock_end_of_DRAM();
-
-	if (!memory_end) {
-		panic("No memory!");
-	}
-
-	ram_start_pfn = PFN_UP(memblock_start_of_DRAM());
-	/* free_ram_start_pfn is first page after kernel */
-	free_ram_start_pfn = PFN_UP(__pa(&_end));
-	max_pfn = PFN_DOWN(memblock_end_of_DRAM());
-	/* it could update max_pfn */
-	if (max_pfn - ram_start_pfn <= MAXMEM_PFN)
-		max_low_pfn = max_pfn;
-	else {
-		max_low_pfn = MAXMEM_PFN + ram_start_pfn;
-		if (!IS_ENABLED(CONFIG_HIGHMEM))
-			max_pfn = MAXMEM_PFN + ram_start_pfn;
-	}
-	/* high_memory is related with VMALLOC */
-	high_memory = (void *)__va(max_low_pfn * PAGE_SIZE);
-	min_low_pfn = free_ram_start_pfn;
-
-	/*
-	 * initialize the boot-time allocator (with low memory only).
-	 *
-	 * This makes the memory from the end of the kernel to the end of
-	 * RAM usable.
-	 */
-	memblock_set_bottom_up(true);
-	memblock_reserve(PFN_PHYS(ram_start_pfn), PFN_PHYS(free_ram_start_pfn - ram_start_pfn));
-
-	early_init_fdt_reserve_self();
-	early_init_fdt_scan_reserved_mem();
-
-	memblock_dump_all();
-}
-
-void __init setup_arch(char **cmdline_p)
-{
-	early_init_devtree(__atags_pointer ? \
-		phys_to_virt(__atags_pointer) : __dtb_start);
-
-	setup_cpuinfo();
-
-	setup_initial_init_mm(_stext, _etext, _edata, _end);
-
-	/* setup bootmem allocator */
-	setup_memory();
-
-	/* paging_init() sets up the MMU and marks all pages as reserved */
-	paging_init();
-
-	/* invalidate all TLB entries because the new mapping is created */
-	__nds32__tlbop_flua();
-
-	/* use generic way to parse */
-	parse_early_param();
-
-	unflatten_and_copy_device_tree();
-
-	*cmdline_p = boot_command_line;
-	early_trap_init();
-}
-
-static int c_show(struct seq_file *m, void *v)
-{
-	int i;
-
-	seq_printf(m, "Processor\t: %c%ld (id %lu, rev %lu, cfg %lu)\n",
-		   cpu_series, cpu_id, cpu_id, cpu_rev, cpu_cfgid);
-
-	seq_printf(m, "L1I\t\t: %luKB/%luS/%luW/%luB\n",
-		   CACHE_SET(ICACHE) * CACHE_WAY(ICACHE) *
-		   CACHE_LINE_SIZE(ICACHE) / 1024, CACHE_SET(ICACHE),
-		   CACHE_WAY(ICACHE), CACHE_LINE_SIZE(ICACHE));
-
-	seq_printf(m, "L1D\t\t: %luKB/%luS/%luW/%luB\n",
-		   CACHE_SET(DCACHE) * CACHE_WAY(DCACHE) *
-		   CACHE_LINE_SIZE(DCACHE) / 1024, CACHE_SET(DCACHE),
-		   CACHE_WAY(DCACHE), CACHE_LINE_SIZE(DCACHE));
-
-	seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
-		   loops_per_jiffy / (500000 / HZ),
-		   (loops_per_jiffy / (5000 / HZ)) % 100);
-
-	/* dump out the processor features */
-	seq_puts(m, "Features\t: ");
-
-	for (i = 0; hwcap_str[i]; i++)
-		if (elf_hwcap & (1 << i))
-			seq_printf(m, "%s ", hwcap_str[i]);
-
-	seq_puts(m, "\n\n");
-
-	return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t * pos)
-{
-	return *pos < 1 ? (void *)1 : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t * pos)
-{
-	++*pos;
-	return NULL;
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-struct seq_operations cpuinfo_op = {
-	.start = c_start,
-	.next = c_next,
-	.stop = c_stop,
-	.show = c_show
-};
diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c
deleted file mode 100644
index 7e3ca430a223..000000000000
--- a/arch/nds32/kernel/signal.c
+++ /dev/null
@@ -1,384 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/ptrace.h>
-#include <linux/personality.h>
-#include <linux/freezer.h>
-#include <linux/tracehook.h>
-#include <linux/uaccess.h>
-
-#include <asm/cacheflush.h>
-#include <asm/ucontext.h>
-#include <asm/unistd.h>
-#include <asm/fpu.h>
-
-#include <asm/ptrace.h>
-#include <asm/vdso.h>
-
-struct rt_sigframe {
-	struct siginfo info;
-	struct ucontext uc;
-};
-#if IS_ENABLED(CONFIG_FPU)
-static inline int restore_sigcontext_fpu(struct pt_regs *regs,
-					 struct sigcontext __user *sc)
-{
-	struct task_struct *tsk = current;
-	unsigned long used_math_flag;
-	int ret = 0;
-
-	clear_used_math();
-	__get_user_error(used_math_flag, &sc->used_math_flag, ret);
-
-	if (!used_math_flag)
-		return 0;
-	set_used_math();
-
-#if IS_ENABLED(CONFIG_LAZY_FPU)
-	preempt_disable();
-	if (current == last_task_used_math) {
-		last_task_used_math = NULL;
-		disable_ptreg_fpu(regs);
-	}
-	preempt_enable();
-#else
-	clear_fpu(regs);
-#endif
-
-	return __copy_from_user(&tsk->thread.fpu, &sc->fpu,
-				sizeof(struct fpu_struct));
-}
-
-static inline int setup_sigcontext_fpu(struct pt_regs *regs,
-				       struct sigcontext __user *sc)
-{
-	struct task_struct *tsk = current;
-	int ret = 0;
-
-	__put_user_error(used_math(), &sc->used_math_flag, ret);
-
-	if (!used_math())
-		return ret;
-
-	preempt_disable();
-#if IS_ENABLED(CONFIG_LAZY_FPU)
-	if (last_task_used_math == tsk)
-		save_fpu(last_task_used_math);
-#else
-	unlazy_fpu(tsk);
-#endif
-	ret = __copy_to_user(&sc->fpu, &tsk->thread.fpu,
-			     sizeof(struct fpu_struct));
-	preempt_enable();
-	return ret;
-}
-#endif
-
-static int restore_sigframe(struct pt_regs *regs,
-			    struct rt_sigframe __user * sf)
-{
-	sigset_t set;
-	int err;
-
-	err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
-	if (err == 0) {
-		set_current_blocked(&set);
-	}
-
-	__get_user_error(regs->uregs[0], &sf->uc.uc_mcontext.nds32_r0, err);
-	__get_user_error(regs->uregs[1], &sf->uc.uc_mcontext.nds32_r1, err);
-	__get_user_error(regs->uregs[2], &sf->uc.uc_mcontext.nds32_r2, err);
-	__get_user_error(regs->uregs[3], &sf->uc.uc_mcontext.nds32_r3, err);
-	__get_user_error(regs->uregs[4], &sf->uc.uc_mcontext.nds32_r4, err);
-	__get_user_error(regs->uregs[5], &sf->uc.uc_mcontext.nds32_r5, err);
-	__get_user_error(regs->uregs[6], &sf->uc.uc_mcontext.nds32_r6, err);
-	__get_user_error(regs->uregs[7], &sf->uc.uc_mcontext.nds32_r7, err);
-	__get_user_error(regs->uregs[8], &sf->uc.uc_mcontext.nds32_r8, err);
-	__get_user_error(regs->uregs[9], &sf->uc.uc_mcontext.nds32_r9, err);
-	__get_user_error(regs->uregs[10], &sf->uc.uc_mcontext.nds32_r10, err);
-	__get_user_error(regs->uregs[11], &sf->uc.uc_mcontext.nds32_r11, err);
-	__get_user_error(regs->uregs[12], &sf->uc.uc_mcontext.nds32_r12, err);
-	__get_user_error(regs->uregs[13], &sf->uc.uc_mcontext.nds32_r13, err);
-	__get_user_error(regs->uregs[14], &sf->uc.uc_mcontext.nds32_r14, err);
-	__get_user_error(regs->uregs[15], &sf->uc.uc_mcontext.nds32_r15, err);
-	__get_user_error(regs->uregs[16], &sf->uc.uc_mcontext.nds32_r16, err);
-	__get_user_error(regs->uregs[17], &sf->uc.uc_mcontext.nds32_r17, err);
-	__get_user_error(regs->uregs[18], &sf->uc.uc_mcontext.nds32_r18, err);
-	__get_user_error(regs->uregs[19], &sf->uc.uc_mcontext.nds32_r19, err);
-	__get_user_error(regs->uregs[20], &sf->uc.uc_mcontext.nds32_r20, err);
-	__get_user_error(regs->uregs[21], &sf->uc.uc_mcontext.nds32_r21, err);
-	__get_user_error(regs->uregs[22], &sf->uc.uc_mcontext.nds32_r22, err);
-	__get_user_error(regs->uregs[23], &sf->uc.uc_mcontext.nds32_r23, err);
-	__get_user_error(regs->uregs[24], &sf->uc.uc_mcontext.nds32_r24, err);
-	__get_user_error(regs->uregs[25], &sf->uc.uc_mcontext.nds32_r25, err);
-
-	__get_user_error(regs->fp, &sf->uc.uc_mcontext.nds32_fp, err);
-	__get_user_error(regs->gp, &sf->uc.uc_mcontext.nds32_gp, err);
-	__get_user_error(regs->lp, &sf->uc.uc_mcontext.nds32_lp, err);
-	__get_user_error(regs->sp, &sf->uc.uc_mcontext.nds32_sp, err);
-	__get_user_error(regs->ipc, &sf->uc.uc_mcontext.nds32_ipc, err);
-#if defined(CONFIG_HWZOL)
-	__get_user_error(regs->lc, &sf->uc.uc_mcontext.zol.nds32_lc, err);
-	__get_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err);
-	__get_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err);
-#endif
-#if IS_ENABLED(CONFIG_FPU)
-	err |= restore_sigcontext_fpu(regs, &sf->uc.uc_mcontext);
-#endif
-	/*
-	 * Avoid sys_rt_sigreturn() restarting.
-	 */
-	forget_syscall(regs);
-	return err;
-}
-
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
-{
-	struct rt_sigframe __user *frame;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current->restart_block.fn = do_no_restart_syscall;
-
-	/*
-	 * Since we stacked the signal on a 64-bit boundary,
-	 * then 'sp' should be two-word aligned here.  If it's
-	 * not, then the user is trying to mess with us.
-	 */
-	if (regs->sp & 7)
-		goto badframe;
-
-	frame = (struct rt_sigframe __user *)regs->sp;
-
-	if (!access_ok(frame, sizeof(*frame)))
-		goto badframe;
-
-	if (restore_sigframe(regs, frame))
-		goto badframe;
-
-	if (restore_altstack(&frame->uc.uc_stack))
-		goto badframe;
-
-	return regs->uregs[0];
-
-badframe:
-	force_sig(SIGSEGV);
-	return 0;
-}
-
-static int
-setup_sigframe(struct rt_sigframe __user * sf, struct pt_regs *regs,
-	       sigset_t * set)
-{
-	int err = 0;
-
-	__put_user_error(regs->uregs[0], &sf->uc.uc_mcontext.nds32_r0, err);
-	__put_user_error(regs->uregs[1], &sf->uc.uc_mcontext.nds32_r1, err);
-	__put_user_error(regs->uregs[2], &sf->uc.uc_mcontext.nds32_r2, err);
-	__put_user_error(regs->uregs[3], &sf->uc.uc_mcontext.nds32_r3, err);
-	__put_user_error(regs->uregs[4], &sf->uc.uc_mcontext.nds32_r4, err);
-	__put_user_error(regs->uregs[5], &sf->uc.uc_mcontext.nds32_r5, err);
-	__put_user_error(regs->uregs[6], &sf->uc.uc_mcontext.nds32_r6, err);
-	__put_user_error(regs->uregs[7], &sf->uc.uc_mcontext.nds32_r7, err);
-	__put_user_error(regs->uregs[8], &sf->uc.uc_mcontext.nds32_r8, err);
-	__put_user_error(regs->uregs[9], &sf->uc.uc_mcontext.nds32_r9, err);
-	__put_user_error(regs->uregs[10], &sf->uc.uc_mcontext.nds32_r10, err);
-	__put_user_error(regs->uregs[11], &sf->uc.uc_mcontext.nds32_r11, err);
-	__put_user_error(regs->uregs[12], &sf->uc.uc_mcontext.nds32_r12, err);
-	__put_user_error(regs->uregs[13], &sf->uc.uc_mcontext.nds32_r13, err);
-	__put_user_error(regs->uregs[14], &sf->uc.uc_mcontext.nds32_r14, err);
-	__put_user_error(regs->uregs[15], &sf->uc.uc_mcontext.nds32_r15, err);
-	__put_user_error(regs->uregs[16], &sf->uc.uc_mcontext.nds32_r16, err);
-	__put_user_error(regs->uregs[17], &sf->uc.uc_mcontext.nds32_r17, err);
-	__put_user_error(regs->uregs[18], &sf->uc.uc_mcontext.nds32_r18, err);
-	__put_user_error(regs->uregs[19], &sf->uc.uc_mcontext.nds32_r19, err);
-	__put_user_error(regs->uregs[20], &sf->uc.uc_mcontext.nds32_r20, err);
-
-	__put_user_error(regs->uregs[21], &sf->uc.uc_mcontext.nds32_r21, err);
-	__put_user_error(regs->uregs[22], &sf->uc.uc_mcontext.nds32_r22, err);
-	__put_user_error(regs->uregs[23], &sf->uc.uc_mcontext.nds32_r23, err);
-	__put_user_error(regs->uregs[24], &sf->uc.uc_mcontext.nds32_r24, err);
-	__put_user_error(regs->uregs[25], &sf->uc.uc_mcontext.nds32_r25, err);
-	__put_user_error(regs->fp, &sf->uc.uc_mcontext.nds32_fp, err);
-	__put_user_error(regs->gp, &sf->uc.uc_mcontext.nds32_gp, err);
-	__put_user_error(regs->lp, &sf->uc.uc_mcontext.nds32_lp, err);
-	__put_user_error(regs->sp, &sf->uc.uc_mcontext.nds32_sp, err);
-	__put_user_error(regs->ipc, &sf->uc.uc_mcontext.nds32_ipc, err);
-#if defined(CONFIG_HWZOL)
-	__put_user_error(regs->lc, &sf->uc.uc_mcontext.zol.nds32_lc, err);
-	__put_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err);
-	__put_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err);
-#endif
-#if IS_ENABLED(CONFIG_FPU)
-	err |= setup_sigcontext_fpu(regs, &sf->uc.uc_mcontext);
-#endif
-
-	__put_user_error(current->thread.trap_no, &sf->uc.uc_mcontext.trap_no,
-			 err);
-	__put_user_error(current->thread.error_code,
-			 &sf->uc.uc_mcontext.error_code, err);
-	__put_user_error(current->thread.address,
-			 &sf->uc.uc_mcontext.fault_address, err);
-	__put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err);
-
-	err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
-
-	return err;
-}
-
-static inline void __user *get_sigframe(struct ksignal *ksig,
-					struct pt_regs *regs, int framesize)
-{
-	unsigned long sp;
-
-	/* Default to using normal stack */
-	sp = regs->sp;
-
-	/*
-	 * If we are on the alternate signal stack and would overflow it, don't.
-	 * Return an always-bogus address instead so we will die with SIGSEGV.
-	 */
-	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
-		return (void __user __force *)(-1UL);
-
-	/* This is the X/Open sanctioned signal stack switching. */
-	sp = (sigsp(sp, ksig) - framesize);
-
-	/*
-	 * nds32 mandates 8-byte alignment
-	 */
-	sp &= ~0x7UL;
-
-	return (void __user *)sp;
-}
-
-static int
-setup_return(struct pt_regs *regs, struct ksignal *ksig, void __user * frame)
-{
-	unsigned long handler = (unsigned long)ksig->ka.sa.sa_handler;
-	unsigned long retcode;
-
-	retcode = VDSO_SYMBOL(current->mm->context.vdso, rt_sigtramp);
-	regs->uregs[0] = ksig->sig;
-	regs->sp = (unsigned long)frame;
-	regs->lp = retcode;
-	regs->ipc = handler;
-
-	return 0;
-}
-
-static int
-setup_rt_frame(struct ksignal *ksig, sigset_t * set, struct pt_regs *regs)
-{
-	struct rt_sigframe __user *frame =
-	    get_sigframe(ksig, regs, sizeof(*frame));
-	int err = 0;
-
-	if (!access_ok(frame, sizeof(*frame)))
-		return -EFAULT;
-
-	__put_user_error(0, &frame->uc.uc_flags, err);
-	__put_user_error(NULL, &frame->uc.uc_link, err);
-
-	err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
-	err |= setup_sigframe(frame, regs, set);
-	if (err == 0) {
-		setup_return(regs, ksig, frame);
-		if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
-			err |= copy_siginfo_to_user(&frame->info, &ksig->info);
-			regs->uregs[1] = (unsigned long)&frame->info;
-			regs->uregs[2] = (unsigned long)&frame->uc;
-		}
-	}
-	return err;
-}
-
-/*
- * OK, we're invoking a handler
- */
-static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
-{
-	int ret;
-	sigset_t *oldset = sigmask_to_save();
-
-	if (in_syscall(regs)) {
-		/* Avoid additional syscall restarting via ret_slow_syscall. */
-		forget_syscall(regs);
-
-		switch (regs->uregs[0]) {
-		case -ERESTART_RESTARTBLOCK:
-		case -ERESTARTNOHAND:
-			regs->uregs[0] = -EINTR;
-			break;
-		case -ERESTARTSYS:
-			if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
-				regs->uregs[0] = -EINTR;
-				break;
-			}
-			fallthrough;
-		case -ERESTARTNOINTR:
-			regs->uregs[0] = regs->orig_r0;
-			regs->ipc -= 4;
-			break;
-		}
-	}
-	/*
-	 * Set up the stack frame
-	 */
-	ret = setup_rt_frame(ksig, oldset, regs);
-
-	signal_setup_done(ret, ksig, 0);
-}
-
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- *
- * Note that we go through the signals twice: once to check the signals that
- * the kernel can handle, and then we build all the user-level signal handling
- * stack-frames in one go after that.
- */
-static void do_signal(struct pt_regs *regs)
-{
-	struct ksignal ksig;
-
-	if (get_signal(&ksig)) {
-		handle_signal(&ksig, regs);
-		return;
-	}
-
-	/*
-	 * If we were from a system call, check for system call restarting...
-	 */
-	if (in_syscall(regs)) {
-		/* Restart the system call - no handlers present */
-
-		/* Avoid additional syscall restarting via ret_slow_syscall. */
-		forget_syscall(regs);
-
-		switch (regs->uregs[0]) {
-		case -ERESTART_RESTARTBLOCK:
-			regs->uregs[15] = __NR_restart_syscall;
-			fallthrough;
-		case -ERESTARTNOHAND:
-		case -ERESTARTSYS:
-		case -ERESTARTNOINTR:
-			regs->uregs[0] = regs->orig_r0;
-			regs->ipc -= 0x4;
-			break;
-		}
-	}
-	restore_saved_sigmask();
-}
-
-asmlinkage void
-do_notify_resume(struct pt_regs *regs, unsigned int thread_flags)
-{
-	if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
-		do_signal(regs);
-
-	if (thread_flags & _TIF_NOTIFY_RESUME)
-		tracehook_notify_resume(regs);
-}
diff --git a/arch/nds32/kernel/sleep.S b/arch/nds32/kernel/sleep.S
deleted file mode 100644
index ca4e61f3656f..000000000000
--- a/arch/nds32/kernel/sleep.S
+++ /dev/null
@@ -1,131 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2017 Andes Technology Corporation */
-
-#include <asm/memory.h>
-
-.data
-.global sp_tmp
-sp_tmp:
-.long
-
-.text
-.globl suspend2ram
-.globl cpu_resume
-
-suspend2ram:
-	pushm   $r0, $r31
-#if defined(CONFIG_HWZOL)
-	mfusr   $r0, $lc
-	mfusr   $r1, $le
-	mfusr   $r2, $lb
-#endif
-	mfsr	$r3, $mr0
-	mfsr    $r4, $mr1
-	mfsr    $r5, $mr4
-	mfsr    $r6, $mr6
-	mfsr    $r7, $mr7
-	mfsr    $r8, $mr8
-	mfsr    $r9, $ir0
-	mfsr    $r10, $ir1
-	mfsr    $r11, $ir2
-	mfsr    $r12, $ir3
-	mfsr    $r13, $ir9
-	mfsr    $r14, $ir10
-	mfsr    $r15, $ir12
-	mfsr    $r16, $ir13
-	mfsr    $r17, $ir14
-	mfsr    $r18, $ir15
-	pushm   $r0, $r19
-#if defined(CONFIG_FPU)
-	jal	store_fpu_for_suspend
-#endif
-	tlbop	FlushAll
-	isb
-
-	// transfer $sp from va to pa
-	sethi	$r0, hi20(PAGE_OFFSET)
-	ori	$r0, $r0, lo12(PAGE_OFFSET)
-	movi	$r2, PHYS_OFFSET
-	sub	$r1, $sp, $r0
-	add	$r2, $r1, $r2
-
-	// store pa($sp) to sp_tmp
-	sethi 	$r1, hi20(sp_tmp)
-	swi	$r2, [$r1 + lo12(sp_tmp)]
-
-	pushm	$r16, $r25
-	pushm	$r29, $r30
-#ifdef	CONFIG_CACHE_L2
-	jal	dcache_wb_all_level
-#else
-	jal	cpu_dcache_wb_all
-#endif
-	popm	$r29, $r30
-	popm	$r16, $r25
-
-	// get wake_mask and loop in standby
-	la	$r1, wake_mask
-	lwi	$r1, [$r1]
-self_loop:
-	standby wake_grant
-	mfsr	$r2, $ir15
-	and	$r2, $r1, $r2
-	beqz	$r2, self_loop
-
-	// set ipc to resume address
-	la	$r1, resume_addr
-	lwi	$r1, [$r1]
-	mtsr	$r1, $ipc
-	isb
-
-	// reset psw, turn off the address translation
-	li      $r2, 0x7000a
-	mtsr    $r2, $ipsw
-	isb
-
-	iret
-cpu_resume:
-	// translate the address of sp_tmp variable to pa
-	la	$r1, sp_tmp
-	sethi   $r0, hi20(PAGE_OFFSET)
-	ori     $r0, $r0, lo12(PAGE_OFFSET)
-	movi    $r2, PHYS_OFFSET
-	sub     $r1, $r1, $r0
-	add     $r1, $r1, $r2
-
-	// access the sp_tmp to get stack pointer
-	lwi	$sp, [$r1]
-
-	popm	$r0, $r19
-#if defined(CONFIG_HWZOL)
-	mtusr   $r0, $lb
-	mtusr   $r1, $lc
-	mtusr   $r2, $le
-#endif
-	mtsr	$r3, $mr0
-	mtsr    $r4, $mr1
-	mtsr    $r5, $mr4
-	mtsr    $r6, $mr6
-	mtsr    $r7, $mr7
-	mtsr    $r8, $mr8
-	// set original psw to ipsw
-	mtsr    $r9, $ir1
-
-	mtsr    $r11, $ir2
-	mtsr    $r12, $ir3
-
-	// set ipc to RR
-	la	$r13, RR
-	mtsr	$r13, $ir9
-
-	mtsr    $r14, $ir10
-	mtsr    $r15, $ir12
-	mtsr    $r16, $ir13
-	mtsr    $r17, $ir14
-	mtsr    $r18, $ir15
-	popm    $r0, $r31
-
-	isb
-	iret
-RR:
-	ret
diff --git a/arch/nds32/kernel/stacktrace.c b/arch/nds32/kernel/stacktrace.c
deleted file mode 100644
index d974c0c1c65f..000000000000
--- a/arch/nds32/kernel/stacktrace.c
+++ /dev/null
@@ -1,53 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/sched/debug.h>
-#include <linux/sched/task_stack.h>
-#include <linux/stacktrace.h>
-#include <linux/ftrace.h>
-
-void save_stack_trace(struct stack_trace *trace)
-{
-	save_stack_trace_tsk(current, trace);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-{
-	unsigned long *fpn;
-	int skip = trace->skip;
-	int savesched;
-	int graph_idx = 0;
-
-	if (tsk == current) {
-		__asm__ __volatile__("\tori\t%0, $fp, #0\n":"=r"(fpn));
-		savesched = 1;
-	} else {
-		fpn = (unsigned long *)thread_saved_fp(tsk);
-		savesched = 0;
-	}
-
-	while (!kstack_end(fpn) && !((unsigned long)fpn & 0x3)
-	       && (fpn >= (unsigned long *)TASK_SIZE)) {
-		unsigned long lpp, fpp;
-
-		lpp = fpn[LP_OFFSET];
-		fpp = fpn[FP_OFFSET];
-		if (!__kernel_text_address(lpp))
-			break;
-		else
-			lpp = ftrace_graph_ret_addr(tsk, &graph_idx, lpp, NULL);
-
-		if (savesched || !in_sched_functions(lpp)) {
-			if (skip) {
-				skip--;
-			} else {
-				trace->entries[trace->nr_entries++] = lpp;
-				if (trace->nr_entries >= trace->max_entries)
-					break;
-			}
-		}
-		fpn = (unsigned long *)fpp;
-	}
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/nds32/kernel/sys_nds32.c b/arch/nds32/kernel/sys_nds32.c
deleted file mode 100644
index cb2d1e219bb3..000000000000
--- a/arch/nds32/kernel/sys_nds32.c
+++ /dev/null
@@ -1,84 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/syscalls.h>
-#include <linux/uaccess.h>
-
-#include <asm/cachectl.h>
-#include <asm/proc-fns.h>
-#include <asm/fpu.h>
-#include <asm/fp_udfiex_crtl.h>
-
-SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
-	       unsigned long, prot, unsigned long, flags,
-	       unsigned long, fd, unsigned long, pgoff)
-{
-	if (pgoff & (~PAGE_MASK >> 12))
-		return -EINVAL;
-
-	return sys_mmap_pgoff(addr, len, prot, flags, fd,
-			      pgoff >> (PAGE_SHIFT - 12));
-}
-
-SYSCALL_DEFINE4(fadvise64_64_wrapper,int, fd, int, advice, loff_t, offset,
-					 loff_t, len)
-{
-	return sys_fadvise64_64(fd, offset, len, advice);
-}
-
-SYSCALL_DEFINE3(cacheflush, unsigned int, start, unsigned int, end, int, cache)
-{
-	struct vm_area_struct *vma;
-	bool flushi = true, wbd = true;
-
-	vma = find_vma(current->mm, start);
-	if (!vma)
-		return -EFAULT;
-	switch (cache) {
-	case ICACHE:
-		wbd = false;
-		break;
-	case DCACHE:
-		flushi = false;
-		break;
-	case BCACHE:
-		break;
-	default:
-		return -EINVAL;
-	}
-	cpu_cache_wbinval_range_check(vma, start, end, flushi, wbd);
-
-	return 0;
-}
-
-SYSCALL_DEFINE2(fp_udfiex_crtl, unsigned int, cmd, unsigned int, act)
-{
-#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
-	int old_udf_iex;
-
-	if (!used_math()) {
-		load_fpu(&init_fpuregs);
-		current->thread.fpu.UDF_IEX_trap = init_fpuregs.UDF_IEX_trap;
-		set_used_math();
-	}
-
-	old_udf_iex = current->thread.fpu.UDF_IEX_trap;
-	act &= (FPCSR_mskUDFE | FPCSR_mskIEXE);
-
-	switch (cmd) {
-	case DISABLE_UDF_IEX_TRAP:
-		current->thread.fpu.UDF_IEX_trap &= ~act;
-		break;
-	case ENABLE_UDF_IEX_TRAP:
-		current->thread.fpu.UDF_IEX_trap |= act;
-		break;
-	case GET_UDF_IEX_TRAP:
-		break;
-	default:
-		return -EINVAL;
-	}
-	return old_udf_iex;
-#else
-	return -ENOTSUPP;
-#endif
-}
diff --git a/arch/nds32/kernel/syscall_table.c b/arch/nds32/kernel/syscall_table.c
deleted file mode 100644
index 7879c061b87f..000000000000
--- a/arch/nds32/kernel/syscall_table.c
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/syscalls.h>
-#include <linux/signal.h>
-#include <linux/unistd.h>
-#include <asm/syscalls.h>
-
-#undef __SYSCALL
-#define __SYSCALL(nr, call) [nr] = (call),
-
-#define sys_rt_sigreturn sys_rt_sigreturn_wrapper
-#define sys_fadvise64_64 sys_fadvise64_64_wrapper
-void *sys_call_table[__NR_syscalls] __aligned(8192) = {
-	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
-#include <asm/unistd.h>
-};
diff --git a/arch/nds32/kernel/time.c b/arch/nds32/kernel/time.c
deleted file mode 100644
index 574a3d0a8539..000000000000
--- a/arch/nds32/kernel/time.c
+++ /dev/null
@@ -1,11 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/clocksource.h>
-#include <linux/of_clk.h>
-
-void __init time_init(void)
-{
-	of_clk_init(NULL);
-	timer_probe();
-}
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
deleted file mode 100644
index c0a8f3344fb9..000000000000
--- a/arch/nds32/kernel/traps.c
+++ /dev/null
@@ -1,354 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/module.h>
-#include <linux/personality.h>
-#include <linux/kallsyms.h>
-#include <linux/hardirq.h>
-#include <linux/kdebug.h>
-#include <linux/sched/task_stack.h>
-#include <linux/uaccess.h>
-#include <linux/ftrace.h>
-
-#include <asm/proc-fns.h>
-#include <asm/unistd.h>
-#include <asm/fpu.h>
-
-#include <linux/ptrace.h>
-#include <nds32_intrinsic.h>
-
-extern void show_pte(struct mm_struct *mm, unsigned long addr);
-
-/*
- * Dump out the contents of some memory nicely...
- */
-void dump_mem(const char *lvl, unsigned long bottom, unsigned long top)
-{
-	unsigned long first;
-	int i;
-
-	pr_emerg("%s(0x%08lx to 0x%08lx)\n", lvl, bottom, top);
-
-	for (first = bottom & ~31; first < top; first += 32) {
-		unsigned long p;
-		char str[sizeof(" 12345678") * 8 + 1];
-
-		memset(str, ' ', sizeof(str));
-		str[sizeof(str) - 1] = '\0';
-
-		for (p = first, i = 0; i < 8 && p < top; i++, p += 4) {
-			if (p >= bottom && p < top) {
-				unsigned long val;
-
-				if (get_kernel_nofault(val,
-						(unsigned long *)p) == 0)
-					sprintf(str + i * 9, " %08lx", val);
-				else
-					sprintf(str + i * 9, " ????????");
-			}
-		}
-		pr_emerg("%s%04lx:%s\n", lvl, first & 0xffff, str);
-	}
-}
-
-EXPORT_SYMBOL(dump_mem);
-
-#define LOOP_TIMES (100)
-static void __dump(struct task_struct *tsk, unsigned long *base_reg,
-		   const char *loglvl)
-{
-	unsigned long ret_addr;
-	int cnt = LOOP_TIMES, graph = 0;
-	printk("%sCall Trace:\n", loglvl);
-	if (!IS_ENABLED(CONFIG_FRAME_POINTER)) {
-		while (!kstack_end(base_reg)) {
-			ret_addr = *base_reg++;
-			if (__kernel_text_address(ret_addr)) {
-				ret_addr = ftrace_graph_ret_addr(
-						tsk, &graph, ret_addr, NULL);
-				print_ip_sym(loglvl, ret_addr);
-			}
-			if (--cnt < 0)
-				break;
-		}
-	} else {
-		while (!kstack_end((void *)base_reg) &&
-		       !((unsigned long)base_reg & 0x3) &&
-		       ((unsigned long)base_reg >= TASK_SIZE)) {
-			unsigned long next_fp;
-			ret_addr = base_reg[LP_OFFSET];
-			next_fp = base_reg[FP_OFFSET];
-			if (__kernel_text_address(ret_addr)) {
-
-				ret_addr = ftrace_graph_ret_addr(
-						tsk, &graph, ret_addr, NULL);
-				print_ip_sym(loglvl, ret_addr);
-			}
-			if (--cnt < 0)
-				break;
-			base_reg = (unsigned long *)next_fp;
-		}
-	}
-	printk("%s\n", loglvl);
-}
-
-void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
-{
-	unsigned long *base_reg;
-
-	if (!tsk)
-		tsk = current;
-	if (!IS_ENABLED(CONFIG_FRAME_POINTER)) {
-		if (tsk != current)
-			base_reg = (unsigned long *)(tsk->thread.cpu_context.sp);
-		else
-			__asm__ __volatile__("\tori\t%0, $sp, #0\n":"=r"(base_reg));
-	} else {
-		if (tsk != current)
-			base_reg = (unsigned long *)(tsk->thread.cpu_context.fp);
-		else
-			__asm__ __volatile__("\tori\t%0, $fp, #0\n":"=r"(base_reg));
-	}
-	__dump(tsk, base_reg, loglvl);
-	barrier();
-}
-
-DEFINE_SPINLOCK(die_lock);
-
-/*
- * This function is protected against re-entrancy.
- */
-void __noreturn die(const char *str, struct pt_regs *regs, int err)
-{
-	struct task_struct *tsk = current;
-	static int die_counter;
-
-	console_verbose();
-	spin_lock_irq(&die_lock);
-	bust_spinlocks(1);
-
-	pr_emerg("Internal error: %s: %x [#%d]\n", str, err, ++die_counter);
-	print_modules();
-	pr_emerg("CPU: %i\n", smp_processor_id());
-	show_regs(regs);
-	pr_emerg("Process %s (pid: %d, stack limit = 0x%p)\n",
-		 tsk->comm, tsk->pid, end_of_stack(tsk));
-
-	if (!user_mode(regs) || in_interrupt()) {
-		dump_mem("Stack: ", regs->sp, (regs->sp + PAGE_SIZE) & PAGE_MASK);
-		dump_stack();
-	}
-
-	bust_spinlocks(0);
-	spin_unlock_irq(&die_lock);
-	make_task_dead(SIGSEGV);
-}
-
-EXPORT_SYMBOL(die);
-
-void die_if_kernel(const char *str, struct pt_regs *regs, int err)
-{
-	if (user_mode(regs))
-		return;
-
-	die(str, regs, err);
-}
-
-int bad_syscall(int n, struct pt_regs *regs)
-{
-	if (current->personality != PER_LINUX) {
-		send_sig(SIGSEGV, current, 1);
-		return regs->uregs[0];
-	}
-
-	force_sig_fault(SIGILL, ILL_ILLTRP,
-			(void __user *)instruction_pointer(regs) - 4);
-	die_if_kernel("Oops - bad syscall", regs, n);
-	return regs->uregs[0];
-}
-
-void __pte_error(const char *file, int line, unsigned long val)
-{
-	pr_emerg("%s:%d: bad pte %08lx.\n", file, line, val);
-}
-
-void __pmd_error(const char *file, int line, unsigned long val)
-{
-	pr_emerg("%s:%d: bad pmd %08lx.\n", file, line, val);
-}
-
-void __pgd_error(const char *file, int line, unsigned long val)
-{
-	pr_emerg("%s:%d: bad pgd %08lx.\n", file, line, val);
-}
-
-extern char *exception_vector, *exception_vector_end;
-void __init early_trap_init(void)
-{
-	unsigned long ivb = 0;
-	unsigned long base = PAGE_OFFSET;
-
-	memcpy((unsigned long *)base, (unsigned long *)&exception_vector,
-	       ((unsigned long)&exception_vector_end -
-		(unsigned long)&exception_vector));
-	ivb = __nds32__mfsr(NDS32_SR_IVB);
-	/* Check platform support. */
-	if (((ivb & IVB_mskNIVIC) >> IVB_offNIVIC) < 2)
-		panic
-		    ("IVIC mode is not allowed on the platform with interrupt controller\n");
-	__nds32__mtsr((ivb & ~IVB_mskESZ) | (IVB_valESZ16 << IVB_offESZ) |
-		      IVB_BASE, NDS32_SR_IVB);
-	__nds32__mtsr(INT_MASK_INITAIAL_VAL, NDS32_SR_INT_MASK);
-
-	/*
-	 * 0x800 = 128 vectors * 16byte.
-	 * It should be enough to flush a page.
-	 */
-	cpu_cache_wbinval_page(base, true);
-}
-
-static void send_sigtrap(struct pt_regs *regs, int error_code, int si_code)
-{
-	struct task_struct *tsk = current;
-
-	tsk->thread.trap_no = ENTRY_DEBUG_RELATED;
-	tsk->thread.error_code = error_code;
-
-	force_sig_fault(SIGTRAP, si_code,
-			(void __user *)instruction_pointer(regs));
-}
-
-void do_debug_trap(unsigned long entry, unsigned long addr,
-		   unsigned long type, struct pt_regs *regs)
-{
-	if (notify_die(DIE_OOPS, "Oops", regs, addr, type, SIGTRAP)
-	    == NOTIFY_STOP)
-		return;
-
-	if (user_mode(regs)) {
-		/* trap_signal */
-		send_sigtrap(regs, 0, TRAP_BRKPT);
-	} else {
-		/* kernel_trap */
-		if (!fixup_exception(regs))
-			die("unexpected kernel_trap", regs, 0);
-	}
-}
-
-void unhandled_interruption(struct pt_regs *regs)
-{
-	pr_emerg("unhandled_interruption\n");
-	show_regs(regs);
-	if (!user_mode(regs))
-		make_task_dead(SIGKILL);
-	force_sig(SIGKILL);
-}
-
-void unhandled_exceptions(unsigned long entry, unsigned long addr,
-			  unsigned long type, struct pt_regs *regs)
-{
-	pr_emerg("Unhandled Exception: entry: %lx addr:%lx itype:%lx\n", entry,
-		 addr, type);
-	show_regs(regs);
-	if (!user_mode(regs))
-		make_task_dead(SIGKILL);
-	force_sig(SIGKILL);
-}
-
-extern int do_page_fault(unsigned long entry, unsigned long addr,
-			 unsigned int error_code, struct pt_regs *regs);
-
-/*
- * 2:DEF dispatch for TLB MISC exception handler
-*/
-
-void do_dispatch_tlb_misc(unsigned long entry, unsigned long addr,
-			  unsigned long type, struct pt_regs *regs)
-{
-	type = type & (ITYPE_mskINST | ITYPE_mskETYPE);
-	if ((type & ITYPE_mskETYPE) < 5) {
-		/* Permission exceptions */
-		do_page_fault(entry, addr, type, regs);
-	} else
-		unhandled_exceptions(entry, addr, type, regs);
-}
-
-void do_revinsn(struct pt_regs *regs)
-{
-	pr_emerg("Reserved Instruction\n");
-	show_regs(regs);
-	if (!user_mode(regs))
-		make_task_dead(SIGILL);
-	force_sig(SIGILL);
-}
-
-#ifdef CONFIG_ALIGNMENT_TRAP
-extern int unalign_access_mode;
-extern int do_unaligned_access(unsigned long addr, struct pt_regs *regs);
-#endif
-void do_dispatch_general(unsigned long entry, unsigned long addr,
-			 unsigned long itype, struct pt_regs *regs,
-			 unsigned long oipc)
-{
-	unsigned int swid = itype >> ITYPE_offSWID;
-	unsigned long type = itype & (ITYPE_mskINST | ITYPE_mskETYPE);
-	if (type == ETYPE_ALIGNMENT_CHECK) {
-#ifdef CONFIG_ALIGNMENT_TRAP
-		/* Alignment check */
-		if (user_mode(regs) && unalign_access_mode) {
-			int ret;
-			ret = do_unaligned_access(addr, regs);
-
-			if (ret == 0)
-				return;
-
-			if (ret == -EFAULT)
-				pr_emerg
-				    ("Unhandled unaligned access exception\n");
-		}
-#endif
-		do_page_fault(entry, addr, type, regs);
-	} else if (type == ETYPE_RESERVED_INSTRUCTION) {
-		/* Reserved instruction */
-		do_revinsn(regs);
-	} else if (type == ETYPE_COPROCESSOR) {
-		/* Coprocessor */
-#if IS_ENABLED(CONFIG_FPU)
-		unsigned int fucop_exist = __nds32__mfsr(NDS32_SR_FUCOP_EXIST);
-		unsigned int cpid = ((itype & ITYPE_mskCPID) >> ITYPE_offCPID);
-
-		if ((cpid == FPU_CPID) &&
-		    (fucop_exist & FUCOP_EXIST_mskCP0ISFPU)) {
-			unsigned int subtype = (itype & ITYPE_mskSTYPE);
-
-			if (true == do_fpu_exception(subtype, regs))
-				return;
-		}
-#endif
-		unhandled_exceptions(entry, addr, type, regs);
-	} else if (type == ETYPE_TRAP && swid == SWID_RAISE_INTERRUPT_LEVEL) {
-		/* trap, used on v3 EDM target debugging workaround */
-		/*
-		 * DIPC(OIPC) is passed as parameter before
-		 * interrupt is enabled, so the DIPC will not be corrupted
-		 * even though interrupts are coming in
-		 */
-		/*
-		 * 1. update ipc
-		 * 2. update pt_regs ipc with oipc
-		 * 3. update pt_regs ipsw (clear DEX)
-		 */
-		__asm__ volatile ("mtsr %0, $IPC\n\t"::"r" (oipc));
-		regs->ipc = oipc;
-		if (regs->pipsw & PSW_mskDEX) {
-			pr_emerg
-			    ("Nested Debug exception is possibly happened\n");
-			pr_emerg("ipc:%08x pipc:%08x\n",
-				 (unsigned int)regs->ipc,
-				 (unsigned int)regs->pipc);
-		}
-		do_debug_trap(entry, addr, itype, regs);
-		regs->ipsw &= ~PSW_mskDEX;
-	} else
-		unhandled_exceptions(entry, addr, type, regs);
-}
diff --git a/arch/nds32/kernel/vdso.c b/arch/nds32/kernel/vdso.c
deleted file mode 100644
index e16009a07971..000000000000
--- a/arch/nds32/kernel/vdso.c
+++ /dev/null
@@ -1,231 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2012 ARM Limited
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/cache.h>
-#include <linux/clocksource.h>
-#include <linux/elf.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/slab.h>
-#include <linux/timekeeper_internal.h>
-#include <linux/vmalloc.h>
-#include <linux/random.h>
-
-#include <asm/cacheflush.h>
-#include <asm/vdso.h>
-#include <asm/vdso_datapage.h>
-#include <asm/vdso_timer_info.h>
-#include <asm/cache_info.h>
-extern struct cache_info L1_cache_info[2];
-extern char vdso_start[], vdso_end[];
-static unsigned long vdso_pages __ro_after_init;
-static unsigned long timer_mapping_base;
-
-struct timer_info_t timer_info = {
-	.cycle_count_down = true,
-	.mapping_base = EMPTY_TIMER_MAPPING,
-	.cycle_count_reg_offset = EMPTY_REG_OFFSET
-};
-/*
- * The vDSO data page.
- */
-static struct page *no_pages[] = { NULL };
-
-static union {
-	struct vdso_data data;
-	u8 page[PAGE_SIZE];
-} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
-static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
-	{
-	 .name = "[vvar]",
-	 .pages = no_pages,
-	 },
-	{
-	 .name = "[vdso]",
-	 },
-};
-
-static void get_timer_node_info(void)
-{
-	timer_mapping_base = timer_info.mapping_base;
-	vdso_data->cycle_count_offset =
-		timer_info.cycle_count_reg_offset;
-	vdso_data->cycle_count_down =
-		timer_info.cycle_count_down;
-}
-
-static int __init vdso_init(void)
-{
-	int i;
-	struct page **vdso_pagelist;
-
-	if (memcmp(vdso_start, "\177ELF", 4)) {
-		pr_err("vDSO is not a valid ELF object!\n");
-		return -EINVAL;
-	}
-	/* Creat a timer io mapping to get clock cycles counter */
-	get_timer_node_info();
-
-	vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
-	pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
-		vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data);
-
-	/* Allocate the vDSO pagelist */
-	vdso_pagelist = kcalloc(vdso_pages, sizeof(struct page *), GFP_KERNEL);
-	if (vdso_pagelist == NULL)
-		return -ENOMEM;
-
-	for (i = 0; i < vdso_pages; i++)
-		vdso_pagelist[i] = virt_to_page(vdso_start + i * PAGE_SIZE);
-	vdso_spec[1].pages = &vdso_pagelist[0];
-
-	return 0;
-}
-
-arch_initcall(vdso_init);
-
-unsigned long inline vdso_random_addr(unsigned long vdso_mapping_len)
-{
-	unsigned long start = current->mm->mmap_base, end, offset, addr;
-	start = PAGE_ALIGN(start);
-
-	/* Round the lowest possible end address up to a PMD boundary. */
-	end = (start + vdso_mapping_len + PMD_SIZE - 1) & PMD_MASK;
-	if (end >= TASK_SIZE)
-		end = TASK_SIZE;
-	end -= vdso_mapping_len;
-
-	if (end > start) {
-		offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
-		addr = start + (offset << PAGE_SHIFT);
-	} else {
-		addr = start;
-	}
-	return addr;
-}
-
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
-{
-	struct mm_struct *mm = current->mm;
-	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
-	struct vm_area_struct *vma;
-	unsigned long addr = 0;
-	pgprot_t prot;
-	int ret, vvar_page_num = 2;
-
-	vdso_text_len = vdso_pages << PAGE_SHIFT;
-
-	if(timer_mapping_base == EMPTY_VALUE)
-		vvar_page_num = 1;
-	/* Be sure to map the data page */
-	vdso_mapping_len = vdso_text_len + vvar_page_num * PAGE_SIZE;
-#ifdef CONFIG_CPU_CACHE_ALIASING
-	vdso_mapping_len += L1_cache_info[DCACHE].aliasing_num - 1;
-#endif
-
-	if (mmap_write_lock_killable(mm))
-		return -EINTR;
-
-	addr = vdso_random_addr(vdso_mapping_len);
-	vdso_base = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0);
-	if (IS_ERR_VALUE(vdso_base)) {
-		ret = vdso_base;
-		goto up_fail;
-	}
-
-#ifdef CONFIG_CPU_CACHE_ALIASING
-	{
-		unsigned int aliasing_mask =
-		    L1_cache_info[DCACHE].aliasing_mask;
-		unsigned int page_colour_ofs;
-		page_colour_ofs = ((unsigned int)vdso_data & aliasing_mask) -
-		    (vdso_base & aliasing_mask);
-		vdso_base += page_colour_ofs & aliasing_mask;
-	}
-#endif
-
-	vma = _install_special_mapping(mm, vdso_base, vvar_page_num * PAGE_SIZE,
-				       VM_READ | VM_MAYREAD, &vdso_spec[0]);
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto up_fail;
-	}
-
-	/*Map vdata to user space */
-	ret = io_remap_pfn_range(vma, vdso_base,
-				 virt_to_phys(vdso_data) >> PAGE_SHIFT,
-				 PAGE_SIZE, vma->vm_page_prot);
-	if (ret)
-		goto up_fail;
-
-	/*Map timer to user space */
-	vdso_base += PAGE_SIZE;
-	prot = __pgprot(_PAGE_V | _PAGE_M_UR_KR | _PAGE_D |  _PAGE_C_DEV);
-	ret = io_remap_pfn_range(vma, vdso_base, timer_mapping_base >> PAGE_SHIFT,
-				 PAGE_SIZE, prot);
-	if (ret)
-		goto up_fail;
-
-	/*Map vdso to user space */
-	vdso_base += PAGE_SIZE;
-	mm->context.vdso = (void *)vdso_base;
-	vma = _install_special_mapping(mm, vdso_base, vdso_text_len,
-				       VM_READ | VM_EXEC |
-				       VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-				       &vdso_spec[1]);
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto up_fail;
-	}
-
-	mmap_write_unlock(mm);
-	return 0;
-
-up_fail:
-	mm->context.vdso = NULL;
-	mmap_write_unlock(mm);
-	return ret;
-}
-
-static void vdso_write_begin(struct vdso_data *vdata)
-{
-	++vdso_data->seq_count;
-	smp_wmb();		/* Pairs with smp_rmb in vdso_read_retry */
-}
-
-static void vdso_write_end(struct vdso_data *vdata)
-{
-	smp_wmb();		/* Pairs with smp_rmb in vdso_read_begin */
-	++vdso_data->seq_count;
-}
-
-void update_vsyscall(struct timekeeper *tk)
-{
-	vdso_write_begin(vdso_data);
-	vdso_data->cs_mask = tk->tkr_mono.mask;
-	vdso_data->cs_mult = tk->tkr_mono.mult;
-	vdso_data->cs_shift = tk->tkr_mono.shift;
-	vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
-	vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec;
-	vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
-	vdso_data->xtime_clock_sec = tk->xtime_sec;
-	vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
-	vdso_data->xtime_coarse_sec = tk->xtime_sec;
-	vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
-	    tk->tkr_mono.shift;
-	vdso_data->hrtimer_res = hrtimer_resolution;
-	vdso_write_end(vdso_data);
-}
-
-void update_vsyscall_tz(void)
-{
-	vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
-	vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-}
diff --git a/arch/nds32/kernel/vdso/.gitignore b/arch/nds32/kernel/vdso/.gitignore
deleted file mode 100644
index 652e31d82582..000000000000
--- a/arch/nds32/kernel/vdso/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-vdso.lds
diff --git a/arch/nds32/kernel/vdso/Makefile b/arch/nds32/kernel/vdso/Makefile
deleted file mode 100644
index 55df25ef0057..000000000000
--- a/arch/nds32/kernel/vdso/Makefile
+++ /dev/null
@@ -1,79 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Building a vDSO image for AArch64.
-#
-# Author: Will Deacon <will.deacon@arm.com>
-# Heavily based on the vDSO Makefiles for other archs.
-#
-
-obj-vdso := note.o datapage.o sigreturn.o gettimeofday.o
-
-# Build rules
-targets := $(obj-vdso) vdso.so vdso.so.dbg
-obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
-
-ccflags-y := -shared -fno-common -fno-builtin -nostdlib -fPIC -Wl,-shared -g \
-	-Wl,-soname=linux-vdso.so.1 -Wl,--hash-style=sysv
-
-# Disable gcov profiling for VDSO code
-GCOV_PROFILE := n
-
-
-obj-y += vdso.o
-targets += vdso.lds
-CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
-
-# Force dependency
-$(obj)/vdso.o : $(obj)/vdso.so
-
-# Link rule for the .so file, .lds has to be first
-$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
-	$(call if_changed,vdsold)
-
-
-# Strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
-	$(call if_changed,objcopy)
-
-# Generate VDSO offsets using helper script
-gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
-quiet_cmd_vdsosym = VDSOSYM $@
-      cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
-
-include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
-	$(call if_changed,vdsosym)
-
-
-
-# Assembly rules for the .S files
-
-sigreturn.o : sigreturn.S
-	$(call if_changed_dep,vdsoas)
-
-note.o : note.S
-	$(call if_changed_dep,vdsoas)
-
-datapage.o : datapage.S
-	$(call if_changed_dep,vdsoas)
-
-gettimeofday.o : gettimeofday.c FORCE
-	$(call if_changed_dep,vdsocc)
-
-# Actual build commands
-quiet_cmd_vdsold = VDSOL   $@
-      cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $(real-prereqs) -o $@
-quiet_cmd_vdsoas = VDSOA   $@
-      cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
-quiet_cmd_vdsocc = VDSOA   $@
-      cmd_vdsocc = $(CC) $(c_flags) -c -o $@ $<
-
-# Install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
-      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso.so: $(obj)/vdso.so.dbg
-	@mkdir -p $(MODLIB)/vdso
-	$(call cmd,vdso_install)
-
-vdso_install: vdso.so
diff --git a/arch/nds32/kernel/vdso/datapage.S b/arch/nds32/kernel/vdso/datapage.S
deleted file mode 100644
index 4a62c3cab1c8..000000000000
--- a/arch/nds32/kernel/vdso/datapage.S
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-ENTRY(__get_timerpage)
-	sethi	$r0, hi20(. + PAGE_SIZE + 8)
-	ori	$r0, $r0, lo12(. + PAGE_SIZE + 4)
-	mfusr	$r1, $pc
-	sub	$r0, $r1, $r0
-	ret
-ENDPROC(__get_timerpage)
-
-ENTRY(__get_datapage)
-	sethi	$r0, hi20(. + 2*PAGE_SIZE + 8)
-	ori	$r0, $r0, lo12(. + 2*PAGE_SIZE + 4)
-	mfusr	$r1, $pc
-	sub	$r0, $r1, $r0
-	ret
-ENDPROC(__get_datapage)
diff --git a/arch/nds32/kernel/vdso/gen_vdso_offsets.sh b/arch/nds32/kernel/vdso/gen_vdso_offsets.sh
deleted file mode 100755
index 01924ff071ad..000000000000
--- a/arch/nds32/kernel/vdso/gen_vdso_offsets.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/sh
-
-#
-# Match symbols in the DSO that look like VDSO_*; produce a header file
-# of constant offsets into the shared object.
-#
-# Doing this inside the Makefile will break the $(filter-out) function,
-# causing Kbuild to rebuild the vdso-offsets header file every time.
-#
-# Author: Will Deacon <will.deacon@arm.com
-#
-
-LC_ALL=C
-sed -n -e 's/^00*/0/' -e \
-'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p'
diff --git a/arch/nds32/kernel/vdso/gettimeofday.c b/arch/nds32/kernel/vdso/gettimeofday.c
deleted file mode 100644
index 9ec03cf0ec54..000000000000
--- a/arch/nds32/kernel/vdso/gettimeofday.c
+++ /dev/null
@@ -1,269 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/compiler.h>
-#include <linux/hrtimer.h>
-#include <linux/time.h>
-#include <asm/io.h>
-#include <asm/barrier.h>
-#include <asm/bug.h>
-#include <asm/page.h>
-#include <asm/unistd.h>
-#include <asm/vdso_datapage.h>
-#include <asm/vdso_timer_info.h>
-#include <asm/asm-offsets.h>
-
-#define X(x) #x
-#define Y(x) X(x)
-
-extern struct vdso_data *__get_datapage(void);
-extern struct vdso_data *__get_timerpage(void);
-
-static notrace unsigned int __vdso_read_begin(const struct vdso_data *vdata)
-{
-	u32 seq;
-repeat:
-	seq = READ_ONCE(vdata->seq_count);
-	if (seq & 1) {
-		cpu_relax();
-		goto repeat;
-	}
-	return seq;
-}
-
-static notrace unsigned int vdso_read_begin(const struct vdso_data *vdata)
-{
-	unsigned int seq;
-
-	seq = __vdso_read_begin(vdata);
-
-	smp_rmb();		/* Pairs with smp_wmb in vdso_write_end */
-	return seq;
-}
-
-static notrace int vdso_read_retry(const struct vdso_data *vdata, u32 start)
-{
-	smp_rmb();		/* Pairs with smp_wmb in vdso_write_begin */
-	return vdata->seq_count != start;
-}
-
-static notrace long clock_gettime_fallback(clockid_t _clkid,
-					   struct __kernel_old_timespec *_ts)
-{
-	register struct __kernel_old_timespec *ts asm("$r1") = _ts;
-	register clockid_t clkid asm("$r0") = _clkid;
-	register long ret asm("$r0");
-
-	asm volatile ("movi	$r15, %3\n"
-		      "syscall 	0x0\n"
-		      :"=r" (ret)
-		      :"r"(clkid), "r"(ts), "i"(__NR_clock_gettime)
-		      :"$r15", "memory");
-
-	return ret;
-}
-
-static notrace int do_realtime_coarse(struct __kernel_old_timespec *ts,
-				      struct vdso_data *vdata)
-{
-	u32 seq;
-
-	do {
-		seq = vdso_read_begin(vdata);
-
-		ts->tv_sec = vdata->xtime_coarse_sec;
-		ts->tv_nsec = vdata->xtime_coarse_nsec;
-
-	} while (vdso_read_retry(vdata, seq));
-	return 0;
-}
-
-static notrace int do_monotonic_coarse(struct __kernel_old_timespec *ts,
-				       struct vdso_data *vdata)
-{
-	u32 seq;
-	u64 ns;
-
-	do {
-		seq = vdso_read_begin(vdata);
-
-		ts->tv_sec = vdata->xtime_coarse_sec + vdata->wtm_clock_sec;
-		ns = vdata->xtime_coarse_nsec + vdata->wtm_clock_nsec;
-
-	} while (vdso_read_retry(vdata, seq));
-
-	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
-	ts->tv_nsec = ns;
-
-	return 0;
-}
-
-static notrace inline u64 vgetsns(struct vdso_data *vdso)
-{
-	u32 cycle_now;
-	u32 cycle_delta;
-	u32 *timer_cycle_base;
-
-	timer_cycle_base =
-	    (u32 *) ((char *)__get_timerpage() + vdso->cycle_count_offset);
-	cycle_now = readl_relaxed(timer_cycle_base);
-	if (true == vdso->cycle_count_down)
-		cycle_now = ~(*timer_cycle_base);
-	cycle_delta = cycle_now - (u32) vdso->cs_cycle_last;
-	return ((u64) cycle_delta & vdso->cs_mask) * vdso->cs_mult;
-}
-
-static notrace int do_realtime(struct __kernel_old_timespec *ts, struct vdso_data *vdata)
-{
-	unsigned count;
-	u64 ns;
-	do {
-		count = vdso_read_begin(vdata);
-		ts->tv_sec = vdata->xtime_clock_sec;
-		ns = vdata->xtime_clock_nsec;
-		ns += vgetsns(vdata);
-		ns >>= vdata->cs_shift;
-	} while (vdso_read_retry(vdata, count));
-
-	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
-	ts->tv_nsec = ns;
-
-	return 0;
-}
-
-static notrace int do_monotonic(struct __kernel_old_timespec *ts, struct vdso_data *vdata)
-{
-	u64 ns;
-	u32 seq;
-
-	do {
-		seq = vdso_read_begin(vdata);
-
-		ts->tv_sec = vdata->xtime_clock_sec;
-		ns = vdata->xtime_clock_nsec;
-		ns += vgetsns(vdata);
-		ns >>= vdata->cs_shift;
-
-		ts->tv_sec += vdata->wtm_clock_sec;
-		ns += vdata->wtm_clock_nsec;
-
-	} while (vdso_read_retry(vdata, seq));
-
-	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
-	ts->tv_nsec = ns;
-
-	return 0;
-}
-
-notrace int __vdso_clock_gettime(clockid_t clkid, struct __kernel_old_timespec *ts)
-{
-	struct vdso_data *vdata;
-	int ret = -1;
-
-	vdata = __get_datapage();
-	if (vdata->cycle_count_offset == EMPTY_REG_OFFSET)
-		return clock_gettime_fallback(clkid, ts);
-
-	switch (clkid) {
-	case CLOCK_REALTIME_COARSE:
-		ret = do_realtime_coarse(ts, vdata);
-		break;
-	case CLOCK_MONOTONIC_COARSE:
-		ret = do_monotonic_coarse(ts, vdata);
-		break;
-	case CLOCK_REALTIME:
-		ret = do_realtime(ts, vdata);
-		break;
-	case CLOCK_MONOTONIC:
-		ret = do_monotonic(ts, vdata);
-		break;
-	default:
-		break;
-	}
-
-	if (ret)
-		ret = clock_gettime_fallback(clkid, ts);
-
-	return ret;
-}
-
-static notrace int clock_getres_fallback(clockid_t _clk_id,
-					  struct __kernel_old_timespec *_res)
-{
-	register clockid_t clk_id asm("$r0") = _clk_id;
-	register struct __kernel_old_timespec *res asm("$r1") = _res;
-	register int ret asm("$r0");
-
-	asm volatile ("movi	$r15, %3\n"
-		      "syscall	0x0\n"
-		      :"=r" (ret)
-		      :"r"(clk_id), "r"(res), "i"(__NR_clock_getres)
-		      :"$r15", "memory");
-
-	return ret;
-}
-
-notrace int __vdso_clock_getres(clockid_t clk_id, struct __kernel_old_timespec *res)
-{
-	struct vdso_data *vdata = __get_datapage();
-
-	if (res == NULL)
-		return 0;
-	switch (clk_id) {
-	case CLOCK_REALTIME:
-	case CLOCK_MONOTONIC:
-	case CLOCK_MONOTONIC_RAW:
-		res->tv_sec = 0;
-		res->tv_nsec = vdata->hrtimer_res;
-		break;
-	case CLOCK_REALTIME_COARSE:
-	case CLOCK_MONOTONIC_COARSE:
-		res->tv_sec = 0;
-		res->tv_nsec = CLOCK_COARSE_RES;
-		break;
-	default:
-		return clock_getres_fallback(clk_id, res);
-	}
-	return 0;
-}
-
-static notrace inline int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
-						struct timezone *_tz)
-{
-	register struct __kernel_old_timeval *tv asm("$r0") = _tv;
-	register struct timezone *tz asm("$r1") = _tz;
-	register int ret asm("$r0");
-
-	asm volatile ("movi	$r15, %3\n"
-		      "syscall	0x0\n"
-		      :"=r" (ret)
-		      :"r"(tv), "r"(tz), "i"(__NR_gettimeofday)
-		      :"$r15", "memory");
-
-	return ret;
-}
-
-notrace int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
-{
-	struct __kernel_old_timespec ts;
-	struct vdso_data *vdata;
-	int ret;
-
-	vdata = __get_datapage();
-
-	if (vdata->cycle_count_offset == EMPTY_REG_OFFSET)
-		return gettimeofday_fallback(tv, tz);
-
-	ret = do_realtime(&ts, vdata);
-
-	if (tv) {
-		tv->tv_sec = ts.tv_sec;
-		tv->tv_usec = ts.tv_nsec / 1000;
-	}
-	if (tz) {
-		tz->tz_minuteswest = vdata->tz_minuteswest;
-		tz->tz_dsttime = vdata->tz_dsttime;
-	}
-
-	return ret;
-}
diff --git a/arch/nds32/kernel/vdso/note.S b/arch/nds32/kernel/vdso/note.S
deleted file mode 100644
index 0aeaa19b05f0..000000000000
--- a/arch/nds32/kernel/vdso/note.S
+++ /dev/null
@@ -1,11 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2012 ARM Limited
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/nds32/kernel/vdso/sigreturn.S b/arch/nds32/kernel/vdso/sigreturn.S
deleted file mode 100644
index 67e4d1d1612a..000000000000
--- a/arch/nds32/kernel/vdso/sigreturn.S
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2012 ARM Limited
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-
-	.text
-
-ENTRY(__kernel_rt_sigreturn)
-	.cfi_startproc
-	movi $r15, __NR_rt_sigreturn
-	/*
-	 * The SWID of syscall should be __NR_rt_sigreturn to synchronize
-	 * the unwinding scheme in gcc
-	 */
-	syscall	__NR_rt_sigreturn
-	.cfi_endproc
-ENDPROC(__kernel_rt_sigreturn)
diff --git a/arch/nds32/kernel/vdso/vdso.S b/arch/nds32/kernel/vdso/vdso.S
deleted file mode 100644
index 16737c11e55b..000000000000
--- a/arch/nds32/kernel/vdso/vdso.S
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2012 ARM Limited
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/const.h>
-#include <asm/page.h>
-
-	.globl vdso_start, vdso_end
-	.section .rodata
-	.balign PAGE_SIZE
-vdso_start:
-	.incbin "arch/nds32/kernel/vdso/vdso.so"
-	.balign PAGE_SIZE
-vdso_end:
-
-	.previous
diff --git a/arch/nds32/kernel/vdso/vdso.lds.S b/arch/nds32/kernel/vdso/vdso.lds.S
deleted file mode 100644
index 1f2b16004594..000000000000
--- a/arch/nds32/kernel/vdso/vdso.lds.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * SPDX-License-Identifier: GPL-2.0
- * Copyright (C) 2005-2017 Andes Technology Corporation
- */
-
-
-#include <linux/const.h>
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_ARCH(nds32)
-
-SECTIONS
-{
-	. = SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-
-
-	.text		: { *(.text*) }			:text
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.rodata		: { *(.rodata*) }		:text
-
-
-	/DISCARD/	: {
-		*(.note.GNU-stack)
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
-	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
-	note		PT_NOTE		FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	LINUX_4 {
-	global:
-		__kernel_rt_sigreturn;
-		__vdso_gettimeofday;
-		__vdso_clock_getres;
-		__vdso_clock_gettime;
-	local: *;
-	};
-}
-
-/*
- * Make the rt_sigreturn code visible to the kernel.
- */
-VDSO_rt_sigtramp	= __kernel_rt_sigreturn;
diff --git a/arch/nds32/kernel/vmlinux.lds.S b/arch/nds32/kernel/vmlinux.lds.S
deleted file mode 100644
index 6a91b965fb1e..000000000000
--- a/arch/nds32/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <asm/page.h>
-#include <asm/thread_info.h>
-#include <asm/cache.h>
-#include <asm/memory.h>
-
-#define LOAD_OFFSET	(PAGE_OFFSET - PHYS_OFFSET)
-#include <asm-generic/vmlinux.lds.h>
-
-OUTPUT_ARCH(nds32)
-ENTRY(_stext_lma)
-jiffies = jiffies_64;
-
-#if defined(CONFIG_GCOV_KERNEL)
-#define NDS32_EXIT_KEEP(x)	x
-#else
-#define NDS32_EXIT_KEEP(x)
-#endif
-
-SECTIONS
-{
-	_stext_lma = TEXTADDR - LOAD_OFFSET;
-	. = TEXTADDR;
-	__init_begin = .;
-	HEAD_TEXT_SECTION
-	.exit.text : {
-		NDS32_EXIT_KEEP(EXIT_TEXT)
-	}
-	INIT_TEXT_SECTION(PAGE_SIZE)
-	INIT_DATA_SECTION(16)
-	.exit.data : {
-		NDS32_EXIT_KEEP(EXIT_DATA)
-	}
-	PERCPU_SECTION(L1_CACHE_BYTES)
-	__init_end = .;
-
-	. = ALIGN(PAGE_SIZE);
-	_stext = .;
-	/* Real text segment */
-	.text : AT(ADDR(.text) - LOAD_OFFSET) {
-		_text = .;		/* Text and read-only data	*/
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		IRQENTRY_TEXT
-		SOFTIRQENTRY_TEXT
-		*(.fixup)
-	}
-
-	_etext = .;			/* End of text and rodata section */
-
-	_sdata = .;
-	RO_DATA(PAGE_SIZE)
-	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-	_edata  =  .;
-
-	EXCEPTION_TABLE(16)
-	BSS_SECTION(4, 4, 4)
-	_end = .;
-
-	STABS_DEBUG
-	DWARF_DEBUG
-	ELF_DETAILS
-
-	DISCARDS
-}
diff --git a/arch/nds32/lib/Makefile b/arch/nds32/lib/Makefile
deleted file mode 100644
index dddbc15d6b37..000000000000
--- a/arch/nds32/lib/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-lib-y		:= copy_page.o memcpy.o memmove.o   \
-		   memset.o memzero.o \
-		   copy_from_user.o copy_to_user.o clear_user.o
diff --git a/arch/nds32/lib/clear_user.S b/arch/nds32/lib/clear_user.S
deleted file mode 100644
index 805dfcd25bf8..000000000000
--- a/arch/nds32/lib/clear_user.S
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-/* Prototype: int __arch_clear_user(void *addr, size_t sz)
- * Purpose  : clear some user memory
- * Params   : addr - user memory address to clear
- *          : sz   - number of bytes to clear
- * Returns  : number of bytes NOT cleared
- */
-	.text
-	.align	5
-ENTRY(__arch_clear_user)
-	add	$r5, $r0, $r1
-	beqz	$r1, clear_exit
-	xor	$p1, $p1, $p1		! Use $p1=0 to clear mem
-	srli	$p0, $r1, #2		! $p0 = number of word to clear
-	andi	$r1, $r1, #3		! Bytes less than a word to copy
-	beqz	$p0, byte_clear		! Only less than a word to clear
-word_clear:
-USER(	smw.bim,$p1, [$r0], $p1)	! Clear the word
-	addi	$p0, $p0, #-1		! Decrease word count
-	bnez	$p0, word_clear		! Continue looping to clear all words
-	beqz	$r1, clear_exit		! No left bytes to copy
-byte_clear:
-USER(	sbi.bi,	$p1, [$r0], #1)		! Clear the byte
-	addi	$r1, $r1, #-1		! Decrease byte count
-	bnez	$r1, byte_clear		! Continue looping to clear all left bytes
-clear_exit:
-	move	$r0, $r1		! Set return value
-	ret
-
-	.section .fixup,"ax"
-	.align	0
-9001:
-	sub	$r0, $r5, $r0		! Bytes left to copy
-	ret
-	.previous
-ENDPROC(__arch_clear_user)
diff --git a/arch/nds32/lib/copy_from_user.S b/arch/nds32/lib/copy_from_user.S
deleted file mode 100644
index ad1857b20067..000000000000
--- a/arch/nds32/lib/copy_from_user.S
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-.macro 	lbi1 dst, addr, adj
-USER( lbi.bi, \dst, [\addr], \adj)
-.endm
-
-.macro 	sbi1 src, addr, adj
-sbi.bi	\src, [\addr], \adj
-.endm
-
-.macro	lmw1 start_reg, addr, end_reg
-USER( lmw.bim, \start_reg, [\addr], \end_reg)
-.endm
-
-.macro	smw1 start_reg, addr, end_reg
-smw.bim \start_reg, [\addr], \end_reg
-.endm
-
-
-/* Prototype: int __arch_copy_from_user(void *to, const char *from, size_t n)
- * Purpose  : copy a block from user memory to kernel memory
- * Params   : to   - kernel memory
- *          : from - user memory
- *          : n    - number of bytes to copy
- * Returns  : Number of bytes NOT copied.
- */
-
-.text
-ENTRY(__arch_copy_from_user)
-	add	$r5, $r0, $r2
-#include "copy_template.S"
-	move	$r0, $r2
-	ret
-.section .fixup,"ax"
-.align  2
-9001:
-	sub	$r0, $r5, $r0
-	ret
-.previous
-ENDPROC(__arch_copy_from_user)
diff --git a/arch/nds32/lib/copy_page.S b/arch/nds32/lib/copy_page.S
deleted file mode 100644
index f8701ed161a8..000000000000
--- a/arch/nds32/lib/copy_page.S
+++ /dev/null
@@ -1,40 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-#include <asm/page.h>
-
-	.text
-ENTRY(copy_page)
-	pushm	$r2, $r10
-	movi	$r2, PAGE_SIZE >> 5
-.Lcopy_loop:
-	lmw.bim	$r3, [$r1], $r10
-	smw.bim	$r3, [$r0], $r10
-	subi45	$r2, #1
-	bnez38	$r2, .Lcopy_loop
-	popm	$r2, $r10
-	ret
-ENDPROC(copy_page)
-EXPORT_SYMBOL(copy_page)
-
-ENTRY(clear_page)
-	pushm	$r1, $r9
-	movi	$r1, PAGE_SIZE >> 5
-	movi55	$r2, #0
-	movi55	$r3, #0
-	movi55	$r4, #0
-	movi55	$r5, #0
-	movi55	$r6, #0
-	movi55	$r7, #0
-	movi55	$r8, #0
-	movi55	$r9, #0
-.Lclear_loop:
-	smw.bim	$r2, [$r0], $r9
-	subi45	$r1, #1
-	bnez38	$r1, .Lclear_loop
-	popm	$r1, $r9
-        ret
-ENDPROC(clear_page)
-EXPORT_SYMBOL(clear_page)
diff --git a/arch/nds32/lib/copy_template.S b/arch/nds32/lib/copy_template.S
deleted file mode 100644
index 3a9a2de468c2..000000000000
--- a/arch/nds32/lib/copy_template.S
+++ /dev/null
@@ -1,69 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-
-	beq	$r1, $r0, quit_memcpy
-	beqz	$r2, quit_memcpy
-	srli    $r3, $r2, #5	! check if len < cache-line size 32
-	beqz	$r3, word_copy_entry
-	andi	$r4, $r0, #0x3	! check byte-align
-	beqz	$r4, unalign_word_copy_entry
-
-	addi	$r4, $r4,#-4
-	abs	$r4, $r4	! check how many un-align byte to copy
-	sub	$r2, $r2, $r4	! update $R2
-
-unalign_byte_copy:
-	lbi1	$r3, $r1, #1
-	addi	$r4, $r4, #-1
-	sbi1	$r3, $r0, #1
-	bnez	$r4, unalign_byte_copy
-	beqz	$r2, quit_memcpy
-
-unalign_word_copy_entry:
-	andi	$r3, $r0, 0x1f	! check cache-line unaligncount
-	beqz	$r3, cache_copy
-
-	addi	$r3, $r3, #-32
-	abs	$r3, $r3
-	sub	$r2, $r2, $r3	! update $R2
-
-unalign_word_copy:
-	lmw1	$r4, $r1, $r4
-	addi	$r3, $r3, #-4
-	smw1	$r4, $r0, $r4
-	bnez	$r3, unalign_word_copy
-	beqz	$r2, quit_memcpy
-
-	addi	$r3, $r2, #-32	! to check $r2< cache_line , than go to word_copy
-	bltz	$r3, word_copy_entry
-cache_copy:
-	srli	$r3, $r2, #5
-	beqz	$r3, word_copy_entry
-3:
-	lmw1	$r17, $r1, $r24
-	addi	$r3, $r3, #-1
-	smw1	$r17, $r0, $r24
-	bnez	$r3, 3b
-
-word_copy_entry:
-	andi	$r2, $r2, #31
-
-	beqz	$r2, quit_memcpy
-5:
-	srli	$r3, $r2, #2
-	beqz	$r3, byte_copy
-word_copy:
-	lmw1	$r4, $r1, $r4
-	addi	$r3, $r3, #-1
-	smw1	$r4, $r0, $r4
-	bnez	$r3, word_copy
-	andi	$r2, $r2, #3
-	beqz	$r2, quit_memcpy
-byte_copy:
-	lbi1	$r3, $r1, #1
-	addi	$r2, $r2, #-1
-
-	sbi1	$r3, $r0, #1
-	bnez	$r2, byte_copy
-quit_memcpy:
diff --git a/arch/nds32/lib/copy_to_user.S b/arch/nds32/lib/copy_to_user.S
deleted file mode 100644
index 3230044dcfb8..000000000000
--- a/arch/nds32/lib/copy_to_user.S
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-.macro 	lbi1 dst, addr, adj
-lbi.bi	\dst, [\addr], \adj
-.endm
-
-.macro 	sbi1 src, addr, adj
-USER( sbi.bi, \src, [\addr], \adj)
-.endm
-
-.macro	lmw1 start_reg, addr, end_reg
-lmw.bim \start_reg, [\addr], \end_reg
-.endm
-
-.macro	smw1 start_reg, addr, end_reg
-USER( smw.bim, \start_reg, [\addr], \end_reg)
-.endm
-
-
-/* Prototype: int __arch_copy_to_user(void *to, const char *from, size_t n)
- * Purpose  : copy a block to user memory from kernel memory
- * Params   : to   - user memory
- *          : from - kernel memory
- *          : n    - number of bytes to copy
- * Returns  : Number of bytes NOT copied.
- */
-
-.text
-ENTRY(__arch_copy_to_user)
-	add	$r5, $r0, $r2
-#include "copy_template.S"
-	move	$r0, $r2
-	ret
-.section .fixup,"ax"
-.align  2
-9001:
-	sub	$r0, $r5, $r0
-	ret
-.previous
-ENDPROC(__arch_copy_to_user)
diff --git a/arch/nds32/lib/memcpy.S b/arch/nds32/lib/memcpy.S
deleted file mode 100644
index a2345ea721e4..000000000000
--- a/arch/nds32/lib/memcpy.S
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-
-
-.macro 	lbi1 dst, addr, adj
-lbi.bi	\dst, [\addr], \adj
-.endm
-
-.macro 	sbi1 src, addr, adj
-sbi.bi	\src, [\addr], \adj
-.endm
-
-.macro	lmw1 start_reg, addr, end_reg
-lmw.bim \start_reg, [\addr], \end_reg
-.endm
-
-.macro	smw1 start_reg, addr, end_reg
-smw.bim \start_reg, [\addr], \end_reg
-.endm
-
-.text
-ENTRY(memcpy)
-	move	$r5, $r0
-#include "copy_template.S"
-	move	$r0, $r5
-	ret
-
-ENDPROC(memcpy)
diff --git a/arch/nds32/lib/memmove.S b/arch/nds32/lib/memmove.S
deleted file mode 100644
index c823aada2271..000000000000
--- a/arch/nds32/lib/memmove.S
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-
-/*
-  void *memmove(void *dst, const void *src, int n);
-
-  dst: $r0
-  src: $r1
-  n  : $r2
-  ret: $r0 - pointer to the memory area dst.
-*/
-	.text
-
-ENTRY(memmove)
-	move	$r5, $r0		! Set return value = det
-	beq	$r0, $r1, exit_memcpy	! Exit when det = src
-	beqz	$r2, exit_memcpy	! Exit when n = 0
-	pushm	$t0, $t1		! Save reg
-	srli	$p1, $r2, #2		! $p1 is how many words to copy
-
-	! Avoid data lost when memory overlap
-	! Copy data reversely when src < dst
-	slt	$p0, $r0, $r1		! check if $r0 < $r1
-	beqz	$p0, do_reverse		! branch if dst > src
-
-	! No reverse, dst < src
-	andi	$r2, $r2, #3		! How many bytes are less than a word
-	li	$t0, #1			! Determining copy direction in byte_cpy
-	beqz	$p1, byte_cpy		! When n is less than a word
-
-word_cpy:
-	lmw.bim	$p0, [$r1], $p0		! Read a word from src
-	addi	$p1, $p1, #-1		! How many words left to copy
-	smw.bim	$p0, [$r0], $p0		! Copy the word to det
-	bnez	$p1, word_cpy		! If remained words > 0
-	beqz	$r2, end_memcpy		! No left bytes to copy
-	b	byte_cpy
-
-do_reverse:
-	add	$r0, $r0, $r2		! Start with the end of $r0
-	add	$r1, $r1, $r2		! Start with the end of $r1
-	andi	$r2, $r2, #3		! How many bytes are less than a word
-	li	$t0, #-1		! Determining copy direction in byte_cpy
-	beqz	$p1, reverse_byte_cpy	! When n is less than a word
-
-reverse_word_cpy:
-	lmw.adm	$p0, [$r1], $p0		! Read a word from src
-	addi	$p1, $p1, #-1		! How many words left to copy
-	smw.adm	$p0, [$r0], $p0		! Copy the word to det
-	bnez	$p1, reverse_word_cpy	! If remained words > 0
-	beqz	$r2, end_memcpy		! No left bytes to copy
-
-reverse_byte_cpy:
-	addi	$r0, $r0, #-1
-	addi	$r1, $r1, #-1
-byte_cpy:				! Less than 4 bytes to copy now
-	lb.bi	$p0, [$r1], $t0		! Read a byte from src
-	addi	$r2, $r2, #-1		! How many bytes left to copy
-	sb.bi	$p0, [$r0], $t0		! copy the byte to det
-	bnez	$r2, byte_cpy		! If remained bytes > 0
-
-end_memcpy:
-	popm	$t0, $t1
-exit_memcpy:
-	move	$r0, $r5
-	ret
-
-ENDPROC(memmove)
diff --git a/arch/nds32/lib/memset.S b/arch/nds32/lib/memset.S
deleted file mode 100644
index 193cb6ce21a9..000000000000
--- a/arch/nds32/lib/memset.S
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-
-	.text
-ENTRY(memset)
-	move	$r5, $r0		! Return value
-	beqz	$r2, end_memset		! Exit when len = 0
-	srli	$p1, $r2, 2		! $p1 is how many words to copy
-	andi	$r2, $r2, 3		! How many bytes are less than a word
-	beqz	$p1, byte_set		! When n is less than a word
-
-	! set $r1 from ??????ab to abababab
-	andi	$r1, $r1, #0x00ff	! $r1 = 000000ab
-	slli	$p0, $r1, #8		! $p0 = 0000ab00
-	or	$r1, $r1, $p0		! $r1 = 0000abab
-	slli	$p0, $r1, #16		! $p0 = abab0000
-	or	$r1, $r1, $p0		! $r1 = abababab
-word_set:
-	addi	$p1, $p1, #-1		! How many words left to copy
-	smw.bim	$r1, [$r0], $r1		! Copy the word to det
-	bnez	$p1, word_set		! Still words to set, continue looping
-	beqz	$r2, end_memset		! No left byte to set
-byte_set:				! Less than 4 bytes left to set
-	addi	$r2, $r2, #-1		! Decrease len by 1
-	sbi.bi	$r1, [$r0], #1		! Set data of the next byte to $r1
-	bnez	$r2, byte_set		! Still bytes left to set
-end_memset:
-	move	$r0, $r5
-	ret
-
-ENDPROC(memset)
diff --git a/arch/nds32/lib/memzero.S b/arch/nds32/lib/memzero.S
deleted file mode 100644
index f055972c9343..000000000000
--- a/arch/nds32/lib/memzero.S
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-
-	.text
-ENTRY(memzero)
-	beqz	$r1, 1f
-	push	$lp
-	move    $r2, $r1
-	move    $r1, #0
-	push	$r0
-	bal     memset
-	pop	$r0
-	pop	$lp
-1:
-        ret
-ENDPROC(memzero)
diff --git a/arch/nds32/math-emu/Makefile b/arch/nds32/math-emu/Makefile
deleted file mode 100644
index 3bed7e5d5d05..000000000000
--- a/arch/nds32/math-emu/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for the Linux/nds32 kernel FPU emulation.
-#
-
-obj-y	:= fpuemu.o \
-	   fdivd.o fmuld.o fsubd.o faddd.o fs2d.o fsqrtd.o fcmpd.o fnegs.o \
-	   fd2si.o fd2ui.o fd2siz.o fd2uiz.o fsi2d.o fui2d.o \
-	   fdivs.o fmuls.o fsubs.o fadds.o fd2s.o fsqrts.o fcmps.o fnegd.o \
-	   fs2si.o fs2ui.o fs2siz.o fs2uiz.o fsi2s.o fui2s.o
diff --git a/arch/nds32/math-emu/faddd.c b/arch/nds32/math-emu/faddd.c
deleted file mode 100644
index f7fd4e3c3904..000000000000
--- a/arch/nds32/math-emu/faddd.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-void faddd(void *ft, void *fa, void *fb)
-{
-	FP_DECL_D(A);
-	FP_DECL_D(B);
-	FP_DECL_D(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_DP(A, fa);
-	FP_UNPACK_DP(B, fb);
-
-	FP_ADD_D(R, A, B);
-
-	FP_PACK_DP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-
-}
diff --git a/arch/nds32/math-emu/fadds.c b/arch/nds32/math-emu/fadds.c
deleted file mode 100644
index f5af6ca8cca5..000000000000
--- a/arch/nds32/math-emu/fadds.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-void fadds(void *ft, void *fa, void *fb)
-{
-	FP_DECL_S(A);
-	FP_DECL_S(B);
-	FP_DECL_S(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_SP(A, fa);
-	FP_UNPACK_SP(B, fb);
-
-	FP_ADD_S(R, A, B);
-
-	FP_PACK_SP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-
-}
diff --git a/arch/nds32/math-emu/fcmpd.c b/arch/nds32/math-emu/fcmpd.c
deleted file mode 100644
index 0ea225abe880..000000000000
--- a/arch/nds32/math-emu/fcmpd.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-int fcmpd(void *ft, void *fa, void *fb, int cmpop)
-{
-	FP_DECL_D(A);
-	FP_DECL_D(B);
-	FP_DECL_EX;
-	long cmp;
-
-	FP_UNPACK_DP(A, fa);
-	FP_UNPACK_DP(B, fb);
-
-	FP_CMP_D(cmp, A, B, SF_CUN);
-	cmp += 2;
-	if (cmp == SF_CGT)
-		*(long *)ft = 0;
-	else
-		*(long *)ft = (cmp & cmpop) ? 1 : 0;
-
-	return 0;
-}
diff --git a/arch/nds32/math-emu/fcmps.c b/arch/nds32/math-emu/fcmps.c
deleted file mode 100644
index 681480758213..000000000000
--- a/arch/nds32/math-emu/fcmps.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-int fcmps(void *ft, void *fa, void *fb, int cmpop)
-{
-	FP_DECL_S(A);
-	FP_DECL_S(B);
-	FP_DECL_EX;
-	long cmp;
-
-	FP_UNPACK_SP(A, fa);
-	FP_UNPACK_SP(B, fb);
-
-	FP_CMP_S(cmp, A, B, SF_CUN);
-	cmp += 2;
-	if (cmp == SF_CGT)
-		*(int *)ft = 0x0;
-	else
-		*(int *)ft = (cmp & cmpop) ? 0x1 : 0x0;
-
-	return 0;
-}
diff --git a/arch/nds32/math-emu/fd2s.c b/arch/nds32/math-emu/fd2s.c
deleted file mode 100644
index 1328371e8170..000000000000
--- a/arch/nds32/math-emu/fd2s.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/double.h>
-#include <math-emu/single.h>
-#include <math-emu/soft-fp.h>
-void fd2s(void *ft, void *fa)
-{
-	FP_DECL_D(A);
-	FP_DECL_S(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_DP(A, fa);
-
-	FP_CONV(S, D, 1, 2, R, A);
-
-	FP_PACK_SP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-}
diff --git a/arch/nds32/math-emu/fd2si.c b/arch/nds32/math-emu/fd2si.c
deleted file mode 100644
index fae3e16a0a10..000000000000
--- a/arch/nds32/math-emu/fd2si.c
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2019 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-
-void fd2si(void *ft, void *fa)
-{
-	int r;
-
-	FP_DECL_D(A);
-	FP_DECL_EX;
-
-	FP_UNPACK_DP(A, fa);
-
-	if (A_c == FP_CLS_INF) {
-		*(int *)ft = (A_s == 0) ? 0x7fffffff : 0x80000000;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else if (A_c == FP_CLS_NAN) {
-		*(int *)ft = 0xffffffff;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else {
-		FP_TO_INT_ROUND_D(r, A, 32, 1);
-		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-		*(int *)ft = r;
-	}
-
-}
diff --git a/arch/nds32/math-emu/fd2siz.c b/arch/nds32/math-emu/fd2siz.c
deleted file mode 100644
index 92fe6774f112..000000000000
--- a/arch/nds32/math-emu/fd2siz.c
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2019 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-
-void fd2si_z(void *ft, void *fa)
-{
-	int r;
-
-	FP_DECL_D(A);
-	FP_DECL_EX;
-
-	FP_UNPACK_DP(A, fa);
-
-	if (A_c == FP_CLS_INF) {
-		*(int *)ft = (A_s == 0) ? 0x7fffffff : 0x80000000;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else if (A_c == FP_CLS_NAN) {
-		*(int *)ft = 0xffffffff;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else {
-		FP_TO_INT_D(r, A, 32, 1);
-		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-		*(int *)ft = r;
-	}
-
-}
diff --git a/arch/nds32/math-emu/fd2ui.c b/arch/nds32/math-emu/fd2ui.c
deleted file mode 100644
index a0423b699aa4..000000000000
--- a/arch/nds32/math-emu/fd2ui.c
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2019 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-
-void fd2ui(void *ft, void *fa)
-{
-	unsigned int r;
-
-	FP_DECL_D(A);
-	FP_DECL_EX;
-
-	FP_UNPACK_DP(A, fa);
-
-	if (A_c == FP_CLS_INF) {
-		*(unsigned int *)ft = (A_s == 0) ? 0xffffffff : 0x00000000;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else if (A_c == FP_CLS_NAN) {
-		*(unsigned int *)ft = 0xffffffff;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else {
-		FP_TO_INT_ROUND_D(r, A, 32, 0);
-		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-		*(unsigned int *)ft = r;
-	}
-
-}
diff --git a/arch/nds32/math-emu/fd2uiz.c b/arch/nds32/math-emu/fd2uiz.c
deleted file mode 100644
index 8ae17cfce90d..000000000000
--- a/arch/nds32/math-emu/fd2uiz.c
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2019 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-
-void fd2ui_z(void *ft, void *fa)
-{
-	unsigned int r;
-
-	FP_DECL_D(A);
-	FP_DECL_EX;
-
-	FP_UNPACK_DP(A, fa);
-
-	if (A_c == FP_CLS_INF) {
-		*(unsigned int *)ft = (A_s == 0) ? 0xffffffff : 0x00000000;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else if (A_c == FP_CLS_NAN) {
-		*(unsigned int *)ft = 0xffffffff;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else {
-		FP_TO_INT_D(r, A, 32, 0);
-		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-		*(unsigned int *)ft = r;
-	}
-
-}
diff --git a/arch/nds32/math-emu/fdivd.c b/arch/nds32/math-emu/fdivd.c
deleted file mode 100644
index 458e7e98b08e..000000000000
--- a/arch/nds32/math-emu/fdivd.c
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-
-#include <linux/uaccess.h>
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-
-void fdivd(void *ft, void *fa, void *fb)
-{
-	FP_DECL_D(A);
-	FP_DECL_D(B);
-	FP_DECL_D(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_DP(A, fa);
-	FP_UNPACK_DP(B, fb);
-
-	if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO)
-		FP_SET_EXCEPTION(FP_EX_DIVZERO);
-
-	FP_DIV_D(R, A, B);
-
-	FP_PACK_DP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-}
diff --git a/arch/nds32/math-emu/fdivs.c b/arch/nds32/math-emu/fdivs.c
deleted file mode 100644
index c7d202159ce2..000000000000
--- a/arch/nds32/math-emu/fdivs.c
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-void fdivs(void *ft, void *fa, void *fb)
-{
-	FP_DECL_S(A);
-	FP_DECL_S(B);
-	FP_DECL_S(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_SP(A, fa);
-	FP_UNPACK_SP(B, fb);
-
-	if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO)
-		FP_SET_EXCEPTION(FP_EX_DIVZERO);
-
-	FP_DIV_S(R, A, B);
-
-	FP_PACK_SP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-}
diff --git a/arch/nds32/math-emu/fmuld.c b/arch/nds32/math-emu/fmuld.c
deleted file mode 100644
index f3c77a45ddc2..000000000000
--- a/arch/nds32/math-emu/fmuld.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-void fmuld(void *ft, void *fa, void *fb)
-{
-	FP_DECL_D(A);
-	FP_DECL_D(B);
-	FP_DECL_D(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_DP(A, fa);
-	FP_UNPACK_DP(B, fb);
-
-	FP_MUL_D(R, A, B);
-
-	FP_PACK_DP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-}
diff --git a/arch/nds32/math-emu/fmuls.c b/arch/nds32/math-emu/fmuls.c
deleted file mode 100644
index cf150df938f9..000000000000
--- a/arch/nds32/math-emu/fmuls.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-void fmuls(void *ft, void *fa, void *fb)
-{
-	FP_DECL_S(A);
-	FP_DECL_S(B);
-	FP_DECL_S(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_SP(A, fa);
-	FP_UNPACK_SP(B, fb);
-
-	FP_MUL_S(R, A, B);
-
-	FP_PACK_SP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-}
diff --git a/arch/nds32/math-emu/fnegd.c b/arch/nds32/math-emu/fnegd.c
deleted file mode 100644
index de7ea6a0873e..000000000000
--- a/arch/nds32/math-emu/fnegd.c
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-void fnegd(void *ft, void *fa)
-{
-	FP_DECL_D(A);
-	FP_DECL_D(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_DP(A, fa);
-
-	FP_NEG_D(R, A);
-
-	FP_PACK_DP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-}
diff --git a/arch/nds32/math-emu/fnegs.c b/arch/nds32/math-emu/fnegs.c
deleted file mode 100644
index 07270b326a77..000000000000
--- a/arch/nds32/math-emu/fnegs.c
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-void fnegs(void *ft, void *fa)
-{
-	FP_DECL_S(A);
-	FP_DECL_S(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_SP(A, fa);
-
-	FP_NEG_S(R, A);
-
-	FP_PACK_SP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-}
diff --git a/arch/nds32/math-emu/fpuemu.c b/arch/nds32/math-emu/fpuemu.c
deleted file mode 100644
index 46558a15c0dc..000000000000
--- a/arch/nds32/math-emu/fpuemu.c
+++ /dev/null
@@ -1,406 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-
-#include <asm/bitfield.h>
-#include <asm/uaccess.h>
-#include <asm/sfp-machine.h>
-#include <asm/fpuemu.h>
-#include <asm/nds32_fpu_inst.h>
-
-#define DPFROMREG(dp, x) (dp = (void *)((unsigned long *)fpu_reg + 2*x))
-#ifdef __NDS32_EL__
-#define SPFROMREG(sp, x)\
-	((sp) = (void *)((unsigned long *)fpu_reg + (x^1)))
-#else
-#define SPFROMREG(sp, x) ((sp) = (void *)((unsigned long *)fpu_reg + x))
-#endif
-
-#define DEF3OP(name, p, f1, f2) \
-void fpemu_##name##p(void *ft, void *fa, void *fb) \
-{ \
-	f1(fa, fa, fb); \
-	f2(ft, ft, fa); \
-}
-
-#define DEF3OPNEG(name, p, f1, f2, f3) \
-void fpemu_##name##p(void *ft, void *fa, void *fb) \
-{ \
-	f1(fa, fa, fb); \
-	f2(ft, ft, fa); \
-	f3(ft, ft); \
-}
-DEF3OP(fmadd, s, fmuls, fadds);
-DEF3OP(fmsub, s, fmuls, fsubs);
-DEF3OP(fmadd, d, fmuld, faddd);
-DEF3OP(fmsub, d, fmuld, fsubd);
-DEF3OPNEG(fnmadd, s, fmuls, fadds, fnegs);
-DEF3OPNEG(fnmsub, s, fmuls, fsubs, fnegs);
-DEF3OPNEG(fnmadd, d, fmuld, faddd, fnegd);
-DEF3OPNEG(fnmsub, d, fmuld, fsubd, fnegd);
-
-static const unsigned char cmptab[8] = {
-	SF_CEQ,
-	SF_CEQ,
-	SF_CLT,
-	SF_CLT,
-	SF_CLT | SF_CEQ,
-	SF_CLT | SF_CEQ,
-	SF_CUN,
-	SF_CUN
-};
-
-enum ARGTYPE {
-	S1S = 1,
-	S2S,
-	S1D,
-	CS,
-	D1D,
-	D2D,
-	D1S,
-	CD
-};
-union func_t {
-	void (*t)(void *ft, void *fa, void *fb);
-	void (*b)(void *ft, void *fa);
-};
-/*
- * Emulate a single FPU arithmetic instruction.
- */
-static int fpu_emu(struct fpu_struct *fpu_reg, unsigned long insn)
-{
-	int rfmt;		/* resulting format */
-	union func_t func;
-	int ftype = 0;
-
-	switch (rfmt = NDS32Insn_OPCODE_COP0(insn)) {
-	case fs1_op:{
-			switch (NDS32Insn_OPCODE_BIT69(insn)) {
-			case fadds_op:
-				func.t = fadds;
-				ftype = S2S;
-				break;
-			case fsubs_op:
-				func.t = fsubs;
-				ftype = S2S;
-				break;
-			case fmadds_op:
-				func.t = fpemu_fmadds;
-				ftype = S2S;
-				break;
-			case fmsubs_op:
-				func.t = fpemu_fmsubs;
-				ftype = S2S;
-				break;
-			case fnmadds_op:
-				func.t = fpemu_fnmadds;
-				ftype = S2S;
-				break;
-			case fnmsubs_op:
-				func.t = fpemu_fnmsubs;
-				ftype = S2S;
-				break;
-			case fmuls_op:
-				func.t = fmuls;
-				ftype = S2S;
-				break;
-			case fdivs_op:
-				func.t = fdivs;
-				ftype = S2S;
-				break;
-			case fs1_f2op_op:
-				switch (NDS32Insn_OPCODE_BIT1014(insn)) {
-				case fs2d_op:
-					func.b = fs2d;
-					ftype = S1D;
-					break;
-				case fs2si_op:
-					func.b = fs2si;
-					ftype = S1S;
-					break;
-				case fs2si_z_op:
-					func.b = fs2si_z;
-					ftype = S1S;
-					break;
-				case fs2ui_op:
-					func.b = fs2ui;
-					ftype = S1S;
-					break;
-				case fs2ui_z_op:
-					func.b = fs2ui_z;
-					ftype = S1S;
-					break;
-				case fsi2s_op:
-					func.b = fsi2s;
-					ftype = S1S;
-					break;
-				case fui2s_op:
-					func.b = fui2s;
-					ftype = S1S;
-					break;
-				case fsqrts_op:
-					func.b = fsqrts;
-					ftype = S1S;
-					break;
-				default:
-					return SIGILL;
-				}
-				break;
-			default:
-				return SIGILL;
-			}
-			break;
-		}
-	case fs2_op:
-		switch (NDS32Insn_OPCODE_BIT69(insn)) {
-		case fcmpeqs_op:
-		case fcmpeqs_e_op:
-		case fcmplts_op:
-		case fcmplts_e_op:
-		case fcmples_op:
-		case fcmples_e_op:
-		case fcmpuns_op:
-		case fcmpuns_e_op:
-			ftype = CS;
-			break;
-		default:
-			return SIGILL;
-		}
-		break;
-	case fd1_op:{
-			switch (NDS32Insn_OPCODE_BIT69(insn)) {
-			case faddd_op:
-				func.t = faddd;
-				ftype = D2D;
-				break;
-			case fsubd_op:
-				func.t = fsubd;
-				ftype = D2D;
-				break;
-			case fmaddd_op:
-				func.t = fpemu_fmaddd;
-				ftype = D2D;
-				break;
-			case fmsubd_op:
-				func.t = fpemu_fmsubd;
-				ftype = D2D;
-				break;
-			case fnmaddd_op:
-				func.t = fpemu_fnmaddd;
-				ftype = D2D;
-				break;
-			case fnmsubd_op:
-				func.t = fpemu_fnmsubd;
-				ftype = D2D;
-				break;
-			case fmuld_op:
-				func.t = fmuld;
-				ftype = D2D;
-				break;
-			case fdivd_op:
-				func.t = fdivd;
-				ftype = D2D;
-				break;
-			case fd1_f2op_op:
-				switch (NDS32Insn_OPCODE_BIT1014(insn)) {
-				case fd2s_op:
-					func.b = fd2s;
-					ftype = D1S;
-					break;
-				case fd2si_op:
-					func.b = fd2si;
-					ftype = D1S;
-					break;
-				case fd2si_z_op:
-					func.b = fd2si_z;
-					ftype = D1S;
-					break;
-				case fd2ui_op:
-					func.b = fd2ui;
-					ftype = D1S;
-					break;
-				case fd2ui_z_op:
-					func.b = fd2ui_z;
-					ftype = D1S;
-					break;
-				case fsi2d_op:
-					func.b = fsi2d;
-					ftype = D1S;
-					break;
-				case fui2d_op:
-					func.b = fui2d;
-					ftype = D1S;
-					break;
-				case fsqrtd_op:
-					func.b = fsqrtd;
-					ftype = D1D;
-					break;
-				default:
-					return SIGILL;
-				}
-				break;
-			default:
-				return SIGILL;
-
-			}
-			break;
-		}
-
-	case fd2_op:
-		switch (NDS32Insn_OPCODE_BIT69(insn)) {
-		case fcmpeqd_op:
-		case fcmpeqd_e_op:
-		case fcmpltd_op:
-		case fcmpltd_e_op:
-		case fcmpled_op:
-		case fcmpled_e_op:
-		case fcmpund_op:
-		case fcmpund_e_op:
-			ftype = CD;
-			break;
-		default:
-			return SIGILL;
-		}
-		break;
-
-	default:
-		return SIGILL;
-	}
-
-	switch (ftype) {
-	case S1S:{
-			void *ft, *fa;
-
-			SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
-			SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
-			func.b(ft, fa);
-			break;
-		}
-	case S2S:{
-			void *ft, *fa, *fb;
-
-			SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
-			SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
-			SPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn));
-			func.t(ft, fa, fb);
-			break;
-		}
-	case S1D:{
-			void *ft, *fa;
-
-			DPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
-			SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
-			func.b(ft, fa);
-			break;
-		}
-	case CS:{
-			unsigned int cmpop = NDS32Insn_OPCODE_BIT69(insn);
-			void *ft, *fa, *fb;
-
-			SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
-			SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
-			SPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn));
-			if (cmpop < 0x8) {
-				cmpop = cmptab[cmpop];
-				fcmps(ft, fa, fb, cmpop);
-			} else
-				return SIGILL;
-			break;
-		}
-	case D1D:{
-			void *ft, *fa;
-
-			DPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
-			DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
-			func.b(ft, fa);
-			break;
-		}
-	case D2D:{
-			void *ft, *fa, *fb;
-
-			DPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
-			DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
-			DPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn));
-			func.t(ft, fa, fb);
-			break;
-		}
-	case D1S:{
-			void *ft, *fa;
-
-			SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
-			DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
-			func.b(ft, fa);
-			break;
-		}
-	case CD:{
-			unsigned int cmpop = NDS32Insn_OPCODE_BIT69(insn);
-			void *ft, *fa, *fb;
-
-			SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
-			DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
-			DPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn));
-			if (cmpop < 0x8) {
-				cmpop = cmptab[cmpop];
-				fcmpd(ft, fa, fb, cmpop);
-			} else
-				return SIGILL;
-			break;
-		}
-	default:
-		return SIGILL;
-	}
-
-	/*
-	 * If an exception is required, generate a tidy SIGFPE exception.
-	 */
-#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
-	if (((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE_NO_UDF_IEXE)
-	    || ((fpu_reg->fpcsr << 5) & (fpu_reg->UDF_IEX_trap))) {
-#else
-	if ((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE) {
-#endif
-		return SIGFPE;
-	}
-	return 0;
-}
-
-int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu)
-{
-	unsigned long insn = 0, addr = regs->ipc;
-	unsigned long emulpc, contpc;
-	unsigned char *pc = (void *)&insn;
-	char c;
-	int i = 0, ret;
-
-	for (i = 0; i < 4; i++) {
-		if (__get_user(c, (unsigned char *)addr++))
-			return SIGBUS;
-		*pc++ = c;
-	}
-
-	insn = be32_to_cpu(insn);
-
-	emulpc = regs->ipc;
-	contpc = regs->ipc + 4;
-
-	if (NDS32Insn_OPCODE(insn) != cop0_op)
-		return SIGILL;
-
-	switch (NDS32Insn_OPCODE_COP0(insn)) {
-	case fs1_op:
-	case fs2_op:
-	case fd1_op:
-	case fd2_op:
-		{
-			/* a real fpu computation instruction */
-			ret = fpu_emu(fpu, insn);
-			if (!ret)
-				regs->ipc = contpc;
-		}
-		break;
-
-	default:
-		return SIGILL;
-	}
-
-	return ret;
-}
diff --git a/arch/nds32/math-emu/fs2d.c b/arch/nds32/math-emu/fs2d.c
deleted file mode 100644
index 0e8db9035631..000000000000
--- a/arch/nds32/math-emu/fs2d.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-
-#include <linux/uaccess.h>
-#include <asm/sfp-machine.h>
-#include <math-emu/double.h>
-#include <math-emu/single.h>
-#include <math-emu/soft-fp.h>
-
-void fs2d(void *ft, void *fa)
-{
-	FP_DECL_S(A);
-	FP_DECL_D(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_SP(A, fa);
-
-	FP_CONV(D, S, 2, 1, R, A);
-
-	FP_PACK_DP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-}
diff --git a/arch/nds32/math-emu/fs2si.c b/arch/nds32/math-emu/fs2si.c
deleted file mode 100644
index b4931d60980e..000000000000
--- a/arch/nds32/math-emu/fs2si.c
+++ /dev/null
@@ -1,29 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2019 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-
-void fs2si(void *ft, void *fa)
-{
-	int r;
-
-	FP_DECL_S(A);
-	FP_DECL_EX;
-
-	FP_UNPACK_SP(A, fa);
-
-	if (A_c == FP_CLS_INF) {
-		*(int *)ft = (A_s == 0) ? 0x7fffffff : 0x80000000;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else if (A_c == FP_CLS_NAN) {
-		*(int *)ft = 0xffffffff;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else {
-		FP_TO_INT_ROUND_S(r, A, 32, 1);
-		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-		*(int *)ft = r;
-	}
-}
diff --git a/arch/nds32/math-emu/fs2siz.c b/arch/nds32/math-emu/fs2siz.c
deleted file mode 100644
index 1c2b99ce3e38..000000000000
--- a/arch/nds32/math-emu/fs2siz.c
+++ /dev/null
@@ -1,29 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2019 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-
-void fs2si_z(void *ft, void *fa)
-{
-	int r;
-
-	FP_DECL_S(A);
-	FP_DECL_EX;
-
-	FP_UNPACK_SP(A, fa);
-
-	if (A_c == FP_CLS_INF) {
-		*(int *)ft = (A_s == 0) ? 0x7fffffff : 0x80000000;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else if (A_c == FP_CLS_NAN) {
-		*(int *)ft = 0xffffffff;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else {
-		FP_TO_INT_S(r, A, 32, 1);
-		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-		*(int *)ft = r;
-	}
-}
diff --git a/arch/nds32/math-emu/fs2ui.c b/arch/nds32/math-emu/fs2ui.c
deleted file mode 100644
index c337f0384d06..000000000000
--- a/arch/nds32/math-emu/fs2ui.c
+++ /dev/null
@@ -1,29 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2019 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-
-void fs2ui(void *ft, void *fa)
-{
-	unsigned int r;
-
-	FP_DECL_S(A);
-	FP_DECL_EX;
-
-	FP_UNPACK_SP(A, fa);
-
-	if (A_c == FP_CLS_INF) {
-		*(unsigned int *)ft = (A_s == 0) ? 0xffffffff : 0x00000000;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else if (A_c == FP_CLS_NAN) {
-		*(unsigned int *)ft = 0xffffffff;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else {
-		FP_TO_INT_ROUND_S(r, A, 32, 0);
-		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-		*(unsigned int *)ft = r;
-	}
-}
diff --git a/arch/nds32/math-emu/fs2uiz.c b/arch/nds32/math-emu/fs2uiz.c
deleted file mode 100644
index 22c5e4768044..000000000000
--- a/arch/nds32/math-emu/fs2uiz.c
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2019 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-
-void fs2ui_z(void *ft, void *fa)
-{
-	unsigned int r;
-
-	FP_DECL_S(A);
-	FP_DECL_EX;
-
-	FP_UNPACK_SP(A, fa);
-
-	if (A_c == FP_CLS_INF) {
-		*(unsigned int *)ft = (A_s == 0) ? 0xffffffff : 0x00000000;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else if (A_c == FP_CLS_NAN) {
-		*(unsigned int *)ft = 0xffffffff;
-		__FPU_FPCSR |= FP_EX_INVALID;
-	} else {
-		FP_TO_INT_S(r, A, 32, 0);
-		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-		*(unsigned int *)ft = r;
-	}
-
-}
diff --git a/arch/nds32/math-emu/fsi2d.c b/arch/nds32/math-emu/fsi2d.c
deleted file mode 100644
index 6b04cec0c5c5..000000000000
--- a/arch/nds32/math-emu/fsi2d.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2019 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-
-void fsi2d(void *ft, void *fa)
-{
-	int a = *(int *)fa;
-
-	FP_DECL_D(R);
-	FP_DECL_EX;
-
-	FP_FROM_INT_D(R, a, 32, int);
-
-	FP_PACK_DP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-
-}
diff --git a/arch/nds32/math-emu/fsi2s.c b/arch/nds32/math-emu/fsi2s.c
deleted file mode 100644
index 689864a5df90..000000000000
--- a/arch/nds32/math-emu/fsi2s.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2019 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-
-void fsi2s(void *ft, void *fa)
-{
-	int a = *(int *)fa;
-
-	FP_DECL_S(R);
-	FP_DECL_EX;
-
-	FP_FROM_INT_S(R, a, 32, int);
-
-	FP_PACK_SP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-
-}
diff --git a/arch/nds32/math-emu/fsqrtd.c b/arch/nds32/math-emu/fsqrtd.c
deleted file mode 100644
index c3a8dbd81d4e..000000000000
--- a/arch/nds32/math-emu/fsqrtd.c
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-
-#include <linux/uaccess.h>
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-void fsqrtd(void *ft, void *fa)
-{
-	FP_DECL_D(A);
-	FP_DECL_D(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_DP(A, fa);
-
-	FP_SQRT_D(R, A);
-
-	FP_PACK_DP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-}
diff --git a/arch/nds32/math-emu/fsqrts.c b/arch/nds32/math-emu/fsqrts.c
deleted file mode 100644
index 4c6f94b27328..000000000000
--- a/arch/nds32/math-emu/fsqrts.c
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-
-#include <linux/uaccess.h>
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-void fsqrts(void *ft, void *fa)
-{
-	FP_DECL_S(A);
-	FP_DECL_S(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_SP(A, fa);
-
-	FP_SQRT_S(R, A);
-
-	FP_PACK_SP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-}
diff --git a/arch/nds32/math-emu/fsubd.c b/arch/nds32/math-emu/fsubd.c
deleted file mode 100644
index 81b6a0d02a1f..000000000000
--- a/arch/nds32/math-emu/fsubd.c
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-void fsubd(void *ft, void *fa, void *fb)
-{
-
-	FP_DECL_D(A);
-	FP_DECL_D(B);
-	FP_DECL_D(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_DP(A, fa);
-	FP_UNPACK_DP(B, fb);
-
-	if (B_c != FP_CLS_NAN)
-		B_s ^= 1;
-
-	FP_ADD_D(R, A, B);
-
-	FP_PACK_DP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-}
diff --git a/arch/nds32/math-emu/fsubs.c b/arch/nds32/math-emu/fsubs.c
deleted file mode 100644
index 61ddd9708465..000000000000
--- a/arch/nds32/math-emu/fsubs.c
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2018 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-void fsubs(void *ft, void *fa, void *fb)
-{
-
-	FP_DECL_S(A);
-	FP_DECL_S(B);
-	FP_DECL_S(R);
-	FP_DECL_EX;
-
-	FP_UNPACK_SP(A, fa);
-	FP_UNPACK_SP(B, fb);
-
-	if (B_c != FP_CLS_NAN)
-		B_s ^= 1;
-
-	FP_ADD_S(R, A, B);
-
-	FP_PACK_SP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-}
diff --git a/arch/nds32/math-emu/fui2d.c b/arch/nds32/math-emu/fui2d.c
deleted file mode 100644
index 9689d33a8d50..000000000000
--- a/arch/nds32/math-emu/fui2d.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2019 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/double.h>
-
-void fui2d(void *ft, void *fa)
-{
-	unsigned int a = *(unsigned int *)fa;
-
-	FP_DECL_D(R);
-	FP_DECL_EX;
-
-	FP_FROM_INT_D(R, a, 32, int);
-
-	FP_PACK_DP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-
-}
diff --git a/arch/nds32/math-emu/fui2s.c b/arch/nds32/math-emu/fui2s.c
deleted file mode 100644
index f70f0762547d..000000000000
--- a/arch/nds32/math-emu/fui2s.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2019 Andes Technology Corporation
-#include <linux/uaccess.h>
-
-#include <asm/sfp-machine.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
-
-void fui2s(void *ft, void *fa)
-{
-	unsigned int a = *(unsigned int *)fa;
-
-	FP_DECL_S(R);
-	FP_DECL_EX;
-
-	FP_FROM_INT_S(R, a, 32, int);
-
-	FP_PACK_SP(ft, R);
-
-	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
-
-}
diff --git a/arch/nds32/mm/Makefile b/arch/nds32/mm/Makefile
deleted file mode 100644
index 14fb2e8eb036..000000000000
--- a/arch/nds32/mm/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-y				:= extable.o tlb.o fault.o init.o mmap.o \
-                                   mm-nds32.o cacheflush.o proc.o
-
-obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
-
-ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_proc.o     = $(CC_FLAGS_FTRACE)
-endif
-CFLAGS_proc.o              += -fomit-frame-pointer
diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
deleted file mode 100644
index 9c2c0a454da8..000000000000
--- a/arch/nds32/mm/alignment.c
+++ /dev/null
@@ -1,575 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/proc_fs.h>
-#include <linux/uaccess.h>
-#include <linux/sysctl.h>
-#include <asm/unaligned.h>
-
-#define DEBUG(enable, tagged, ...)				\
-	do{							\
-		if (enable) {					\
-			if (tagged)				\
-			pr_warn("[ %30s() ] ", __func__);	\
-			pr_warn(__VA_ARGS__);			\
-		}						\
-	} while (0)
-
-#define RT(inst)	(((inst) >> 20) & 0x1FUL)
-#define RA(inst)	(((inst) >> 15) & 0x1FUL)
-#define RB(inst)	(((inst) >> 10) & 0x1FUL)
-#define SV(inst)	(((inst) >> 8) & 0x3UL)
-#define IMM(inst)	(((inst) >> 0) & 0x7FFFUL)
-
-#define RA3(inst)	(((inst) >> 3) & 0x7UL)
-#define RT3(inst)	(((inst) >> 6) & 0x7UL)
-#define IMM3U(inst)	(((inst) >> 0) & 0x7UL)
-
-#define RA5(inst)	(((inst) >> 0) & 0x1FUL)
-#define RT4(inst)	(((inst) >> 5) & 0xFUL)
-
-#define GET_IMMSVAL(imm_value) \
-	(((imm_value >> 14) & 0x1) ? (imm_value - 0x8000) : imm_value)
-
-#define __get8_data(val,addr,err)	\
-	__asm__(					\
-	"1:	lbi.bi	%1, [%2], #1\n"			\
-	"2:\n"						\
-	"	.pushsection .text.fixup,\"ax\"\n"	\
-	"	.align	2\n"				\
-	"3:	movi	%0, #1\n"			\
-	"	j	2b\n"				\
-	"	.popsection\n"				\
-	"	.pushsection __ex_table,\"a\"\n"	\
-	"	.align	3\n"				\
-	"	.long	1b, 3b\n"			\
-	"	.popsection\n"				\
-	: "=r" (err), "=&r" (val), "=r" (addr)		\
-	: "0" (err), "2" (addr))
-
-#define get16_data(addr, val_ptr)				\
-	do {							\
-		unsigned int err = 0, v, a = addr;		\
-		__get8_data(v,a,err);				\
-		*val_ptr =  v << 0;				\
-		__get8_data(v,a,err);				\
-		*val_ptr |= v << 8;				\
-		if (err)					\
-			goto fault;				\
-		*val_ptr = le16_to_cpu(*val_ptr);		\
-	} while(0)
-
-#define get32_data(addr, val_ptr)				\
-	do {							\
-		unsigned int err = 0, v, a = addr;		\
-		__get8_data(v,a,err);				\
-		*val_ptr =  v << 0;				\
-		__get8_data(v,a,err);				\
-		*val_ptr |= v << 8;				\
-		__get8_data(v,a,err);				\
-		*val_ptr |= v << 16;				\
-		__get8_data(v,a,err);				\
-		*val_ptr |= v << 24;				\
-		if (err)					\
-			goto fault;				\
-		*val_ptr = le32_to_cpu(*val_ptr);		\
-	} while(0)
-
-#define get_data(addr, val_ptr, len)				\
-	if (len == 2)						\
-		get16_data(addr, val_ptr);			\
-	else							\
-		get32_data(addr, val_ptr);
-
-#define set16_data(addr, val)					\
-	do {							\
-		unsigned int err = 0, *ptr = addr ;		\
-		val = le32_to_cpu(val);				\
-		__asm__(					\
-                "1:	sbi.bi 	%2, [%1], #1\n"			\
-                "	srli 	%2, %2, #8\n"			\
-                "2:	sbi	%2, [%1]\n"			\
-		"3:\n"						\
-		"	.pushsection .text.fixup,\"ax\"\n"	\
-		"	.align	2\n"				\
-		"4:	movi	%0, #1\n"			\
-		"	j	3b\n"				\
-		"	.popsection\n"				\
-		"	.pushsection __ex_table,\"a\"\n"	\
-		"	.align	3\n"				\
-		"	.long	1b, 4b\n"			\
-		"	.long	2b, 4b\n"			\
-		"	.popsection\n"				\
-		: "=r" (err), "+r" (ptr), "+r" (val)		\
-		: "0" (err)					\
-		);						\
-		if (err)					\
-			goto fault;				\
-	} while(0)
-
-#define set32_data(addr, val)					\
-	do {							\
-		unsigned int err = 0, *ptr = addr ;		\
-		val = le32_to_cpu(val);				\
-		__asm__(					\
-                "1:	sbi.bi 	%2, [%1], #1\n"			\
-                "	srli 	%2, %2, #8\n"			\
-                "2:	sbi.bi 	%2, [%1], #1\n"			\
-                "	srli 	%2, %2, #8\n"			\
-                "3:	sbi.bi 	%2, [%1], #1\n"			\
-                "	srli 	%2, %2, #8\n"			\
-                "4:	sbi 	%2, [%1]\n"			\
-		"5:\n"						\
-		"	.pushsection .text.fixup,\"ax\"\n"	\
-		"	.align	2\n"				\
-		"6:	movi	%0, #1\n"			\
-		"	j	5b\n"				\
-		"	.popsection\n"				\
-		"	.pushsection __ex_table,\"a\"\n"	\
-		"	.align	3\n"				\
-		"	.long	1b, 6b\n"			\
-		"	.long	2b, 6b\n"			\
-		"	.long	3b, 6b\n"			\
-		"	.long	4b, 6b\n"			\
-		"	.popsection\n"				\
-		: "=r" (err), "+r" (ptr), "+r" (val)		\
-		: "0" (err)					\
-		);						\
-		if (err)					\
-			goto fault;				\
-	} while(0)
-#define set_data(addr, val, len)				\
-	if (len == 2)						\
-		set16_data(addr, val);				\
-	else							\
-		set32_data(addr, val);
-#define NDS32_16BIT_INSTRUCTION	0x80000000
-
-extern pte_t va_present(struct mm_struct *mm, unsigned long addr);
-extern pte_t va_kernel_present(unsigned long addr);
-extern int va_readable(struct pt_regs *regs, unsigned long addr);
-extern int va_writable(struct pt_regs *regs, unsigned long addr);
-
-int unalign_access_mode = 0, unalign_access_debug = 0;
-
-static inline unsigned long *idx_to_addr(struct pt_regs *regs, int idx)
-{
-	/* this should be consistent with ptrace.h */
-	if (idx >= 0 && idx <= 25)	/* R0-R25 */
-		return &regs->uregs[0] + idx;
-	else if (idx >= 28 && idx <= 30)	/* FP, GP, LP */
-		return &regs->fp + (idx - 28);
-	else if (idx == 31)	/* SP */
-		return &regs->sp;
-	else
-		return NULL;	/* cause a segfault */
-}
-
-static inline unsigned long get_inst(unsigned long addr)
-{
-	return be32_to_cpu(get_unaligned((u32 *) addr));
-}
-
-static inline unsigned long sign_extend(unsigned long val, int len)
-{
-	unsigned long ret = 0;
-	unsigned char *s, *t;
-	int i = 0;
-
-	val = cpu_to_le32(val);
-
-	s = (void *)&val;
-	t = (void *)&ret;
-
-	while (i++ < len)
-		*t++ = *s++;
-
-	if (((*(t - 1)) & 0x80) && (i < 4)) {
-
-		while (i++ <= 4)
-			*t++ = 0xff;
-	}
-
-	return le32_to_cpu(ret);
-}
-
-static inline int do_16(unsigned long inst, struct pt_regs *regs)
-{
-	int imm, regular, load, len, addr_mode, idx_mode;
-	unsigned long unaligned_addr, target_val, source_idx, target_idx,
-	    shift = 0;
-	switch ((inst >> 9) & 0x3F) {
-
-	case 0x12:		/* LHI333    */
-		imm = 1;
-		regular = 1;
-		load = 1;
-		len = 2;
-		addr_mode = 3;
-		idx_mode = 3;
-		break;
-	case 0x10:		/* LWI333    */
-		imm = 1;
-		regular = 1;
-		load = 1;
-		len = 4;
-		addr_mode = 3;
-		idx_mode = 3;
-		break;
-	case 0x11:		/* LWI333.bi */
-		imm = 1;
-		regular = 0;
-		load = 1;
-		len = 4;
-		addr_mode = 3;
-		idx_mode = 3;
-		break;
-	case 0x1A:		/* LWI450    */
-		imm = 0;
-		regular = 1;
-		load = 1;
-		len = 4;
-		addr_mode = 5;
-		idx_mode = 4;
-		break;
-	case 0x16:		/* SHI333    */
-		imm = 1;
-		regular = 1;
-		load = 0;
-		len = 2;
-		addr_mode = 3;
-		idx_mode = 3;
-		break;
-	case 0x14:		/* SWI333    */
-		imm = 1;
-		regular = 1;
-		load = 0;
-		len = 4;
-		addr_mode = 3;
-		idx_mode = 3;
-		break;
-	case 0x15:		/* SWI333.bi */
-		imm = 1;
-		regular = 0;
-		load = 0;
-		len = 4;
-		addr_mode = 3;
-		idx_mode = 3;
-		break;
-	case 0x1B:		/* SWI450    */
-		imm = 0;
-		regular = 1;
-		load = 0;
-		len = 4;
-		addr_mode = 5;
-		idx_mode = 4;
-		break;
-
-	default:
-		return -EFAULT;
-	}
-
-	if (addr_mode == 3) {
-		unaligned_addr = *idx_to_addr(regs, RA3(inst));
-		source_idx = RA3(inst);
-	} else {
-		unaligned_addr = *idx_to_addr(regs, RA5(inst));
-		source_idx = RA5(inst);
-	}
-
-	if (idx_mode == 3)
-		target_idx = RT3(inst);
-	else
-		target_idx = RT4(inst);
-
-	if (imm)
-		shift = IMM3U(inst) * len;
-
-	if (regular)
-		unaligned_addr += shift;
-
-	if (load) {
-		if (!access_ok((void *)unaligned_addr, len))
-			return -EACCES;
-
-		get_data(unaligned_addr, &target_val, len);
-		*idx_to_addr(regs, target_idx) = target_val;
-	} else {
-		if (!access_ok((void *)unaligned_addr, len))
-			return -EACCES;
-		target_val = *idx_to_addr(regs, target_idx);
-		set_data((void *)unaligned_addr, target_val, len);
-	}
-
-	if (!regular)
-		*idx_to_addr(regs, source_idx) = unaligned_addr + shift;
-	regs->ipc += 2;
-
-	return 0;
-fault:
-	return -EACCES;
-}
-
-static inline int do_32(unsigned long inst, struct pt_regs *regs)
-{
-	int imm, regular, load, len, sign_ext;
-	unsigned long unaligned_addr, target_val, shift;
-
-	unaligned_addr = *idx_to_addr(regs, RA(inst));
-
-	switch ((inst >> 25) << 1) {
-
-	case 0x02:		/* LHI       */
-		imm = 1;
-		regular = 1;
-		load = 1;
-		len = 2;
-		sign_ext = 0;
-		break;
-	case 0x0A:		/* LHI.bi    */
-		imm = 1;
-		regular = 0;
-		load = 1;
-		len = 2;
-		sign_ext = 0;
-		break;
-	case 0x22:		/* LHSI      */
-		imm = 1;
-		regular = 1;
-		load = 1;
-		len = 2;
-		sign_ext = 1;
-		break;
-	case 0x2A:		/* LHSI.bi   */
-		imm = 1;
-		regular = 0;
-		load = 1;
-		len = 2;
-		sign_ext = 1;
-		break;
-	case 0x04:		/* LWI       */
-		imm = 1;
-		regular = 1;
-		load = 1;
-		len = 4;
-		sign_ext = 0;
-		break;
-	case 0x0C:		/* LWI.bi    */
-		imm = 1;
-		regular = 0;
-		load = 1;
-		len = 4;
-		sign_ext = 0;
-		break;
-	case 0x12:		/* SHI       */
-		imm = 1;
-		regular = 1;
-		load = 0;
-		len = 2;
-		sign_ext = 0;
-		break;
-	case 0x1A:		/* SHI.bi    */
-		imm = 1;
-		regular = 0;
-		load = 0;
-		len = 2;
-		sign_ext = 0;
-		break;
-	case 0x14:		/* SWI       */
-		imm = 1;
-		regular = 1;
-		load = 0;
-		len = 4;
-		sign_ext = 0;
-		break;
-	case 0x1C:		/* SWI.bi    */
-		imm = 1;
-		regular = 0;
-		load = 0;
-		len = 4;
-		sign_ext = 0;
-		break;
-
-	default:
-		switch (inst & 0xff) {
-
-		case 0x01:	/* LH        */
-			imm = 0;
-			regular = 1;
-			load = 1;
-			len = 2;
-			sign_ext = 0;
-			break;
-		case 0x05:	/* LH.bi     */
-			imm = 0;
-			regular = 0;
-			load = 1;
-			len = 2;
-			sign_ext = 0;
-			break;
-		case 0x11:	/* LHS       */
-			imm = 0;
-			regular = 1;
-			load = 1;
-			len = 2;
-			sign_ext = 1;
-			break;
-		case 0x15:	/* LHS.bi    */
-			imm = 0;
-			regular = 0;
-			load = 1;
-			len = 2;
-			sign_ext = 1;
-			break;
-		case 0x02:	/* LW        */
-			imm = 0;
-			regular = 1;
-			load = 1;
-			len = 4;
-			sign_ext = 0;
-			break;
-		case 0x06:	/* LW.bi     */
-			imm = 0;
-			regular = 0;
-			load = 1;
-			len = 4;
-			sign_ext = 0;
-			break;
-		case 0x09:	/* SH        */
-			imm = 0;
-			regular = 1;
-			load = 0;
-			len = 2;
-			sign_ext = 0;
-			break;
-		case 0x0D:	/* SH.bi     */
-			imm = 0;
-			regular = 0;
-			load = 0;
-			len = 2;
-			sign_ext = 0;
-			break;
-		case 0x0A:	/* SW        */
-			imm = 0;
-			regular = 1;
-			load = 0;
-			len = 4;
-			sign_ext = 0;
-			break;
-		case 0x0E:	/* SW.bi     */
-			imm = 0;
-			regular = 0;
-			load = 0;
-			len = 4;
-			sign_ext = 0;
-			break;
-
-		default:
-			return -EFAULT;
-		}
-	}
-
-	if (imm)
-		shift = GET_IMMSVAL(IMM(inst)) * len;
-	else
-		shift = *idx_to_addr(regs, RB(inst)) << SV(inst);
-
-	if (regular)
-		unaligned_addr += shift;
-
-	if (load) {
-
-		if (!access_ok((void *)unaligned_addr, len))
-			return -EACCES;
-
-		get_data(unaligned_addr, &target_val, len);
-
-		if (sign_ext)
-			*idx_to_addr(regs, RT(inst)) =
-			    sign_extend(target_val, len);
-		else
-			*idx_to_addr(regs, RT(inst)) = target_val;
-	} else {
-
-		if (!access_ok((void *)unaligned_addr, len))
-			return -EACCES;
-
-		target_val = *idx_to_addr(regs, RT(inst));
-		set_data((void *)unaligned_addr, target_val, len);
-	}
-
-	if (!regular)
-		*idx_to_addr(regs, RA(inst)) = unaligned_addr + shift;
-
-	regs->ipc += 4;
-
-	return 0;
-fault:
-	return -EACCES;
-}
-
-int do_unaligned_access(unsigned long addr, struct pt_regs *regs)
-{
-	unsigned long inst;
-	int ret = -EFAULT;
-
-	inst = get_inst(regs->ipc);
-
-	DEBUG((unalign_access_debug > 0), 1,
-	      "Faulting addr: 0x%08lx, pc: 0x%08lx [inst: 0x%08lx ]\n", addr,
-	      regs->ipc, inst);
-
-	if (inst & NDS32_16BIT_INSTRUCTION)
-		ret = do_16((inst >> 16) & 0xffff, regs);
-	else
-		ret = do_32(inst, regs);
-
-	return ret;
-}
-
-#ifdef CONFIG_PROC_FS
-
-static struct ctl_table alignment_tbl[3] = {
-	{
-	 .procname = "enable",
-	 .data = &unalign_access_mode,
-	 .maxlen = sizeof(unalign_access_mode),
-	 .mode = 0666,
-	 .proc_handler = &proc_dointvec
-	}
-	,
-	{
-	 .procname = "debug_info",
-	 .data = &unalign_access_debug,
-	 .maxlen = sizeof(unalign_access_debug),
-	 .mode = 0644,
-	 .proc_handler = &proc_dointvec
-	}
-	,
-	{}
-};
-
-static struct ctl_table nds32_sysctl_table[2] = {
-	{
-	 .procname = "unaligned_access",
-	 .mode = 0555,
-	 .child = alignment_tbl},
-	{}
-};
-
-static struct ctl_path nds32_path[2] = {
-	{.procname = "nds32"},
-	{}
-};
-
-/*
- * Initialize nds32 alignment-correction interface
- */
-static int __init nds32_sysctl_init(void)
-{
-	register_sysctl_paths(nds32_path, nds32_sysctl_table);
-	return 0;
-}
-
-__initcall(nds32_sysctl_init);
-#endif /* CONFIG_PROC_FS */
diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c
deleted file mode 100644
index 07aac65d1cab..000000000000
--- a/arch/nds32/mm/cacheflush.c
+++ /dev/null
@@ -1,338 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/module.h>
-#include <asm/cacheflush.h>
-#include <asm/proc-fns.h>
-#include <asm/shmparam.h>
-#include <asm/cache_info.h>
-
-extern struct cache_info L1_cache_info[2];
-
-void flush_icache_range(unsigned long start, unsigned long end)
-{
-	unsigned long line_size, flags;
-	line_size = L1_cache_info[DCACHE].line_size;
-	start = start & ~(line_size - 1);
-	end = (end + line_size - 1) & ~(line_size - 1);
-	local_irq_save(flags);
-	cpu_cache_wbinval_range(start, end, 1);
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(flush_icache_range);
-
-void flush_icache_page(struct vm_area_struct *vma, struct page *page)
-{
-	unsigned long flags;
-	unsigned long kaddr;
-	local_irq_save(flags);
-	kaddr = (unsigned long)kmap_atomic(page);
-	cpu_cache_wbinval_page(kaddr, vma->vm_flags & VM_EXEC);
-	kunmap_atomic((void *)kaddr);
-	local_irq_restore(flags);
-}
-
-void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
-	                     unsigned long addr, int len)
-{
-	unsigned long kaddr;
-	kaddr = (unsigned long)kmap_atomic(page) + (addr & ~PAGE_MASK);
-	flush_icache_range(kaddr, kaddr + len);
-	kunmap_atomic((void *)kaddr);
-}
-
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
-		      pte_t * pte)
-{
-	struct page *page;
-	unsigned long pfn = pte_pfn(*pte);
-	unsigned long flags;
-
-	if (!pfn_valid(pfn))
-		return;
-
-	if (vma->vm_mm == current->active_mm) {
-		local_irq_save(flags);
-		__nds32__mtsr_dsb(addr, NDS32_SR_TLB_VPN);
-		__nds32__tlbop_rwr(*pte);
-		__nds32__isb();
-		local_irq_restore(flags);
-	}
-	page = pfn_to_page(pfn);
-
-	if ((test_and_clear_bit(PG_dcache_dirty, &page->flags)) ||
-	    (vma->vm_flags & VM_EXEC)) {
-		unsigned long kaddr;
-		local_irq_save(flags);
-		kaddr = (unsigned long)kmap_atomic(page);
-		cpu_cache_wbinval_page(kaddr, vma->vm_flags & VM_EXEC);
-		kunmap_atomic((void *)kaddr);
-		local_irq_restore(flags);
-	}
-}
-#ifdef CONFIG_CPU_CACHE_ALIASING
-extern pte_t va_present(struct mm_struct *mm, unsigned long addr);
-
-static inline unsigned long aliasing(unsigned long addr, unsigned long page)
-{
-	return ((addr & PAGE_MASK) ^ page) & (SHMLBA - 1);
-}
-
-static inline unsigned long kremap0(unsigned long uaddr, unsigned long pa)
-{
-	unsigned long kaddr, pte;
-
-#define BASE_ADDR0 0xffffc000
-	kaddr = BASE_ADDR0 | (uaddr & L1_cache_info[DCACHE].aliasing_mask);
-	pte = (pa | PAGE_KERNEL);
-	__nds32__mtsr_dsb(kaddr, NDS32_SR_TLB_VPN);
-	__nds32__tlbop_rwlk(pte);
-	__nds32__isb();
-	return kaddr;
-}
-
-static inline void kunmap01(unsigned long kaddr)
-{
-	__nds32__tlbop_unlk(kaddr);
-	__nds32__tlbop_inv(kaddr);
-	__nds32__isb();
-}
-
-static inline unsigned long kremap1(unsigned long uaddr, unsigned long pa)
-{
-	unsigned long kaddr, pte;
-
-#define BASE_ADDR1 0xffff8000
-	kaddr = BASE_ADDR1 | (uaddr & L1_cache_info[DCACHE].aliasing_mask);
-	pte = (pa | PAGE_KERNEL);
-	__nds32__mtsr_dsb(kaddr, NDS32_SR_TLB_VPN);
-	__nds32__tlbop_rwlk(pte);
-	__nds32__isb();
-	return kaddr;
-}
-
-void flush_cache_mm(struct mm_struct *mm)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	cpu_dcache_wbinval_all();
-	cpu_icache_inval_all();
-	local_irq_restore(flags);
-}
-
-void flush_cache_dup_mm(struct mm_struct *mm)
-{
-}
-
-void flush_cache_range(struct vm_area_struct *vma,
-		       unsigned long start, unsigned long end)
-{
-	unsigned long flags;
-
-	if ((end - start) > 8 * PAGE_SIZE) {
-		cpu_dcache_wbinval_all();
-		if (vma->vm_flags & VM_EXEC)
-			cpu_icache_inval_all();
-		return;
-	}
-	local_irq_save(flags);
-	while (start < end) {
-		if (va_present(vma->vm_mm, start))
-			cpu_cache_wbinval_page(start, vma->vm_flags & VM_EXEC);
-		start += PAGE_SIZE;
-	}
-	local_irq_restore(flags);
-	return;
-}
-
-void flush_cache_page(struct vm_area_struct *vma,
-		      unsigned long addr, unsigned long pfn)
-{
-	unsigned long vto, flags;
-
-	local_irq_save(flags);
-	vto = kremap0(addr, pfn << PAGE_SHIFT);
-	cpu_cache_wbinval_page(vto, vma->vm_flags & VM_EXEC);
-	kunmap01(vto);
-	local_irq_restore(flags);
-}
-
-void flush_cache_vmap(unsigned long start, unsigned long end)
-{
-	cpu_dcache_wbinval_all();
-	cpu_icache_inval_all();
-}
-
-void flush_cache_vunmap(unsigned long start, unsigned long end)
-{
-	cpu_dcache_wbinval_all();
-	cpu_icache_inval_all();
-}
-
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
-		    struct page *to)
-{
-	cpu_dcache_wbinval_page((unsigned long)vaddr);
-	cpu_icache_inval_page((unsigned long)vaddr);
-	copy_page(vto, vfrom);
-	cpu_dcache_wbinval_page((unsigned long)vto);
-	cpu_icache_inval_page((unsigned long)vto);
-}
-
-void clear_user_page(void *addr, unsigned long vaddr, struct page *page)
-{
-	cpu_dcache_wbinval_page((unsigned long)vaddr);
-	cpu_icache_inval_page((unsigned long)vaddr);
-	clear_page(addr);
-	cpu_dcache_wbinval_page((unsigned long)addr);
-	cpu_icache_inval_page((unsigned long)addr);
-}
-
-void copy_user_highpage(struct page *to, struct page *from,
-			unsigned long vaddr, struct vm_area_struct *vma)
-{
-	unsigned long vto, vfrom, flags, kto, kfrom, pfrom, pto;
-	kto = ((unsigned long)page_address(to) & PAGE_MASK);
-	kfrom = ((unsigned long)page_address(from) & PAGE_MASK);
-	pto = page_to_phys(to);
-	pfrom = page_to_phys(from);
-
-	local_irq_save(flags);
-	if (aliasing(vaddr, (unsigned long)kfrom))
-		cpu_dcache_wb_page((unsigned long)kfrom);
-	vto = kremap0(vaddr, pto);
-	vfrom = kremap1(vaddr, pfrom);
-	copy_page((void *)vto, (void *)vfrom);
-	kunmap01(vfrom);
-	kunmap01(vto);
-	local_irq_restore(flags);
-}
-
-EXPORT_SYMBOL(copy_user_highpage);
-
-void clear_user_highpage(struct page *page, unsigned long vaddr)
-{
-	unsigned long vto, flags, kto;
-
-	kto = ((unsigned long)page_address(page) & PAGE_MASK);
-
-	local_irq_save(flags);
-	if (aliasing(kto, vaddr) && kto != 0) {
-		cpu_dcache_inval_page(kto);
-		cpu_icache_inval_page(kto);
-	}
-	vto = kremap0(vaddr, page_to_phys(page));
-	clear_page((void *)vto);
-	kunmap01(vto);
-	local_irq_restore(flags);
-}
-
-EXPORT_SYMBOL(clear_user_highpage);
-
-void flush_dcache_page(struct page *page)
-{
-	struct address_space *mapping;
-
-	mapping = page_mapping_file(page);
-	if (mapping && !mapping_mapped(mapping))
-		set_bit(PG_dcache_dirty, &page->flags);
-	else {
-		unsigned long kaddr, flags;
-
-		kaddr = (unsigned long)page_address(page);
-		local_irq_save(flags);
-		cpu_dcache_wbinval_page(kaddr);
-		if (mapping) {
-			unsigned long vaddr, kto;
-
-			vaddr = page->index << PAGE_SHIFT;
-			if (aliasing(vaddr, kaddr)) {
-				kto = kremap0(vaddr, page_to_phys(page));
-				cpu_dcache_wbinval_page(kto);
-				kunmap01(kto);
-			}
-		}
-		local_irq_restore(flags);
-	}
-}
-EXPORT_SYMBOL(flush_dcache_page);
-
-void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
-		       unsigned long vaddr, void *dst, void *src, int len)
-{
-	unsigned long line_size, start, end, vto, flags;
-
-	local_irq_save(flags);
-	vto = kremap0(vaddr, page_to_phys(page));
-	dst = (void *)(vto | (vaddr & (PAGE_SIZE - 1)));
-	memcpy(dst, src, len);
-	if (vma->vm_flags & VM_EXEC) {
-		line_size = L1_cache_info[DCACHE].line_size;
-		start = (unsigned long)dst & ~(line_size - 1);
-		end =
-		    ((unsigned long)dst + len + line_size - 1) & ~(line_size -
-								   1);
-		cpu_cache_wbinval_range(start, end, 1);
-	}
-	kunmap01(vto);
-	local_irq_restore(flags);
-}
-
-void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
-			 unsigned long vaddr, void *dst, void *src, int len)
-{
-	unsigned long vto, flags;
-
-	local_irq_save(flags);
-	vto = kremap0(vaddr, page_to_phys(page));
-	src = (void *)(vto | (vaddr & (PAGE_SIZE - 1)));
-	memcpy(dst, src, len);
-	kunmap01(vto);
-	local_irq_restore(flags);
-}
-
-void flush_anon_page(struct vm_area_struct *vma,
-		     struct page *page, unsigned long vaddr)
-{
-	unsigned long kaddr, flags, ktmp;
-	if (!PageAnon(page))
-		return;
-
-	if (vma->vm_mm != current->active_mm)
-		return;
-
-	local_irq_save(flags);
-	if (vma->vm_flags & VM_EXEC)
-		cpu_icache_inval_page(vaddr & PAGE_MASK);
-	kaddr = (unsigned long)page_address(page);
-	if (aliasing(vaddr, kaddr)) {
-		ktmp = kremap0(vaddr, page_to_phys(page));
-		cpu_dcache_wbinval_page(ktmp);
-		kunmap01(ktmp);
-	}
-	local_irq_restore(flags);
-}
-
-void flush_kernel_vmap_range(void *addr, int size)
-{
-	unsigned long flags;
-	local_irq_save(flags);
-	cpu_dcache_wb_range((unsigned long)addr, (unsigned long)addr +  size);
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(flush_kernel_vmap_range);
-
-void invalidate_kernel_vmap_range(void *addr, int size)
-{
-	unsigned long flags;
-	local_irq_save(flags);
-	cpu_dcache_inval_range((unsigned long)addr, (unsigned long)addr + size);
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(invalidate_kernel_vmap_range);
-#endif
diff --git a/arch/nds32/mm/extable.c b/arch/nds32/mm/extable.c
deleted file mode 100644
index db7f0a7c8966..000000000000
--- a/arch/nds32/mm/extable.c
+++ /dev/null
@@ -1,16 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/extable.h>
-#include <linux/uaccess.h>
-
-int fixup_exception(struct pt_regs *regs)
-{
-	const struct exception_table_entry *fixup;
-
-	fixup = search_exception_tables(instruction_pointer(regs));
-	if (fixup)
-		regs->ipc = fixup->fixup;
-
-	return fixup != NULL;
-}
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
deleted file mode 100644
index 636977a1c8b9..000000000000
--- a/arch/nds32/mm/fault.c
+++ /dev/null
@@ -1,396 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/extable.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/ptrace.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/hardirq.h>
-#include <linux/uaccess.h>
-#include <linux/perf_event.h>
-
-#include <asm/tlbflush.h>
-
-extern void __noreturn die(const char *str, struct pt_regs *regs, long err);
-
-/*
- * This is useful to dump out the page tables associated with
- * 'addr' in mm 'mm'.
- */
-void show_pte(struct mm_struct *mm, unsigned long addr)
-{
-	pgd_t *pgd;
-	if (!mm)
-		mm = &init_mm;
-
-	pr_alert("pgd = %p\n", mm->pgd);
-	pgd = pgd_offset(mm, addr);
-	pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
-
-	do {
-		p4d_t *p4d;
-		pud_t *pud;
-		pmd_t *pmd;
-
-		if (pgd_none(*pgd))
-			break;
-
-		if (pgd_bad(*pgd)) {
-			pr_alert("(bad)");
-			break;
-		}
-
-		p4d = p4d_offset(pgd, addr);
-		pud = pud_offset(p4d, addr);
-		pmd = pmd_offset(pud, addr);
-#if PTRS_PER_PMD != 1
-		pr_alert(", *pmd=%08lx", pmd_val(*pmd));
-#endif
-
-		if (pmd_none(*pmd))
-			break;
-
-		if (pmd_bad(*pmd)) {
-			pr_alert("(bad)");
-			break;
-		}
-
-		if (IS_ENABLED(CONFIG_HIGHMEM))
-		{
-			pte_t *pte;
-			/* We must not map this if we have highmem enabled */
-			pte = pte_offset_map(pmd, addr);
-			pr_alert(", *pte=%08lx", pte_val(*pte));
-			pte_unmap(pte);
-		}
-	} while (0);
-
-	pr_alert("\n");
-}
-
-void do_page_fault(unsigned long entry, unsigned long addr,
-		   unsigned int error_code, struct pt_regs *regs)
-{
-	struct task_struct *tsk;
-	struct mm_struct *mm;
-	struct vm_area_struct *vma;
-	int si_code;
-	vm_fault_t fault;
-	unsigned int mask = VM_ACCESS_FLAGS;
-	unsigned int flags = FAULT_FLAG_DEFAULT;
-
-	error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE);
-	tsk = current;
-	mm = tsk->mm;
-	si_code = SEGV_MAPERR;
-	/*
-	 * We fault-in kernel-space virtual memory on-demand. The
-	 * 'reference' page table is init_mm.pgd.
-	 *
-	 * NOTE! We MUST NOT take any locks for this case. We may
-	 * be in an interrupt or a critical region, and should
-	 * only copy the information from the master page table,
-	 * nothing more.
-	 */
-	if (addr >= TASK_SIZE) {
-		if (user_mode(regs))
-			goto bad_area_nosemaphore;
-
-		if (addr >= TASK_SIZE && addr < VMALLOC_END
-		    && (entry == ENTRY_PTE_NOT_PRESENT))
-			goto vmalloc_fault;
-		else
-			goto no_context;
-	}
-
-	/* Send a signal to the task for handling the unalignment access. */
-	if (entry == ENTRY_GENERAL_EXCPETION
-	    && error_code == ETYPE_ALIGNMENT_CHECK) {
-		if (user_mode(regs))
-			goto bad_area_nosemaphore;
-		else
-			goto no_context;
-	}
-
-	/*
-	 * If we're in an interrupt or have no user
-	 * context, we must not take the fault..
-	 */
-	if (unlikely(faulthandler_disabled() || !mm))
-		goto no_context;
-
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
-
-	/*
-	 * As per x86, we may deadlock here. However, since the kernel only
-	 * validly references user space from well defined areas of the code,
-	 * we can bug out early if this is from code which shouldn't.
-	 */
-	if (unlikely(!mmap_read_trylock(mm))) {
-		if (!user_mode(regs) &&
-		    !search_exception_tables(instruction_pointer(regs)))
-			goto no_context;
-retry:
-		mmap_read_lock(mm);
-	} else {
-		/*
-		 * The above down_read_trylock() might have succeeded in which
-		 * case, we'll have missed the might_sleep() from down_read().
-		 */
-		might_sleep();
-		if (IS_ENABLED(CONFIG_DEBUG_VM)) {
-			if (!user_mode(regs) &&
-			    !search_exception_tables(instruction_pointer(regs)))
-				goto no_context;
-		}
-	}
-
-	vma = find_vma(mm, addr);
-
-	if (unlikely(!vma))
-		goto bad_area;
-
-	if (vma->vm_start <= addr)
-		goto good_area;
-
-	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
-		goto bad_area;
-
-	if (unlikely(expand_stack(vma, addr)))
-		goto bad_area;
-
-	/*
-	 * Ok, we have a good vm_area for this memory access, so
-	 * we can handle it..
-	 */
-
-good_area:
-	si_code = SEGV_ACCERR;
-
-	/* first do some preliminary protection checks */
-	if (entry == ENTRY_PTE_NOT_PRESENT) {
-		if (error_code & ITYPE_mskINST)
-			mask = VM_EXEC;
-		else {
-			mask = VM_READ | VM_WRITE;
-		}
-	} else if (entry == ENTRY_TLB_MISC) {
-		switch (error_code & ITYPE_mskETYPE) {
-		case RD_PROT:
-			mask = VM_READ;
-			break;
-		case WRT_PROT:
-			mask = VM_WRITE;
-			flags |= FAULT_FLAG_WRITE;
-			break;
-		case NOEXEC:
-			mask = VM_EXEC;
-			break;
-		case PAGE_MODIFY:
-			mask = VM_WRITE;
-			flags |= FAULT_FLAG_WRITE;
-			break;
-		case ACC_BIT:
-			BUG();
-		default:
-			break;
-		}
-
-	}
-	if (!(vma->vm_flags & mask))
-		goto bad_area;
-
-	/*
-	 * If for any reason at all we couldn't handle the fault,
-	 * make sure we exit gracefully rather than endlessly redo
-	 * the fault.
-	 */
-
-	fault = handle_mm_fault(vma, addr, flags, regs);
-
-	/*
-	 * If we need to retry but a fatal signal is pending, handle the
-	 * signal first. We do not need to release the mmap_lock because it
-	 * would already be released in __lock_page_or_retry in mm/filemap.c.
-	 */
-	if (fault_signal_pending(fault, regs)) {
-		if (!user_mode(regs))
-			goto no_context;
-		return;
-	}
-
-	if (unlikely(fault & VM_FAULT_ERROR)) {
-		if (fault & VM_FAULT_OOM)
-			goto out_of_memory;
-		else if (fault & VM_FAULT_SIGBUS)
-			goto do_sigbus;
-		else
-			goto bad_area;
-	}
-
-	if (fault & VM_FAULT_RETRY) {
-		flags |= FAULT_FLAG_TRIED;
-
-		/* No need to mmap_read_unlock(mm) as we would
-		 * have already released it in __lock_page_or_retry
-		 * in mm/filemap.c.
-		 */
-		goto retry;
-	}
-
-	mmap_read_unlock(mm);
-	return;
-
-	/*
-	 * Something tried to access memory that isn't in our memory map..
-	 * Fix it, but check if it's kernel or user first..
-	 */
-bad_area:
-	mmap_read_unlock(mm);
-
-bad_area_nosemaphore:
-
-	/* User mode accesses just cause a SIGSEGV */
-
-	if (user_mode(regs)) {
-		tsk->thread.address = addr;
-		tsk->thread.error_code = error_code;
-		tsk->thread.trap_no = entry;
-		force_sig_fault(SIGSEGV, si_code, (void __user *)addr);
-		return;
-	}
-
-no_context:
-
-	/* Are we prepared to handle this kernel fault?
-	 *
-	 * (The kernel has valid exception-points in the source
-	 *  when it acesses user-memory. When it fails in one
-	 *  of those points, we find it in a table and do a jump
-	 *  to some fixup code that loads an appropriate error
-	 *  code)
-	 */
-
-	{
-		const struct exception_table_entry *entry;
-
-		if ((entry =
-		     search_exception_tables(instruction_pointer(regs))) !=
-		    NULL) {
-			/* Adjust the instruction pointer in the stackframe */
-			instruction_pointer(regs) = entry->fixup;
-			return;
-		}
-	}
-
-	/*
-	 * Oops. The kernel tried to access some bad page. We'll have to
-	 * terminate things with extreme prejudice.
-	 */
-
-	bust_spinlocks(1);
-	pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
-		 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
-		 "paging request", addr);
-
-	show_pte(mm, addr);
-	die("Oops", regs, error_code);
-
-	/*
-	 * We ran out of memory, or some other thing happened to us that made
-	 * us unable to handle the page fault gracefully.
-	 */
-
-out_of_memory:
-	mmap_read_unlock(mm);
-	if (!user_mode(regs))
-		goto no_context;
-	pagefault_out_of_memory();
-	return;
-
-do_sigbus:
-	mmap_read_unlock(mm);
-
-	/* Kernel mode? Handle exceptions or die */
-	if (!user_mode(regs))
-		goto no_context;
-
-	/*
-	 * Send a sigbus
-	 */
-	tsk->thread.address = addr;
-	tsk->thread.error_code = error_code;
-	tsk->thread.trap_no = entry;
-	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr);
-
-	return;
-
-vmalloc_fault:
-	{
-		/*
-		 * Synchronize this task's top level page-table
-		 * with the 'reference' page table.
-		 *
-		 * Use current_pgd instead of tsk->active_mm->pgd
-		 * since the latter might be unavailable if this
-		 * code is executed in a misfortunately run irq
-		 * (like inside schedule() between switch_mm and
-		 *  switch_to...).
-		 */
-
-		unsigned int index = pgd_index(addr);
-		pgd_t *pgd, *pgd_k;
-		p4d_t *p4d, *p4d_k;
-		pud_t *pud, *pud_k;
-		pmd_t *pmd, *pmd_k;
-		pte_t *pte_k;
-
-		pgd = (pgd_t *) __va(__nds32__mfsr(NDS32_SR_L1_PPTB)) + index;
-		pgd_k = init_mm.pgd + index;
-
-		if (!pgd_present(*pgd_k))
-			goto no_context;
-
-		p4d = p4d_offset(pgd, addr);
-		p4d_k = p4d_offset(pgd_k, addr);
-		if (!p4d_present(*p4d_k))
-			goto no_context;
-
-		pud = pud_offset(p4d, addr);
-		pud_k = pud_offset(p4d_k, addr);
-		if (!pud_present(*pud_k))
-			goto no_context;
-
-		pmd = pmd_offset(pud, addr);
-		pmd_k = pmd_offset(pud_k, addr);
-		if (!pmd_present(*pmd_k))
-			goto no_context;
-
-		if (!pmd_present(*pmd))
-			set_pmd(pmd, *pmd_k);
-		else
-			BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
-
-		/*
-		 * Since the vmalloc area is global, we don't
-		 * need to copy individual PTE's, it is enough to
-		 * copy the pgd pointer into the pte page of the
-		 * root task. If that is there, we'll find our pte if
-		 * it exists.
-		 */
-
-		/* Make sure the actual PTE exists as well to
-		 * catch kernel vmalloc-area accesses to non-mapped
-		 * addres. If we don't do this, this will just
-		 * silently loop forever.
-		 */
-
-		pte_k = pte_offset_kernel(pmd_k, addr);
-		if (!pte_present(*pte_k))
-			goto no_context;
-
-		return;
-	}
-}
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
deleted file mode 100644
index f63f839738c4..000000000000
--- a/arch/nds32/mm/init.c
+++ /dev/null
@@ -1,263 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 1995-2005 Russell King
-// Copyright (C) 2012 ARM Ltd.
-// Copyright (C) 2013-2017 Andes Technology Corporation
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/swap.h>
-#include <linux/init.h>
-#include <linux/memblock.h>
-#include <linux/mman.h>
-#include <linux/nodemask.h>
-#include <linux/initrd.h>
-#include <linux/highmem.h>
-
-#include <asm/sections.h>
-#include <asm/setup.h>
-#include <asm/tlb.h>
-#include <asm/page.h>
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-DEFINE_SPINLOCK(anon_alias_lock);
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-
-/*
- * empty_zero_page is a special page that is used for
- * zero-initialized data and COW.
- */
-struct page *empty_zero_page;
-EXPORT_SYMBOL(empty_zero_page);
-
-static void __init zone_sizes_init(void)
-{
-	unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
-
-	max_zone_pfn[ZONE_NORMAL] = max_low_pfn;
-#ifdef CONFIG_HIGHMEM
-	max_zone_pfn[ZONE_HIGHMEM] = max_pfn;
-#endif
-	free_area_init(max_zone_pfn);
-
-}
-
-/*
- * Map all physical memory under high_memory into kernel's address space.
- *
- * This is explicitly coded for two-level page tables, so if you need
- * something else then this needs to change.
- */
-static void __init map_ram(void)
-{
-	unsigned long v, p, e;
-	pgd_t *pge;
-	p4d_t *p4e;
-	pud_t *pue;
-	pmd_t *pme;
-	pte_t *pte;
-	/* These mark extents of read-only kernel pages...
-	 * ...from vmlinux.lds.S
-	 */
-
-	p = (u32) memblock_start_of_DRAM() & PAGE_MASK;
-	e = min((u32) memblock_end_of_DRAM(), (u32) __pa(high_memory));
-
-	v = (u32) __va(p);
-	pge = pgd_offset_k(v);
-
-	while (p < e) {
-		int j;
-		p4e = p4d_offset(pge, v);
-		pue = pud_offset(p4e, v);
-		pme = pmd_offset(pue, v);
-
-		if ((u32) pue != (u32) pge || (u32) pme != (u32) pge) {
-			panic("%s: Kernel hardcoded for "
-			      "two-level page tables", __func__);
-		}
-
-		/* Alloc one page for holding PTE's... */
-		pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-		if (!pte)
-			panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
-			      __func__, PAGE_SIZE, PAGE_SIZE);
-		set_pmd(pme, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE));
-
-		/* Fill the newly allocated page with PTE'S */
-		for (j = 0; p < e && j < PTRS_PER_PTE;
-		     v += PAGE_SIZE, p += PAGE_SIZE, j++, pte++) {
-			/* Create mapping between p and v. */
-			/* TODO: more fine grant for page access permission */
-			set_pte(pte, __pte(p + pgprot_val(PAGE_KERNEL)));
-		}
-
-		pge++;
-	}
-}
-static pmd_t *fixmap_pmd_p;
-static void __init fixedrange_init(void)
-{
-	unsigned long vaddr;
-	pmd_t *pmd;
-#ifdef CONFIG_HIGHMEM
-	pte_t *pte;
-#endif /* CONFIG_HIGHMEM */
-
-	/*
-	 * Fixed mappings:
-	 */
-	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1);
-	pmd = pmd_off_k(vaddr);
-	fixmap_pmd_p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-	if (!fixmap_pmd_p)
-		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
-		      __func__, PAGE_SIZE, PAGE_SIZE);
-	set_pmd(pmd, __pmd(__pa(fixmap_pmd_p) + _PAGE_KERNEL_TABLE));
-
-#ifdef CONFIG_HIGHMEM
-	/*
-	 * Permanent kmaps:
-	 */
-	vaddr = PKMAP_BASE;
-
-	pmd = pmd_off_k(vaddr);
-	pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-	if (!pte)
-		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
-		      __func__, PAGE_SIZE, PAGE_SIZE);
-	set_pmd(pmd, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE));
-	pkmap_page_table = pte;
-#endif /* CONFIG_HIGHMEM */
-}
-
-/*
- * paging_init() sets up the page tables, initialises the zone memory
- * maps, and sets up the zero page, bad page and bad page tables.
- */
-void __init paging_init(void)
-{
-	int i;
-	void *zero_page;
-
-	pr_info("Setting up paging and PTEs.\n");
-	/* clear out the init_mm.pgd that will contain the kernel's mappings */
-	for (i = 0; i < PTRS_PER_PGD; i++)
-		swapper_pg_dir[i] = __pgd(1);
-
-	map_ram();
-
-	fixedrange_init();
-
-	/* allocate space for empty_zero_page */
-	zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-	if (!zero_page)
-		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
-		      __func__, PAGE_SIZE, PAGE_SIZE);
-	zone_sizes_init();
-
-	empty_zero_page = virt_to_page(zero_page);
-	flush_dcache_page(empty_zero_page);
-}
-
-static inline void __init free_highmem(void)
-{
-#ifdef CONFIG_HIGHMEM
-	unsigned long pfn;
-	for (pfn = PFN_UP(__pa(high_memory)); pfn < max_pfn; pfn++) {
-		phys_addr_t paddr = (phys_addr_t) pfn << PAGE_SHIFT;
-		if (!memblock_is_reserved(paddr))
-			free_highmem_page(pfn_to_page(pfn));
-	}
-#endif
-}
-
-static void __init set_max_mapnr_init(void)
-{
-	max_mapnr = max_pfn;
-}
-
-/*
- * mem_init() marks the free areas in the mem_map and tells us how much
- * memory is free.  This is done after various parts of the system have
- * claimed their memory after the kernel image.
- */
-void __init mem_init(void)
-{
-	phys_addr_t memory_start = memblock_start_of_DRAM();
-	BUG_ON(!mem_map);
-	set_max_mapnr_init();
-
-	free_highmem();
-
-	/* this will put all low memory onto the freelists */
-	memblock_free_all();
-
-	pr_info("virtual kernel memory layout:\n"
-		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-#ifdef CONFIG_HIGHMEM
-		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-#endif
-		"    consist : 0x%08lx - 0x%08lx   (%4ld MB)\n"
-		"    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
-		"    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n"
-		"      .init : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-		"      .data : 0x%08lx - 0x%08lx   (%4ld kB)\n"
-		"      .text : 0x%08lx - 0x%08lx   (%4ld kB)\n",
-		FIXADDR_START, FIXADDR_TOP, (FIXADDR_TOP - FIXADDR_START) >> 10,
-#ifdef CONFIG_HIGHMEM
-		PKMAP_BASE, PKMAP_BASE + LAST_PKMAP * PAGE_SIZE,
-		(LAST_PKMAP * PAGE_SIZE) >> 10,
-#endif
-		CONSISTENT_BASE, CONSISTENT_END,
-		((CONSISTENT_END) - (CONSISTENT_BASE)) >> 20, VMALLOC_START,
-		(unsigned long)VMALLOC_END, (VMALLOC_END - VMALLOC_START) >> 20,
-		(unsigned long)__va(memory_start), (unsigned long)high_memory,
-		((unsigned long)high_memory -
-		 (unsigned long)__va(memory_start)) >> 20,
-		(unsigned long)&__init_begin, (unsigned long)&__init_end,
-		((unsigned long)&__init_end -
-		 (unsigned long)&__init_begin) >> 10, (unsigned long)&_etext,
-		(unsigned long)&_edata,
-		((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
-		(unsigned long)&_text, (unsigned long)&_etext,
-		((unsigned long)&_etext - (unsigned long)&_text) >> 10);
-
-	/*
-	 * Check boundaries twice: Some fundamental inconsistencies can
-	 * be detected at build time already.
-	 */
-#ifdef CONFIG_HIGHMEM
-	BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > FIXADDR_START);
-	BUILD_BUG_ON((CONSISTENT_END) > PKMAP_BASE);
-#endif
-	BUILD_BUG_ON(VMALLOC_END > CONSISTENT_BASE);
-	BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
-
-#ifdef CONFIG_HIGHMEM
-	BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > FIXADDR_START);
-	BUG_ON(CONSISTENT_END > PKMAP_BASE);
-#endif
-	BUG_ON(VMALLOC_END > CONSISTENT_BASE);
-	BUG_ON(VMALLOC_START >= VMALLOC_END);
-	BUG_ON((unsigned long)high_memory > VMALLOC_START);
-
-	return;
-}
-
-void __set_fixmap(enum fixed_addresses idx,
-			       phys_addr_t phys, pgprot_t flags)
-{
-	unsigned long addr = __fix_to_virt(idx);
-	pte_t *pte;
-
-	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
-
-	pte = (pte_t *)&fixmap_pmd_p[pte_index(addr)];
-
-	if (pgprot_val(flags)) {
-		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
-	} else {
-		pte_clear(&init_mm, addr, pte);
-		flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
-	}
-}
diff --git a/arch/nds32/mm/mm-nds32.c b/arch/nds32/mm/mm-nds32.c
deleted file mode 100644
index f2778f2b39f6..000000000000
--- a/arch/nds32/mm/mm-nds32.c
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/init_task.h>
-
-#define __HAVE_ARCH_PGD_FREE
-#include <asm/pgalloc.h>
-
-#define FIRST_KERNEL_PGD_NR	(USER_PTRS_PER_PGD)
-
-/*
- * need to get a page for level 1
- */
-
-pgd_t *pgd_alloc(struct mm_struct *mm)
-{
-	pgd_t *new_pgd, *init_pgd;
-	int i;
-
-	new_pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, 0);
-	if (!new_pgd)
-		return NULL;
-	for (i = 0; i < PTRS_PER_PGD; i++) {
-		(*new_pgd) = 1;
-		new_pgd++;
-	}
-	new_pgd -= PTRS_PER_PGD;
-
-	init_pgd = pgd_offset_k(0);
-
-	memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
-	       (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
-
-	cpu_dcache_wb_range((unsigned long)new_pgd,
-			    (unsigned long)new_pgd +
-			    PTRS_PER_PGD * sizeof(pgd_t));
-	inc_lruvec_page_state(virt_to_page((unsigned long *)new_pgd),
-			      NR_PAGETABLE);
-
-	return new_pgd;
-}
-
-void pgd_free(struct mm_struct *mm, pgd_t * pgd)
-{
-	pmd_t *pmd;
-	struct page *pte;
-
-	if (!pgd)
-		return;
-
-	pmd = (pmd_t *) pgd;
-	if (pmd_none(*pmd))
-		goto free;
-	if (pmd_bad(*pmd)) {
-		pmd_ERROR(*pmd);
-		pmd_clear(pmd);
-		goto free;
-	}
-
-	pte = pmd_page(*pmd);
-	pmd_clear(pmd);
-	dec_lruvec_page_state(virt_to_page((unsigned long *)pgd), NR_PAGETABLE);
-	pte_free(mm, pte);
-	mm_dec_nr_ptes(mm);
-	pmd_free(mm, pmd);
-free:
-	free_pages((unsigned long)pgd, 0);
-}
-
-/*
- * In order to soft-boot, we need to insert a 1:1 mapping in place of
- * the user-mode pages.  This will then ensure that we have predictable
- * results when turning the mmu off
- */
-void setup_mm_for_reboot(char mode)
-{
-	unsigned long pmdval;
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-	int i;
-
-	if (current->mm && current->mm->pgd)
-		pgd = current->mm->pgd;
-	else
-		pgd = init_mm.pgd;
-
-	for (i = 0; i < USER_PTRS_PER_PGD; i++) {
-		pmdval = (i << PGDIR_SHIFT);
-		p4d = p4d_offset(pgd, i << PGDIR_SHIFT);
-		pud = pud_offset(p4d, i << PGDIR_SHIFT);
-		pmd = pmd_offset(pud + i, i << PGDIR_SHIFT);
-		set_pmd(pmd, __pmd(pmdval));
-	}
-}
diff --git a/arch/nds32/mm/mmap.c b/arch/nds32/mm/mmap.c
deleted file mode 100644
index 1bdf5e7d1b43..000000000000
--- a/arch/nds32/mm/mmap.c
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/sched.h>
-#include <linux/mman.h>
-#include <linux/shm.h>
-
-#define COLOUR_ALIGN(addr,pgoff)		\
-	((((addr)+SHMLBA-1)&~(SHMLBA-1)) +	\
-	 (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))
-
-/*
- * We need to ensure that shared mappings are correctly aligned to
- * avoid aliasing issues with VIPT caches.  We need to ensure that
- * a specific page of an object is always mapped at a multiple of
- * SHMLBA bytes.
- *
- * We unconditionally provide this function for all cases, however
- * in the VIVT case, we optimise out the alignment rules.
- */
-unsigned long
-arch_get_unmapped_area(struct file *filp, unsigned long addr,
-		       unsigned long len, unsigned long pgoff,
-		       unsigned long flags)
-{
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	int do_align = 0;
-	struct vm_unmapped_area_info info;
-	int aliasing = 0;
-	if(IS_ENABLED(CONFIG_CPU_CACHE_ALIASING))
-		aliasing = 1;
-
-	/*
-	 * We only need to do colour alignment if either the I or D
-	 * caches alias.
-	 */
-	if (aliasing)
-		do_align = filp || (flags & MAP_SHARED);
-
-	/*
-	 * We enforce the MAP_FIXED case.
-	 */
-	if (flags & MAP_FIXED) {
-		if (aliasing && flags & MAP_SHARED &&
-		    (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))
-			return -EINVAL;
-		return addr;
-	}
-
-	if (len > TASK_SIZE)
-		return -ENOMEM;
-
-	if (addr) {
-		if (do_align)
-			addr = COLOUR_ALIGN(addr, pgoff);
-		else
-			addr = PAGE_ALIGN(addr);
-
-		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-		    (!vma || addr + len <= vm_start_gap(vma)))
-			return addr;
-	}
-
-	info.flags = 0;
-	info.length = len;
-	info.low_limit = mm->mmap_base;
-	info.high_limit = TASK_SIZE;
-	info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
-	info.align_offset = pgoff << PAGE_SHIFT;
-	return vm_unmapped_area(&info);
-}
diff --git a/arch/nds32/mm/proc.c b/arch/nds32/mm/proc.c
deleted file mode 100644
index 848c845f5f33..000000000000
--- a/arch/nds32/mm/proc.c
+++ /dev/null
@@ -1,536 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <asm/nds32.h>
-#include <asm/tlbflush.h>
-#include <asm/cacheflush.h>
-#include <asm/l2_cache.h>
-#include <nds32_intrinsic.h>
-
-#include <asm/cache_info.h>
-extern struct cache_info L1_cache_info[2];
-
-int va_kernel_present(unsigned long addr)
-{
-	pmd_t *pmd;
-	pte_t *ptep, pte;
-
-	pmd = pmd_off_k(addr);
-	if (!pmd_none(*pmd)) {
-		ptep = pte_offset_map(pmd, addr);
-		pte = *ptep;
-		if (pte_present(pte))
-			return pte;
-	}
-	return 0;
-}
-
-pte_t va_present(struct mm_struct * mm, unsigned long addr)
-{
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *ptep, pte;
-
-	pgd = pgd_offset(mm, addr);
-	if (!pgd_none(*pgd)) {
-		p4d = p4d_offset(pgd, addr);
-		if (!p4d_none(*p4d)) {
-			pud = pud_offset(p4d, addr);
-			if (!pud_none(*pud)) {
-				pmd = pmd_offset(pud, addr);
-				if (!pmd_none(*pmd)) {
-					ptep = pte_offset_map(pmd, addr);
-					pte = *ptep;
-					if (pte_present(pte))
-						return pte;
-				}
-			}
-		}
-	}
-	return 0;
-
-}
-
-int va_readable(struct pt_regs *regs, unsigned long addr)
-{
-	struct mm_struct *mm = current->mm;
-	pte_t pte;
-	int ret = 0;
-
-	if (user_mode(regs)) {
-		/* user mode */
-		pte = va_present(mm, addr);
-		if (!pte && pte_read(pte))
-			ret = 1;
-	} else {
-		/* superuser mode is always readable, so we can only
-		 * check it is present or not*/
-		return (! !va_kernel_present(addr));
-	}
-	return ret;
-}
-
-int va_writable(struct pt_regs *regs, unsigned long addr)
-{
-	struct mm_struct *mm = current->mm;
-	pte_t pte;
-	int ret = 0;
-
-	if (user_mode(regs)) {
-		/* user mode */
-		pte = va_present(mm, addr);
-		if (!pte && pte_write(pte))
-			ret = 1;
-	} else {
-		/* superuser mode */
-		pte = va_kernel_present(addr);
-		if (!pte && pte_kernel_write(pte))
-			ret = 1;
-	}
-	return ret;
-}
-
-/*
- * All
- */
-void cpu_icache_inval_all(void)
-{
-	unsigned long end, line_size;
-
-	line_size = L1_cache_info[ICACHE].line_size;
-	end =
-	    line_size * L1_cache_info[ICACHE].ways * L1_cache_info[ICACHE].sets;
-
-	do {
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1I_IX_INVAL"::"r" (end));
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1I_IX_INVAL"::"r" (end));
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1I_IX_INVAL"::"r" (end));
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1I_IX_INVAL"::"r" (end));
-	} while (end > 0);
-	__nds32__isb();
-}
-
-void cpu_dcache_inval_all(void)
-{
-	__nds32__cctl_l1d_invalall();
-}
-
-#ifdef CONFIG_CACHE_L2
-void dcache_wb_all_level(void)
-{
-	unsigned long flags, cmd;
-	local_irq_save(flags);
-	__nds32__cctl_l1d_wball_alvl();
-	/* Section 1: Ensure the section 2 & 3 program code execution after */
-	__nds32__cctlidx_read(NDS32_CCTL_L1D_IX_RWD,0);
-
-	/* Section 2: Confirm the writeback all level is done in CPU and L2C */
-	cmd = CCTL_CMD_L2_SYNC;
-	L2_CMD_RDY();
-	L2C_W_REG(L2_CCTL_CMD_OFF, cmd);
-	L2_CMD_RDY();
-
-	/* Section 3: Writeback whole L2 cache */
-	cmd = CCTL_ALL_CMD | CCTL_CMD_L2_IX_WB;
-	L2_CMD_RDY();
-	L2C_W_REG(L2_CCTL_CMD_OFF, cmd);
-	L2_CMD_RDY();
-	__nds32__msync_all();
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(dcache_wb_all_level);
-#endif
-
-void cpu_dcache_wb_all(void)
-{
-	__nds32__cctl_l1d_wball_one_lvl();
-	__nds32__cctlidx_read(NDS32_CCTL_L1D_IX_RWD,0);
-}
-
-void cpu_dcache_wbinval_all(void)
-{
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	unsigned long flags;
-	local_irq_save(flags);
-#endif
-	cpu_dcache_wb_all();
-	cpu_dcache_inval_all();
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	local_irq_restore(flags);
-#endif
-}
-
-/*
- * Page
- */
-void cpu_icache_inval_page(unsigned long start)
-{
-	unsigned long line_size, end;
-
-	line_size = L1_cache_info[ICACHE].line_size;
-	end = start + PAGE_SIZE;
-
-	do {
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1I_VA_INVAL"::"r" (end));
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1I_VA_INVAL"::"r" (end));
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1I_VA_INVAL"::"r" (end));
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1I_VA_INVAL"::"r" (end));
-	} while (end != start);
-	__nds32__isb();
-}
-
-void cpu_dcache_inval_page(unsigned long start)
-{
-	unsigned long line_size, end;
-
-	line_size = L1_cache_info[DCACHE].line_size;
-	end = start + PAGE_SIZE;
-
-	do {
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_INVAL"::"r" (end));
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_INVAL"::"r" (end));
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_INVAL"::"r" (end));
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_INVAL"::"r" (end));
-	} while (end != start);
-}
-
-void cpu_dcache_wb_page(unsigned long start)
-{
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	unsigned long line_size, end;
-
-	line_size = L1_cache_info[DCACHE].line_size;
-	end = start + PAGE_SIZE;
-
-	do {
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_WB"::"r" (end));
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_WB"::"r" (end));
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_WB"::"r" (end));
-		end -= line_size;
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_WB"::"r" (end));
-	} while (end != start);
-	__nds32__cctlidx_read(NDS32_CCTL_L1D_IX_RWD,0);
-#endif
-}
-
-void cpu_dcache_wbinval_page(unsigned long start)
-{
-	unsigned long line_size, end;
-
-	line_size = L1_cache_info[DCACHE].line_size;
-	end = start + PAGE_SIZE;
-
-	do {
-		end -= line_size;
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_WB"::"r" (end));
-#endif
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_INVAL"::"r" (end));
-		end -= line_size;
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_WB"::"r" (end));
-#endif
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_INVAL"::"r" (end));
-		end -= line_size;
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_WB"::"r" (end));
-#endif
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_INVAL"::"r" (end));
-		end -= line_size;
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_WB"::"r" (end));
-#endif
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_INVAL"::"r" (end));
-	} while (end != start);
-	__nds32__cctlidx_read(NDS32_CCTL_L1D_IX_RWD,0);
-}
-
-void cpu_cache_wbinval_page(unsigned long page, int flushi)
-{
-	cpu_dcache_wbinval_page(page);
-	if (flushi)
-		cpu_icache_inval_page(page);
-}
-
-/*
- * Range
- */
-void cpu_icache_inval_range(unsigned long start, unsigned long end)
-{
-	unsigned long line_size;
-
-	line_size = L1_cache_info[ICACHE].line_size;
-
-	while (end > start) {
-		__asm__ volatile ("\n\tcctl %0, L1I_VA_INVAL"::"r" (start));
-		start += line_size;
-	}
-	__nds32__isb();
-}
-
-void cpu_dcache_inval_range(unsigned long start, unsigned long end)
-{
-	unsigned long line_size;
-
-	line_size = L1_cache_info[DCACHE].line_size;
-
-	while (end > start) {
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_INVAL"::"r" (start));
-		start += line_size;
-	}
-}
-
-void cpu_dcache_wb_range(unsigned long start, unsigned long end)
-{
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	unsigned long line_size;
-
-	line_size = L1_cache_info[DCACHE].line_size;
-
-	while (end > start) {
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_WB"::"r" (start));
-		start += line_size;
-	}
-	__nds32__cctlidx_read(NDS32_CCTL_L1D_IX_RWD,0);
-#endif
-}
-
-void cpu_dcache_wbinval_range(unsigned long start, unsigned long end)
-{
-	unsigned long line_size;
-
-	line_size = L1_cache_info[DCACHE].line_size;
-
-	while (end > start) {
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_WB"::"r" (start));
-#endif
-		__asm__ volatile ("\n\tcctl %0, L1D_VA_INVAL"::"r" (start));
-		start += line_size;
-	}
-	__nds32__cctlidx_read(NDS32_CCTL_L1D_IX_RWD,0);
-}
-
-void cpu_cache_wbinval_range(unsigned long start, unsigned long end, int flushi)
-{
-	unsigned long line_size, align_start, align_end;
-
-	line_size = L1_cache_info[DCACHE].line_size;
-	align_start = start & ~(line_size - 1);
-	align_end = (end + line_size - 1) & ~(line_size - 1);
-	cpu_dcache_wbinval_range(align_start, align_end);
-
-	if (flushi) {
-		line_size = L1_cache_info[ICACHE].line_size;
-		align_start = start & ~(line_size - 1);
-		align_end = (end + line_size - 1) & ~(line_size - 1);
-		cpu_icache_inval_range(align_start, align_end);
-	}
-}
-
-void cpu_cache_wbinval_range_check(struct vm_area_struct *vma,
-				   unsigned long start, unsigned long end,
-				   bool flushi, bool wbd)
-{
-	unsigned long line_size, t_start, t_end;
-
-	if (!flushi && !wbd)
-		return;
-	line_size = L1_cache_info[DCACHE].line_size;
-	start = start & ~(line_size - 1);
-	end = (end + line_size - 1) & ~(line_size - 1);
-
-	if ((end - start) > (8 * PAGE_SIZE)) {
-		if (wbd)
-			cpu_dcache_wbinval_all();
-		if (flushi)
-			cpu_icache_inval_all();
-		return;
-	}
-
-	t_start = (start + PAGE_SIZE) & PAGE_MASK;
-	t_end = ((end - 1) & PAGE_MASK);
-
-	if ((start & PAGE_MASK) == t_end) {
-		if (va_present(vma->vm_mm, start)) {
-			if (wbd)
-				cpu_dcache_wbinval_range(start, end);
-			if (flushi)
-				cpu_icache_inval_range(start, end);
-		}
-		return;
-	}
-
-	if (va_present(vma->vm_mm, start)) {
-		if (wbd)
-			cpu_dcache_wbinval_range(start, t_start);
-		if (flushi)
-			cpu_icache_inval_range(start, t_start);
-	}
-
-	if (va_present(vma->vm_mm, end - 1)) {
-		if (wbd)
-			cpu_dcache_wbinval_range(t_end, end);
-		if (flushi)
-			cpu_icache_inval_range(t_end, end);
-	}
-
-	while (t_start < t_end) {
-		if (va_present(vma->vm_mm, t_start)) {
-			if (wbd)
-				cpu_dcache_wbinval_page(t_start);
-			if (flushi)
-				cpu_icache_inval_page(t_start);
-		}
-		t_start += PAGE_SIZE;
-	}
-}
-
-#ifdef CONFIG_CACHE_L2
-static inline void cpu_l2cache_op(unsigned long start, unsigned long end, unsigned long op)
-{
-	if (atl2c_base) {
-		unsigned long p_start = __pa(start);
-		unsigned long p_end = __pa(end);
-		unsigned long cmd;
-		unsigned long line_size;
-		/* TODO Can Use PAGE Mode to optimize if range large than PAGE_SIZE */
-		line_size = L2_CACHE_LINE_SIZE();
-		p_start = p_start & (~(line_size - 1));
-		p_end = (p_end + line_size - 1) & (~(line_size - 1));
-		cmd =
-		    (p_start & ~(line_size - 1)) | op |
-		    CCTL_SINGLE_CMD;
-		do {
-			L2_CMD_RDY();
-			L2C_W_REG(L2_CCTL_CMD_OFF, cmd);
-			cmd += line_size;
-			p_start += line_size;
-		} while (p_end > p_start);
-		cmd = CCTL_CMD_L2_SYNC;
-		L2_CMD_RDY();
-		L2C_W_REG(L2_CCTL_CMD_OFF, cmd);
-		L2_CMD_RDY();
-	}
-}
-#else
-#define cpu_l2cache_op(start,end,op) do { } while (0)
-#endif
-/*
- * DMA
- */
-void cpu_dma_wb_range(unsigned long start, unsigned long end)
-{
-	unsigned long line_size;
-	unsigned long flags;
-	line_size = L1_cache_info[DCACHE].line_size;
-	start = start & (~(line_size - 1));
-	end = (end + line_size - 1) & (~(line_size - 1));
-	if (unlikely(start == end))
-		return;
-
-	local_irq_save(flags);
-	cpu_dcache_wb_range(start, end);
-	cpu_l2cache_op(start, end, CCTL_CMD_L2_PA_WB);
-	__nds32__msync_all();
-	local_irq_restore(flags);
-}
-
-void cpu_dma_inval_range(unsigned long start, unsigned long end)
-{
-	unsigned long line_size;
-	unsigned long old_start = start;
-	unsigned long old_end = end;
-	unsigned long flags;
-	line_size = L1_cache_info[DCACHE].line_size;
-	start = start & (~(line_size - 1));
-	end = (end + line_size - 1) & (~(line_size - 1));
-	if (unlikely(start == end))
-		return;
-	local_irq_save(flags);
-	if (start != old_start) {
-		cpu_dcache_wbinval_range(start, start + line_size);
-		cpu_l2cache_op(start, start + line_size, CCTL_CMD_L2_PA_WBINVAL);
-	}
-	if (end != old_end) {
-		cpu_dcache_wbinval_range(end - line_size, end);
-		cpu_l2cache_op(end - line_size, end, CCTL_CMD_L2_PA_WBINVAL);
-	}
-	cpu_dcache_inval_range(start, end);
-	cpu_l2cache_op(start, end, CCTL_CMD_L2_PA_INVAL);
-	__nds32__msync_all();
-	local_irq_restore(flags);
-
-}
-
-void cpu_dma_wbinval_range(unsigned long start, unsigned long end)
-{
-	unsigned long line_size;
-	unsigned long flags;
-	line_size = L1_cache_info[DCACHE].line_size;
-	start = start & (~(line_size - 1));
-	end = (end + line_size - 1) & (~(line_size - 1));
-	if (unlikely(start == end))
-		return;
-
-	local_irq_save(flags);
-	cpu_dcache_wbinval_range(start, end);
-	cpu_l2cache_op(start, end, CCTL_CMD_L2_PA_WBINVAL);
-	__nds32__msync_all();
-	local_irq_restore(flags);
-}
-
-void cpu_proc_init(void)
-{
-}
-
-void cpu_proc_fin(void)
-{
-}
-
-void cpu_do_idle(void)
-{
-	__nds32__standby_no_wake_grant();
-}
-
-void cpu_reset(unsigned long reset)
-{
-	u32 tmp;
-	GIE_DISABLE();
-	tmp = __nds32__mfsr(NDS32_SR_CACHE_CTL);
-	tmp &= ~(CACHE_CTL_mskIC_EN | CACHE_CTL_mskDC_EN);
-	__nds32__mtsr_isb(tmp, NDS32_SR_CACHE_CTL);
-	cpu_dcache_wbinval_all();
-	cpu_icache_inval_all();
-
-	__asm__ __volatile__("jr.toff %0\n\t"::"r"(reset));
-}
-
-void cpu_switch_mm(struct mm_struct *mm)
-{
-	unsigned long cid;
-	cid = __nds32__mfsr(NDS32_SR_TLB_MISC);
-	cid = (cid & ~TLB_MISC_mskCID) | mm->context.id;
-	__nds32__mtsr_dsb(cid, NDS32_SR_TLB_MISC);
-	__nds32__mtsr_isb(__pa(mm->pgd), NDS32_SR_L1_PPTB);
-}
diff --git a/arch/nds32/mm/tlb.c b/arch/nds32/mm/tlb.c
deleted file mode 100644
index dd41f5e0712f..000000000000
--- a/arch/nds32/mm/tlb.c
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/spinlock_types.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <asm/nds32.h>
-#include <nds32_intrinsic.h>
-
-unsigned int cpu_last_cid = { TLB_MISC_mskCID + (2 << TLB_MISC_offCID) };
-
-DEFINE_SPINLOCK(cid_lock);
-
-void local_flush_tlb_range(struct vm_area_struct *vma,
-			   unsigned long start, unsigned long end)
-{
-	unsigned long flags, ocid, ncid;
-
-	if ((end - start) > 0x400000) {
-		__nds32__tlbop_flua();
-		__nds32__isb();
-		return;
-	}
-
-	spin_lock_irqsave(&cid_lock, flags);
-	ocid = __nds32__mfsr(NDS32_SR_TLB_MISC);
-	ncid = (ocid & ~TLB_MISC_mskCID) | vma->vm_mm->context.id;
-	__nds32__mtsr_dsb(ncid, NDS32_SR_TLB_MISC);
-	while (start < end) {
-		__nds32__tlbop_inv(start);
-		__nds32__isb();
-		start += PAGE_SIZE;
-	}
-	__nds32__mtsr_dsb(ocid, NDS32_SR_TLB_MISC);
-	spin_unlock_irqrestore(&cid_lock, flags);
-}
-
-void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
-{
-	unsigned long flags, ocid, ncid;
-
-	spin_lock_irqsave(&cid_lock, flags);
-	ocid = __nds32__mfsr(NDS32_SR_TLB_MISC);
-	ncid = (ocid & ~TLB_MISC_mskCID) | vma->vm_mm->context.id;
-	__nds32__mtsr_dsb(ncid, NDS32_SR_TLB_MISC);
-	__nds32__tlbop_inv(addr);
-	__nds32__isb();
-	__nds32__mtsr_dsb(ocid, NDS32_SR_TLB_MISC);
-	spin_unlock_irqrestore(&cid_lock, flags);
-}
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index cfb8ea0df3b1..ae95d06a4a8f 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -617,15 +617,6 @@ config CLKSRC_ST_LPC
 	  Enable this option to use the Low Power controller timer
 	  as clocksource.
 
-config ATCPIT100_TIMER
-	bool "ATCPIT100 timer driver"
-	depends on NDS32 || COMPILE_TEST
-	depends on HAS_IOMEM
-	select TIMER_OF
-	default NDS32
-	help
-	  This option enables support for the Andestech ATCPIT100 timers.
-
 config RISCV_TIMER
 	bool "Timer for the RISC-V platform" if COMPILE_TEST
 	depends on GENERIC_SCHED_CLOCK && RISCV && RISCV_SBI
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index fa5f624eadb6..9c85ee2bb373 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -81,7 +81,6 @@ obj-$(CONFIG_INGENIC_SYSOST)	+= ingenic-sysost.o
 obj-$(CONFIG_INGENIC_TIMER)		+= ingenic-timer.o
 obj-$(CONFIG_CLKSRC_ST_LPC)		+= clksrc_st_lpc.o
 obj-$(CONFIG_X86_NUMACHIP)		+= numachip.o
-obj-$(CONFIG_ATCPIT100_TIMER)		+= timer-atcpit100.o
 obj-$(CONFIG_RISCV_TIMER)		+= timer-riscv.o
 obj-$(CONFIG_CLINT_TIMER)		+= timer-clint.o
 obj-$(CONFIG_CSKY_MP_TIMER)		+= timer-mp-csky.o
diff --git a/drivers/clocksource/timer-atcpit100.c b/drivers/clocksource/timer-atcpit100.c
deleted file mode 100644
index b4bd2f5b801d..000000000000
--- a/drivers/clocksource/timer-atcpit100.c
+++ /dev/null
@@ -1,266 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-/*
- *  Andestech ATCPIT100 Timer Device Driver Implementation
- * Rick Chen, Andes Technology Corporation <rick@andestech.com>
- *
- */
-
-#include <linux/irq.h>
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/cpufreq.h>
-#include <linux/sched.h>
-#include <linux/sched_clock.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
-#include "timer-of.h"
-#ifdef CONFIG_NDS32
-#include <asm/vdso_timer_info.h>
-#endif
-
-/*
- * Definition of register offsets
- */
-
-/* ID and Revision Register */
-#define ID_REV		0x0
-
-/* Configuration Register */
-#define CFG		0x10
-
-/* Interrupt Enable Register */
-#define INT_EN		0x14
-#define CH_INT_EN(c, i)	((1<<i)<<(4*c))
-#define CH0INT0EN	0x01
-
-/* Interrupt Status Register */
-#define INT_STA		0x18
-#define CH0INT0		0x01
-
-/* Channel Enable Register */
-#define CH_EN		0x1C
-#define CH0TMR0EN	0x1
-#define CH1TMR0EN	0x10
-
-/* Channel 0 , 1 Control Register */
-#define CH0_CTL		(0x20)
-#define CH1_CTL		(0x20 + 0x10)
-
-/* Channel clock source , bit 3 , 0:External clock , 1:APB clock */
-#define APB_CLK		BIT(3)
-
-/* Channel mode , bit 0~2 */
-#define TMR_32		0x1
-#define TMR_16		0x2
-#define TMR_8		0x3
-
-/* Channel 0 , 1 Reload Register */
-#define CH0_REL		(0x24)
-#define CH1_REL		(0x24 + 0x10)
-
-/* Channel 0 , 1 Counter Register */
-#define CH0_CNT		(0x28)
-#define CH1_CNT		(0x28 + 0x10)
-
-#define TIMER_SYNC_TICKS	3
-
-static void atcpit100_ch1_tmr0_en(void __iomem *base)
-{
-	writel(~0, base + CH1_REL);
-	writel(APB_CLK|TMR_32, base + CH1_CTL);
-}
-
-static void atcpit100_ch0_tmr0_en(void __iomem *base)
-{
-	writel(APB_CLK|TMR_32, base + CH0_CTL);
-}
-
-static void atcpit100_clkevt_time_setup(void __iomem *base, unsigned long delay)
-{
-	writel(delay, base + CH0_CNT);
-	writel(delay, base + CH0_REL);
-}
-
-static void atcpit100_timer_clear_interrupt(void __iomem *base)
-{
-	u32 val;
-
-	val = readl(base + INT_STA);
-	writel(val | CH0INT0, base + INT_STA);
-}
-
-static void atcpit100_clocksource_start(void __iomem *base)
-{
-	u32 val;
-
-	val = readl(base + CH_EN);
-	writel(val | CH1TMR0EN, base + CH_EN);
-}
-
-static void atcpit100_clkevt_time_start(void __iomem *base)
-{
-	u32 val;
-
-	val = readl(base + CH_EN);
-	writel(val | CH0TMR0EN, base + CH_EN);
-}
-
-static void atcpit100_clkevt_time_stop(void __iomem *base)
-{
-	u32 val;
-
-	atcpit100_timer_clear_interrupt(base);
-	val = readl(base + CH_EN);
-	writel(val & ~CH0TMR0EN, base + CH_EN);
-}
-
-static int atcpit100_clkevt_next_event(unsigned long evt,
-	struct clock_event_device *clkevt)
-{
-	u32 val;
-	struct timer_of *to = to_timer_of(clkevt);
-
-	val = readl(timer_of_base(to) + CH_EN);
-	writel(val & ~CH0TMR0EN, timer_of_base(to) + CH_EN);
-	writel(evt, timer_of_base(to) + CH0_REL);
-	writel(val | CH0TMR0EN, timer_of_base(to) + CH_EN);
-
-	return 0;
-}
-
-static int atcpit100_clkevt_set_periodic(struct clock_event_device *evt)
-{
-	struct timer_of *to = to_timer_of(evt);
-
-	atcpit100_clkevt_time_setup(timer_of_base(to), timer_of_period(to));
-	atcpit100_clkevt_time_start(timer_of_base(to));
-
-	return 0;
-}
-static int atcpit100_clkevt_shutdown(struct clock_event_device *evt)
-{
-	struct timer_of *to = to_timer_of(evt);
-
-	atcpit100_clkevt_time_stop(timer_of_base(to));
-
-	return 0;
-}
-static int atcpit100_clkevt_set_oneshot(struct clock_event_device *evt)
-{
-	struct timer_of *to = to_timer_of(evt);
-	u32 val;
-
-	writel(~0x0, timer_of_base(to) + CH0_REL);
-	val = readl(timer_of_base(to) + CH_EN);
-	writel(val | CH0TMR0EN, timer_of_base(to) + CH_EN);
-
-	return 0;
-}
-
-static irqreturn_t atcpit100_timer_interrupt(int irq, void *dev_id)
-{
-	struct clock_event_device *evt = (struct clock_event_device *)dev_id;
-	struct timer_of *to = to_timer_of(evt);
-
-	atcpit100_timer_clear_interrupt(timer_of_base(to));
-
-	evt->event_handler(evt);
-
-	return IRQ_HANDLED;
-}
-
-static struct timer_of to = {
-	.flags = TIMER_OF_IRQ | TIMER_OF_CLOCK | TIMER_OF_BASE,
-
-	.clkevt = {
-		.name = "atcpit100_tick",
-		.rating = 300,
-		.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-		.set_state_shutdown = atcpit100_clkevt_shutdown,
-		.set_state_periodic = atcpit100_clkevt_set_periodic,
-		.set_state_oneshot = atcpit100_clkevt_set_oneshot,
-		.tick_resume = atcpit100_clkevt_shutdown,
-		.set_next_event = atcpit100_clkevt_next_event,
-		.cpumask = cpu_possible_mask,
-	},
-
-	.of_irq = {
-		.handler = atcpit100_timer_interrupt,
-		.flags = IRQF_TIMER | IRQF_IRQPOLL,
-	},
-
-	/*
-	 * FIXME: we currently only support clocking using PCLK
-	 * and using EXTCLK is not supported in the driver.
-	 */
-	.of_clk = {
-		.name = "PCLK",
-	}
-};
-
-static u64 notrace atcpit100_timer_sched_read(void)
-{
-	return ~readl(timer_of_base(&to) + CH1_CNT);
-}
-
-#ifdef CONFIG_NDS32
-static void fill_vdso_need_info(struct device_node *node)
-{
-	struct resource timer_res;
-	of_address_to_resource(node, 0, &timer_res);
-	timer_info.mapping_base = (unsigned long)timer_res.start;
-	timer_info.cycle_count_down = true;
-	timer_info.cycle_count_reg_offset = CH1_CNT;
-}
-#endif
-
-static int __init atcpit100_timer_init(struct device_node *node)
-{
-	int ret;
-	u32 val;
-	void __iomem *base;
-
-	ret = timer_of_init(node, &to);
-	if (ret)
-		return ret;
-
-	base = timer_of_base(&to);
-
-	sched_clock_register(atcpit100_timer_sched_read, 32,
-		timer_of_rate(&to));
-
-	ret = clocksource_mmio_init(base + CH1_CNT,
-		node->name, timer_of_rate(&to), 300, 32,
-		clocksource_mmio_readl_down);
-
-	if (ret) {
-		pr_err("Failed to register clocksource\n");
-		return ret;
-	}
-
-	/* clear channel 0 timer0 interrupt */
-	atcpit100_timer_clear_interrupt(base);
-
-	clockevents_config_and_register(&to.clkevt, timer_of_rate(&to),
-					TIMER_SYNC_TICKS, 0xffffffff);
-	atcpit100_ch0_tmr0_en(base);
-	atcpit100_ch1_tmr0_en(base);
-	atcpit100_clocksource_start(base);
-	atcpit100_clkevt_time_start(base);
-
-	/* Enable channel 0 timer0 interrupt */
-	val = readl(base + INT_EN);
-	writel(val | CH0INT0EN, base + INT_EN);
-
-#ifdef CONFIG_NDS32
-	fill_vdso_need_info(node);
-#endif
-
-	return ret;
-}
-
-TIMER_OF_DECLARE(atcpit100, "andestech,atcpit100", atcpit100_timer_init);
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index c1f611cbfbf8..c6161b0b0cb6 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -92,7 +92,6 @@ obj-$(CONFIG_IRQ_UNIPHIER_AIDET)	+= irq-uniphier-aidet.o
 obj-$(CONFIG_ARCH_SYNQUACER)		+= irq-sni-exiu.o
 obj-$(CONFIG_MESON_IRQ_GPIO)		+= irq-meson-gpio.o
 obj-$(CONFIG_GOLDFISH_PIC) 		+= irq-goldfish-pic.o
-obj-$(CONFIG_NDS32)			+= irq-ativic32.o
 obj-$(CONFIG_QCOM_PDC)			+= qcom-pdc.o
 obj-$(CONFIG_CSKY_MPINTC)		+= irq-csky-mpintc.o
 obj-$(CONFIG_CSKY_APB_INTC)		+= irq-csky-apb-intc.o
diff --git a/drivers/irqchip/irq-ativic32.c b/drivers/irqchip/irq-ativic32.c
deleted file mode 100644
index 223dd2f97d28..000000000000
--- a/drivers/irqchip/irq-ativic32.c
+++ /dev/null
@@ -1,156 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/irq.h>
-#include <linux/of.h>
-#include <linux/of_irq.h>
-#include <linux/of_address.h>
-#include <linux/hardirq.h>
-#include <linux/interrupt.h>
-#include <linux/irqdomain.h>
-#include <linux/irqchip.h>
-#include <nds32_intrinsic.h>
-
-#include <asm/irq_regs.h>
-
-unsigned long wake_mask;
-
-static void ativic32_ack_irq(struct irq_data *data)
-{
-	__nds32__mtsr_dsb(BIT(data->hwirq), NDS32_SR_INT_PEND2);
-}
-
-static void ativic32_mask_irq(struct irq_data *data)
-{
-	unsigned long int_mask2 = __nds32__mfsr(NDS32_SR_INT_MASK2);
-	__nds32__mtsr_dsb(int_mask2 & (~(BIT(data->hwirq))), NDS32_SR_INT_MASK2);
-}
-
-static void ativic32_unmask_irq(struct irq_data *data)
-{
-	unsigned long int_mask2 = __nds32__mfsr(NDS32_SR_INT_MASK2);
-	__nds32__mtsr_dsb(int_mask2 | (BIT(data->hwirq)), NDS32_SR_INT_MASK2);
-}
-
-static int nointc_set_wake(struct irq_data *data, unsigned int on)
-{
-	unsigned long int_mask = __nds32__mfsr(NDS32_SR_INT_MASK);
-	static unsigned long irq_orig_bit;
-	u32 bit = 1 << data->hwirq;
-
-	if (on) {
-		if (int_mask & bit)
-			__assign_bit(data->hwirq, &irq_orig_bit, true);
-		else
-			__assign_bit(data->hwirq, &irq_orig_bit, false);
-
-		__assign_bit(data->hwirq, &int_mask, true);
-		__assign_bit(data->hwirq, &wake_mask, true);
-
-	} else {
-		if (!(irq_orig_bit & bit))
-			__assign_bit(data->hwirq, &int_mask, false);
-
-		__assign_bit(data->hwirq, &wake_mask, false);
-		__assign_bit(data->hwirq, &irq_orig_bit, false);
-	}
-
-	__nds32__mtsr_dsb(int_mask, NDS32_SR_INT_MASK);
-
-	return 0;
-}
-
-static struct irq_chip ativic32_chip = {
-	.name = "ativic32",
-	.irq_ack = ativic32_ack_irq,
-	.irq_mask = ativic32_mask_irq,
-	.irq_unmask = ativic32_unmask_irq,
-	.irq_set_wake = nointc_set_wake,
-};
-
-static unsigned int __initdata nivic_map[6] = { 6, 2, 10, 16, 24, 32 };
-
-static struct irq_domain *root_domain;
-static int ativic32_irq_domain_map(struct irq_domain *id, unsigned int virq,
-				  irq_hw_number_t hw)
-{
-
-	unsigned long int_trigger_type;
-	u32 type;
-	struct irq_data *irq_data;
-	int_trigger_type = __nds32__mfsr(NDS32_SR_INT_TRIGGER);
-	irq_data = irq_get_irq_data(virq);
-	if (!irq_data)
-		return -EINVAL;
-
-	if (int_trigger_type & (BIT(hw))) {
-		irq_set_chip_and_handler(virq, &ativic32_chip, handle_edge_irq);
-		type = IRQ_TYPE_EDGE_RISING;
-	} else {
-		irq_set_chip_and_handler(virq, &ativic32_chip, handle_level_irq);
-		type = IRQ_TYPE_LEVEL_HIGH;
-	}
-
-	irqd_set_trigger_type(irq_data, type);
-	return 0;
-}
-
-static const struct irq_domain_ops ativic32_ops = {
-	.map = ativic32_irq_domain_map,
-	.xlate = irq_domain_xlate_onecell
-};
-
-static irq_hw_number_t get_intr_src(void)
-{
-	return ((__nds32__mfsr(NDS32_SR_ITYPE) & ITYPE_mskVECTOR) >> ITYPE_offVECTOR)
-		- NDS32_VECTOR_offINTERRUPT;
-}
-
-static void ativic32_handle_irq(struct pt_regs *regs)
-{
-	irq_hw_number_t hwirq = get_intr_src();
-	generic_handle_domain_irq(root_domain, hwirq);
-}
-
-/*
- * TODO: convert nds32 to GENERIC_IRQ_MULTI_HANDLER so that this entry logic
- * can live in arch code.
- */
-asmlinkage void asm_do_IRQ(struct pt_regs *regs)
-{
-	struct pt_regs *old_regs;
-
-	irq_enter();
-	old_regs = set_irq_regs(regs);
-	ativic32_handle_irq(regs);
-	set_irq_regs(old_regs);
-	irq_exit();
-}
-
-int __init ativic32_init_irq(struct device_node *node, struct device_node *parent)
-{
-	unsigned long int_vec_base, nivic, nr_ints;
-
-	if (WARN(parent, "non-root ativic32 are not supported"))
-		return -EINVAL;
-
-	int_vec_base = __nds32__mfsr(NDS32_SR_IVB);
-
-	if (((int_vec_base & IVB_mskIVIC_VER) >> IVB_offIVIC_VER) == 0)
-		panic("Unable to use atcivic32 for this cpu.\n");
-
-	nivic = (int_vec_base & IVB_mskNIVIC) >> IVB_offNIVIC;
-	if (nivic >= ARRAY_SIZE(nivic_map))
-		panic("The number of input for ativic32 is not supported.\n");
-
-	nr_ints = nivic_map[nivic];
-
-	root_domain = irq_domain_add_linear(node, nr_ints,
-			&ativic32_ops, NULL);
-
-	if (!root_domain)
-		panic("%s: unable to create IRQ domain\n", node->full_name);
-
-	return 0;
-}
-IRQCHIP_DECLARE(ativic32, "andestech,ativic32", ativic32_init_irq);
diff --git a/drivers/net/ethernet/faraday/Kconfig b/drivers/net/ethernet/faraday/Kconfig
index 3d1e9a302148..c699bd6bcbb9 100644
--- a/drivers/net/ethernet/faraday/Kconfig
+++ b/drivers/net/ethernet/faraday/Kconfig
@@ -6,7 +6,7 @@
 config NET_VENDOR_FARADAY
 	bool "Faraday devices"
 	default y
-	depends on ARM || NDS32 || COMPILE_TEST
+	depends on ARM || COMPILE_TEST
 	help
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -19,24 +19,22 @@ if NET_VENDOR_FARADAY
 
 config FTMAC100
 	tristate "Faraday FTMAC100 10/100 Ethernet support"
-	depends on ARM || NDS32 || COMPILE_TEST
+	depends on ARM || COMPILE_TEST
 	depends on !64BIT || BROKEN
 	select MII
 	help
 	  This driver supports the FTMAC100 10/100 Ethernet controller
-	  from Faraday. It is used on Faraday A320, Andes AG101 and some
-	  other ARM/NDS32 SoC's.
+	  from Faraday. It is used on Faraday A320 and some other ARM SoC's.
 
 config FTGMAC100
 	tristate "Faraday FTGMAC100 Gigabit Ethernet support"
-	depends on ARM || NDS32 || COMPILE_TEST
+	depends on ARM || COMPILE_TEST
 	depends on !64BIT || BROKEN
 	select PHYLIB
 	select MDIO_ASPEED if MACH_ASPEED_G6
 	select CRC32
 	help
 	  This driver supports the FTGMAC100 Gigabit Ethernet controller
-	  from Faraday. It is used on Faraday A369, Andes AG102 and some
-	  other ARM/NDS32 SoC's.
+	  from Faraday. It is used on Faraday A369 and some other ARM SoC's.
 
 endif # NET_VENDOR_FARADAY
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index fcc46380e7c9..40c50fa2dd70 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -9,7 +9,7 @@ config VGA_CONSOLE
 	bool "VGA text console" if EXPERT || !X86
 	depends on !4xx && !PPC_8xx && !SPARC && !M68K && !PARISC &&  !SUPERH && \
 		(!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && \
-		!ARM64 && !ARC && !MICROBLAZE && !OPENRISC && !NDS32 && !S390 && !UML
+		!ARM64 && !ARC && !MICROBLAZE && !OPENRISC && !S390 && !UML
 	default y
 	help
 	  Saying Y here will allow you to use Linux in text mode through a
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 3ccb2c70add4..6a4645a57976 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -362,9 +362,6 @@ if ($arch eq "x86_64") {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL(_PLT)?\\s_?mcount\$";
     $type = ".quad";
     $alignment = 2;
-} elsif ($arch eq "nds32") {
-    $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_NDS32_HI20_RELA\\s+_mcount\$";
-    $alignment = 2;
 } elsif ($arch eq "csky") {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_CKCORE_PCREL_JSR_IMM26BY2\\s+_mcount\$";
     $alignment = 2;
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index dc696c151e1c..8d378c57cb01 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -24,8 +24,6 @@
 #include "../../arch/ia64/include/asm/barrier.h"
 #elif defined(__xtensa__)
 #include "../../arch/xtensa/include/asm/barrier.h"
-#elif defined(__nds32__)
-#include "../../arch/nds32/include/asm/barrier.h"
 #else
 #include <asm-generic/barrier.h>
 #endif
diff --git a/tools/perf/arch/nds32/Build b/tools/perf/arch/nds32/Build
deleted file mode 100644
index e4e5f33c84d8..000000000000
--- a/tools/perf/arch/nds32/Build
+++ /dev/null
@@ -1 +0,0 @@
-perf-y += util/
diff --git a/tools/perf/arch/nds32/util/Build b/tools/perf/arch/nds32/util/Build
deleted file mode 100644
index d0bc205fe49a..000000000000
--- a/tools/perf/arch/nds32/util/Build
+++ /dev/null
@@ -1 +0,0 @@
-perf-y += header.o
diff --git a/tools/perf/arch/nds32/util/header.c b/tools/perf/arch/nds32/util/header.c
deleted file mode 100644
index ef9dbdbe7968..000000000000
--- a/tools/perf/arch/nds32/util/header.c
+++ /dev/null
@@ -1,29 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <api/fs/fs.h>
-#include "header.h"
-
-#define STR_LEN 1024
-
-char *get_cpuid_str(struct perf_pmu *pmu)
-{
-	/* In nds32, we only have one cpu */
-	char *buf = NULL;
-	struct cpu_map *cpus;
-	const char *sysfs = sysfs__mountpoint();
-
-	if (!sysfs || !pmu || !pmu->cpus)
-		return NULL;
-
-	buf = malloc(STR_LEN);
-	if (!buf)
-		return NULL;
-
-	cpus = cpu_map__get(pmu->cpus);
-	sprintf(buf, "0x%x", cpus->nr - 1);
-	cpu_map__put(cpus);
-	return buf;
-}
diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
index 6188b16827d1..cdfed403ba13 100644
--- a/tools/testing/selftests/vDSO/vdso_config.h
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -53,10 +53,6 @@
 #if __riscv_xlen == 32
 #define VDSO_32BIT		1
 #endif
-#else /* nds32 */
-#define VDSO_VERSION		4
-#define VDSO_NAMES		1
-#define VDSO_32BIT		1
 #endif
 
 static const char *versions[6] = {
-- 
cgit 


From 0f4ef8a3bf784f250abc7d0155ae4e9fa22d8210 Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@linux.ibm.com>
Date: Fri, 7 Jan 2022 19:44:27 +0530
Subject: selftests/powerpc: Add test for real address error handling

Add test for real address or control memory address access
error handling, using NX-GZIP engine.

The error is injected by accessing the control memory address
using illegal instruction, on successful handling the process
attempting to access control memory address using illegal
instruction receives SIGBUS.

Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220107141428.67862-2-ganeshgr@linux.ibm.com
---
 tools/testing/selftests/powerpc/Makefile           |  3 +-
 tools/testing/selftests/powerpc/mce/Makefile       |  7 +++
 .../testing/selftests/powerpc/mce/inject-ra-err.c  | 65 ++++++++++++++++++++++
 tools/testing/selftests/powerpc/mce/vas-api.h      |  1 +
 4 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/mce/Makefile
 create mode 100644 tools/testing/selftests/powerpc/mce/inject-ra-err.c
 create mode 120000 tools/testing/selftests/powerpc/mce/vas-api.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 0830e63818c1..4830372d7416 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -31,7 +31,8 @@ SUB_DIRS = alignment		\
 	   vphn         \
 	   math		\
 	   ptrace	\
-	   security
+	   security	\
+	   mce
 
 endif
 
diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile
new file mode 100644
index 000000000000..2424513982d9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/Makefile
@@ -0,0 +1,7 @@
+#SPDX-License-Identifier: GPL-2.0-or-later
+
+TEST_GEN_PROGS := inject-ra-err
+
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/mce/inject-ra-err.c b/tools/testing/selftests/powerpc/mce/inject-ra-err.c
new file mode 100644
index 000000000000..94323c34d9a6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/inject-ra-err.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "vas-api.h"
+#include "utils.h"
+
+static bool faulted;
+
+static void sigbus_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+	ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+	struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+	faulted = true;
+	regs->nip += 4;
+}
+
+static int test_ra_error(void)
+{
+	struct vas_tx_win_open_attr attr;
+	int fd, *paste_addr;
+	char *devname = "/dev/crypto/nx-gzip";
+	struct sigaction act = {
+		.sa_sigaction = sigbus_handler,
+		.sa_flags = SA_SIGINFO,
+	};
+
+	memset(&attr, 0, sizeof(attr));
+	attr.version = 1;
+	attr.vas_id = 0;
+
+	SKIP_IF(access(devname, F_OK));
+
+	fd = open(devname, O_RDWR);
+	FAIL_IF(fd < 0);
+	FAIL_IF(ioctl(fd, VAS_TX_WIN_OPEN, &attr) < 0);
+	FAIL_IF(sigaction(SIGBUS, &act, NULL) != 0);
+
+	paste_addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0ULL);
+
+	/* The following assignment triggers exception */
+	mb();
+	*paste_addr = 1;
+	mb();
+
+	FAIL_IF(!faulted);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_ra_error, "inject-ra-err");
+}
+
diff --git a/tools/testing/selftests/powerpc/mce/vas-api.h b/tools/testing/selftests/powerpc/mce/vas-api.h
new file mode 120000
index 000000000000..1455c1bcd351
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/vas-api.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/include/uapi/asm/vas-api.h
\ No newline at end of file
-- 
cgit 


From 57201d657eb76d735298405d3200a3b1f67197e1 Mon Sep 17 00:00:00 2001
From: "Pratik R. Sampat" <psampat@linux.ibm.com>
Date: Thu, 17 Feb 2022 16:23:21 +0530
Subject: selftest/powerpc: Add PAPR sysfs attributes sniff test

Include a testcase to check if the sysfs files for energy and frequency
related have its related attribute files exist and populated

Signed-off-by: Pratik R. Sampat <psampat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220217105321.52941-3-psampat@linux.ibm.com
---
 tools/testing/selftests/powerpc/Makefile           |   1 +
 .../selftests/powerpc/papr_attributes/.gitignore   |   2 +
 .../selftests/powerpc/papr_attributes/Makefile     |   7 ++
 .../selftests/powerpc/papr_attributes/attr_test.c  | 107 +++++++++++++++++++++
 4 files changed, 117 insertions(+)
 create mode 100644 tools/testing/selftests/powerpc/papr_attributes/.gitignore
 create mode 100644 tools/testing/selftests/powerpc/papr_attributes/Makefile
 create mode 100644 tools/testing/selftests/powerpc/papr_attributes/attr_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 4830372d7416..6ba95cd19e42 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -30,6 +30,7 @@ SUB_DIRS = alignment		\
 	   eeh			\
 	   vphn         \
 	   math		\
+	   papr_attributes	\
 	   ptrace	\
 	   security	\
 	   mce
diff --git a/tools/testing/selftests/powerpc/papr_attributes/.gitignore b/tools/testing/selftests/powerpc/papr_attributes/.gitignore
new file mode 100644
index 000000000000..d5f42b6d9e99
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_attributes/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+attr_test
diff --git a/tools/testing/selftests/powerpc/papr_attributes/Makefile b/tools/testing/selftests/powerpc/papr_attributes/Makefile
new file mode 100644
index 000000000000..e899712d49db
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_attributes/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := attr_test
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c
new file mode 100644
index 000000000000..bab0dc06e90b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PAPR Energy attributes sniff test
+ * This checks if the papr folders and contents are populated relating to
+ * the energy and frequency attributes
+ *
+ * Copyright 2022, Pratik Rajesh Sampat, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <dirent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include "utils.h"
+
+enum energy_freq_attrs {
+	POWER_PERFORMANCE_MODE = 1,
+	IDLE_POWER_SAVER_STATUS = 2,
+	MIN_FREQ = 3,
+	STAT_FREQ = 4,
+	MAX_FREQ = 6,
+	PROC_FOLDING_STATUS = 8
+};
+
+enum type {
+	INVALID,
+	STR_VAL,
+	NUM_VAL
+};
+
+int value_type(int id)
+{
+	int val_type;
+
+	switch (id) {
+	case POWER_PERFORMANCE_MODE:
+	case IDLE_POWER_SAVER_STATUS:
+		val_type = STR_VAL;
+		break;
+	case MIN_FREQ:
+	case STAT_FREQ:
+	case MAX_FREQ:
+	case PROC_FOLDING_STATUS:
+		val_type = NUM_VAL;
+		break;
+	default:
+		val_type = INVALID;
+	}
+
+	return val_type;
+}
+
+int verify_energy_info(void)
+{
+	const char *path = "/sys/firmware/papr/energy_scale_info";
+	struct dirent *entry;
+	struct stat s;
+	DIR *dirp;
+
+	if (stat(path, &s) || !S_ISDIR(s.st_mode))
+		return -1;
+	dirp = opendir(path);
+
+	while ((entry = readdir(dirp)) != NULL) {
+		char file_name[64];
+		int id, attr_type;
+		FILE *f;
+
+		if (strcmp(entry->d_name, ".") == 0 ||
+		    strcmp(entry->d_name, "..") == 0)
+			continue;
+
+		id = atoi(entry->d_name);
+		attr_type = value_type(id);
+		if (attr_type == INVALID)
+			return -1;
+
+		/* Check if the files exist and have data in them */
+		sprintf(file_name, "%s/%d/desc", path, id);
+		f = fopen(file_name, "r");
+		if (!f || fgetc(f) == EOF)
+			return -1;
+
+		sprintf(file_name, "%s/%d/value", path, id);
+		f = fopen(file_name, "r");
+		if (!f || fgetc(f) == EOF)
+			return -1;
+
+		if (attr_type == STR_VAL) {
+			sprintf(file_name, "%s/%d/value_desc", path, id);
+			f = fopen(file_name, "r");
+			if (!f || fgetc(f) == EOF)
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(verify_energy_info, "papr_attributes");
+}
-- 
cgit 


From e2dc49ef6c6b0e76c6d37cbec1161662570044e7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 3 Mar 2022 19:28:17 +0000
Subject: kselftest/arm64: Log the PIDs of the parent and child in sve-ptrace

If the test triggers a problem it may well result in a log message from
the kernel such as a WARN() or BUG(). If these include a PID it can help
with debugging to know if it was the parent or child process that triggered
the issue, since the test is just creating a new thread the process name
will be the same either way. Print the PIDs of the parent and child on
startup so users have this information to hand should it be needed.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220303192817.2732509-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/fp/sve-ptrace.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index 4bd333768cc4..4c418b2021e0 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -487,6 +487,8 @@ static int do_parent(pid_t child)
 	unsigned int vq, vl;
 	bool vl_supported;
 
+	ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
 	/* Attach to the child */
 	while (1) {
 		int sig;
-- 
cgit 


From 5ad0a415da6be8c13ed45c655e5acc9fa93557a9 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Mon, 7 Mar 2022 13:30:47 +0000
Subject: bpf/docs: Update vmtest docs for static linking

Dynamic linking when compiling on the host can cause issues when the
libc version does not match the one in the VM image. Update the
docs to explain how to do this.

Before:
  ./vmtest.sh -- ./test_progs -t test_ima
  ./test_progs: /usr/lib/libc.so.6: version `GLIBC_2.33' not found (required by ./test_progs)

After:

  LDLIBS=-static ./vmtest.sh -- ./test_progs -t test_ima
  test_ima:OK
  Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED

Reported-by: "Geyslan G. Bem" <geyslan@gmail.com>
Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220307133048.1287644-1-kpsingh@kernel.org
---
 tools/testing/selftests/bpf/README.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index d099d91adc3b..afe5ab2763bf 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -32,6 +32,14 @@ For more information on about using the script, run:
 
   $ tools/testing/selftests/bpf/vmtest.sh -h
 
+In case of linker errors when running selftests, try using static linking:
+
+.. code-block:: console
+
+  $ LDLIBS=-static vmtest.sh
+
+.. note:: Some distros may not support static linking.
+
 .. note:: The script uses pahole and clang based on host environment setting.
           If you want to change pahole and llvm, you can change `PATH` environment
           variable in the beginning of script.
-- 
cgit 


From e878ae2d1df5de4ea36e6d96c7d3ebe789aab9a5 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Mon, 7 Mar 2022 13:30:48 +0000
Subject: bpf/docs: Update list of architectures supported.

vmtest.sh also supports s390x now.

Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220307133048.1287644-2-kpsingh@kernel.org
---
 tools/testing/selftests/bpf/README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index afe5ab2763bf..eb1b7541f39d 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -44,7 +44,7 @@ In case of linker errors when running selftests, try using static linking:
           If you want to change pahole and llvm, you can change `PATH` environment
           variable in the beginning of script.
 
-.. note:: The script currently only supports x86_64.
+.. note:: The script currently only supports x86_64 and s390x architectures.
 
 Additional information about selftest failures are
 documented here.
-- 
cgit 


From 0d060f230fa06c32d91e67978a3088be9635848e Mon Sep 17 00:00:00 2001
From: Stefan Berger <stefanb@linux.ibm.com>
Date: Sat, 27 Nov 2021 23:10:51 -0500
Subject: selftests: tpm2: Determine available PCR bank

Determine an available PCR bank to be used by a test case by querying the
capability TPM2_GET_CAP. The TPM2 returns TPML_PCR_SELECTIONS that
contains an array of TPMS_PCR_SELECTIONs indicating available PCR banks
and the bitmasks that show which PCRs are enabled in each bank. Collect
the data in a dictionary. From the dictionary determine the PCR bank that
has the PCRs enabled that the test needs. This avoids test failures with
TPM2's that either to not have a SHA-1 bank or whose SHA-1 bank is
disabled.

Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
Tested-by: Jarkko Sakkinen <jarkko@kernel.org>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 tools/testing/selftests/tpm2/tpm2.py       | 31 ++++++++++++++++++++++++++++++
 tools/testing/selftests/tpm2/tpm2_tests.py | 29 ++++++++++++++++++++--------
 2 files changed, 52 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index f34486cd7342..057a4f49c79d 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -56,6 +56,7 @@ TSS2_RESMGR_TPM_RC_LAYER = (11 << TSS2_RC_LAYER_SHIFT)
 
 TPM2_CAP_HANDLES = 0x00000001
 TPM2_CAP_COMMANDS = 0x00000002
+TPM2_CAP_PCRS = 0x00000005
 TPM2_CAP_TPM_PROPERTIES = 0x00000006
 
 TPM2_PT_FIXED = 0x100
@@ -712,3 +713,33 @@ class Client:
             pt += 1
 
         return handles
+
+    def get_cap_pcrs(self):
+        pcr_banks = {}
+
+        fmt = '>HII III'
+
+        cmd = struct.pack(fmt,
+                          TPM2_ST_NO_SESSIONS,
+                          struct.calcsize(fmt),
+                          TPM2_CC_GET_CAPABILITY,
+                          TPM2_CAP_PCRS, 0, 1)
+        rsp = self.send_cmd(cmd)[10:]
+        _, _, cnt = struct.unpack('>BII', rsp[:9])
+        rsp = rsp[9:]
+
+        # items are TPMS_PCR_SELECTION's
+        for i in range(0, cnt):
+              hash, sizeOfSelect = struct.unpack('>HB', rsp[:3])
+              rsp = rsp[3:]
+
+              pcrSelect = 0
+              if sizeOfSelect > 0:
+                  pcrSelect, = struct.unpack('%ds' % sizeOfSelect,
+                                             rsp[:sizeOfSelect])
+                  rsp = rsp[sizeOfSelect:]
+                  pcrSelect = int.from_bytes(pcrSelect, byteorder='big')
+
+              pcr_banks[hash] = pcrSelect
+
+        return pcr_banks
diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index 9d764306887b..e63a37819978 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -27,7 +27,17 @@ class SmokeTest(unittest.TestCase):
         result = self.client.unseal(self.root_key, blob, auth, None)
         self.assertEqual(data, result)
 
+    def determine_bank_alg(self, mask):
+        pcr_banks = self.client.get_cap_pcrs()
+        for bank_alg, pcrSelection in pcr_banks.items():
+            if pcrSelection & mask == mask:
+                return bank_alg
+        return None
+
     def test_seal_with_policy(self):
+        bank_alg = self.determine_bank_alg(1 << 16)
+        self.assertIsNotNone(bank_alg)
+
         handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL)
 
         data = ('X' * 64).encode()
@@ -35,7 +45,7 @@ class SmokeTest(unittest.TestCase):
         pcrs = [16]
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             policy_dig = self.client.get_policy_digest(handle)
@@ -47,7 +57,7 @@ class SmokeTest(unittest.TestCase):
         handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             result = self.client.unseal(self.root_key, blob, auth, handle)
@@ -72,6 +82,9 @@ class SmokeTest(unittest.TestCase):
         self.assertEqual(rc, tpm2.TPM2_RC_AUTH_FAIL)
 
     def test_unseal_with_wrong_policy(self):
+        bank_alg = self.determine_bank_alg(1 << 16 | 1 << 1)
+        self.assertIsNotNone(bank_alg)
+
         handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL)
 
         data = ('X' * 64).encode()
@@ -79,7 +92,7 @@ class SmokeTest(unittest.TestCase):
         pcrs = [16]
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             policy_dig = self.client.get_policy_digest(handle)
@@ -91,13 +104,13 @@ class SmokeTest(unittest.TestCase):
         # Extend first a PCR that is not part of the policy and try to unseal.
         # This should succeed.
 
-        ds = tpm2.get_digest_size(tpm2.TPM2_ALG_SHA1)
-        self.client.extend_pcr(1, ('X' * ds).encode())
+        ds = tpm2.get_digest_size(bank_alg)
+        self.client.extend_pcr(1, ('X' * ds).encode(), bank_alg=bank_alg)
 
         handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             result = self.client.unseal(self.root_key, blob, auth, handle)
@@ -109,14 +122,14 @@ class SmokeTest(unittest.TestCase):
 
         # Then, extend a PCR that is part of the policy and try to unseal.
         # This should fail.
-        self.client.extend_pcr(16, ('X' * ds).encode())
+        self.client.extend_pcr(16, ('X' * ds).encode(), bank_alg=bank_alg)
 
         handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
 
         rc = 0
 
         try:
-            self.client.policy_pcr(handle, pcrs)
+            self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
             self.client.policy_password(handle)
 
             result = self.client.unseal(self.root_key, blob, auth, handle)
-- 
cgit 


From 8335adb8f9d3ea783422d22883a327427c1dbee8 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tstruk@gmail.com>
Date: Sat, 15 Jan 2022 17:26:27 -0800
Subject: selftests: tpm: add async space test with noneexisting handle

Add a test for /dev/tpmrm0 in async mode that checks if
the code handles invalid handles correctly.

Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: <linux-integrity@vger.kernel.org>
Cc: <linux-kselftest@vger.kernel.org>
Cc: <linux-kernel@vger.kernel.org>
Tested-by: Jarkko Sakkinen<jarkko@kernel.org>
Signed-off-by: Tadeusz Struk <tstruk@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 tools/testing/selftests/tpm2/tpm2_tests.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index e63a37819978..ffe98b5c8d22 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -315,3 +315,19 @@ class AsyncTest(unittest.TestCase):
         log.debug("Calling get_cap in a NON_BLOCKING mode")
         async_client.get_cap(tpm2.TPM2_CAP_HANDLES, tpm2.HR_LOADED_SESSION)
         async_client.close()
+
+    def test_flush_invalid_context(self):
+        log = logging.getLogger(__name__)
+        log.debug(sys._getframe().f_code.co_name)
+
+        async_client = tpm2.Client(tpm2.Client.FLAG_SPACE | tpm2.Client.FLAG_NONBLOCK)
+        log.debug("Calling flush_context passing in an invalid handle ")
+        handle = 0x80123456
+        rc = 0
+        try:
+            async_client.flush_context(handle)
+        except OSError as e:
+            rc = e.errno
+
+        self.assertEqual(rc, 22)
+        async_client.close()
-- 
cgit 


From d23a8720327d33616f584d76c80824bfa4699be6 Mon Sep 17 00:00:00 2001
From: Felix Maurer <fmaurer@redhat.com>
Date: Thu, 3 Mar 2022 12:15:26 +0100
Subject: selftests/bpf: Make test_lwt_ip_encap more stable and faster

In test_lwt_ip_encap, the ingress IPv6 encap test failed from time to
time. The failure occured when an IPv4 ping through the IPv6 GRE
encapsulation did not receive a reply within the timeout. The IPv4 ping
and the IPv6 ping in the test used different timeouts (1 sec for IPv4
and 6 sec for IPv6), probably taking into account that IPv6 might need
longer to successfully complete. However, when IPv4 pings (with the
short timeout) are encapsulated into the IPv6 tunnel, the delays of IPv6
apply.

The actual reason for the long delays with IPv6 was that the IPv6
neighbor discovery sometimes did not complete in time. This was caused
by the outgoing interface only having a tentative link local address,
i.e., not having completed DAD for that lladdr. The ND was successfully
retried after 1 sec but that was too late for the ping timeout.

The IPv6 addresses for the test were already added with nodad. However,
for the lladdrs, DAD was still performed. We now disable DAD in the test
netns completely and just assume that the two lladdrs on each veth pair
do not collide. This removes all the delays for IPv6 traffic in the
test.

Without the delays, we can now also reduce the delay of the IPv6 ping to
1 sec. This makes the whole test complete faster because we don't need
to wait for the excessive timeout for each IPv6 ping that is supposed
to fail.

Fixes: 0fde56e4385b0 ("selftests: bpf: add test_lwt_ip_encap selftest")
Signed-off-by: Felix Maurer <fmaurer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/4987d549d48b4e316cd5b3936de69c8d4bc75a4f.1646305899.git.fmaurer@redhat.com
---
 tools/testing/selftests/bpf/test_lwt_ip_encap.sh | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index b497bb85b667..6c69c42b1d60 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -120,6 +120,14 @@ setup()
 	ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
 	ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
 
+	# disable IPv6 DAD because it sometimes takes too long and fails tests
+	ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
 	ip link add veth1 type veth peer name veth2
 	ip link add veth3 type veth peer name veth4
 	ip link add veth5 type veth peer name veth6
@@ -289,7 +297,7 @@ test_ping()
 		ip netns exec ${NS1} ping  -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
 		RET=$?
 	elif [ "${PROTO}" == "IPv6" ] ; then
-		ip netns exec ${NS1} ping6 -c 1 -W 6 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
+		ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
 		RET=$?
 	else
 		echo "    test_ping: unknown PROTO: ${PROTO}"
-- 
cgit 


From 3d7d6043f3c31bcfbed1f636035a2eab37117971 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 26 Feb 2022 00:15:43 +0000
Subject: KVM: selftests: Move raw KVM_SET_USER_MEMORY_REGION helper to utils

Move set_memory_region_test's KVM_SET_USER_MEMORY_REGION helper to KVM's
utils so that it can be used by other tests.  Provide a raw version as
well as an assert-success version to reduce the amount of boilerplate
code need for basic usage.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220226001546.360188-26-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/include/kvm_util_base.h  |  4 +++
 tools/testing/selftests/kvm/lib/kvm_util.c         | 24 +++++++++++++++
 .../testing/selftests/kvm/set_memory_region_test.c | 35 +++++-----------------
 3 files changed, 36 insertions(+), 27 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index f987cf7c0d2e..573de0354175 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -147,6 +147,10 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
 
 void vm_create_irqchip(struct kvm_vm *vm);
 
+void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+			       uint64_t gpa, uint64_t size, void *hva);
+int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+				uint64_t gpa, uint64_t size, void *hva);
 void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	enum vm_mem_backing_src_type src_type,
 	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 64618032aa58..dcb8e96c6a54 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -839,6 +839,30 @@ static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
 	rb_insert_color(&region->hva_node, hva_tree);
 }
 
+
+int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+				uint64_t gpa, uint64_t size, void *hva)
+{
+	struct kvm_userspace_memory_region region = {
+		.slot = slot,
+		.flags = flags,
+		.guest_phys_addr = gpa,
+		.memory_size = size,
+		.userspace_addr = (uintptr_t)hva,
+	};
+
+	return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region);
+}
+
+void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+			       uint64_t gpa, uint64_t size, void *hva)
+{
+	int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva);
+
+	TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)",
+		    errno, strerror(errno));
+}
+
 /*
  * VM Userspace Memory Region Add
  *
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 72a1c9b4882c..73bc297dabe6 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -329,22 +329,6 @@ static void test_zero_memory_regions(void)
 }
 #endif /* __x86_64__ */
 
-static int test_memory_region_add(struct kvm_vm *vm, void *mem, uint32_t slot,
-				   uint32_t size, uint64_t guest_addr)
-{
-	struct kvm_userspace_memory_region region;
-	int ret;
-
-	region.slot = slot;
-	region.flags = 0;
-	region.guest_phys_addr = guest_addr;
-	region.memory_size = size;
-	region.userspace_addr = (uintptr_t) mem;
-	ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION, &region);
-
-	return ret;
-}
-
 /*
  * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any
  * tentative to add further slots should fail.
@@ -382,23 +366,20 @@ static void test_add_max_memory_regions(void)
 	TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
 	mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
 
-	for (slot = 0; slot < max_mem_slots; slot++) {
-		ret = test_memory_region_add(vm, mem_aligned +
-					     ((uint64_t)slot * MEM_REGION_SIZE),
-					     slot, MEM_REGION_SIZE,
-					     (uint64_t)slot * MEM_REGION_SIZE);
-		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
-			    "  rc: %i errno: %i slot: %i\n",
-			    ret, errno, slot);
-	}
+	for (slot = 0; slot < max_mem_slots; slot++)
+		vm_set_user_memory_region(vm, slot, 0,
+					  ((uint64_t)slot * MEM_REGION_SIZE),
+					  MEM_REGION_SIZE,
+					  mem_aligned + (uint64_t)slot * MEM_REGION_SIZE);
 
 	/* Check it cannot be added memory slots beyond the limit */
 	mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
 			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 	TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host");
 
-	ret = test_memory_region_add(vm, mem_extra, max_mem_slots, MEM_REGION_SIZE,
-				     (uint64_t)max_mem_slots * MEM_REGION_SIZE);
+	ret = __vm_set_user_memory_region(vm, max_mem_slots, 0,
+					  (uint64_t)max_mem_slots * MEM_REGION_SIZE,
+					  MEM_REGION_SIZE, mem_extra);
 	TEST_ASSERT(ret == -1 && errno == EINVAL,
 		    "Adding one more memory slot should fail with EINVAL");
 
-- 
cgit 


From a4187c9bd16341b17d54cb3f87b8164945fa81ce Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 26 Feb 2022 00:15:44 +0000
Subject: KVM: selftests: Split out helper to allocate guest mem via memfd

Extract the code for allocating guest memory via memfd out of
vm_userspace_mem_region_add() and into a new helper, kvm_memfd_alloc().
A future selftest to populate a guest with the maximum amount of guest
memory will abuse KVM's memslots to alias guest memory regions to a
single memfd-backed host region, i.e. needs to back a guest with memfd
memory without a 1:1 association between a memslot and a memfd instance.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220226001546.360188-27-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/include/kvm_util_base.h  |  1 +
 tools/testing/selftests/kvm/lib/kvm_util.c         | 42 ++++++++++++----------
 2 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 573de0354175..92cef0ffb19e 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -123,6 +123,7 @@ int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
 		       size_t len);
 
 void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
+int kvm_memfd_alloc(size_t size, bool hugepages);
 
 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
 
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index dcb8e96c6a54..1665a220abcb 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -718,6 +718,27 @@ void kvm_vm_free(struct kvm_vm *vmp)
 	free(vmp);
 }
 
+int kvm_memfd_alloc(size_t size, bool hugepages)
+{
+	int memfd_flags = MFD_CLOEXEC;
+	int fd, r;
+
+	if (hugepages)
+		memfd_flags |= MFD_HUGETLB;
+
+	fd = memfd_create("kvm_selftest", memfd_flags);
+	TEST_ASSERT(fd != -1, "memfd_create() failed, errno: %i (%s)",
+		    errno, strerror(errno));
+
+	r = ftruncate(fd, size);
+	TEST_ASSERT(!r, "ftruncate() failed, errno: %i (%s)", errno, strerror(errno));
+
+	r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
+	TEST_ASSERT(!r, "fallocate() failed, errno: %i (%s)", errno, strerror(errno));
+
+	return fd;
+}
+
 /*
  * Memory Compare, host virtual to guest virtual
  *
@@ -970,24 +991,9 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 		region->mmap_size += alignment;
 
 	region->fd = -1;
-	if (backing_src_is_shared(src_type)) {
-		int memfd_flags = MFD_CLOEXEC;
-
-		if (src_type == VM_MEM_SRC_SHARED_HUGETLB)
-			memfd_flags |= MFD_HUGETLB;
-
-		region->fd = memfd_create("kvm_selftest", memfd_flags);
-		TEST_ASSERT(region->fd != -1,
-			    "memfd_create failed, errno: %i", errno);
-
-		ret = ftruncate(region->fd, region->mmap_size);
-		TEST_ASSERT(ret == 0, "ftruncate failed, errno: %i", errno);
-
-		ret = fallocate(region->fd,
-				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
-				region->mmap_size);
-		TEST_ASSERT(ret == 0, "fallocate failed, errno: %i", errno);
-	}
+	if (backing_src_is_shared(src_type))
+		region->fd = kvm_memfd_alloc(region->mmap_size,
+					     src_type == VM_MEM_SRC_SHARED_HUGETLB);
 
 	region->mmap_start = mmap(NULL, region->mmap_size,
 				  PROT_READ | PROT_WRITE,
-- 
cgit 


From 17ae5ebc46e713e3a16735644a093aa3aff9fe8a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 26 Feb 2022 00:15:45 +0000
Subject: KVM: selftests: Define cpu_relax() helpers for s390 and x86

Add cpu_relax() for s390 and x86 for use in arch-agnostic tests.  arm64
already defines its own version.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220226001546.360188-28-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/s390x/processor.h  | 8 ++++++++
 tools/testing/selftests/kvm/include/x86_64/processor.h | 5 +++++
 2 files changed, 13 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h
index e0e96a5f608c..255c9b990f4c 100644
--- a/tools/testing/selftests/kvm/include/s390x/processor.h
+++ b/tools/testing/selftests/kvm/include/s390x/processor.h
@@ -5,6 +5,8 @@
 #ifndef SELFTEST_KVM_PROCESSOR_H
 #define SELFTEST_KVM_PROCESSOR_H
 
+#include <linux/compiler.h>
+
 /* Bits in the region/segment table entry */
 #define REGION_ENTRY_ORIGIN	~0xfffUL /* region/segment table origin	   */
 #define REGION_ENTRY_PROTECT	0x200	 /* region protection bit	   */
@@ -19,4 +21,10 @@
 #define PAGE_PROTECT	0x200		/* HW read-only bit  */
 #define PAGE_NOEXEC	0x100		/* HW no-execute bit */
 
+/* Is there a portable way to do this? */
+static inline void cpu_relax(void)
+{
+	barrier();
+}
+
 #endif
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 8a470da7b71a..37db341d4cc5 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -363,6 +363,11 @@ static inline unsigned long get_xmm(int n)
 	return 0;
 }
 
+static inline void cpu_relax(void)
+{
+	asm volatile("rep; nop" ::: "memory");
+}
+
 bool is_intel_cpu(void);
 bool is_amd_cpu(void);
 
-- 
cgit 


From b58c55d522b256fa54c5e9175cf3202bc452b20e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 26 Feb 2022 00:15:46 +0000
Subject: KVM: selftests: Add test to populate a VM with the max possible guest
 mem

Add a selftest that enables populating a VM with the maximum amount of
guest memory allowed by the underlying architecture.  Abuse KVM's
memslots by mapping a single host memory region into multiple memslots so
that the selftest doesn't require a system with terabytes of RAM.

Default to 512gb of guest memory, which isn't all that interesting, but
should work on all MMUs and doesn't take an exorbitant amount of memory
or time.  E.g. testing with ~64tb of guest memory takes the better part
of an hour, and requires 200gb of memory for KVM's page tables when using
4kb pages.

To inflicit maximum abuse on KVM' MMU, default to 4kb pages (or whatever
the not-hugepage size is) in the backing store (memfd).  Use memfd for
the host backing store to ensure that hugepages are guaranteed when
requested, and to give the user explicit control of the size of hugepage
being tested.

By default, spin up as many vCPUs as there are available to the selftest,
and distribute the work of dirtying each 4kb chunk of memory across all
vCPUs.  Dirtying guest memory forces KVM to populate its page tables, and
also forces KVM to write back accessed/dirty information to struct page
when the guest memory is freed.

On x86, perform two passes with a MMU context reset between each pass to
coerce KVM into dropping all references to the MMU root, e.g. to emulate
a vCPU dropping the last reference.  Perform both passes and all
rendezvous on all architectures in the hope that arm64 and s390x can gain
similar shenanigans in the future.

Measure and report the duration of each operation, which is helpful not
only to verify the test is working as intended, but also to easily
evaluate the performance differences different page sizes.

Provide command line options to limit the amount of guest memory, set the
size of each slot (i.e. of the host memory region), set the number of
vCPUs, and to enable usage of hugepages.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220226001546.360188-29-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore             |   1 +
 tools/testing/selftests/kvm/Makefile               |   1 +
 .../testing/selftests/kvm/max_guest_memory_test.c  | 292 +++++++++++++++++++++
 3 files changed, 294 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/max_guest_memory_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 052ddfe4b23a..9b67343dc4ab 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -58,6 +58,7 @@
 /hardware_disable_test
 /kvm_create_max_vcpus
 /kvm_page_table_test
+/max_guest_memory_test
 /memslot_modification_stress_test
 /memslot_perf_test
 /rseq_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index f7fa5655e535..04099f453b59 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -93,6 +93,7 @@ TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
 TEST_GEN_PROGS_x86_64 += hardware_disable_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_x86_64 += kvm_page_table_test
+TEST_GEN_PROGS_x86_64 += max_guest_memory_test
 TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
 TEST_GEN_PROGS_x86_64 += memslot_perf_test
 TEST_GEN_PROGS_x86_64 += rseq_test
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c
new file mode 100644
index 000000000000..3875c4b23a04
--- /dev/null
+++ b/tools/testing/selftests/kvm/max_guest_memory_test.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/atomic.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "guest_modes.h"
+#include "processor.h"
+
+static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
+{
+	uint64_t gpa;
+
+	for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+		*((volatile uint64_t *)gpa) = gpa;
+
+	GUEST_DONE();
+}
+
+struct vcpu_info {
+	struct kvm_vm *vm;
+	uint32_t id;
+	uint64_t start_gpa;
+	uint64_t end_gpa;
+};
+
+static int nr_vcpus;
+static atomic_t rendezvous;
+
+static void rendezvous_with_boss(void)
+{
+	int orig = atomic_read(&rendezvous);
+
+	if (orig > 0) {
+		atomic_dec_and_test(&rendezvous);
+		while (atomic_read(&rendezvous) > 0)
+			cpu_relax();
+	} else {
+		atomic_inc(&rendezvous);
+		while (atomic_read(&rendezvous) < 0)
+			cpu_relax();
+	}
+}
+
+static void run_vcpu(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+	vcpu_run(vm, vcpu_id);
+	ASSERT_EQ(get_ucall(vm, vcpu_id, NULL), UCALL_DONE);
+}
+
+static void *vcpu_worker(void *data)
+{
+	struct vcpu_info *vcpu = data;
+	struct kvm_vm *vm = vcpu->vm;
+	struct kvm_sregs sregs;
+	struct kvm_regs regs;
+
+	vcpu_args_set(vm, vcpu->id, 3, vcpu->start_gpa, vcpu->end_gpa,
+		      vm_get_page_size(vm));
+
+	/* Snapshot regs before the first run. */
+	vcpu_regs_get(vm, vcpu->id, &regs);
+	rendezvous_with_boss();
+
+	run_vcpu(vm, vcpu->id);
+	rendezvous_with_boss();
+	vcpu_regs_set(vm, vcpu->id, &regs);
+	vcpu_sregs_get(vm, vcpu->id, &sregs);
+#ifdef __x86_64__
+	/* Toggle CR0.WP to trigger a MMU context reset. */
+	sregs.cr0 ^= X86_CR0_WP;
+#endif
+	vcpu_sregs_set(vm, vcpu->id, &sregs);
+	rendezvous_with_boss();
+
+	run_vcpu(vm, vcpu->id);
+	rendezvous_with_boss();
+
+	return NULL;
+}
+
+static pthread_t *spawn_workers(struct kvm_vm *vm, uint64_t start_gpa,
+				uint64_t end_gpa)
+{
+	struct vcpu_info *info;
+	uint64_t gpa, nr_bytes;
+	pthread_t *threads;
+	int i;
+
+	threads = malloc(nr_vcpus * sizeof(*threads));
+	TEST_ASSERT(threads, "Failed to allocate vCPU threads");
+
+	info = malloc(nr_vcpus * sizeof(*info));
+	TEST_ASSERT(info, "Failed to allocate vCPU gpa ranges");
+
+	nr_bytes = ((end_gpa - start_gpa) / nr_vcpus) &
+			~((uint64_t)vm_get_page_size(vm) - 1);
+	TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus);
+
+	for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) {
+		info[i].vm = vm;
+		info[i].id = i;
+		info[i].start_gpa = gpa;
+		info[i].end_gpa = gpa + nr_bytes;
+		pthread_create(&threads[i], NULL, vcpu_worker, &info[i]);
+	}
+	return threads;
+}
+
+static void rendezvous_with_vcpus(struct timespec *time, const char *name)
+{
+	int i, rendezvoused;
+
+	pr_info("Waiting for vCPUs to finish %s...\n", name);
+
+	rendezvoused = atomic_read(&rendezvous);
+	for (i = 0; abs(rendezvoused) != 1; i++) {
+		usleep(100);
+		if (!(i & 0x3f))
+			pr_info("\r%d vCPUs haven't rendezvoused...",
+				abs(rendezvoused) - 1);
+		rendezvoused = atomic_read(&rendezvous);
+	}
+
+	clock_gettime(CLOCK_MONOTONIC, time);
+
+	/* Release the vCPUs after getting the time of the previous action. */
+	pr_info("\rAll vCPUs finished %s, releasing...\n", name);
+	if (rendezvoused > 0)
+		atomic_set(&rendezvous, -nr_vcpus - 1);
+	else
+		atomic_set(&rendezvous, nr_vcpus + 1);
+}
+
+static void calc_default_nr_vcpus(void)
+{
+	cpu_set_t possible_mask;
+	int r;
+
+	r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
+	TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)",
+		    errno, strerror(errno));
+
+	nr_vcpus = CPU_COUNT(&possible_mask) * 3/4;
+	TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?");
+}
+
+int main(int argc, char *argv[])
+{
+	/*
+	 * Skip the first 4gb and slot0.  slot0 maps <1gb and is used to back
+	 * the guest's code, stack, and page tables.  Because selftests creates
+	 * an IRQCHIP, a.k.a. a local APIC, KVM creates an internal memslot
+	 * just below the 4gb boundary.  This test could create memory at
+	 * 1gb-3gb,but it's simpler to skip straight to 4gb.
+	 */
+	const uint64_t size_1gb = (1 << 30);
+	const uint64_t start_gpa = (4ull * size_1gb);
+	const int first_slot = 1;
+
+	struct timespec time_start, time_run1, time_reset, time_run2;
+	uint64_t max_gpa, gpa, slot_size, max_mem, i;
+	int max_slots, slot, opt, fd;
+	bool hugepages = false;
+	pthread_t *threads;
+	struct kvm_vm *vm;
+	void *mem;
+
+	/*
+	 * Default to 2gb so that maxing out systems with MAXPHADDR=46, which
+	 * are quite common for x86, requires changing only max_mem (KVM allows
+	 * 32k memslots, 32k * 2gb == ~64tb of guest memory).
+	 */
+	slot_size = 2 * size_1gb;
+
+	max_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+	TEST_ASSERT(max_slots > first_slot, "KVM is broken");
+
+	/* All KVM MMUs should be able to survive a 128gb guest. */
+	max_mem = 128 * size_1gb;
+
+	calc_default_nr_vcpus();
+
+	while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) {
+		switch (opt) {
+		case 'c':
+			nr_vcpus = atoi(optarg);
+			TEST_ASSERT(nr_vcpus > 0, "number of vcpus must be >0");
+			break;
+		case 'm':
+			max_mem = atoi(optarg) * size_1gb;
+			TEST_ASSERT(max_mem > 0, "memory size must be >0");
+			break;
+		case 's':
+			slot_size = atoi(optarg) * size_1gb;
+			TEST_ASSERT(slot_size > 0, "slot size must be >0");
+			break;
+		case 'H':
+			hugepages = true;
+			break;
+		case 'h':
+		default:
+			printf("usage: %s [-c nr_vcpus] [-m max_mem_in_gb] [-s slot_size_in_gb] [-H]\n", argv[0]);
+			exit(1);
+		}
+	}
+
+	vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
+
+	max_gpa = vm_get_max_gfn(vm) << vm_get_page_shift(vm);
+	TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb ");
+
+	fd = kvm_memfd_alloc(slot_size, hugepages);
+	mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+
+	TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed");
+
+	/* Pre-fault the memory to avoid taking mmap_sem on guest page faults. */
+	for (i = 0; i < slot_size; i += vm_get_page_size(vm))
+		((uint8_t *)mem)[i] = 0xaa;
+
+	gpa = 0;
+	for (slot = first_slot; slot < max_slots; slot++) {
+		gpa = start_gpa + ((slot - first_slot) * slot_size);
+		if (gpa + slot_size > max_gpa)
+			break;
+
+		if ((gpa - start_gpa) >= max_mem)
+			break;
+
+		vm_set_user_memory_region(vm, slot, 0, gpa, slot_size, mem);
+
+#ifdef __x86_64__
+		/* Identity map memory in the guest using 1gb pages. */
+		for (i = 0; i < slot_size; i += size_1gb)
+			__virt_pg_map(vm, gpa + i, gpa + i, X86_PAGE_SIZE_1G);
+#else
+		for (i = 0; i < slot_size; i += vm_get_page_size(vm))
+			virt_pg_map(vm, gpa + i, gpa + i);
+#endif
+	}
+
+	atomic_set(&rendezvous, nr_vcpus + 1);
+	threads = spawn_workers(vm, start_gpa, gpa);
+
+	pr_info("Running with %lugb of guest memory and %u vCPUs\n",
+		(gpa - start_gpa) / size_1gb, nr_vcpus);
+
+	rendezvous_with_vcpus(&time_start, "spawning");
+	rendezvous_with_vcpus(&time_run1, "run 1");
+	rendezvous_with_vcpus(&time_reset, "reset");
+	rendezvous_with_vcpus(&time_run2, "run 2");
+
+	time_run2  = timespec_sub(time_run2,   time_reset);
+	time_reset = timespec_sub(time_reset, time_run1);
+	time_run1  = timespec_sub(time_run1,   time_start);
+
+	pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 =  %ld.%.9lds\n",
+		time_run1.tv_sec, time_run1.tv_nsec,
+		time_reset.tv_sec, time_reset.tv_nsec,
+		time_run2.tv_sec, time_run2.tv_nsec);
+
+	/*
+	 * Delete even numbered slots (arbitrary) and unmap the first half of
+	 * the backing (also arbitrary) to verify KVM correctly drops all
+	 * references to the removed regions.
+	 */
+	for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2)
+		vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL);
+
+	munmap(mem, slot_size / 2);
+
+	/* Sanity check that the vCPUs actually ran. */
+	for (i = 0; i < nr_vcpus; i++)
+		pthread_join(threads[i], NULL);
+
+	/*
+	 * Deliberately exit without deleting the remaining memslots or closing
+	 * kvm_fd to test cleanup via mmu_notifier.release.
+	 */
+}
-- 
cgit 


From d4b540544499d90ac81695e21e354cd5c82fa67e Mon Sep 17 00:00:00 2001
From: Mykola Lysenko <mykolal@fb.com>
Date: Tue, 8 Mar 2022 12:04:47 -0800
Subject: Improve perf related BPF tests (sample_freq issue)

Linux kernel may automatically reduce kernel.perf_event_max_sample_rate
value when running tests in parallel on slow systems. Linux kernel checks
against this limit when opening perf event with freq=1 parameter set.
The lower bound is 1000. This patch reduces sample_freq value to 1000
in all BPF tests that use sample_freq to ensure they always can open
perf event.

Signed-off-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220308200449.1757478-2-mykolal@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_cookie.c    | 2 +-
 tools/testing/selftests/bpf/prog_tests/find_vma.c      | 2 +-
 tools/testing/selftests/bpf/prog_tests/perf_branches.c | 4 ++--
 tools/testing/selftests/bpf/prog_tests/perf_link.c     | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index cd10df6cd0fc..0612e79a9281 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -199,7 +199,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
 	attr.type = PERF_TYPE_SOFTWARE;
 	attr.config = PERF_COUNT_SW_CPU_CLOCK;
 	attr.freq = 1;
-	attr.sample_freq = 4000;
+	attr.sample_freq = 1000;
 	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
 	if (!ASSERT_GE(pfd, 0, "perf_fd"))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
index b74b3c0c555a..743a094c9510 100644
--- a/tools/testing/selftests/bpf/prog_tests/find_vma.c
+++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
@@ -30,7 +30,7 @@ static int open_pe(void)
 	attr.type = PERF_TYPE_HARDWARE;
 	attr.config = PERF_COUNT_HW_CPU_CYCLES;
 	attr.freq = 1;
-	attr.sample_freq = 4000;
+	attr.sample_freq = 1000;
 	pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
 
 	return pfd >= 0 ? pfd : -errno;
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index 12c4f45cee1a..bc24f83339d6 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -110,7 +110,7 @@ static void test_perf_branches_hw(void)
 	attr.type = PERF_TYPE_HARDWARE;
 	attr.config = PERF_COUNT_HW_CPU_CYCLES;
 	attr.freq = 1;
-	attr.sample_freq = 4000;
+	attr.sample_freq = 1000;
 	attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
 	attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
 	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
@@ -151,7 +151,7 @@ static void test_perf_branches_no_hw(void)
 	attr.type = PERF_TYPE_SOFTWARE;
 	attr.config = PERF_COUNT_SW_CPU_CLOCK;
 	attr.freq = 1;
-	attr.sample_freq = 4000;
+	attr.sample_freq = 1000;
 	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
 	if (CHECK(pfd < 0, "perf_event_open", "err %d\n", pfd))
 		return;
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
index ede07344f264..224eba6fef2e 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c
@@ -39,7 +39,7 @@ void serial_test_perf_link(void)
 	attr.type = PERF_TYPE_SOFTWARE;
 	attr.config = PERF_COUNT_SW_CPU_CLOCK;
 	attr.freq = 1;
-	attr.sample_freq = 4000;
+	attr.sample_freq = 1000;
 	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
 	if (!ASSERT_GE(pfd, 0, "perf_fd"))
 		goto cleanup;
-- 
cgit 


From 1fd49864127cd0d33aea8de4cf0858344c9c7265 Mon Sep 17 00:00:00 2001
From: Mykola Lysenko <mykolal@fb.com>
Date: Tue, 8 Mar 2022 12:04:48 -0800
Subject: Improve send_signal BPF test stability

Substitute sleep with dummy CPU intensive computation.
Finish aforemention computation as soon as signal was
delivered to the test process. Make the BPF code to
only execute when PID global variable is set

Signed-off-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220308200449.1757478-3-mykolal@fb.com
---
 tools/testing/selftests/bpf/prog_tests/send_signal.c    | 17 ++++++++++-------
 .../testing/selftests/bpf/progs/test_send_signal_kern.c |  2 +-
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 776916b61c40..def50f1c5c31 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -4,11 +4,11 @@
 #include <sys/resource.h>
 #include "test_send_signal_kern.skel.h"
 
-int sigusr1_received = 0;
+static int sigusr1_received;
 
 static void sigusr1_handler(int signum)
 {
-	sigusr1_received++;
+	sigusr1_received = 1;
 }
 
 static void test_send_signal_common(struct perf_event_attr *attr,
@@ -40,9 +40,10 @@ static void test_send_signal_common(struct perf_event_attr *attr,
 
 	if (pid == 0) {
 		int old_prio;
+		volatile int j = 0;
 
 		/* install signal handler and notify parent */
-		signal(SIGUSR1, sigusr1_handler);
+		ASSERT_NEQ(signal(SIGUSR1, sigusr1_handler), SIG_ERR, "signal");
 
 		close(pipe_c2p[0]); /* close read */
 		close(pipe_p2c[1]); /* close write */
@@ -63,9 +64,11 @@ static void test_send_signal_common(struct perf_event_attr *attr,
 		ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
 
 		/* wait a little for signal handler */
-		sleep(1);
+		for (int i = 0; i < 100000000 && !sigusr1_received; i++)
+			j /= i + 1;
 
 		buf[0] = sigusr1_received ? '2' : '0';
+		ASSERT_EQ(sigusr1_received, 1, "sigusr1_received");
 		ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
 
 		/* wait for parent notification and exit */
@@ -93,7 +96,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
 			goto destroy_skel;
 		}
 	} else {
-		pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
+		pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1 /* cpu */,
 				 -1 /* group id */, 0 /* flags */);
 		if (!ASSERT_GE(pmu_fd, 0, "perf_event_open")) {
 			err = -1;
@@ -110,9 +113,9 @@ static void test_send_signal_common(struct perf_event_attr *attr,
 	ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read");
 
 	/* trigger the bpf send_signal */
-	skel->bss->pid = pid;
-	skel->bss->sig = SIGUSR1;
 	skel->bss->signal_thread = signal_thread;
+	skel->bss->sig = SIGUSR1;
+	skel->bss->pid = pid;
 
 	/* notify child that bpf program can send_signal now */
 	ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
index b4233d3efac2..92354cd72044 100644
--- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -10,7 +10,7 @@ static __always_inline int bpf_send_signal_test(void *ctx)
 {
 	int ret;
 
-	if (status != 0 || sig == 0 || pid == 0)
+	if (status != 0 || pid == 0)
 		return 0;
 
 	if ((bpf_get_current_pid_tgid() >> 32) == pid) {
-- 
cgit 


From ba83af059153441d77bc2dbb4cd22421b8a34107 Mon Sep 17 00:00:00 2001
From: Mykola Lysenko <mykolal@fb.com>
Date: Tue, 8 Mar 2022 12:04:49 -0800
Subject: Improve stability of find_vma BPF test

Remove unneeded spleep and increase length of dummy CPU
intensive computation to guarantee test process execution.
Also, complete aforemention computation as soon as
test success criteria is met

Signed-off-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220308200449.1757478-4-mykolal@fb.com
---
 tools/testing/selftests/bpf/prog_tests/find_vma.c | 28 +++++++++++++++--------
 1 file changed, 19 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
index 743a094c9510..5165b38f0e59 100644
--- a/tools/testing/selftests/bpf/prog_tests/find_vma.c
+++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
@@ -7,12 +7,14 @@
 #include "find_vma_fail1.skel.h"
 #include "find_vma_fail2.skel.h"
 
-static void test_and_reset_skel(struct find_vma *skel, int expected_find_zero_ret)
+static void test_and_reset_skel(struct find_vma *skel, int expected_find_zero_ret, bool need_test)
 {
-	ASSERT_EQ(skel->bss->found_vm_exec, 1, "found_vm_exec");
-	ASSERT_EQ(skel->data->find_addr_ret, 0, "find_addr_ret");
-	ASSERT_EQ(skel->data->find_zero_ret, expected_find_zero_ret, "find_zero_ret");
-	ASSERT_OK_PTR(strstr(skel->bss->d_iname, "test_progs"), "find_test_progs");
+	if (need_test) {
+		ASSERT_EQ(skel->bss->found_vm_exec, 1, "found_vm_exec");
+		ASSERT_EQ(skel->data->find_addr_ret, 0, "find_addr_ret");
+		ASSERT_EQ(skel->data->find_zero_ret, expected_find_zero_ret, "find_zero_ret");
+		ASSERT_OK_PTR(strstr(skel->bss->d_iname, "test_progs"), "find_test_progs");
+	}
 
 	skel->bss->found_vm_exec = 0;
 	skel->data->find_addr_ret = -1;
@@ -36,11 +38,20 @@ static int open_pe(void)
 	return pfd >= 0 ? pfd : -errno;
 }
 
+static bool find_vma_pe_condition(struct find_vma *skel)
+{
+	return skel->bss->found_vm_exec == 0 ||
+		skel->data->find_addr_ret != 0 ||
+		skel->data->find_zero_ret == -1 ||
+		strcmp(skel->bss->d_iname, "test_progs") != 0;
+}
+
 static void test_find_vma_pe(struct find_vma *skel)
 {
 	struct bpf_link *link = NULL;
 	volatile int j = 0;
 	int pfd, i;
+	const int one_bn = 1000000000;
 
 	pfd = open_pe();
 	if (pfd < 0) {
@@ -57,10 +68,10 @@ static void test_find_vma_pe(struct find_vma *skel)
 	if (!ASSERT_OK_PTR(link, "attach_perf_event"))
 		goto cleanup;
 
-	for (i = 0; i < 1000000; ++i)
+	for (i = 0; i < one_bn && find_vma_pe_condition(skel); ++i)
 		++j;
 
-	test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */);
+	test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */, i == one_bn);
 cleanup:
 	bpf_link__destroy(link);
 	close(pfd);
@@ -75,7 +86,7 @@ static void test_find_vma_kprobe(struct find_vma *skel)
 		return;
 
 	getpgid(skel->bss->target_pid);
-	test_and_reset_skel(skel, -ENOENT /* could not find vma for ptr 0 */);
+	test_and_reset_skel(skel, -ENOENT /* could not find vma for ptr 0 */, true);
 }
 
 static void test_illegal_write_vma(void)
@@ -108,7 +119,6 @@ void serial_test_find_vma(void)
 	skel->bss->addr = (__u64)(uintptr_t)test_find_vma_pe;
 
 	test_find_vma_pe(skel);
-	usleep(100000); /* allow the irq_work to finish */
 	test_find_vma_kprobe(skel);
 
 	find_vma__destroy(skel);
-- 
cgit 


From 826d7bdca83328b101853b48ee6b5e9bb6a5f537 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Mon, 7 Mar 2022 12:44:33 -0800
Subject: selftests: mptcp: join: allow running -cCi

Without this patch, no tests would be ran when launching:

  mptcp_join.sh -cCi

In any order or a combination with 2 of these letters.

The recommended way with getopt is first parse all options and then act.

This allows to do some actions in priority, e.g. display the help menu
and stop.

But also some global variables changing the behaviour of this selftests
 -- like the ones behind -cCi options -- can be set before running the
different tests. By doing that, we can also avoid long and unreadable
regex.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 67 +++++++++++--------------
 1 file changed, 28 insertions(+), 39 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 45c6e5f06916..309d06781ae7 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -16,7 +16,6 @@ capture=0
 checksum=0
 ip_mptcp=0
 check_invert=0
-do_all_tests=1
 init=0
 
 TEST_COUNT=0
@@ -2293,84 +2292,66 @@ usage()
 	exit ${ret}
 }
 
-for arg in "$@"; do
-	# check for "capture/checksum" args before launching tests
-	if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then
-		capture=1
-	fi
-	if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then
-		checksum=1
-	fi
-	if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"i"[0-9a-zA-Z]*$ ]]; then
-		ip_mptcp=1
-	fi
-
-	# exception for the capture/checksum/ip_mptcp options, the rest means: a part of the tests
-	if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ] && [ "${arg}" != "-i" ]; then
-		do_all_tests=0
-	fi
-done
-
-if [ $do_all_tests -eq 1 ]; then
-	all_tests
-	exit $ret
-fi
 
+tests=()
 while getopts 'fesltra64bpkdmchzCSi' opt; do
 	case $opt in
 		f)
-			subflows_tests
+			tests+=(subflows_tests)
 			;;
 		e)
-			subflows_error_tests
+			tests+=(subflows_error_tests)
 			;;
 		s)
-			signal_address_tests
+			tests+=(signal_address_tests)
 			;;
 		l)
-			link_failure_tests
+			tests+=(link_failure_tests)
 			;;
 		t)
-			add_addr_timeout_tests
+			tests+=(add_addr_timeout_tests)
 			;;
 		r)
-			remove_tests
+			tests+=(remove_tests)
 			;;
 		a)
-			add_tests
+			tests+=(add_tests)
 			;;
 		6)
-			ipv6_tests
+			tests+=(ipv6_tests)
 			;;
 		4)
-			v4mapped_tests
+			tests+=(v4mapped_tests)
 			;;
 		b)
-			backup_tests
+			tests+=(backup_tests)
 			;;
 		p)
-			add_addr_ports_tests
+			tests+=(add_addr_ports_tests)
 			;;
 		k)
-			syncookies_tests
+			tests+=(syncookies_tests)
 			;;
 		S)
-			checksum_tests
+			tests+=(checksum_tests)
 			;;
 		d)
-			deny_join_id0_tests
+			tests+=(deny_join_id0_tests)
 			;;
 		m)
-			fullmesh_tests
+			tests+=(fullmesh_tests)
 			;;
 		z)
-			fastclose_tests
+			tests+=(fastclose_tests)
 			;;
 		c)
+			capture=1
 			;;
 		C)
+			checksum=1
 			;;
 		i)
+			ip_mptcp=1
 			;;
 		h)
 			usage
@@ -2381,4 +2362,12 @@ while getopts 'fesltra64bpkdmchzCSi' opt; do
 	esac
 done
 
+if [ ${#tests[@]} -eq 0 ]; then
+	all_tests
+else
+	for subtests in "${tests[@]}"; do
+		"${subtests}"
+	done
+fi
+
 exit $ret
-- 
cgit 


From f98c2bca7b2bd3fd777e05bb9e94c969381b1de8 Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Mon, 7 Mar 2022 12:44:34 -0800
Subject: selftests: mptcp: Rename wait function

The "selftests: mptcp: improve 'fair usage on close' stability" commit
changed that self test to check the TcpAttemptFails MIB instead of
looking for TW sockets. The associated bash function wasn't renamed in
that commit because of the merge conflicts it would cause, so this
commit updates the function name as Paolo originally intended.

Cc: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 309d06781ae7..d4769bc0d842 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1242,7 +1242,7 @@ chk_link_usage()
 	fi
 }
 
-wait_for_tw()
+wait_attempt_fail()
 {
 	local timeout_ms=$((timeout_poll * 1000))
 	local time=0
@@ -1361,7 +1361,7 @@ subflows_error_tests()
 	TEST_COUNT=$((TEST_COUNT+1))
 
 	# mpj subflow will be in TW after the reset
-	wait_for_tw $ns2
+	wait_attempt_fail $ns2
 	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 	wait
 
-- 
cgit 


From 6fa0174a7c8646fce7039b5a176b4f90b0ea513a Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 7 Mar 2022 12:44:35 -0800
Subject: mptcp: more careful RM_ADDR generation

The in-kernel MPTCP path manager, when processing the MPTCP_PM_CMD_FLUSH_ADDR
command, generates RM_ADDR events for each known local address. While that
is allowed by the RFC, it makes unpredictable the exact number of RM_ADDR
generated when both ends flush the PM addresses.

This change restricts the RM_ADDR generation to previously explicitly
announced addresses, and adjust the expected results in a bunch of related
self-tests.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/pm_netlink.c                          | 10 +++---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 42 +++++++++++++++++++++----
 2 files changed, 40 insertions(+), 12 deletions(-)

(limited to 'tools/testing')

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 75a0a27547e6..91b77d1162cf 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1466,14 +1466,12 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
 
 	list_for_each_entry(entry, rm_list, list) {
 		if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
-		    alist.nr < MPTCP_RM_IDS_MAX &&
-		    slist.nr < MPTCP_RM_IDS_MAX) {
-			alist.ids[alist.nr++] = entry->addr.id;
+		    slist.nr < MPTCP_RM_IDS_MAX)
 			slist.ids[slist.nr++] = entry->addr.id;
-		} else if (remove_anno_list_by_saddr(msk, &entry->addr) &&
-			 alist.nr < MPTCP_RM_IDS_MAX) {
+
+		if (remove_anno_list_by_saddr(msk, &entry->addr) &&
+		    alist.nr < MPTCP_RM_IDS_MAX)
 			alist.ids[alist.nr++] = entry->addr.id;
-		}
 	}
 
 	if (alist.nr) {
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index d4769bc0d842..02bab8a2d5a5 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1149,14 +1149,25 @@ chk_rm_nr()
 {
 	local rm_addr_nr=$1
 	local rm_subflow_nr=$2
-	local invert=${3:-""}
+	local invert
+	local simult
 	local count
 	local dump_stats
 	local addr_ns=$ns1
 	local subflow_ns=$ns2
 	local extra_msg=""
 
-	if [[ $invert = "invert" ]]; then
+	shift 2
+	while [ -n "$1" ]; do
+		[ "$1" = "invert" ] && invert=true
+		[ "$1" = "simult" ] && simult=true
+		shift
+	done
+
+	if [ -z $invert ]; then
+		addr_ns=$ns1
+		subflow_ns=$ns2
+	elif [ $invert = "true" ]; then
 		addr_ns=$ns2
 		subflow_ns=$ns1
 		extra_msg="   invert"
@@ -1176,6 +1187,25 @@ chk_rm_nr()
 	echo -n " - rmsf  "
 	count=`ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
 	[ -z "$count" ] && count=0
+	if [ -n "$simult" ]; then
+		local cnt=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}')
+		local suffix
+
+		# in case of simult flush, the subflow removal count on each side is
+		# unreliable
+		[ -z "$cnt" ] && cnt=0
+		count=$((count + cnt))
+		[ "$count" != "$rm_subflow_nr" ] && suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
+		if [ $count -ge "$rm_subflow_nr" ] && \
+		   [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
+			echo "[ ok ] $suffix"
+		else
+			echo "[fail] got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]"
+			ret=1
+			dump_stats=1
+		fi
+		return
+	fi
 	if [ "$count" != "$rm_subflow_nr" ]; then
 		echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
 		ret=1
@@ -1666,7 +1696,7 @@ remove_tests()
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
 	chk_join_nr "flush subflows and signal" 3 3 3
 	chk_add_nr 1 1
-	chk_rm_nr 2 2
+	chk_rm_nr 1 3 invert simult
 
 	# subflows flush
 	reset
@@ -1677,7 +1707,7 @@ remove_tests()
 	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
 	chk_join_nr "flush subflows" 3 3 3
-	chk_rm_nr 3 3
+	chk_rm_nr 0 3 simult
 
 	# addresses flush
 	reset
@@ -1689,7 +1719,7 @@ remove_tests()
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
 	chk_join_nr "flush addresses" 3 3 3
 	chk_add_nr 3 3
-	chk_rm_nr 3 3 invert
+	chk_rm_nr 3 3 invert simult
 
 	# invalid addresses flush
 	reset
@@ -1973,7 +2003,7 @@ add_addr_ports_tests()
 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow
 	chk_join_nr "flush subflows and signal with port" 3 3 3
 	chk_add_nr 1 1
-	chk_rm_nr 2 2
+	chk_rm_nr 1 3 invert simult
 
 	# multiple addresses with port
 	reset
-- 
cgit 


From d045b9eb95a9b611c483897a69e7285aefdc66d7 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 7 Mar 2022 12:44:36 -0800
Subject: mptcp: introduce implicit endpoints

In some edge scenarios, an MPTCP subflows can use a local address
mapped by a "implicit" endpoint created by the in-kernel path manager.

Such endpoints presence can be confusing, as it's creation is hard
to track and will prevent the later endpoint creation from the user-space
using the same address.

Define a new endpoint flag to mark implicit endpoints and allow the
user-space to replace implicit them with user-provided data at endpoint
creation time.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/mptcp.h                      |  1 +
 net/mptcp/pm_netlink.c                          | 61 ++++++++++++++++++-------
 tools/testing/selftests/net/mptcp/mptcp_join.sh |  4 +-
 3 files changed, 47 insertions(+), 19 deletions(-)

(limited to 'tools/testing')

diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index f106a3941cdf..9690efedb5fa 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -81,6 +81,7 @@ enum {
 #define MPTCP_PM_ADDR_FLAG_SUBFLOW			(1 << 1)
 #define MPTCP_PM_ADDR_FLAG_BACKUP			(1 << 2)
 #define MPTCP_PM_ADDR_FLAG_FULLMESH			(1 << 3)
+#define MPTCP_PM_ADDR_FLAG_IMPLICIT			(1 << 4)
 
 enum {
 	MPTCP_PM_CMD_UNSPEC,
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 91b77d1162cf..10368a4f1c4a 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -877,10 +877,18 @@ static bool address_use_port(struct mptcp_pm_addr_entry *entry)
 		MPTCP_PM_ADDR_FLAG_SIGNAL;
 }
 
+/* caller must ensure the RCU grace period is already elapsed */
+static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
+{
+	if (entry->lsk)
+		sock_release(entry->lsk);
+	kfree(entry);
+}
+
 static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
 					     struct mptcp_pm_addr_entry *entry)
 {
-	struct mptcp_pm_addr_entry *cur;
+	struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
 	unsigned int addr_max;
 	int ret = -EINVAL;
 
@@ -901,8 +909,22 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
 	list_for_each_entry(cur, &pernet->local_addr_list, list) {
 		if (addresses_equal(&cur->addr, &entry->addr,
 				    address_use_port(entry) &&
-				    address_use_port(cur)))
-			goto out;
+				    address_use_port(cur))) {
+			/* allow replacing the exiting endpoint only if such
+			 * endpoint is an implicit one and the user-space
+			 * did not provide an endpoint id
+			 */
+			if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT))
+				goto out;
+			if (entry->addr.id)
+				goto out;
+
+			pernet->addrs--;
+			entry->addr.id = cur->addr.id;
+			list_del_rcu(&cur->list);
+			del_entry = cur;
+			break;
+		}
 	}
 
 	if (!entry->addr.id) {
@@ -938,6 +960,12 @@ find_next:
 
 out:
 	spin_unlock_bh(&pernet->lock);
+
+	/* just replaced an existing entry, free it */
+	if (del_entry) {
+		synchronize_rcu();
+		__mptcp_pm_release_addr_entry(del_entry);
+	}
 	return ret;
 }
 
@@ -1036,7 +1064,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
 	entry->addr.id = 0;
 	entry->addr.port = 0;
 	entry->ifindex = 0;
-	entry->flags = 0;
+	entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
 	entry->lsk = NULL;
 	ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
 	if (ret < 0)
@@ -1249,6 +1277,11 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	}
 
+	if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) {
+		GENL_SET_ERR_MSG(info, "can't create IMPLICIT endpoint");
+		return -EINVAL;
+	}
+
 	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry) {
 		GENL_SET_ERR_MSG(info, "can't allocate addr");
@@ -1333,11 +1366,12 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
 }
 
 static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
-						   struct mptcp_addr_info *addr)
+						   const struct mptcp_pm_addr_entry *entry)
 {
-	struct mptcp_sock *msk;
-	long s_slot = 0, s_num = 0;
+	const struct mptcp_addr_info *addr = &entry->addr;
 	struct mptcp_rm_list list = { .nr = 0 };
+	long s_slot = 0, s_num = 0;
+	struct mptcp_sock *msk;
 
 	pr_debug("remove_id=%d", addr->id);
 
@@ -1354,7 +1388,8 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
 
 		lock_sock(sk);
 		remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
-		mptcp_pm_remove_anno_addr(msk, addr, remove_subflow);
+		mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
+					  !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT));
 		if (remove_subflow)
 			mptcp_pm_remove_subflow(msk, &list);
 		release_sock(sk);
@@ -1367,14 +1402,6 @@ next:
 	return 0;
 }
 
-/* caller must ensure the RCU grace period is already elapsed */
-static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
-{
-	if (entry->lsk)
-		sock_release(entry->lsk);
-	kfree(entry);
-}
-
 static int mptcp_nl_remove_id_zero_address(struct net *net,
 					   struct mptcp_addr_info *addr)
 {
@@ -1451,7 +1478,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
 	__clear_bit(entry->addr.id, pernet->id_bitmap);
 	spin_unlock_bh(&pernet->lock);
 
-	mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr);
+	mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry);
 	synchronize_rcu();
 	__mptcp_pm_release_addr_entry(entry);
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 02bab8a2d5a5..1e2e8dd9f0d6 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1938,7 +1938,7 @@ backup_tests()
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
 	chk_join_nr "single address, backup" 1 1 1
 	chk_add_nr 1 1
-	chk_prio_nr 1 0
+	chk_prio_nr 1 1
 
 	# single address with port, backup
 	reset
@@ -1948,7 +1948,7 @@ backup_tests()
 	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
 	chk_join_nr "single address with port, backup" 1 1 1
 	chk_add_nr 1 1
-	chk_prio_nr 1 0
+	chk_prio_nr 1 1
 }
 
 add_addr_ports_tests()
-- 
cgit 


From 69c6ce7b6ecad8ed6c1b785bfadf50159d9f1023 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 7 Mar 2022 12:44:38 -0800
Subject: selftests: mptcp: add implicit endpoint test case

Ensure implicit endpoint are created when expected and
that the user-space can update them

Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 120 +++++++++++++++++++++++-
 tools/testing/selftests/net/mptcp/pm_nl_ctl.c   |   7 ++
 2 files changed, 126 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 1e2e8dd9f0d6..ee435948d130 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -310,6 +310,21 @@ wait_rm_addr()
 	done
 }
 
+wait_mpj()
+{
+	local ns="${1}"
+	local cnt old_cnt
+
+	old_cnt=$(ip netns exec ${ns} nstat -as | grep MPJoinAckRx | awk '{print $2}')
+
+	local i
+	for i in $(seq 10); do
+		cnt=$(ip netns exec ${ns} nstat -as | grep MPJoinAckRx | awk '{print $2}')
+		[ "$cnt" = "${old_cnt}" ] || break
+		sleep 0.1
+	done
+}
+
 pm_nl_set_limits()
 {
 	local ns=$1
@@ -410,6 +425,80 @@ pm_nl_change_endpoint()
 	fi
 }
 
+pm_nl_check_endpoint()
+{
+	local line expected_line
+	local title="$1"
+	local msg="$2"
+	local ns=$3
+	local addr=$4
+	local _flags=""
+	local flags
+	local _port
+	local port
+	local dev
+	local _id
+	local id
+
+	if [ -n "${title}" ]; then
+		printf "%03u %-36s %s" "${TEST_COUNT}" "${title}" "${msg}"
+	else
+		printf "%-${nr_blank}s %s" " " "${msg}"
+	fi
+
+	shift 4
+	while [ -n "$1" ]; do
+		if [ $1 = "flags" ]; then
+			_flags=$2
+			[ ! -z $_flags ]; flags="flags $_flags"
+			shift
+		elif [ $1 = "dev" ]; then
+			[ ! -z $2 ]; dev="dev $1"
+			shift
+		elif [ $1 = "id" ]; then
+			_id=$2
+			[ ! -z $_id ]; id="id $_id"
+			shift
+		elif [ $1 = "port" ]; then
+			_port=$2
+			[ ! -z $_port ]; port=" port $_port"
+			shift
+		fi
+
+		shift
+	done
+
+	if [ -z "$id" ]; then
+		echo "[skip] bad test - missing endpoint id"
+		return
+	fi
+
+	if [ $ip_mptcp -eq 1 ]; then
+		line=$(ip -n $ns mptcp endpoint show $id)
+		# the dump order is: address id flags port dev
+		expected_line="$addr"
+		[ -n "$addr" ] && expected_line="$expected_line $addr"
+		expected_line="$expected_line $id"
+		[ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}"
+		[ -n "$dev" ] && expected_line="$expected_line $dev"
+		[ -n "$port" ] && expected_line="$expected_line $port"
+	else
+		line=$(ip netns exec $ns ./pm_nl_ctl get $_id)
+		# the dump order is: id flags dev address port
+		expected_line="$id"
+		[ -n "$flags" ] && expected_line="$expected_line $flags"
+		[ -n "$dev" ] && expected_line="$expected_line $dev"
+		[ -n "$addr" ] && expected_line="$expected_line $addr"
+		[ -n "$_port" ] && expected_line="$expected_line $_port"
+	fi
+	if [ "$line" = "$expected_line" ]; then
+		echo "[ ok ]"
+	else
+		echo "[fail] expected '$expected_line' found '$line'"
+		ret=1
+	fi
+}
+
 do_transfer()
 {
 	listener_ns="$1"
@@ -2269,6 +2358,30 @@ fastclose_tests()
 	chk_rst_nr 1 1 invert
 }
 
+implicit_tests()
+{
+	# userspace pm type prevents add_addr
+	reset
+	pm_nl_set_limits $ns1 2 2
+	pm_nl_set_limits $ns2 2 2
+	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
+
+	wait_mpj $ns1
+	TEST_COUNT=$((TEST_COUNT + 1))
+	pm_nl_check_endpoint "implicit EP" "creation" \
+		$ns2 10.0.2.2 id 1 flags implicit
+
+	pm_nl_add_endpoint $ns2 10.0.2.2 id 33
+	pm_nl_check_endpoint "" "ID change is prevented" \
+		$ns2 10.0.2.2 id 1 flags implicit
+
+	pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
+	pm_nl_check_endpoint "" "modif is allowed" \
+		$ns2 10.0.2.2 id 1 flags signal
+	wait
+}
+
 all_tests()
 {
 	subflows_tests
@@ -2287,6 +2400,7 @@ all_tests()
 	deny_join_id0_tests
 	fullmesh_tests
 	fastclose_tests
+	implicit_tests
 }
 
 # [$1: error message]
@@ -2314,6 +2428,7 @@ usage()
 	echo "  -d deny_join_id0_tests"
 	echo "  -m fullmesh_tests"
 	echo "  -z fastclose_tests"
+	echo "  -I implicit_tests"
 	echo "  -c capture pcap files"
 	echo "  -C enable data checksum"
 	echo "  -i use ip mptcp"
@@ -2324,7 +2439,7 @@ usage()
 
 
 tests=()
-while getopts 'fesltra64bpkdmchzCSi' opt; do
+while getopts 'fesltra64bpkdmchzICSi' opt; do
 	case $opt in
 		f)
 			tests+=(subflows_tests)
@@ -2374,6 +2489,9 @@ while getopts 'fesltra64bpkdmchzCSi' opt; do
 		z)
 			tests+=(fastclose_tests)
 			;;
+		I)
+			tests+=(implicit_tests)
+			;;
 		c)
 			capture=1
 			;;
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 22a5ec1e128e..a75a68ad652e 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -436,6 +436,13 @@ static void print_addr(struct rtattr *attrs, int len)
 					printf(",");
 			}
 
+			if (flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) {
+				printf("implicit");
+				flags &= ~MPTCP_PM_ADDR_FLAG_IMPLICIT;
+				if (flags)
+					printf(",");
+			}
+
 			/* bump unknown flags, if any */
 			if (flags)
 				printf("0x%x", flags);
-- 
cgit 


From 2c3dacba5d46ef0ff83fe6b82570c433910b63ab Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Mon, 28 Feb 2022 15:46:43 +0100
Subject: memblock tests: Split up reset_memblock function

All memblock data structure fields are reset in one function. In some
test cases, it's preferred to reset memory region arrays without
modifying other values like allocation direction flag.

Extract two functions from reset_memblock, so it's possible to reset
different parts of memblock:
  - reset_memblock_regions    - reset region arrays and their counters
  - reset_memblock_attributes - set other fields to their default values

Update checks in basic_api.c to use new definitions. Remove
reset_memblock call from memblock_initialization_check, so the true
initial values are tested.

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/5cc1ba9a0ade922dbf4ba450165b81a9ed17d4a9.1646055639.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/tests/basic_api.c | 48 +++++++++++++++-----------------
 tools/testing/memblock/tests/common.c    | 14 ++++++----
 tools/testing/memblock/tests/common.h    |  3 +-
 3 files changed, 33 insertions(+), 32 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index fbb989f6ddbf..d5035a3dcce8 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -8,8 +8,6 @@
 
 static int memblock_initialization_check(void)
 {
-	reset_memblock();
-
 	assert(memblock.memory.regions);
 	assert(memblock.memory.cnt == 1);
 	assert(memblock.memory.max == EXPECTED_MEMBLOCK_REGIONS);
@@ -43,7 +41,7 @@ static int memblock_add_simple_check(void)
 		.size = SZ_4M
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_add(r.base, r.size);
 
 	assert(rgn->base == r.base);
@@ -72,7 +70,7 @@ static int memblock_add_node_simple_check(void)
 		.size = SZ_16M
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_add_node(r.base, r.size, 1, MEMBLOCK_HOTPLUG);
 
 	assert(rgn->base == r.base);
@@ -110,7 +108,7 @@ static int memblock_add_disjoint_check(void)
 		.size = SZ_8K
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_add(r1.base, r1.size);
 	memblock_add(r2.base, r2.size);
 
@@ -151,7 +149,7 @@ static int memblock_add_overlap_top_check(void)
 
 	total_size = (r1.base - r2.base) + r1.size;
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_add(r1.base, r1.size);
 	memblock_add(r2.base, r2.size);
 
@@ -190,7 +188,7 @@ static int memblock_add_overlap_bottom_check(void)
 
 	total_size = (r2.base - r1.base) + r2.size;
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_add(r1.base, r1.size);
 	memblock_add(r2.base, r2.size);
 
@@ -225,7 +223,7 @@ static int memblock_add_within_check(void)
 		.size = SZ_1M
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_add(r1.base, r1.size);
 	memblock_add(r2.base, r2.size);
 
@@ -249,7 +247,7 @@ static int memblock_add_twice_check(void)
 		.size = SZ_2M
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 
 	memblock_add(r.base, r.size);
 	memblock_add(r.base, r.size);
@@ -290,7 +288,7 @@ static int memblock_reserve_simple_check(void)
 		.size = SZ_128M
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_reserve(r.base, r.size);
 
 	assert(rgn->base == r.base);
@@ -321,7 +319,7 @@ static int memblock_reserve_disjoint_check(void)
 		.size = SZ_512M
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 
@@ -364,7 +362,7 @@ static int memblock_reserve_overlap_top_check(void)
 
 	total_size = (r1.base - r2.base) + r1.size;
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 
@@ -404,7 +402,7 @@ static int memblock_reserve_overlap_bottom_check(void)
 
 	total_size = (r2.base - r1.base) + r2.size;
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 
@@ -440,7 +438,7 @@ static int memblock_reserve_within_check(void)
 		.size = SZ_64K
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 
@@ -465,7 +463,7 @@ static int memblock_reserve_twice_check(void)
 		.size = SZ_2M
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 
 	memblock_reserve(r.base, r.size);
 	memblock_reserve(r.base, r.size);
@@ -511,7 +509,7 @@ static int memblock_remove_simple_check(void)
 		.size = SZ_4M
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_add(r1.base, r1.size);
 	memblock_add(r2.base, r2.size);
 	memblock_remove(r1.base, r1.size);
@@ -545,7 +543,7 @@ static int memblock_remove_absent_check(void)
 		.size = SZ_1G
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_add(r1.base, r1.size);
 	memblock_remove(r2.base, r2.size);
 
@@ -585,7 +583,7 @@ static int memblock_remove_overlap_top_check(void)
 	r2_end = r2.base + r2.size;
 	total_size = r1_end - r2_end;
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_add(r1.base, r1.size);
 	memblock_remove(r2.base, r2.size);
 
@@ -623,7 +621,7 @@ static int memblock_remove_overlap_bottom_check(void)
 
 	total_size = r2.base - r1.base;
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_add(r1.base, r1.size);
 	memblock_remove(r2.base, r2.size);
 
@@ -665,7 +663,7 @@ static int memblock_remove_within_check(void)
 	r2_size = (r1.base + r1.size) - (r2.base + r2.size);
 	total_size = r1_size + r2_size;
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_add(r1.base, r1.size);
 	memblock_remove(r2.base, r2.size);
 
@@ -715,7 +713,7 @@ static int memblock_free_simple_check(void)
 		.size = SZ_1M
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 	memblock_free((void *)r1.base, r1.size);
@@ -749,7 +747,7 @@ static int memblock_free_absent_check(void)
 		.size = SZ_128M
 	};
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_reserve(r1.base, r1.size);
 	memblock_free((void *)r2.base, r2.size);
 
@@ -787,7 +785,7 @@ static int memblock_free_overlap_top_check(void)
 
 	total_size = (r1.size + r1.base) - (r2.base + r2.size);
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_reserve(r1.base, r1.size);
 	memblock_free((void *)r2.base, r2.size);
 
@@ -824,7 +822,7 @@ static int memblock_free_overlap_bottom_check(void)
 
 	total_size = r2.base - r1.base;
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_reserve(r1.base, r1.size);
 	memblock_free((void *)r2.base, r2.size);
 
@@ -867,7 +865,7 @@ static int memblock_free_within_check(void)
 	r2_size = (r1.base + r1.size) - (r2.base + r2.size);
 	total_size = r1_size + r2_size;
 
-	reset_memblock();
+	reset_memblock_regions();
 	memblock_reserve(r1.base, r1.size);
 	memblock_free((void *)r2.base, r2.size);
 
diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c
index 03de6eab0c3c..dd7e87c589fe 100644
--- a/tools/testing/memblock/tests/common.c
+++ b/tools/testing/memblock/tests/common.c
@@ -5,23 +5,25 @@
 #define INIT_MEMBLOCK_REGIONS			128
 #define INIT_MEMBLOCK_RESERVED_REGIONS		INIT_MEMBLOCK_REGIONS
 
-void reset_memblock(void)
+void reset_memblock_regions(void)
 {
 	memset(memblock.memory.regions, 0,
 	       memblock.memory.cnt * sizeof(struct memblock_region));
-	memset(memblock.reserved.regions, 0,
-	       memblock.reserved.cnt * sizeof(struct memblock_region));
-
 	memblock.memory.cnt	= 1;
 	memblock.memory.max	= INIT_MEMBLOCK_REGIONS;
-	memblock.memory.name	= "memory";
 	memblock.memory.total_size = 0;
 
+	memset(memblock.reserved.regions, 0,
+	       memblock.reserved.cnt * sizeof(struct memblock_region));
 	memblock.reserved.cnt	= 1;
 	memblock.reserved.max	= INIT_MEMBLOCK_RESERVED_REGIONS;
-	memblock.reserved.name	= "reserved";
 	memblock.reserved.total_size = 0;
+}
 
+void reset_memblock_attributes(void)
+{
+	memblock.memory.name	= "memory";
+	memblock.reserved.name	= "reserved";
 	memblock.bottom_up	= false;
 	memblock.current_limit	= MEMBLOCK_ALLOC_ANYWHERE;
 }
diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h
index 48efc4270ea1..b864c64fb60f 100644
--- a/tools/testing/memblock/tests/common.h
+++ b/tools/testing/memblock/tests/common.h
@@ -10,6 +10,7 @@ struct region {
 	phys_addr_t size;
 };
 
-void reset_memblock(void);
+void reset_memblock_regions(void);
+void reset_memblock_attributes(void);
 
 #endif
-- 
cgit 


From 284d950dd6b0ea699608455e443341e82f9719c8 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Mon, 28 Feb 2022 15:46:44 +0100
Subject: memblock tests: Add simulation of physical memory

Allocation functions that return virtual addresses (with an exception
of _raw variant) clear the allocated memory after reserving it. This
requires valid memory ranges in memblock.memory.

Introduce memory_block variable to store memory that can be registered
with memblock data structure. Move assert.h and size.h includes to common.h
to share them between the test files.

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/dce115503c74a6936c44694b00014658a1bb6522.1646055639.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/tests/basic_api.c |  1 -
 tools/testing/memblock/tests/basic_api.h |  1 -
 tools/testing/memblock/tests/common.c    | 19 +++++++++++++++++++
 tools/testing/memblock/tests/common.h    | 18 ++++++++++++++++++
 4 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index d5035a3dcce8..fbc1ce160303 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include <string.h>
 #include <linux/memblock.h>
-#include <linux/sizes.h>
 #include "basic_api.h"
 
 #define EXPECTED_MEMBLOCK_REGIONS			128
diff --git a/tools/testing/memblock/tests/basic_api.h b/tools/testing/memblock/tests/basic_api.h
index 1ceecfca1f47..1873faa54754 100644
--- a/tools/testing/memblock/tests/basic_api.h
+++ b/tools/testing/memblock/tests/basic_api.h
@@ -2,7 +2,6 @@
 #ifndef _MEMBLOCK_BASIC_H
 #define _MEMBLOCK_BASIC_H
 
-#include <assert.h>
 #include "common.h"
 
 int memblock_basic_checks(void);
diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c
index dd7e87c589fe..62d3191f7c9a 100644
--- a/tools/testing/memblock/tests/common.c
+++ b/tools/testing/memblock/tests/common.c
@@ -5,6 +5,8 @@
 #define INIT_MEMBLOCK_REGIONS			128
 #define INIT_MEMBLOCK_RESERVED_REGIONS		INIT_MEMBLOCK_REGIONS
 
+static struct test_memory memory_block;
+
 void reset_memblock_regions(void)
 {
 	memset(memblock.memory.regions, 0,
@@ -27,3 +29,20 @@ void reset_memblock_attributes(void)
 	memblock.bottom_up	= false;
 	memblock.current_limit	= MEMBLOCK_ALLOC_ANYWHERE;
 }
+
+void setup_memblock(void)
+{
+	reset_memblock_regions();
+	memblock_add((phys_addr_t)memory_block.base, MEM_SIZE);
+}
+
+void dummy_physical_memory_init(void)
+{
+	memory_block.base = malloc(MEM_SIZE);
+	assert(memory_block.base);
+}
+
+void dummy_physical_memory_cleanup(void)
+{
+	free(memory_block.base);
+}
diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h
index b864c64fb60f..619054d03219 100644
--- a/tools/testing/memblock/tests/common.h
+++ b/tools/testing/memblock/tests/common.h
@@ -2,8 +2,23 @@
 #ifndef _MEMBLOCK_TEST_H
 #define _MEMBLOCK_TEST_H
 
+#include <stdlib.h>
+#include <assert.h>
 #include <linux/types.h>
 #include <linux/memblock.h>
+#include <linux/sizes.h>
+
+#define MEM_SIZE SZ_16K
+
+/*
+ * Available memory registered with memblock needs to be valid for allocs
+ * test to run. This is a convenience wrapper for memory allocated in
+ * dummy_physical_memory_init() that is later registered with memblock
+ * in setup_memblock().
+ */
+struct test_memory {
+	void *base;
+};
 
 struct region {
 	phys_addr_t base;
@@ -12,5 +27,8 @@ struct region {
 
 void reset_memblock_regions(void);
 void reset_memblock_attributes(void);
+void setup_memblock(void);
+void dummy_physical_memory_init(void);
+void dummy_physical_memory_cleanup(void);
 
 #endif
-- 
cgit 


From 142eac65f3e04577451a40b3cc57e708311eed01 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Mon, 28 Feb 2022 15:46:45 +0100
Subject: memblock tests: Add memblock_alloc tests for top down

Add checks for memblock_alloc for top down allocation direction.
The tested scenarios are:
  - Region can be allocated on the first fit (with and without
    region merging)
  - Region can be allocated on the second fit (with and without
    region merging)

Add checks for both allocation directions:
  - Region can be allocated between two already existing entries
  - Limited memory available
  - All memory is reserved
  - No available memory registered with memblock

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/26ccf409b8ff0394559d38d792b2afb24b55887c.1646055639.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/Makefile          |   2 +-
 tools/testing/memblock/main.c            |   3 +
 tools/testing/memblock/tests/alloc_api.c | 434 +++++++++++++++++++++++++++++++
 tools/testing/memblock/tests/alloc_api.h |   9 +
 4 files changed, 447 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/memblock/tests/alloc_api.c
 create mode 100644 tools/testing/memblock/tests/alloc_api.h

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile
index 29715327a2d3..5b01cfd808d0 100644
--- a/tools/testing/memblock/Makefile
+++ b/tools/testing/memblock/Makefile
@@ -6,7 +6,7 @@ CFLAGS += -I. -I../../include -Wall -O2 -fsanitize=address \
 	  -fsanitize=undefined -D CONFIG_PHYS_ADDR_T_64BIT
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 TARGETS = main
-TEST_OFILES = tests/basic_api.o tests/common.o
+TEST_OFILES = tests/alloc_api.o tests/basic_api.o tests/common.o
 DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o
 OFILES = main.o $(DEP_OFILES) $(TEST_OFILES)
 EXTR_SRC = ../../../mm/memblock.c
diff --git a/tools/testing/memblock/main.c b/tools/testing/memblock/main.c
index da65b0adee91..e7cc45dc06d4 100644
--- a/tools/testing/memblock/main.c
+++ b/tools/testing/memblock/main.c
@@ -1,8 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include "tests/basic_api.h"
+#include "tests/alloc_api.h"
 
 int main(int argc, char **argv)
 {
 	memblock_basic_checks();
+	memblock_alloc_checks();
+
 	return 0;
 }
diff --git a/tools/testing/memblock/tests/alloc_api.c b/tools/testing/memblock/tests/alloc_api.c
new file mode 100644
index 000000000000..add4345aa289
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_api.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "alloc_api.h"
+
+/*
+ * A simple test that tries to allocate a small memory region.
+ * Expect to allocate an aligned region near the end of the available memory.
+ */
+static int alloc_top_down_simple_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+
+	phys_addr_t size = SZ_2;
+	phys_addr_t expected_start;
+
+	setup_memblock();
+
+	expected_start = memblock_end_of_DRAM() - SMP_CACHE_BYTES;
+
+	allocated_ptr = memblock_alloc(size, SMP_CACHE_BYTES);
+
+	assert(allocated_ptr);
+	assert(rgn->size == size);
+	assert(rgn->base == expected_start);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory next to a reserved region that starts at
+ * the misaligned address. Expect to create two separate entries, with the new
+ * entry aligned to the provided alignment:
+ *
+ *              +
+ * |            +--------+         +--------|
+ * |            |  rgn2  |         |  rgn1  |
+ * +------------+--------+---------+--------+
+ *              ^
+ *              |
+ *              Aligned address boundary
+ *
+ * The allocation direction is top-down and region arrays are sorted from lower
+ * to higher addresses, so the new region will be the first entry in
+ * memory.reserved array. The previously reserved region does not get modified.
+ * Region counter and total size get updated.
+ */
+static int alloc_top_down_disjoint_check(void)
+{
+	/* After allocation, this will point to the "old" region */
+	struct memblock_region *rgn1 = &memblock.reserved.regions[1];
+	struct memblock_region *rgn2 = &memblock.reserved.regions[0];
+	struct region r1;
+	void *allocated_ptr = NULL;
+
+	phys_addr_t r2_size = SZ_16;
+	/* Use custom alignment */
+	phys_addr_t alignment = SMP_CACHE_BYTES * 2;
+	phys_addr_t total_size;
+	phys_addr_t expected_start;
+
+	setup_memblock();
+
+	r1.base = memblock_end_of_DRAM() - SZ_2;
+	r1.size = SZ_2;
+
+	total_size = r1.size + r2_size;
+	expected_start = memblock_end_of_DRAM() - alignment;
+
+	memblock_reserve(r1.base, r1.size);
+
+	allocated_ptr = memblock_alloc(r2_size, alignment);
+
+	assert(allocated_ptr);
+	assert(rgn1->size == r1.size);
+	assert(rgn1->base == r1.base);
+
+	assert(rgn2->size == r2_size);
+	assert(rgn2->base == expected_start);
+
+	assert(memblock.reserved.cnt == 2);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there is enough space at the end
+ * of the previously reserved block (i.e. first fit):
+ *
+ *  |              +--------+--------------|
+ *  |              |   r1   |      r2      |
+ *  +--------------+--------+--------------+
+ *
+ * Expect a merge of both regions. Only the region size gets updated.
+ */
+static int alloc_top_down_before_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+
+	/*
+	 * The first region ends at the aligned address to test region merging
+	 */
+	phys_addr_t r1_size = SMP_CACHE_BYTES;
+	phys_addr_t r2_size = SZ_512;
+	phys_addr_t total_size = r1_size + r2_size;
+
+	setup_memblock();
+
+	memblock_reserve(memblock_end_of_DRAM() - total_size, r1_size);
+
+	allocated_ptr = memblock_alloc(r2_size, SMP_CACHE_BYTES);
+
+	assert(allocated_ptr);
+	assert(rgn->size == total_size);
+	assert(rgn->base == memblock_end_of_DRAM() - total_size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there is not enough space at the
+ * end of the previously reserved block (i.e. second fit):
+ *
+ *  |            +-----------+------+     |
+ *  |            |     r2    |  r1  |     |
+ *  +------------+-----------+------+-----+
+ *
+ * Expect a merge of both regions. Both the base address and size of the region
+ * get updated.
+ */
+static int alloc_top_down_after_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	struct region r1;
+	void *allocated_ptr = NULL;
+
+	phys_addr_t r2_size = SZ_512;
+	phys_addr_t total_size;
+
+	setup_memblock();
+
+	/*
+	 * The first region starts at the aligned address to test region merging
+	 */
+	r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES;
+	r1.size = SZ_8;
+
+	total_size = r1.size + r2_size;
+
+	memblock_reserve(r1.base, r1.size);
+
+	allocated_ptr = memblock_alloc(r2_size, SMP_CACHE_BYTES);
+
+	assert(allocated_ptr);
+	assert(rgn->size == total_size);
+	assert(rgn->base == r1.base - r2_size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there are two reserved regions with
+ * a gap too small to fit the new region:
+ *
+ *  |       +--------+----------+   +------|
+ *  |       |   r3   |    r2    |   |  r1  |
+ *  +-------+--------+----------+---+------+
+ *
+ * Expect to allocate a region before the one that starts at the lower address,
+ * and merge them into one. The region counter and total size fields get
+ * updated.
+ */
+static int alloc_top_down_second_fit_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	struct region r1, r2;
+	void *allocated_ptr = NULL;
+
+	phys_addr_t r3_size = SZ_1K;
+	phys_addr_t total_size;
+
+	setup_memblock();
+
+	r1.base = memblock_end_of_DRAM() - SZ_512;
+	r1.size = SZ_512;
+
+	r2.base = r1.base - SZ_512;
+	r2.size = SZ_256;
+
+	total_size = r1.size + r2.size + r3_size;
+
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	allocated_ptr = memblock_alloc(r3_size, SMP_CACHE_BYTES);
+
+	assert(allocated_ptr);
+	assert(rgn->size == r2.size + r3_size);
+	assert(rgn->base == r2.base - r3_size);
+
+	assert(memblock.reserved.cnt == 2);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there are two reserved regions with
+ * a gap big enough to accommodate the new region:
+ *
+ *  |     +--------+--------+--------+     |
+ *  |     |   r2   |   r3   |   r1   |     |
+ *  +-----+--------+--------+--------+-----+
+ *
+ * Expect to merge all of them, creating one big entry in memblock.reserved
+ * array. The region counter and total size fields get updated.
+ */
+static int alloc_in_between_generic_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	struct region r1, r2;
+	void *allocated_ptr = NULL;
+
+	phys_addr_t gap_size = SMP_CACHE_BYTES;
+	phys_addr_t r3_size = SZ_64;
+	/*
+	 * Calculate regions size so there's just enough space for the new entry
+	 */
+	phys_addr_t rgn_size = (MEM_SIZE - (2 * gap_size + r3_size)) / 2;
+	phys_addr_t total_size;
+
+	setup_memblock();
+
+	r1.size = rgn_size;
+	r1.base = memblock_end_of_DRAM() - (gap_size + rgn_size);
+
+	r2.size = rgn_size;
+	r2.base = memblock_start_of_DRAM() + gap_size;
+
+	total_size = r1.size + r2.size + r3_size;
+
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	allocated_ptr = memblock_alloc(r3_size, SMP_CACHE_BYTES);
+
+	assert(allocated_ptr);
+	assert(rgn->size == total_size);
+	assert(rgn->base == r1.base - r2.size - r3_size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory when the memory is filled with reserved
+ * regions with memory gaps too small to fit the new region:
+ *
+ * +-------+
+ * |  new  |
+ * +--+----+
+ *    |    +-----+    +-----+    +-----+    |
+ *    |    | res |    | res |    | res |    |
+ *    +----+-----+----+-----+----+-----+----+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_small_gaps_generic_check(void)
+{
+	void *allocated_ptr = NULL;
+
+	phys_addr_t region_size = SZ_1K;
+	phys_addr_t gap_size = SZ_256;
+	phys_addr_t region_end;
+
+	setup_memblock();
+
+	region_end = memblock_start_of_DRAM();
+
+	while (region_end < memblock_end_of_DRAM()) {
+		memblock_reserve(region_end + gap_size, region_size);
+		region_end += gap_size + region_size;
+	}
+
+	allocated_ptr = memblock_alloc(region_size, SMP_CACHE_BYTES);
+
+	assert(!allocated_ptr);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory when all memory is reserved.
+ * Expect no allocation to happen.
+ */
+static int alloc_all_reserved_generic_check(void)
+{
+	void *allocated_ptr = NULL;
+
+	setup_memblock();
+
+	/* Simulate full memory */
+	memblock_reserve(memblock_start_of_DRAM(), MEM_SIZE);
+
+	allocated_ptr = memblock_alloc(SZ_256, SMP_CACHE_BYTES);
+
+	assert(!allocated_ptr);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory when the memory is almost full,
+ * with not enough space left for the new region:
+ *
+ *                                +-------+
+ *                                |  new  |
+ *                                +-------+
+ *  |-----------------------------+   |
+ *  |          reserved           |   |
+ *  +-----------------------------+---+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_no_space_generic_check(void)
+{
+	void *allocated_ptr = NULL;
+
+	setup_memblock();
+
+	phys_addr_t available_size = SZ_256;
+	phys_addr_t reserved_size = MEM_SIZE - available_size;
+
+	/* Simulate almost-full memory */
+	memblock_reserve(memblock_start_of_DRAM(), reserved_size);
+
+	allocated_ptr = memblock_alloc(SZ_1K, SMP_CACHE_BYTES);
+
+	assert(!allocated_ptr);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory when the memory is almost full,
+ * but there is just enough space left:
+ *
+ *  |---------------------------+---------|
+ *  |          reserved         |   new   |
+ *  +---------------------------+---------+
+ *
+ * Expect to allocate memory and merge all the regions. The total size field
+ * gets updated.
+ */
+static int alloc_limited_space_generic_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+
+	phys_addr_t available_size = SZ_256;
+	phys_addr_t reserved_size = MEM_SIZE - available_size;
+
+	setup_memblock();
+
+	/* Simulate almost-full memory */
+	memblock_reserve(memblock_start_of_DRAM(), reserved_size);
+
+	allocated_ptr = memblock_alloc(available_size, SMP_CACHE_BYTES);
+
+	assert(allocated_ptr);
+	assert(rgn->size == MEM_SIZE);
+	assert(rgn->base == memblock_start_of_DRAM());
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == MEM_SIZE);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there is no available memory
+ * registered (i.e. memblock.memory has only a dummy entry).
+ * Expect no allocation to happen.
+ */
+static int alloc_no_memory_generic_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+
+	reset_memblock_regions();
+
+	allocated_ptr = memblock_alloc(SZ_1K, SMP_CACHE_BYTES);
+
+	assert(!allocated_ptr);
+	assert(rgn->size == 0);
+	assert(rgn->base == 0);
+	assert(memblock.reserved.total_size == 0);
+
+	return 0;
+}
+
+int memblock_alloc_checks(void)
+{
+	reset_memblock_attributes();
+	dummy_physical_memory_init();
+
+	alloc_top_down_simple_check();
+	alloc_top_down_disjoint_check();
+	alloc_top_down_before_check();
+	alloc_top_down_after_check();
+	alloc_top_down_second_fit_check();
+	alloc_in_between_generic_check();
+	alloc_small_gaps_generic_check();
+	alloc_all_reserved_generic_check();
+	alloc_no_space_generic_check();
+	alloc_limited_space_generic_check();
+	alloc_no_memory_generic_check();
+
+	dummy_physical_memory_cleanup();
+
+	return 0;
+}
diff --git a/tools/testing/memblock/tests/alloc_api.h b/tools/testing/memblock/tests/alloc_api.h
new file mode 100644
index 000000000000..585b085baf21
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_api.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MEMBLOCK_ALLOCS_H
+#define _MEMBLOCK_ALLOCS_H
+
+#include "common.h"
+
+int memblock_alloc_checks(void);
+
+#endif
-- 
cgit 


From 0237ee2388703cf72d1e1f9ea0c73c054582f30d Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Mon, 28 Feb 2022 15:46:46 +0100
Subject: memblock tests: Add memblock_alloc tests for bottom up

Add checks for memblock_alloc for bottom up allocation direction.
The tested scenarios are:
  - Region can be allocated on the first fit (with and without
    region merging)
  - Region can be allocated on the second fit (with and without
    region merging)

Add test case wrappers to test both directions in the same context.

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/426674eee20d99dca49caf1ee0142a83dccbc98d.1646055639.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/tests/alloc_api.c | 324 ++++++++++++++++++++++++++++++-
 1 file changed, 320 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/alloc_api.c b/tools/testing/memblock/tests/alloc_api.c
index add4345aa289..d1aa7e15c18d 100644
--- a/tools/testing/memblock/tests/alloc_api.c
+++ b/tools/testing/memblock/tests/alloc_api.c
@@ -411,23 +411,339 @@ static int alloc_no_memory_generic_check(void)
 	return 0;
 }
 
-int memblock_alloc_checks(void)
+/*
+ * A simple test that tries to allocate a small memory region.
+ * Expect to allocate an aligned region at the beginning of the available
+ * memory.
+ */
+static int alloc_bottom_up_simple_check(void)
 {
-	reset_memblock_attributes();
-	dummy_physical_memory_init();
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+
+	setup_memblock();
+
+	allocated_ptr = memblock_alloc(SZ_2, SMP_CACHE_BYTES);
+
+	assert(allocated_ptr);
+	assert(rgn->size == SZ_2);
+	assert(rgn->base == memblock_start_of_DRAM());
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == SZ_2);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory next to a reserved region that starts at
+ * the misaligned address. Expect to create two separate entries, with the new
+ * entry aligned to the provided alignment:
+ *
+ *                      +
+ *  |    +----------+   +----------+     |
+ *  |    |   rgn1   |   |   rgn2   |     |
+ *  +----+----------+---+----------+-----+
+ *                      ^
+ *                      |
+ *                      Aligned address boundary
+ *
+ * The allocation direction is bottom-up, so the new region will be the second
+ * entry in memory.reserved array. The previously reserved region does not get
+ * modified. Region counter and total size get updated.
+ */
+static int alloc_bottom_up_disjoint_check(void)
+{
+	struct memblock_region *rgn1 = &memblock.reserved.regions[0];
+	struct memblock_region *rgn2 = &memblock.reserved.regions[1];
+	struct region r1;
+	void *allocated_ptr = NULL;
+
+	phys_addr_t r2_size = SZ_16;
+	/* Use custom alignment */
+	phys_addr_t alignment = SMP_CACHE_BYTES * 2;
+	phys_addr_t total_size;
+	phys_addr_t expected_start;
+
+	setup_memblock();
+
+	r1.base = memblock_start_of_DRAM() + SZ_2;
+	r1.size = SZ_2;
+
+	total_size = r1.size + r2_size;
+	expected_start = memblock_start_of_DRAM() + alignment;
 
+	memblock_reserve(r1.base, r1.size);
+
+	allocated_ptr = memblock_alloc(r2_size, alignment);
+
+	assert(allocated_ptr);
+
+	assert(rgn1->size == r1.size);
+	assert(rgn1->base == r1.base);
+
+	assert(rgn2->size == r2_size);
+	assert(rgn2->base == expected_start);
+
+	assert(memblock.reserved.cnt == 2);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there is enough space at
+ * the beginning of the previously reserved block (i.e. first fit):
+ *
+ *  |------------------+--------+         |
+ *  |        r1        |   r2   |         |
+ *  +------------------+--------+---------+
+ *
+ * Expect a merge of both regions. Only the region size gets updated.
+ */
+static int alloc_bottom_up_before_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+
+	phys_addr_t r1_size = SZ_512;
+	phys_addr_t r2_size = SZ_128;
+	phys_addr_t total_size = r1_size + r2_size;
+
+	setup_memblock();
+
+	memblock_reserve(memblock_start_of_DRAM() + r1_size, r2_size);
+
+	allocated_ptr = memblock_alloc(r1_size, SMP_CACHE_BYTES);
+
+	assert(allocated_ptr);
+	assert(rgn->size == total_size);
+	assert(rgn->base == memblock_start_of_DRAM());
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there is not enough space at
+ * the beginning of the previously reserved block (i.e. second fit):
+ *
+ *  |    +--------+--------------+         |
+ *  |    |   r1   |      r2      |         |
+ *  +----+--------+--------------+---------+
+ *
+ * Expect a merge of both regions. Only the region size gets updated.
+ */
+static int alloc_bottom_up_after_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	struct region r1;
+	void *allocated_ptr = NULL;
+
+	phys_addr_t r2_size = SZ_512;
+	phys_addr_t total_size;
+
+	setup_memblock();
+
+	/*
+	 * The first region starts at the aligned address to test region merging
+	 */
+	r1.base = memblock_start_of_DRAM() + SMP_CACHE_BYTES;
+	r1.size = SZ_64;
+
+	total_size = r1.size + r2_size;
+
+	memblock_reserve(r1.base, r1.size);
+
+	allocated_ptr = memblock_alloc(r2_size, SMP_CACHE_BYTES);
+
+	assert(allocated_ptr);
+	assert(rgn->size == total_size);
+	assert(rgn->base == r1.base);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory when there are two reserved regions, the
+ * first one starting at the beginning of the available memory, with a gap too
+ * small to fit the new region:
+ *
+ *  |------------+     +--------+--------+  |
+ *  |     r1     |     |   r2   |   r3   |  |
+ *  +------------+-----+--------+--------+--+
+ *
+ * Expect to allocate after the second region, which starts at the higher
+ * address, and merge them into one. The region counter and total size fields
+ * get updated.
+ */
+static int alloc_bottom_up_second_fit_check(void)
+{
+	struct memblock_region *rgn  = &memblock.reserved.regions[1];
+	struct region r1, r2;
+	void *allocated_ptr = NULL;
+
+	phys_addr_t r3_size = SZ_1K;
+	phys_addr_t total_size;
+
+	setup_memblock();
+
+	r1.base = memblock_start_of_DRAM();
+	r1.size = SZ_512;
+
+	r2.base = r1.base + r1.size + SZ_512;
+	r2.size = SZ_256;
+
+	total_size = r1.size + r2.size + r3_size;
+
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	allocated_ptr = memblock_alloc(r3_size, SMP_CACHE_BYTES);
+
+	assert(allocated_ptr);
+	assert(rgn->size == r2.size + r3_size);
+	assert(rgn->base == r2.base);
+
+	assert(memblock.reserved.cnt == 2);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/* Test case wrappers */
+static int alloc_simple_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_top_down_simple_check();
+	memblock_set_bottom_up(true);
+	alloc_bottom_up_simple_check();
+
+	return 0;
+}
+
+static int alloc_disjoint_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_top_down_disjoint_check();
+	memblock_set_bottom_up(true);
+	alloc_bottom_up_disjoint_check();
+
+	return 0;
+}
+
+static int alloc_before_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_top_down_before_check();
+	memblock_set_bottom_up(true);
+	alloc_bottom_up_before_check();
+
+	return 0;
+}
+
+static int alloc_after_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_top_down_after_check();
-	alloc_top_down_second_fit_check();
+	memblock_set_bottom_up(true);
+	alloc_bottom_up_after_check();
+
+	return 0;
+}
+
+static int alloc_in_between_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_in_between_generic_check();
+	memblock_set_bottom_up(true);
+	alloc_in_between_generic_check();
+
+	return 0;
+}
+
+static int alloc_second_fit_check(void)
+{
+	memblock_set_bottom_up(false);
+	alloc_top_down_second_fit_check();
+	memblock_set_bottom_up(true);
+	alloc_bottom_up_second_fit_check();
+
+	return 0;
+}
+
+static int alloc_small_gaps_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_small_gaps_generic_check();
+	memblock_set_bottom_up(true);
+	alloc_small_gaps_generic_check();
+
+	return 0;
+}
+
+static int alloc_all_reserved_check(void)
+{
+	memblock_set_bottom_up(false);
+	alloc_all_reserved_generic_check();
+	memblock_set_bottom_up(true);
 	alloc_all_reserved_generic_check();
+
+	return 0;
+}
+
+static int alloc_no_space_check(void)
+{
+	memblock_set_bottom_up(false);
+	alloc_no_space_generic_check();
+	memblock_set_bottom_up(true);
 	alloc_no_space_generic_check();
+
+	return 0;
+}
+
+static int alloc_limited_space_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_limited_space_generic_check();
+	memblock_set_bottom_up(true);
+	alloc_limited_space_generic_check();
+
+	return 0;
+}
+
+static int alloc_no_memory_check(void)
+{
+	memblock_set_bottom_up(false);
+	alloc_no_memory_generic_check();
+	memblock_set_bottom_up(true);
 	alloc_no_memory_generic_check();
 
+	return 0;
+}
+
+int memblock_alloc_checks(void)
+{
+	reset_memblock_attributes();
+	dummy_physical_memory_init();
+
+	alloc_simple_check();
+	alloc_disjoint_check();
+	alloc_before_check();
+	alloc_after_check();
+	alloc_second_fit_check();
+	alloc_small_gaps_check();
+	alloc_in_between_check();
+	alloc_all_reserved_check();
+	alloc_no_space_check();
+	alloc_limited_space_check();
+	alloc_no_memory_check();
+
 	dummy_physical_memory_cleanup();
 
 	return 0;
-- 
cgit 


From 16567b5f30f6ea13a78a3b553e0c9c0c1ba34e64 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Mon, 28 Feb 2022 15:46:47 +0100
Subject: memblock tests: Add memblock_alloc_from tests for top down

Add checks for memblock_alloc_from for default allocation direction.
The tested scenarios are:
  - Not enough space to allocate memory at the minimal address
  - Minimal address parameter is smaller than the start address
    of the available memory
  - Minimal address is too close to the available memory

Add simple memblock_alloc_from test that can be used to test both
allocation directions (minimal address is aligned or misaligned).

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/3dd645f437975fd393010b95b8faa85d2b86490a.1646055639.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/Makefile                  |   3 +-
 tools/testing/memblock/main.c                    |   2 +
 tools/testing/memblock/tests/alloc_helpers_api.c | 226 +++++++++++++++++++++++
 tools/testing/memblock/tests/alloc_helpers_api.h |   9 +
 4 files changed, 239 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/memblock/tests/alloc_helpers_api.c
 create mode 100644 tools/testing/memblock/tests/alloc_helpers_api.h

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile
index 5b01cfd808d0..89e374470009 100644
--- a/tools/testing/memblock/Makefile
+++ b/tools/testing/memblock/Makefile
@@ -6,7 +6,8 @@ CFLAGS += -I. -I../../include -Wall -O2 -fsanitize=address \
 	  -fsanitize=undefined -D CONFIG_PHYS_ADDR_T_64BIT
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 TARGETS = main
-TEST_OFILES = tests/alloc_api.o tests/basic_api.o tests/common.o
+TEST_OFILES = tests/alloc_helpers_api.o tests/alloc_api.o tests/basic_api.o \
+	      tests/common.o
 DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o
 OFILES = main.o $(DEP_OFILES) $(TEST_OFILES)
 EXTR_SRC = ../../../mm/memblock.c
diff --git a/tools/testing/memblock/main.c b/tools/testing/memblock/main.c
index e7cc45dc06d4..b63150ee554f 100644
--- a/tools/testing/memblock/main.c
+++ b/tools/testing/memblock/main.c
@@ -1,11 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include "tests/basic_api.h"
 #include "tests/alloc_api.h"
+#include "tests/alloc_helpers_api.h"
 
 int main(int argc, char **argv)
 {
 	memblock_basic_checks();
 	memblock_alloc_checks();
+	memblock_alloc_helpers_checks();
 
 	return 0;
 }
diff --git a/tools/testing/memblock/tests/alloc_helpers_api.c b/tools/testing/memblock/tests/alloc_helpers_api.c
new file mode 100644
index 000000000000..dc5152adcc5b
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_helpers_api.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "alloc_helpers_api.h"
+
+/*
+ * A simple test that tries to allocate a memory region above a specified,
+ * aligned address:
+ *
+ *             +
+ *  |          +-----------+         |
+ *  |          |    rgn    |         |
+ *  +----------+-----------+---------+
+ *             ^
+ *             |
+ *             Aligned min_addr
+ *
+ * Expect to allocate a cleared region at the minimal memory address.
+ */
+static int alloc_from_simple_generic_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_16;
+	phys_addr_t min_addr;
+
+	setup_memblock();
+
+	min_addr = memblock_end_of_DRAM() - SMP_CACHE_BYTES;
+
+	allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == min_addr);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region above a certain address.
+ * The minimal address here is not aligned:
+ *
+ *         +      +
+ *  |      +      +---------+            |
+ *  |      |      |   rgn   |            |
+ *  +------+------+---------+------------+
+ *         ^      ^------.
+ *         |             |
+ *       min_addr        Aligned address
+ *                       boundary
+ *
+ * Expect to allocate a cleared region at the closest aligned memory address.
+ */
+static int alloc_from_misaligned_generic_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_32;
+	phys_addr_t min_addr;
+
+	setup_memblock();
+
+	/* A misaligned address */
+	min_addr = memblock_end_of_DRAM() - (SMP_CACHE_BYTES * 2 - 1);
+
+	allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == memblock_end_of_DRAM() - SMP_CACHE_BYTES);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region above an address that is too
+ * close to the end of the memory:
+ *
+ *              +        +
+ *  |           +--------+---+      |
+ *  |           |   rgn  +   |      |
+ *  +-----------+--------+---+------+
+ *              ^        ^
+ *              |        |
+ *              |        min_addr
+ *              |
+ *              Aligned address
+ *              boundary
+ *
+ * Expect to prioritize granting memory over satisfying the minimal address
+ * requirement.
+ */
+static int alloc_from_top_down_high_addr_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+
+	phys_addr_t size = SZ_32;
+	phys_addr_t min_addr;
+
+	setup_memblock();
+
+	/* The address is too close to the end of the memory */
+	min_addr = memblock_end_of_DRAM() - SZ_16;
+
+	allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr);
+
+	assert(allocated_ptr);
+	assert(rgn->size == size);
+	assert(rgn->base == memblock_end_of_DRAM() - SMP_CACHE_BYTES);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region when there is no space
+ * available above the minimal address above a certain address:
+ *
+ *                     +
+ *  |        +---------+-------------|
+ *  |        |   rgn   |             |
+ *  +--------+---------+-------------+
+ *                     ^
+ *                     |
+ *                     min_addr
+ *
+ * Expect to prioritize granting memory over satisfying the minimal address
+ * requirement and to allocate next to the previously reserved region. The
+ * regions get merged into one.
+ */
+static int alloc_from_top_down_no_space_above_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+
+	phys_addr_t r1_size = SZ_64;
+	phys_addr_t r2_size = SZ_2;
+	phys_addr_t total_size = r1_size + r2_size;
+	phys_addr_t min_addr;
+
+	setup_memblock();
+
+	min_addr = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
+
+	/* No space above this address */
+	memblock_reserve(min_addr, r2_size);
+
+	allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
+
+	assert(allocated_ptr);
+	assert(rgn->base == min_addr - r1_size);
+	assert(rgn->size == total_size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region with a minimal address below
+ * the start address of the available memory. As the allocation is top-down,
+ * first reserve a region that will force allocation near the start.
+ * Expect successful allocation and merge of both regions.
+ */
+static int alloc_from_top_down_min_addr_cap_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+
+	phys_addr_t r1_size = SZ_64;
+	phys_addr_t min_addr;
+	phys_addr_t start_addr;
+
+	setup_memblock();
+
+	start_addr = (phys_addr_t)memblock_start_of_DRAM();
+	min_addr = start_addr - SMP_CACHE_BYTES * 3;
+
+	memblock_reserve(start_addr + r1_size, MEM_SIZE - r1_size);
+
+	allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
+
+	assert(allocated_ptr);
+	assert(rgn->base == start_addr);
+	assert(rgn->size == MEM_SIZE);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == MEM_SIZE);
+
+	return 0;
+}
+
+int memblock_alloc_helpers_checks(void)
+{
+	reset_memblock_attributes();
+	dummy_physical_memory_init();
+
+	alloc_from_simple_generic_check();
+	alloc_from_misaligned_generic_check();
+	alloc_from_top_down_high_addr_check();
+	alloc_from_top_down_min_addr_cap_check();
+	alloc_from_top_down_no_space_above_check();
+
+	dummy_physical_memory_cleanup();
+
+	return 0;
+}
diff --git a/tools/testing/memblock/tests/alloc_helpers_api.h b/tools/testing/memblock/tests/alloc_helpers_api.h
new file mode 100644
index 000000000000..c9e4827b1623
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_helpers_api.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MEMBLOCK_ALLOC_HELPERS_H
+#define _MEMBLOCK_ALLOC_HELPERS_H
+
+#include "common.h"
+
+int memblock_alloc_helpers_checks(void);
+
+#endif
-- 
cgit 


From 0ac06631a3bcdaed497f7b8abccf405f9eb8e0bd Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Mon, 28 Feb 2022 15:46:48 +0100
Subject: memblock tests: Add memblock_alloc_from tests for bottom up

Add checks for memblock_alloc_from for bottom up allocation direction.
The tested scenarios are:
  - Not enough space to allocate memory at the minimal address
  - Minimal address parameter is smaller than the start address
    of the available memory
  - Minimal address parameter is too close to the end of the available
    memory

Add test case wrappers to test both directions in the same context.

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/506cf5293c8a21c012b7ea87b14af07754d3e656.1646055639.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/tests/alloc_helpers_api.c | 175 ++++++++++++++++++++++-
 1 file changed, 171 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/alloc_helpers_api.c b/tools/testing/memblock/tests/alloc_helpers_api.c
index dc5152adcc5b..963a966db461 100644
--- a/tools/testing/memblock/tests/alloc_helpers_api.c
+++ b/tools/testing/memblock/tests/alloc_helpers_api.c
@@ -209,16 +209,183 @@ static int alloc_from_top_down_min_addr_cap_check(void)
 	return 0;
 }
 
-int memblock_alloc_helpers_checks(void)
+/*
+ * A test that tries to allocate a memory region above an address that is too
+ * close to the end of the memory:
+ *
+ *                             +
+ *  |-----------+              +     |
+ *  |    rgn    |              |     |
+ *  +-----------+--------------+-----+
+ *  ^                          ^
+ *  |                          |
+ *  Aligned address            min_addr
+ *  boundary
+ *
+ * Expect to prioritize granting memory over satisfying the minimal address
+ * requirement. Allocation happens at beginning of the available memory.
+ */
+static int alloc_from_bottom_up_high_addr_check(void)
 {
-	reset_memblock_attributes();
-	dummy_physical_memory_init();
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+
+	phys_addr_t size = SZ_32;
+	phys_addr_t min_addr;
+
+	setup_memblock();
+
+	/* The address is too close to the end of the memory */
+	min_addr = memblock_end_of_DRAM() - SZ_8;
+
+	allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr);
+
+	assert(allocated_ptr);
+	assert(rgn->size == size);
+	assert(rgn->base == memblock_start_of_DRAM());
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
 
+/*
+ * A test that tries to allocate a memory region when there is no space
+ * available above the minimal address above a certain address:
+ *
+ *                   +
+ *  |-----------+    +-------------------|
+ *  |    rgn    |    |                   |
+ *  +-----------+----+-------------------+
+ *                   ^
+ *                   |
+ *                   min_addr
+ *
+ * Expect to prioritize granting memory over satisfying the minimal address
+ * requirement and to allocate at the beginning of the available memory.
+ */
+static int alloc_from_bottom_up_no_space_above_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+
+	phys_addr_t r1_size = SZ_64;
+	phys_addr_t min_addr;
+	phys_addr_t r2_size;
+
+	setup_memblock();
+
+	min_addr = memblock_start_of_DRAM() + SZ_128;
+	r2_size = memblock_end_of_DRAM() - min_addr;
+
+	/* No space above this address */
+	memblock_reserve(min_addr - SMP_CACHE_BYTES, r2_size);
+
+	allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
+
+	assert(allocated_ptr);
+	assert(rgn->base == memblock_start_of_DRAM());
+	assert(rgn->size == r1_size);
+
+	assert(memblock.reserved.cnt == 2);
+	assert(memblock.reserved.total_size == r1_size + r2_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region with a minimal address below
+ * the start address of the available memory. Expect to allocate a region
+ * at the beginning of the available memory.
+ */
+static int alloc_from_bottom_up_min_addr_cap_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+
+	phys_addr_t r1_size = SZ_64;
+	phys_addr_t min_addr;
+	phys_addr_t start_addr;
+
+	setup_memblock();
+
+	start_addr = (phys_addr_t)memblock_start_of_DRAM();
+	min_addr = start_addr - SMP_CACHE_BYTES * 3;
+
+	allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr);
+
+	assert(allocated_ptr);
+	assert(rgn->base == start_addr);
+	assert(rgn->size == r1_size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == r1_size);
+
+	return 0;
+}
+
+/* Test case wrappers */
+static int alloc_from_simple_check(void)
+{
+	memblock_set_bottom_up(false);
+	alloc_from_simple_generic_check();
+	memblock_set_bottom_up(true);
 	alloc_from_simple_generic_check();
+
+	return 0;
+}
+
+static int alloc_from_misaligned_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_from_misaligned_generic_check();
+	memblock_set_bottom_up(true);
+	alloc_from_misaligned_generic_check();
+
+	return 0;
+}
+
+static int alloc_from_high_addr_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_from_top_down_high_addr_check();
-	alloc_from_top_down_min_addr_cap_check();
+	memblock_set_bottom_up(true);
+	alloc_from_bottom_up_high_addr_check();
+
+	return 0;
+}
+
+static int alloc_from_no_space_above_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_from_top_down_no_space_above_check();
+	memblock_set_bottom_up(true);
+	alloc_from_bottom_up_no_space_above_check();
+
+	return 0;
+}
+
+static int alloc_from_min_addr_cap_check(void)
+{
+	memblock_set_bottom_up(false);
+	alloc_from_top_down_min_addr_cap_check();
+	memblock_set_bottom_up(true);
+	alloc_from_bottom_up_min_addr_cap_check();
+
+	return 0;
+}
+
+int memblock_alloc_helpers_checks(void)
+{
+	reset_memblock_attributes();
+	dummy_physical_memory_init();
+
+	alloc_from_simple_check();
+	alloc_from_misaligned_check();
+	alloc_from_high_addr_check();
+	alloc_from_no_space_above_check();
+	alloc_from_min_addr_cap_check();
 
 	dummy_physical_memory_cleanup();
 
-- 
cgit 


From 8f98435d674e5b7ec382914d2755849f5a95754b Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Mon, 28 Feb 2022 15:46:49 +0100
Subject: memblock tests: Add memblock_alloc_try_nid tests for top down

Add tests for memblock_alloc_try_nid for top down allocation direction.
As the definition of this function is pretty close to the core
memblock_alloc_range_nid, the test cases implemented here cover most of
the code paths related to the memory allocations.

The tested scenarios are:
  - Region can be allocated within the requested range (both with aligned
    and misaligned boundaries)
  - Region can be allocated between two already existing entries
  - Not enough space between already reserved regions
  - Memory range is too narrow but memory can be allocated before
    the maximum address
  - Edge cases:
      + Minimum address is below memblock_start_of_DRAM()
      + Maximum address is above memblock_end_of_DRAM()

Add checks for both allocation directions:
  - Region starts at the min_addr and ends at max_addr
  - Maximum address is too close to the beginning of the available
    memory
  - Memory at the range boundaries is reserved but there is enough space
    to allocate a new region

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/d6c282e0f9f62c15bf74c216214604764232d637.1646055639.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/Makefile              |   4 +-
 tools/testing/memblock/main.c                |   2 +
 tools/testing/memblock/tests/alloc_nid_api.c | 679 +++++++++++++++++++++++++++
 tools/testing/memblock/tests/alloc_nid_api.h |   9 +
 4 files changed, 692 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/memblock/tests/alloc_nid_api.c
 create mode 100644 tools/testing/memblock/tests/alloc_nid_api.h

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile
index 89e374470009..a698e24b35e7 100644
--- a/tools/testing/memblock/Makefile
+++ b/tools/testing/memblock/Makefile
@@ -6,8 +6,8 @@ CFLAGS += -I. -I../../include -Wall -O2 -fsanitize=address \
 	  -fsanitize=undefined -D CONFIG_PHYS_ADDR_T_64BIT
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 TARGETS = main
-TEST_OFILES = tests/alloc_helpers_api.o tests/alloc_api.o tests/basic_api.o \
-	      tests/common.o
+TEST_OFILES = tests/alloc_nid_api.o tests/alloc_helpers_api.o tests/alloc_api.o \
+		  tests/basic_api.o tests/common.o
 DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o
 OFILES = main.o $(DEP_OFILES) $(TEST_OFILES)
 EXTR_SRC = ../../../mm/memblock.c
diff --git a/tools/testing/memblock/main.c b/tools/testing/memblock/main.c
index b63150ee554f..fb183c9e76d1 100644
--- a/tools/testing/memblock/main.c
+++ b/tools/testing/memblock/main.c
@@ -2,12 +2,14 @@
 #include "tests/basic_api.h"
 #include "tests/alloc_api.h"
 #include "tests/alloc_helpers_api.h"
+#include "tests/alloc_nid_api.h"
 
 int main(int argc, char **argv)
 {
 	memblock_basic_checks();
 	memblock_alloc_checks();
 	memblock_alloc_helpers_checks();
+	memblock_alloc_nid_checks();
 
 	return 0;
 }
diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c
new file mode 100644
index 000000000000..13622c0cfdb6
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_nid_api.c
@@ -0,0 +1,679 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "alloc_nid_api.h"
+
+/*
+ * A simple test that tries to allocate a memory region within min_addr and
+ * max_addr range:
+ *
+ *        +                   +
+ *   |    +       +-----------+      |
+ *   |    |       |    rgn    |      |
+ *   +----+-------+-----------+------+
+ *        ^                   ^
+ *        |                   |
+ *        min_addr           max_addr
+ *
+ * Expect to allocate a cleared region that ends at max_addr.
+ */
+static int alloc_try_nid_top_down_simple_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_128;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+	phys_addr_t rgn_end;
+
+	setup_memblock();
+
+	min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2;
+	max_addr = min_addr + SZ_512;
+
+	allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+	rgn_end = rgn->base + rgn->size;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == max_addr - size);
+	assert(rgn_end == max_addr);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A simple test that tries to allocate a memory region within min_addr and
+ * max_addr range, where the end address is misaligned:
+ *
+ *         +       +            +
+ *  |      +       +---------+  +    |
+ *  |      |       |   rgn   |  |    |
+ *  +------+-------+---------+--+----+
+ *         ^       ^            ^
+ *         |       |            |
+ *       min_add   |            max_addr
+ *                 |
+ *                 Aligned address
+ *                 boundary
+ *
+ * Expect to allocate a cleared, aligned region that ends before max_addr.
+ */
+static int alloc_try_nid_top_down_end_misaligned_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_128;
+	phys_addr_t misalign = SZ_2;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+	phys_addr_t rgn_end;
+
+	setup_memblock();
+
+	min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2;
+	max_addr = min_addr + SZ_512 + misalign;
+
+	allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+	rgn_end = rgn->base + rgn->size;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == max_addr - size - misalign);
+	assert(rgn_end < max_addr);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A simple test that tries to allocate a memory region, which spans over the
+ * min_addr and max_addr range:
+ *
+ *         +               +
+ *  |      +---------------+       |
+ *  |      |      rgn      |       |
+ *  +------+---------------+-------+
+ *         ^               ^
+ *         |               |
+ *         min_addr        max_addr
+ *
+ * Expect to allocate a cleared region that starts at min_addr and ends at
+ * max_addr, given that min_addr is aligned.
+ */
+static int alloc_try_nid_exact_address_generic_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_1K;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+	phys_addr_t rgn_end;
+
+	setup_memblock();
+
+	min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES;
+	max_addr = min_addr + size;
+
+	allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+	rgn_end = rgn->base + rgn->size;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == min_addr);
+	assert(rgn_end == max_addr);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, which can't fit into
+ * min_addr and max_addr range:
+ *
+ *           +          +     +
+ *  |        +----------+-----+    |
+ *  |        |   rgn    +     |    |
+ *  +--------+----------+-----+----+
+ *           ^          ^     ^
+ *           |          |     |
+ *           Aligned    |    max_addr
+ *           address    |
+ *           boundary   min_add
+ *
+ * Expect to drop the lower limit and allocate a cleared memory region which
+ * ends at max_addr (if the address is aligned).
+ */
+static int alloc_try_nid_top_down_narrow_range_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_256;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	setup_memblock();
+
+	min_addr = memblock_start_of_DRAM() + SZ_512;
+	max_addr = min_addr + SMP_CACHE_BYTES;
+
+	allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == max_addr - size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, which can't fit into
+ * min_addr and max_addr range, with the latter being too close to the beginning
+ * of the available memory:
+ *
+ *   +-------------+
+ *   |     new     |
+ *   +-------------+
+ *         +       +
+ *         |       +              |
+ *         |       |              |
+ *         +-------+--------------+
+ *         ^       ^
+ *         |       |
+ *         |       max_addr
+ *         |
+ *         min_addr
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_try_nid_low_max_generic_check(void)
+{
+	void *allocated_ptr = NULL;
+
+	phys_addr_t size = SZ_1K;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	setup_memblock();
+
+	min_addr = memblock_start_of_DRAM();
+	max_addr = min_addr + SMP_CACHE_BYTES;
+
+	allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+
+	assert(!allocated_ptr);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region within min_addr min_addr range,
+ * with min_addr being so close that it's next to an allocated region:
+ *
+ *          +                        +
+ *  |       +--------+---------------|
+ *  |       |   r1   |      rgn      |
+ *  +-------+--------+---------------+
+ *          ^                        ^
+ *          |                        |
+ *          min_addr                 max_addr
+ *
+ * Expect a merge of both regions. Only the region size gets updated.
+ */
+static int alloc_try_nid_min_reserved_generic_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t r1_size = SZ_128;
+	phys_addr_t r2_size = SZ_64;
+	phys_addr_t total_size = r1_size + r2_size;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+	phys_addr_t reserved_base;
+
+	setup_memblock();
+
+	max_addr = memblock_end_of_DRAM();
+	min_addr = max_addr - r2_size;
+	reserved_base = min_addr - r1_size;
+
+	memblock_reserve(reserved_base, r1_size);
+
+	allocated_ptr = memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == total_size);
+	assert(rgn->base == reserved_base);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region within min_addr and max_addr,
+ * with max_addr being so close that it's next to an allocated region:
+ *
+ *             +             +
+ *  |          +-------------+--------|
+ *  |          |     rgn     |   r1   |
+ *  +----------+-------------+--------+
+ *             ^             ^
+ *             |             |
+ *             min_addr      max_addr
+ *
+ * Expect a merge of regions. Only the region size gets updated.
+ */
+static int alloc_try_nid_max_reserved_generic_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t r1_size = SZ_64;
+	phys_addr_t r2_size = SZ_128;
+	phys_addr_t total_size = r1_size + r2_size;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	setup_memblock();
+
+	max_addr = memblock_end_of_DRAM() - r1_size;
+	min_addr = max_addr - r2_size;
+
+	memblock_reserve(max_addr, r1_size);
+
+	allocated_ptr = memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == total_size);
+	assert(rgn->base == min_addr);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range, when
+ * there are two reserved regions at the borders, with a gap big enough to fit
+ * a new region:
+ *
+ *                +           +
+ *  |    +--------+   +-------+------+  |
+ *  |    |   r2   |   |  rgn  |  r1  |  |
+ *  +----+--------+---+-------+------+--+
+ *                ^           ^
+ *                |           |
+ *                min_addr    max_addr
+ *
+ * Expect to merge the new region with r1. The second region does not get
+ * updated. The total size field gets updated.
+ */
+
+static int alloc_try_nid_top_down_reserved_with_space_check(void)
+{
+	struct memblock_region *rgn1 = &memblock.reserved.regions[1];
+	struct memblock_region *rgn2 = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+	struct region r1, r2;
+
+	phys_addr_t r3_size = SZ_64;
+	phys_addr_t gap_size = SMP_CACHE_BYTES;
+	phys_addr_t total_size;
+	phys_addr_t max_addr;
+	phys_addr_t min_addr;
+
+	setup_memblock();
+
+	r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
+	r1.size = SMP_CACHE_BYTES;
+
+	r2.size = SZ_128;
+	r2.base = r1.base - (r3_size + gap_size + r2.size);
+
+	total_size = r1.size + r2.size + r3_size;
+	min_addr = r2.base + r2.size;
+	max_addr = r1.base;
+
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn1->size == r1.size + r3_size);
+	assert(rgn1->base == max_addr - r3_size);
+
+	assert(rgn2->size == r2.size);
+	assert(rgn2->base == r2.base);
+
+	assert(memblock.reserved.cnt == 2);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range, when
+ * there are two reserved regions at the borders, with a gap of a size equal to
+ * the size of the new region:
+ *
+ *                 +        +
+ *  |     +--------+--------+--------+     |
+ *  |     |   r2   |   r3   |   r1   |     |
+ *  +-----+--------+--------+--------+-----+
+ *                 ^        ^
+ *                 |        |
+ *                 min_addr max_addr
+ *
+ * Expect to merge all of the regions into one. The region counter and total
+ * size fields get updated.
+ */
+static int alloc_try_nid_reserved_full_merge_generic_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+	struct region r1, r2;
+
+	phys_addr_t r3_size = SZ_64;
+	phys_addr_t total_size;
+	phys_addr_t max_addr;
+	phys_addr_t min_addr;
+
+	setup_memblock();
+
+	r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
+	r1.size = SMP_CACHE_BYTES;
+
+	r2.size = SZ_128;
+	r2.base = r1.base - (r3_size + r2.size);
+
+	total_size = r1.size + r2.size + r3_size;
+	min_addr = r2.base + r2.size;
+	max_addr = r1.base;
+
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == total_size);
+	assert(rgn->base == r2.base);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range, when
+ * there are two reserved regions at the borders, with a gap that can't fit
+ * a new region:
+ *
+ *                       +    +
+ *  |  +----------+------+    +------+   |
+ *  |  |    r3    |  r2  |    |  r1  |   |
+ *  +--+----------+------+----+------+---+
+ *                       ^    ^
+ *                       |    |
+ *                       |    max_addr
+ *                       |
+ *                       min_addr
+ *
+ * Expect to merge the new region with r2. The second region does not get
+ * updated. The total size counter gets updated.
+ */
+static int alloc_try_nid_top_down_reserved_no_space_check(void)
+{
+	struct memblock_region *rgn1 = &memblock.reserved.regions[1];
+	struct memblock_region *rgn2 = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+	struct region r1, r2;
+
+	phys_addr_t r3_size = SZ_256;
+	phys_addr_t gap_size = SMP_CACHE_BYTES;
+	phys_addr_t total_size;
+	phys_addr_t max_addr;
+	phys_addr_t min_addr;
+
+	setup_memblock();
+
+	r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
+	r1.size = SMP_CACHE_BYTES;
+
+	r2.size = SZ_128;
+	r2.base = r1.base - (r2.size + gap_size);
+
+	total_size = r1.size + r2.size + r3_size;
+	min_addr = r2.base + r2.size;
+	max_addr = r1.base;
+
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn1->size == r1.size);
+	assert(rgn1->base == r1.base);
+
+	assert(rgn2->size == r2.size + r3_size);
+	assert(rgn2->base == r2.base - r3_size);
+
+	assert(memblock.reserved.cnt == 2);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range, but
+ * it's too narrow and everything else is reserved:
+ *
+ *            +-----------+
+ *            |    new    |
+ *            +-----------+
+ *                 +      +
+ *  |--------------+      +----------|
+ *  |      r2      |      |    r1    |
+ *  +--------------+------+----------+
+ *                 ^      ^
+ *                 |      |
+ *                 |      max_addr
+ *                 |
+ *                 min_addr
+ *
+ * Expect no allocation to happen.
+ */
+
+static int alloc_try_nid_reserved_all_generic_check(void)
+{
+	void *allocated_ptr = NULL;
+	struct region r1, r2;
+
+	phys_addr_t r3_size = SZ_256;
+	phys_addr_t gap_size = SMP_CACHE_BYTES;
+	phys_addr_t max_addr;
+	phys_addr_t min_addr;
+
+	setup_memblock();
+
+	r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES;
+	r1.size = SMP_CACHE_BYTES;
+
+	r2.size = MEM_SIZE - (r1.size + gap_size);
+	r2.base = memblock_start_of_DRAM();
+
+	min_addr = r2.base + r2.size;
+	max_addr = r1.base;
+
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+
+	assert(!allocated_ptr);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, where max_addr is
+ * bigger than the end address of the available memory. Expect to allocate
+ * a cleared region that ends before the end of the memory.
+ */
+static int alloc_try_nid_top_down_cap_max_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_256;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	setup_memblock();
+
+	min_addr = memblock_end_of_DRAM() - SZ_1K;
+	max_addr = memblock_end_of_DRAM() + SZ_256;
+
+	allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == memblock_end_of_DRAM() - size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, where min_addr is
+ * smaller than the start address of the available memory. Expect to allocate
+ * a cleared region that ends before the end of the memory.
+ */
+static int alloc_try_nid_top_down_cap_min_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_1K;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	setup_memblock();
+
+	min_addr = memblock_start_of_DRAM() - SZ_256;
+	max_addr = memblock_end_of_DRAM();
+
+	allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == memblock_end_of_DRAM() - size);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+int memblock_alloc_nid_checks(void)
+{
+	reset_memblock_attributes();
+	dummy_physical_memory_init();
+
+	alloc_try_nid_top_down_simple_check();
+	alloc_try_nid_top_down_end_misaligned_check();
+	alloc_try_nid_top_down_narrow_range_check();
+	alloc_try_nid_top_down_reserved_with_space_check();
+	alloc_try_nid_top_down_reserved_no_space_check();
+	alloc_try_nid_top_down_cap_min_check();
+	alloc_try_nid_top_down_cap_max_check();
+
+	alloc_try_nid_min_reserved_generic_check();
+	alloc_try_nid_max_reserved_generic_check();
+	alloc_try_nid_exact_address_generic_check();
+	alloc_try_nid_reserved_full_merge_generic_check();
+	alloc_try_nid_reserved_all_generic_check();
+	alloc_try_nid_low_max_generic_check();
+
+	dummy_physical_memory_cleanup();
+
+	return 0;
+}
diff --git a/tools/testing/memblock/tests/alloc_nid_api.h b/tools/testing/memblock/tests/alloc_nid_api.h
new file mode 100644
index 000000000000..b35cf3c3f489
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_nid_api.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MEMBLOCK_ALLOC_NID_H
+#define _MEMBLOCK_ALLOC_NID_H
+
+#include "common.h"
+
+int memblock_alloc_nid_checks(void);
+
+#endif
-- 
cgit 


From 9d8f6abe980f98ffccad80b11d46df2116d7a5fc Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Mon, 28 Feb 2022 15:46:50 +0100
Subject: memblock tests: Add memblock_alloc_try_nid tests for bottom up

Add checks for memblock_alloc_try_nid for bottom up allocation direction.
As the definition of this function is pretty close to the core
memblock_alloc_range_nid, the test cases implemented here cover most of
the code paths related to the memory allocations.

The tested scenarios are:
  - Region can be allocated within the requested range (both with aligned
    and misaligned boundaries)
  - Region can be allocated between two already existing entries
  - Not enough space between already reserved regions
  - Memory at the range boundaries is reserved but there is enough space
    to allocate a new region
  - The memory range is too narrow but memory can be allocated before
    the maximum address
  - Edge cases:
      + Minimum address is below memblock_start_of_DRAM()
      + Maximum address is above memblock_end_of_DRAM()

Add test case wrappers to test both directions in the same context.

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/1c0ba11b8da5dc8f71ad45175c536fa4be720984.1646055639.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/tests/alloc_nid_api.c | 503 ++++++++++++++++++++++++++-
 1 file changed, 499 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c
index 13622c0cfdb6..6390206e50e1 100644
--- a/tools/testing/memblock/tests/alloc_nid_api.c
+++ b/tools/testing/memblock/tests/alloc_nid_api.c
@@ -653,26 +653,521 @@ static int alloc_try_nid_top_down_cap_min_check(void)
 	return 0;
 }
 
-int memblock_alloc_nid_checks(void)
+/*
+ * A simple test that tries to allocate a memory region within min_addr and
+ * max_addr range:
+ *
+ *        +                       +
+ *   |    +-----------+           |      |
+ *   |    |    rgn    |           |      |
+ *   +----+-----------+-----------+------+
+ *        ^                       ^
+ *        |                       |
+ *        min_addr                max_addr
+ *
+ * Expect to allocate a cleared region that ends before max_addr.
+ */
+static int alloc_try_nid_bottom_up_simple_check(void)
 {
-	reset_memblock_attributes();
-	dummy_physical_memory_init();
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_128;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+	phys_addr_t rgn_end;
+
+	setup_memblock();
+
+	min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2;
+	max_addr = min_addr + SZ_512;
+
+	allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+	rgn_end = rgn->base + rgn->size;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == min_addr);
+	assert(rgn_end < max_addr);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A simple test that tries to allocate a memory region within min_addr and
+ * max_addr range, where the start address is misaligned:
+ *
+ *        +                     +
+ *  |     +   +-----------+     +     |
+ *  |     |   |    rgn    |     |     |
+ *  +-----+---+-----------+-----+-----+
+ *        ^   ^----.            ^
+ *        |        |            |
+ *     min_add     |            max_addr
+ *                 |
+ *                 Aligned address
+ *                 boundary
+ *
+ * Expect to allocate a cleared, aligned region that ends before max_addr.
+ */
+static int alloc_try_nid_bottom_up_start_misaligned_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_128;
+	phys_addr_t misalign = SZ_2;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+	phys_addr_t rgn_end;
+
+	setup_memblock();
+
+	min_addr = memblock_start_of_DRAM() + misalign;
+	max_addr = min_addr + SZ_512;
+
+	allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+	rgn_end = rgn->base + rgn->size;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == min_addr + (SMP_CACHE_BYTES - misalign));
+	assert(rgn_end < max_addr);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, which can't fit into min_addr
+ * and max_addr range:
+ *
+ *                      +    +
+ *  |---------+         +    +      |
+ *  |   rgn   |         |    |      |
+ *  +---------+---------+----+------+
+ *                      ^    ^
+ *                      |    |
+ *                      |    max_addr
+ *                      |
+ *                      min_add
+ *
+ * Expect to drop the lower limit and allocate a cleared memory region which
+ * starts at the beginning of the available memory.
+ */
+static int alloc_try_nid_bottom_up_narrow_range_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_256;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	setup_memblock();
+
+	min_addr = memblock_start_of_DRAM() + SZ_512;
+	max_addr = min_addr + SMP_CACHE_BYTES;
+
+	allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == memblock_start_of_DRAM());
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range, when
+ * there are two reserved regions at the borders, with a gap big enough to fit
+ * a new region:
+ *
+ *                +           +
+ *  |    +--------+-------+   +------+  |
+ *  |    |   r2   |  rgn  |   |  r1  |  |
+ *  +----+--------+-------+---+------+--+
+ *                ^           ^
+ *                |           |
+ *                min_addr    max_addr
+ *
+ * Expect to merge the new region with r2. The second region does not get
+ * updated. The total size field gets updated.
+ */
 
+static int alloc_try_nid_bottom_up_reserved_with_space_check(void)
+{
+	struct memblock_region *rgn1 = &memblock.reserved.regions[1];
+	struct memblock_region *rgn2 = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+	struct region r1, r2;
+
+	phys_addr_t r3_size = SZ_64;
+	phys_addr_t gap_size = SMP_CACHE_BYTES;
+	phys_addr_t total_size;
+	phys_addr_t max_addr;
+	phys_addr_t min_addr;
+
+	setup_memblock();
+
+	r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
+	r1.size = SMP_CACHE_BYTES;
+
+	r2.size = SZ_128;
+	r2.base = r1.base - (r3_size + gap_size + r2.size);
+
+	total_size = r1.size + r2.size + r3_size;
+	min_addr = r2.base + r2.size;
+	max_addr = r1.base;
+
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn1->size == r1.size);
+	assert(rgn1->base == max_addr);
+
+	assert(rgn2->size == r2.size + r3_size);
+	assert(rgn2->base == r2.base);
+
+	assert(memblock.reserved.cnt == 2);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range, when
+ * there are two reserved regions at the borders, with a gap of a size equal to
+ * the size of the new region:
+ *
+ *                         +   +
+ *  |----------+    +------+   +----+  |
+ *  |    r3    |    |  r2  |   | r1 |  |
+ *  +----------+----+------+---+----+--+
+ *                         ^   ^
+ *                         |   |
+ *                         |  max_addr
+ *                         |
+ *                         min_addr
+ *
+ * Expect to drop the lower limit and allocate memory at the beginning of the
+ * available memory. The region counter and total size fields get updated.
+ * Other regions are not modified.
+ */
+
+static int alloc_try_nid_bottom_up_reserved_no_space_check(void)
+{
+	struct memblock_region *rgn1 = &memblock.reserved.regions[2];
+	struct memblock_region *rgn2 = &memblock.reserved.regions[1];
+	struct memblock_region *rgn3 = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+	struct region r1, r2;
+
+	phys_addr_t r3_size = SZ_256;
+	phys_addr_t gap_size = SMP_CACHE_BYTES;
+	phys_addr_t total_size;
+	phys_addr_t max_addr;
+	phys_addr_t min_addr;
+
+	setup_memblock();
+
+	r1.base = memblock_end_of_DRAM() - SMP_CACHE_BYTES * 2;
+	r1.size = SMP_CACHE_BYTES;
+
+	r2.size = SZ_128;
+	r2.base = r1.base - (r2.size + gap_size);
+
+	total_size = r1.size + r2.size + r3_size;
+	min_addr = r2.base + r2.size;
+	max_addr = r1.base;
+
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn3->size == r3_size);
+	assert(rgn3->base == memblock_start_of_DRAM());
+
+	assert(rgn2->size == r2.size);
+	assert(rgn2->base == r2.base);
+
+	assert(rgn1->size == r1.size);
+	assert(rgn1->base == r1.base);
+
+	assert(memblock.reserved.cnt == 3);
+	assert(memblock.reserved.total_size == total_size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, where max_addr is
+ * bigger than the end address of the available memory. Expect to allocate
+ * a cleared region that starts at the min_addr
+ */
+static int alloc_try_nid_bottom_up_cap_max_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_256;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	setup_memblock();
+
+	min_addr = memblock_start_of_DRAM() + SZ_1K;
+	max_addr = memblock_end_of_DRAM() + SZ_256;
+
+	allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == min_addr);
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region, where min_addr is
+ * smaller than the start address of the available memory. Expect to allocate
+ * a cleared region at the beginning of the available memory.
+ */
+static int alloc_try_nid_bottom_up_cap_min_check(void)
+{
+	struct memblock_region *rgn = &memblock.reserved.regions[0];
+	void *allocated_ptr = NULL;
+	char *b;
+
+	phys_addr_t size = SZ_1K;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	setup_memblock();
+
+	min_addr = memblock_start_of_DRAM();
+	max_addr = memblock_end_of_DRAM() - SZ_256;
+
+	allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
+	b = (char *)allocated_ptr;
+
+	assert(allocated_ptr);
+	assert(*b == 0);
+
+	assert(rgn->size == size);
+	assert(rgn->base == memblock_start_of_DRAM());
+
+	assert(memblock.reserved.cnt == 1);
+	assert(memblock.reserved.total_size == size);
+
+	return 0;
+}
+
+/* Test case wrappers */
+static int alloc_try_nid_simple_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_try_nid_top_down_simple_check();
+	memblock_set_bottom_up(true);
+	alloc_try_nid_bottom_up_simple_check();
+
+	return 0;
+}
+
+static int alloc_try_nid_misaligned_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_try_nid_top_down_end_misaligned_check();
+	memblock_set_bottom_up(true);
+	alloc_try_nid_bottom_up_start_misaligned_check();
+
+	return 0;
+}
+
+static int alloc_try_nid_narrow_range_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_try_nid_top_down_narrow_range_check();
+	memblock_set_bottom_up(true);
+	alloc_try_nid_bottom_up_narrow_range_check();
+
+	return 0;
+}
+
+static int alloc_try_nid_reserved_with_space_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_try_nid_top_down_reserved_with_space_check();
+	memblock_set_bottom_up(true);
+	alloc_try_nid_bottom_up_reserved_with_space_check();
+
+	return 0;
+}
+
+static int alloc_try_nid_reserved_no_space_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_try_nid_top_down_reserved_no_space_check();
-	alloc_try_nid_top_down_cap_min_check();
+	memblock_set_bottom_up(true);
+	alloc_try_nid_bottom_up_reserved_no_space_check();
+
+	return 0;
+}
+
+static int alloc_try_nid_cap_max_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_try_nid_top_down_cap_max_check();
+	memblock_set_bottom_up(true);
+	alloc_try_nid_bottom_up_cap_max_check();
+
+	return 0;
+}
+
+static int alloc_try_nid_cap_min_check(void)
+{
+	memblock_set_bottom_up(false);
+	alloc_try_nid_top_down_cap_min_check();
+	memblock_set_bottom_up(true);
+	alloc_try_nid_bottom_up_cap_min_check();
+
+	return 0;
+}
 
+static int alloc_try_nid_min_reserved_check(void)
+{
+	memblock_set_bottom_up(false);
+	alloc_try_nid_min_reserved_generic_check();
+	memblock_set_bottom_up(true);
 	alloc_try_nid_min_reserved_generic_check();
+
+	return 0;
+}
+
+static int alloc_try_nid_max_reserved_check(void)
+{
+	memblock_set_bottom_up(false);
+	alloc_try_nid_max_reserved_generic_check();
+	memblock_set_bottom_up(true);
 	alloc_try_nid_max_reserved_generic_check();
+
+	return 0;
+}
+
+static int alloc_try_nid_exact_address_check(void)
+{
+	memblock_set_bottom_up(false);
+	alloc_try_nid_exact_address_generic_check();
+	memblock_set_bottom_up(true);
 	alloc_try_nid_exact_address_generic_check();
+
+	return 0;
+}
+
+static int alloc_try_nid_reserved_full_merge_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_try_nid_reserved_full_merge_generic_check();
+	memblock_set_bottom_up(true);
+	alloc_try_nid_reserved_full_merge_generic_check();
+
+	return 0;
+}
+
+static int alloc_try_nid_reserved_all_check(void)
+{
+	memblock_set_bottom_up(false);
 	alloc_try_nid_reserved_all_generic_check();
+	memblock_set_bottom_up(true);
+	alloc_try_nid_reserved_all_generic_check();
+
+	return 0;
+}
+
+static int alloc_try_nid_low_max_check(void)
+{
+	memblock_set_bottom_up(false);
+	alloc_try_nid_low_max_generic_check();
+	memblock_set_bottom_up(true);
 	alloc_try_nid_low_max_generic_check();
 
+	return 0;
+}
+
+int memblock_alloc_nid_checks(void)
+{
+	reset_memblock_attributes();
+	dummy_physical_memory_init();
+
+	alloc_try_nid_simple_check();
+	alloc_try_nid_misaligned_check();
+	alloc_try_nid_narrow_range_check();
+	alloc_try_nid_reserved_with_space_check();
+	alloc_try_nid_reserved_no_space_check();
+	alloc_try_nid_cap_max_check();
+	alloc_try_nid_cap_min_check();
+
+	alloc_try_nid_min_reserved_check();
+	alloc_try_nid_max_reserved_check();
+	alloc_try_nid_exact_address_check();
+	alloc_try_nid_reserved_full_merge_check();
+	alloc_try_nid_reserved_all_check();
+	alloc_try_nid_low_max_check();
+
 	dummy_physical_memory_cleanup();
 
 	return 0;
-- 
cgit 


From a30338840fa5c6e400673b7d8a31323280bfd521 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Wed, 9 Mar 2022 11:53:45 +0100
Subject: selftests/bpf: Move open_netns() and close_netns() into
 network_helpers.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These will also be used by the xdp_do_redirect test being added in the next
commit.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220309105346.100053-5-toke@redhat.com
---
 tools/testing/selftests/bpf/network_helpers.c      | 86 +++++++++++++++++++++
 tools/testing/selftests/bpf/network_helpers.h      |  9 +++
 .../testing/selftests/bpf/prog_tests/tc_redirect.c | 89 ----------------------
 3 files changed, 95 insertions(+), 89 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 6db1af8fdee7..2bb1f9b3841d 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -1,18 +1,25 @@
 // SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+
 #include <errno.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
+#include <sched.h>
 
 #include <arpa/inet.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
 
 #include <linux/err.h>
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/limits.h>
 
 #include "bpf_util.h"
 #include "network_helpers.h"
+#include "test_progs.h"
 
 #define clean_errno() (errno == 0 ? "None" : strerror(errno))
 #define log_err(MSG, ...) ({						\
@@ -356,3 +363,82 @@ char *ping_command(int family)
 	}
 	return "ping";
 }
+
+struct nstoken {
+	int orig_netns_fd;
+};
+
+static int setns_by_fd(int nsfd)
+{
+	int err;
+
+	err = setns(nsfd, CLONE_NEWNET);
+	close(nsfd);
+
+	if (!ASSERT_OK(err, "setns"))
+		return err;
+
+	/* Switch /sys to the new namespace so that e.g. /sys/class/net
+	 * reflects the devices in the new namespace.
+	 */
+	err = unshare(CLONE_NEWNS);
+	if (!ASSERT_OK(err, "unshare"))
+		return err;
+
+	/* Make our /sys mount private, so the following umount won't
+	 * trigger the global umount in case it's shared.
+	 */
+	err = mount("none", "/sys", NULL, MS_PRIVATE, NULL);
+	if (!ASSERT_OK(err, "remount private /sys"))
+		return err;
+
+	err = umount2("/sys", MNT_DETACH);
+	if (!ASSERT_OK(err, "umount2 /sys"))
+		return err;
+
+	err = mount("sysfs", "/sys", "sysfs", 0, NULL);
+	if (!ASSERT_OK(err, "mount /sys"))
+		return err;
+
+	err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
+	if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
+		return err;
+
+	return 0;
+}
+
+struct nstoken *open_netns(const char *name)
+{
+	int nsfd;
+	char nspath[PATH_MAX];
+	int err;
+	struct nstoken *token;
+
+	token = malloc(sizeof(struct nstoken));
+	if (!ASSERT_OK_PTR(token, "malloc token"))
+		return NULL;
+
+	token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
+		goto fail;
+
+	snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+	nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+	if (!ASSERT_GE(nsfd, 0, "open netns fd"))
+		goto fail;
+
+	err = setns_by_fd(nsfd);
+	if (!ASSERT_OK(err, "setns_by_fd"))
+		goto fail;
+
+	return token;
+fail:
+	free(token);
+	return NULL;
+}
+
+void close_netns(struct nstoken *token)
+{
+	ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
+	free(token);
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index d198181a5648..a4b3b2f9877b 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -55,4 +55,13 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
 		  struct sockaddr_storage *addr, socklen_t *len);
 char *ping_command(int family);
 
+struct nstoken;
+/**
+ * open_netns() - Switch to specified network namespace by name.
+ *
+ * Returns token with which to restore the original namespace
+ * using close_netns().
+ */
+struct nstoken *open_netns(const char *name);
+void close_netns(struct nstoken *token);
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 2b255e28ed26..7ad66a247c02 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -10,8 +10,6 @@
  * to drop unexpected traffic.
  */
 
-#define _GNU_SOURCE
-
 #include <arpa/inet.h>
 #include <linux/if.h>
 #include <linux/if_tun.h>
@@ -19,10 +17,8 @@
 #include <linux/sysctl.h>
 #include <linux/time_types.h>
 #include <linux/net_tstamp.h>
-#include <sched.h>
 #include <stdbool.h>
 #include <stdio.h>
-#include <sys/mount.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
@@ -92,91 +88,6 @@ static int write_file(const char *path, const char *newval)
 	return 0;
 }
 
-struct nstoken {
-	int orig_netns_fd;
-};
-
-static int setns_by_fd(int nsfd)
-{
-	int err;
-
-	err = setns(nsfd, CLONE_NEWNET);
-	close(nsfd);
-
-	if (!ASSERT_OK(err, "setns"))
-		return err;
-
-	/* Switch /sys to the new namespace so that e.g. /sys/class/net
-	 * reflects the devices in the new namespace.
-	 */
-	err = unshare(CLONE_NEWNS);
-	if (!ASSERT_OK(err, "unshare"))
-		return err;
-
-	/* Make our /sys mount private, so the following umount won't
-	 * trigger the global umount in case it's shared.
-	 */
-	err = mount("none", "/sys", NULL, MS_PRIVATE, NULL);
-	if (!ASSERT_OK(err, "remount private /sys"))
-		return err;
-
-	err = umount2("/sys", MNT_DETACH);
-	if (!ASSERT_OK(err, "umount2 /sys"))
-		return err;
-
-	err = mount("sysfs", "/sys", "sysfs", 0, NULL);
-	if (!ASSERT_OK(err, "mount /sys"))
-		return err;
-
-	err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
-	if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
-		return err;
-
-	return 0;
-}
-
-/**
- * open_netns() - Switch to specified network namespace by name.
- *
- * Returns token with which to restore the original namespace
- * using close_netns().
- */
-static struct nstoken *open_netns(const char *name)
-{
-	int nsfd;
-	char nspath[PATH_MAX];
-	int err;
-	struct nstoken *token;
-
-	token = calloc(1, sizeof(struct nstoken));
-	if (!ASSERT_OK_PTR(token, "malloc token"))
-		return NULL;
-
-	token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
-	if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
-		goto fail;
-
-	snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
-	nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
-	if (!ASSERT_GE(nsfd, 0, "open netns fd"))
-		goto fail;
-
-	err = setns_by_fd(nsfd);
-	if (!ASSERT_OK(err, "setns_by_fd"))
-		goto fail;
-
-	return token;
-fail:
-	free(token);
-	return NULL;
-}
-
-static void close_netns(struct nstoken *token)
-{
-	ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
-	free(token);
-}
-
 static int netns_setup_namespaces(const char *verb)
 {
 	const char * const *ns = namespaces;
-- 
cgit 


From 55fcacca36468801e62e1d9cc81e5870eea620ba Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Wed, 9 Mar 2022 11:53:46 +0100
Subject: selftests/bpf: Add selftest for XDP_REDIRECT in BPF_PROG_RUN
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds a selftest for the XDP_REDIRECT facility in BPF_PROG_RUN, that
redirects packets into a veth and counts them using an XDP program on the
other side of the veth pair and a TC program on the local side of the veth.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220309105346.100053-6-toke@redhat.com
---
 .../selftests/bpf/prog_tests/xdp_do_redirect.c     | 177 +++++++++++++++++++++
 .../selftests/bpf/progs/test_xdp_do_redirect.c     | 100 ++++++++++++
 2 files changed, 277 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
new file mode 100644
index 000000000000..9926b07e38c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <net/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include "test_xdp_do_redirect.skel.h"
+
+#define SYS(fmt, ...)						\
+	({							\
+		char cmd[1024];					\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
+		if (!ASSERT_OK(system(cmd), cmd))		\
+			goto out;				\
+	})
+
+struct udp_packet {
+	struct ethhdr eth;
+	struct ipv6hdr iph;
+	struct udphdr udp;
+	__u8 payload[64 - sizeof(struct udphdr)
+		     - sizeof(struct ethhdr) - sizeof(struct ipv6hdr)];
+} __packed;
+
+static struct udp_packet pkt_udp = {
+	.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+	.eth.h_dest = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
+	.eth.h_source = {0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb},
+	.iph.version = 6,
+	.iph.nexthdr = IPPROTO_UDP,
+	.iph.payload_len = bpf_htons(sizeof(struct udp_packet)
+				     - offsetof(struct udp_packet, udp)),
+	.iph.hop_limit = 2,
+	.iph.saddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(1)},
+	.iph.daddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(2)},
+	.udp.source = bpf_htons(1),
+	.udp.dest = bpf_htons(1),
+	.udp.len = bpf_htons(sizeof(struct udp_packet)
+			     - offsetof(struct udp_packet, udp)),
+	.payload = {0x42}, /* receiver XDP program matches on this */
+};
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
+	int ret;
+
+	ret = bpf_tc_hook_create(hook);
+	if (!ASSERT_OK(ret, "create tc hook"))
+		return ret;
+
+	ret = bpf_tc_attach(hook, &opts);
+	if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+		bpf_tc_hook_destroy(hook);
+		return ret;
+	}
+
+	return 0;
+}
+
+#define NUM_PKTS 10000
+void test_xdp_do_redirect(void)
+{
+	int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
+	char data[sizeof(pkt_udp) + sizeof(__u32)];
+	struct test_xdp_do_redirect *skel = NULL;
+	struct nstoken *nstoken = NULL;
+	struct bpf_link *link;
+
+	struct xdp_md ctx_in = { .data = sizeof(__u32),
+				 .data_end = sizeof(data) };
+	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+			    .data_in = &data,
+			    .data_size_in = sizeof(data),
+			    .ctx_in = &ctx_in,
+			    .ctx_size_in = sizeof(ctx_in),
+			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
+			    .repeat = NUM_PKTS,
+			    .batch_size = 64,
+		);
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+			    .attach_point = BPF_TC_INGRESS);
+
+	memcpy(&data[sizeof(__u32)], &pkt_udp, sizeof(pkt_udp));
+	*((__u32 *)data) = 0x42; /* metadata test value */
+
+	skel = test_xdp_do_redirect__open();
+	if (!ASSERT_OK_PTR(skel, "skel"))
+		return;
+
+	/* The XDP program we run with bpf_prog_run() will cycle through all
+	 * three xmit (PASS/TX/REDIRECT) return codes starting from above, and
+	 * ending up with PASS, so we should end up with two packets on the dst
+	 * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP
+	 * payload.
+	 */
+	SYS("ip netns add testns");
+	nstoken = open_netns("testns");
+	if (!ASSERT_OK_PTR(nstoken, "setns"))
+		goto out;
+
+	SYS("ip link add veth_src type veth peer name veth_dst");
+	SYS("ip link set dev veth_src address 00:11:22:33:44:55");
+	SYS("ip link set dev veth_dst address 66:77:88:99:aa:bb");
+	SYS("ip link set dev veth_src up");
+	SYS("ip link set dev veth_dst up");
+	SYS("ip addr add dev veth_src fc00::1/64");
+	SYS("ip addr add dev veth_dst fc00::2/64");
+	SYS("ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
+
+	/* We enable forwarding in the test namespace because that will cause
+	 * the packets that go through the kernel stack (with XDP_PASS) to be
+	 * forwarded back out the same interface (because of the packet dst
+	 * combined with the interface addresses). When this happens, the
+	 * regular forwarding path will end up going through the same
+	 * veth_xdp_xmit() call as the XDP_REDIRECT code, which can cause a
+	 * deadlock if it happens on the same CPU. There's a local_bh_disable()
+	 * in the test_run code to prevent this, but an earlier version of the
+	 * code didn't have this, so we keep the test behaviour to make sure the
+	 * bug doesn't resurface.
+	 */
+	SYS("sysctl -qw net.ipv6.conf.all.forwarding=1");
+
+	ifindex_src = if_nametoindex("veth_src");
+	ifindex_dst = if_nametoindex("veth_dst");
+	if (!ASSERT_NEQ(ifindex_src, 0, "ifindex_src") ||
+	    !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst"))
+		goto out;
+
+	memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
+	skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */
+	skel->rodata->ifindex_in = ifindex_src;
+	ctx_in.ingress_ifindex = ifindex_src;
+	tc_hook.ifindex = ifindex_src;
+
+	if (!ASSERT_OK(test_xdp_do_redirect__load(skel), "load"))
+		goto out;
+
+	link = bpf_program__attach_xdp(skel->progs.xdp_count_pkts, ifindex_dst);
+	if (!ASSERT_OK_PTR(link, "prog_attach"))
+		goto out;
+	skel->links.xdp_count_pkts = link;
+
+	tc_prog_fd = bpf_program__fd(skel->progs.tc_count_pkts);
+	if (attach_tc_prog(&tc_hook, tc_prog_fd))
+		goto out;
+
+	xdp_prog_fd = bpf_program__fd(skel->progs.xdp_redirect);
+	err = bpf_prog_test_run_opts(xdp_prog_fd, &opts);
+	if (!ASSERT_OK(err, "prog_run"))
+		goto out_tc;
+
+	/* wait for the packets to be flushed */
+	kern_sync_rcu();
+
+	/* There will be one packet sent through XDP_REDIRECT and one through
+	 * XDP_TX; these will show up on the XDP counting program, while the
+	 * rest will be counted at the TC ingress hook (and the counting program
+	 * resets the packet payload so they don't get counted twice even though
+	 * they are re-xmited out the veth device
+	 */
+	ASSERT_EQ(skel->bss->pkts_seen_xdp, 2, "pkt_count_xdp");
+	ASSERT_EQ(skel->bss->pkts_seen_zero, 2, "pkt_count_zero");
+	ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc");
+
+out_tc:
+	bpf_tc_hook_destroy(&tc_hook);
+out:
+	if (nstoken)
+		close_netns(nstoken);
+	system("ip netns del testns");
+	test_xdp_do_redirect__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
new file mode 100644
index 000000000000..77a123071940
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define ETH_ALEN 6
+#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
+const volatile int ifindex_out;
+const volatile int ifindex_in;
+const volatile __u8 expect_dst[ETH_ALEN];
+volatile int pkts_seen_xdp = 0;
+volatile int pkts_seen_zero = 0;
+volatile int pkts_seen_tc = 0;
+volatile int retcode = XDP_REDIRECT;
+
+SEC("xdp")
+int xdp_redirect(struct xdp_md *xdp)
+{
+	__u32 *metadata = (void *)(long)xdp->data_meta;
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+
+	__u8 *payload = data + HDR_SZ;
+	int ret = retcode;
+
+	if (payload + 1 > data_end)
+		return XDP_ABORTED;
+
+	if (xdp->ingress_ifindex != ifindex_in)
+		return XDP_ABORTED;
+
+	if (metadata + 1 > data)
+		return XDP_ABORTED;
+
+	if (*metadata != 0x42)
+		return XDP_ABORTED;
+
+	if (*payload == 0) {
+		*payload = 0x42;
+		pkts_seen_zero++;
+	}
+
+	if (bpf_xdp_adjust_meta(xdp, 4))
+		return XDP_ABORTED;
+
+	if (retcode > XDP_PASS)
+		retcode--;
+
+	if (ret == XDP_REDIRECT)
+		return bpf_redirect(ifindex_out, 0);
+
+	return ret;
+}
+
+static bool check_pkt(void *data, void *data_end)
+{
+	struct ipv6hdr *iph = data + sizeof(struct ethhdr);
+	__u8 *payload = data + HDR_SZ;
+
+	if (payload + 1 > data_end)
+		return false;
+
+	if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42)
+		return false;
+
+	/* reset the payload so the same packet doesn't get counted twice when
+	 * it cycles back through the kernel path and out the dst veth
+	 */
+	*payload = 0;
+	return true;
+}
+
+SEC("xdp")
+int xdp_count_pkts(struct xdp_md *xdp)
+{
+	void *data = (void *)(long)xdp->data;
+	void *data_end = (void *)(long)xdp->data_end;
+
+	if (check_pkt(data, data_end))
+		pkts_seen_xdp++;
+
+	/* Return XDP_DROP to make sure the data page is recycled, like when it
+	 * exits a physical NIC. Recycled pages will be counted in the
+	 * pkts_seen_zero counter above.
+	 */
+	return XDP_DROP;
+}
+
+SEC("tc")
+int tc_count_pkts(struct __sk_buff *skb)
+{
+	void *data = (void *)(long)skb->data;
+	void *data_end = (void *)(long)skb->data_end;
+
+	if (check_pkt(data, data_end))
+		pkts_seen_tc++;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 8ddde07a3d285a0f3cec14924446608320fdc013 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Tue, 8 Mar 2022 16:59:10 +0800
Subject: dma-mapping: benchmark: extract a common header file for
 map_benchmark definition

kernel/dma/map_benchmark.c and selftests/dma/dma_map_benchmark.c
have duplicate map_benchmark definitions, which tends to lead to
inconsistent changes to map_benchmark on both sides, extract a
common header file to avoid this problem.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Acked-by: Barry Song <song.bao.hua@hisilicon.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/map_benchmark.h                   | 31 +++++++++++++++++++++++++
 kernel/dma/map_benchmark.c                      | 25 +-------------------
 tools/testing/selftests/dma/dma_map_benchmark.c | 25 +-------------------
 3 files changed, 33 insertions(+), 48 deletions(-)
 create mode 100644 include/linux/map_benchmark.h

(limited to 'tools/testing')

diff --git a/include/linux/map_benchmark.h b/include/linux/map_benchmark.h
new file mode 100644
index 000000000000..62674c83bde4
--- /dev/null
+++ b/include/linux/map_benchmark.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 HiSilicon Limited.
+ */
+
+#ifndef _KERNEL_DMA_BENCHMARK_H
+#define _KERNEL_DMA_BENCHMARK_H
+
+#define DMA_MAP_BENCHMARK       _IOWR('d', 1, struct map_benchmark)
+#define DMA_MAP_MAX_THREADS     1024
+#define DMA_MAP_MAX_SECONDS     300
+#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC)
+
+#define DMA_MAP_BIDIRECTIONAL   0
+#define DMA_MAP_TO_DEVICE       1
+#define DMA_MAP_FROM_DEVICE     2
+
+struct map_benchmark {
+	__u64 avg_map_100ns; /* average map latency in 100ns */
+	__u64 map_stddev; /* standard deviation of map latency */
+	__u64 avg_unmap_100ns; /* as above */
+	__u64 unmap_stddev;
+	__u32 threads; /* how many threads will do map/unmap in parallel */
+	__u32 seconds; /* how long the test will last */
+	__s32 node; /* which numa node this benchmark will run on */
+	__u32 dma_bits; /* DMA addressing capability */
+	__u32 dma_dir; /* DMA data direction */
+	__u32 dma_trans_ns; /* time for DMA transmission in ns */
+	__u32 granule;  /* how many PAGE_SIZE will do map/unmap once a time */
+};
+#endif /* _KERNEL_DMA_BENCHMARK_H */
diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index 9b9af1bd6be3..0520a8f4fb1d 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -11,6 +11,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
+#include <linux/map_benchmark.h>
 #include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -18,30 +19,6 @@
 #include <linux/slab.h>
 #include <linux/timekeeping.h>
 
-#define DMA_MAP_BENCHMARK	_IOWR('d', 1, struct map_benchmark)
-#define DMA_MAP_MAX_THREADS	1024
-#define DMA_MAP_MAX_SECONDS	300
-#define DMA_MAP_MAX_TRANS_DELAY	(10 * NSEC_PER_MSEC)
-
-#define DMA_MAP_BIDIRECTIONAL	0
-#define DMA_MAP_TO_DEVICE	1
-#define DMA_MAP_FROM_DEVICE	2
-
-struct map_benchmark {
-	__u64 avg_map_100ns; /* average map latency in 100ns */
-	__u64 map_stddev; /* standard deviation of map latency */
-	__u64 avg_unmap_100ns; /* as above */
-	__u64 unmap_stddev;
-	__u32 threads; /* how many threads will do map/unmap in parallel */
-	__u32 seconds; /* how long the test will last */
-	__s32 node; /* which numa node this benchmark will run on */
-	__u32 dma_bits; /* DMA addressing capability */
-	__u32 dma_dir; /* DMA data direction */
-	__u32 dma_trans_ns; /* time for DMA transmission in ns */
-	__u32 granule;	/* how many PAGE_SIZE will do map/unmap once a time */
-	__u8 expansion[76];	/* For future use */
-};
-
 struct map_benchmark_data {
 	struct map_benchmark bparam;
 	struct device *dev;
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
index 485dff51bad2..c3b3c09e995e 100644
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ b/tools/testing/selftests/dma/dma_map_benchmark.c
@@ -10,40 +10,17 @@
 #include <unistd.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <linux/map_benchmark.h>
 #include <linux/types.h>
 
 #define NSEC_PER_MSEC	1000000L
 
-#define DMA_MAP_BENCHMARK	_IOWR('d', 1, struct map_benchmark)
-#define DMA_MAP_MAX_THREADS	1024
-#define DMA_MAP_MAX_SECONDS     300
-#define DMA_MAP_MAX_TRANS_DELAY	(10 * NSEC_PER_MSEC)
-
-#define DMA_MAP_BIDIRECTIONAL	0
-#define DMA_MAP_TO_DEVICE	1
-#define DMA_MAP_FROM_DEVICE	2
-
 static char *directions[] = {
 	"BIDIRECTIONAL",
 	"TO_DEVICE",
 	"FROM_DEVICE",
 };
 
-struct map_benchmark {
-	__u64 avg_map_100ns; /* average map latency in 100ns */
-	__u64 map_stddev; /* standard deviation of map latency */
-	__u64 avg_unmap_100ns; /* as above */
-	__u64 unmap_stddev;
-	__u32 threads; /* how many threads will do map/unmap in parallel */
-	__u32 seconds; /* how long the test will last */
-	__s32 node; /* which numa node this benchmark will run on */
-	__u32 dma_bits; /* DMA addressing capability */
-	__u32 dma_dir; /* DMA data direction */
-	__u32 dma_trans_ns; /* time for DMA transmission in ns */
-	__u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */
-	__u8 expansion[76];	/* For future use */
-};
-
 int main(int argc, char **argv)
 {
 	struct map_benchmark map;
-- 
cgit 


From 58ffc34896db2e5e49e6ae6bf8042f85504d84e8 Mon Sep 17 00:00:00 2001
From: Karolina Drobnik <karolinadrobnik@gmail.com>
Date: Mon, 28 Feb 2022 15:46:51 +0100
Subject: memblock tests: Add TODO and README files

Add description of the project, its structure and how to run it.
List what is left to implement and what the known issues are.

Signed-off-by: Karolina Drobnik <karolinadrobnik@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/d5e39b9f7dcef177ebc14282727447bc21e3b38f.1646055639.git.karolinadrobnik@gmail.com
---
 tools/testing/memblock/README | 107 ++++++++++++++++++++++++++++++++++++++++++
 tools/testing/memblock/TODO   |  28 +++++++++++
 2 files changed, 135 insertions(+)
 create mode 100644 tools/testing/memblock/README
 create mode 100644 tools/testing/memblock/TODO

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/README b/tools/testing/memblock/README
new file mode 100644
index 000000000000..ca6afcff013a
--- /dev/null
+++ b/tools/testing/memblock/README
@@ -0,0 +1,107 @@
+==================
+Memblock simulator
+==================
+
+Introduction
+============
+
+Memblock is a boot time memory allocator[1] that manages memory regions before
+the actual memory management is initialized. Its APIs allow to register physical
+memory regions, mark them as available or reserved, allocate a block of memory
+within the requested range and/or in specific NUMA node, and many more.
+
+Because it is used so early in the booting process, testing and debugging it is
+difficult. This test suite, usually referred as memblock simulator, is
+an attempt at testing the memblock mechanism. It runs one monolithic test that
+consist of a series of checks that exercise both the basic operations and
+allocation functionalities of memblock. The main data structure of the boot time
+memory allocator is initialized at the build time, so the checks here reuse its
+instance throughout the duration of the test. To ensure that tests don't affect
+each other, region arrays are reset in between.
+
+As this project uses the actual memblock code and has to run in user space,
+some of the kernel definitions were stubbed by the initial commit that
+introduced memblock simulator (commit 16802e55dea9 ("memblock tests: Add
+skeleton of the memblock simulator")) and a few preparation commits just
+before it. Most of them don't match the kernel implementation, so one should
+consult them first before making any significant changes to the project.
+
+Usage
+=====
+
+To run the tests, build the main target and run it:
+
+$ make && ./main
+
+A successful run produces no output. It is also possible to override different
+configuration parameters. For example, to simulate enabled NUMA, use:
+
+$ make NUMA=1
+
+For the full list of options, see `make help`.
+
+Project structure
+=================
+
+The project has one target, main, which calls a group of checks for basic and
+allocation functions. Tests for each group are defined in dedicated files, as it
+can be seen here:
+
+memblock
+|-- asm       ------------------,
+|-- lib                         |-- implement function and struct stubs
+|-- linux     ------------------'
+|-- scripts
+|    |-- Makefile.include        -- handles `make` parameters
+|-- tests
+|    |-- alloc_api.(c|h)         -- memblock_alloc tests
+|    |-- alloc_helpers_api.(c|h) -- memblock_alloc_from tests
+|    |-- alloc_nid_api.(c|h)     -- memblock_alloc_try_nid tests
+|    |-- basic_api.(c|h)         -- memblock_add/memblock_reserve/... tests
+|    |-- common.(c|h)            -- helper functions for resetting memblock;
+|-- main.c        --------------.   dummy physical memory definition
+|-- Makefile                     `- test runner
+|-- README
+|-- TODO
+|-- .gitignore
+
+Simulating physical memory
+==========================
+
+Some allocation functions clear the memory in the process, so it is required for
+memblock to track valid memory ranges. To achieve this, the test suite registers
+with memblock memory stored by test_memory struct. It is a small wrapper that
+points to a block of memory allocated via malloc. For each group of allocation
+tests, dummy physical memory is allocated, added to memblock, and then released
+at the end of the test run. The structure of a test runner checking allocation
+functions is as follows:
+
+int memblock_alloc_foo_checks(void)
+{
+	reset_memblock_attributes();     /* data structure reset */
+	dummy_physical_memory_init();    /* allocate and register memory */
+
+	(...allocation checks...)
+
+	dummy_physical_memory_cleanup(); /* free the memory */
+}
+
+There's no need to explicitly free the dummy memory from memblock via
+memblock_free() call. The entry will be erased by reset_memblock_regions(),
+called at the beginning of each test.
+
+Known issues
+============
+
+1. Requesting a specific NUMA node via memblock_alloc_node() does not work as
+   intended. Once the fix is in place, tests for this function can be added.
+
+2. Tests for memblock_alloc_low() can't be easily implemented. The function uses
+   ARCH_LOW_ADDRESS_LIMIT marco, which can't be changed to point at the low
+   memory of the memory_block.
+
+References
+==========
+
+1. Boot time memory management documentation page:
+   https://www.kernel.org/doc/html/latest/core-api/boot-time-mm.html
diff --git a/tools/testing/memblock/TODO b/tools/testing/memblock/TODO
new file mode 100644
index 000000000000..c25b2fdec45e
--- /dev/null
+++ b/tools/testing/memblock/TODO
@@ -0,0 +1,28 @@
+TODO
+=====
+
+1. Add verbose output (e.g., what is being tested and how many tests cases are
+   passing)
+
+2. Add flags to Makefile:
+   + verbosity level
+   + enable memblock_dbg() messages (i.e. pass "-D CONFIG_DEBUG_MEMORY_INIT"
+     flag)
+
+3. Add tests trying to memblock_add() or memblock_reserve() 129th region.
+   This will trigger memblock_double_array(), make sure it succeeds.
+   *Important:* These tests require valid memory ranges, use dummy physical
+                memory block from common.c to implement them. It is also very
+                likely that the current MEM_SIZE won't be enough for these
+                test cases. Use realloc to adjust the size accordingly.
+
+4. Add test cases using this functions (implement them for both directions):
+   + memblock_alloc_raw()
+   + memblock_alloc_exact_nid_raw()
+   + memblock_alloc_try_nid_raw()
+
+5. Add tests for memblock_alloc_node() to check if the correct NUMA node is set
+   for the new region
+
+6. Update comments in tests/basic_api.c to match the style used in
+   tests/alloc_*.c
-- 
cgit 


From 3c082695e78b99bba177725bf509ad3230f8287c Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Wed, 9 Mar 2022 11:16:27 -0800
Subject: selftests: mptcp: drop msg argument of chk_csum_nr

This patch dropped the msg argument of chk_csum_nr, to unify chk_csum_nr
with other chk_*_nr functions.

Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 26 ++++++++++++-------------
 1 file changed, 12 insertions(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index ee435948d130..194c4420220e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -16,6 +16,7 @@ capture=0
 checksum=0
 ip_mptcp=0
 check_invert=0
+validate_checksum=0
 init=0
 
 TEST_COUNT=0
@@ -60,6 +61,7 @@ init_partial()
 	done
 
 	check_invert=0
+	validate_checksum=$checksum
 
 	#  ns1              ns2
 	# ns1eth1    ns2eth1
@@ -192,6 +194,8 @@ reset_with_checksum()
 
 	ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
 	ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
+
+	validate_checksum=1
 }
 
 reset_with_allow_join_id0()
@@ -853,9 +857,8 @@ dump_stats()
 
 chk_csum_nr()
 {
-	local msg=${1:-""}
-	local csum_ns1=${2:-0}
-	local csum_ns2=${3:-0}
+	local csum_ns1=${1:-0}
+	local csum_ns2=${2:-0}
 	local count
 	local dump_stats
 	local allow_multi_errors_ns1=0
@@ -870,12 +873,7 @@ chk_csum_nr()
 		csum_ns2=${csum_ns2:1}
 	fi
 
-	if [ ! -z "$msg" ]; then
-		printf "%03u" "$TEST_COUNT"
-	else
-		echo -n "   "
-	fi
-	printf " %-36s %s" "$msg" "sum"
+	printf "%-${nr_blank}s %s" " " "sum"
 	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 	if [ "$count" != $csum_ns1 -a $allow_multi_errors_ns1 -eq 0 ] ||
@@ -1064,7 +1062,7 @@ chk_join_nr()
 	fi
 	[ "${dump_stats}" = 1 ] && dump_stats
 	if [ $checksum -eq 1 ]; then
-		chk_csum_nr "" $csum_ns1 $csum_ns2
+		chk_csum_nr $csum_ns1 $csum_ns2
 		chk_fail_nr $fail_nr $fail_nr
 		chk_rst_nr $rst_nr $rst_nr
 	fi
@@ -2181,28 +2179,28 @@ checksum_tests()
 	pm_nl_set_limits $ns1 0 1
 	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1
-	chk_csum_nr "checksum test 0 0"
+	chk_join_nr "checksum test 0 0" 0 0 0
 
 	# checksum test 1 1
 	reset_with_checksum 1 1
 	pm_nl_set_limits $ns1 0 1
 	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1
-	chk_csum_nr "checksum test 1 1"
+	chk_join_nr "checksum test 1 1" 0 0 0
 
 	# checksum test 0 1
 	reset_with_checksum 0 1
 	pm_nl_set_limits $ns1 0 1
 	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1
-	chk_csum_nr "checksum test 0 1"
+	chk_join_nr "checksum test 0 1" 0 0 0
 
 	# checksum test 1 0
 	reset_with_checksum 1 0
 	pm_nl_set_limits $ns1 0 1
 	pm_nl_set_limits $ns2 0 1
 	run_tests $ns1 $ns2 10.0.1.1
-	chk_csum_nr "checksum test 1 0"
+	chk_join_nr "checksum test 1 0" 0 0 0
 }
 
 deny_join_id0_tests()
-- 
cgit 


From 3afd0280e7d323c2b8df458cefecc9e4b3bec570 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 9 Mar 2022 11:16:28 -0800
Subject: selftests: mptcp: join: define tests groups once

When adding a new tests group, it has to be defined in multiple places:

- in the all_tests() function
- in the 'usage()' function
- in the getopts: short option + what to do when the option is used

Because it is easy to forget one of them, it is useful to have to define
them only once.

Note: only using an associative array would simplify the code but the
entries are stored in a hashtable and iterating over the different items
doesn't give the same order as the one used in the declaration of this
array. Because we want to run these tests in the same order as before, a
"simple" array is used first.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 141 ++++++++----------------
 1 file changed, 47 insertions(+), 94 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 194c4420220e..8dc50b480152 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -19,6 +19,7 @@ check_invert=0
 validate_checksum=0
 init=0
 
+declare -A all_tests
 TEST_COUNT=0
 nr_blank=40
 
@@ -2380,27 +2381,6 @@ implicit_tests()
 	wait
 }
 
-all_tests()
-{
-	subflows_tests
-	subflows_error_tests
-	signal_address_tests
-	link_failure_tests
-	add_addr_timeout_tests
-	remove_tests
-	add_tests
-	ipv6_tests
-	v4mapped_tests
-	backup_tests
-	add_addr_ports_tests
-	syncookies_tests
-	checksum_tests
-	deny_join_id0_tests
-	fullmesh_tests
-	fastclose_tests
-	implicit_tests
-}
-
 # [$1: error message]
 usage()
 {
@@ -2410,23 +2390,12 @@ usage()
 	fi
 
 	echo "mptcp_join usage:"
-	echo "  -f subflows_tests"
-	echo "  -e subflows_error_tests"
-	echo "  -s signal_address_tests"
-	echo "  -l link_failure_tests"
-	echo "  -t add_addr_timeout_tests"
-	echo "  -r remove_tests"
-	echo "  -a add_tests"
-	echo "  -6 ipv6_tests"
-	echo "  -4 v4mapped_tests"
-	echo "  -b backup_tests"
-	echo "  -p add_addr_ports_tests"
-	echo "  -k syncookies_tests"
-	echo "  -S checksum_tests"
-	echo "  -d deny_join_id0_tests"
-	echo "  -m fullmesh_tests"
-	echo "  -z fastclose_tests"
-	echo "  -I implicit_tests"
+
+	local key
+	for key in "${!all_tests[@]}"; do
+		echo "  -${key} ${all_tests[${key}]}"
+	done
+
 	echo "  -c capture pcap files"
 	echo "  -C enable data checksum"
 	echo "  -i use ip mptcp"
@@ -2436,59 +2405,43 @@ usage()
 }
 
 
+# Use a "simple" array to force an specific order we cannot have with an associative one
+all_tests_sorted=(
+	f@subflows_tests
+	e@subflows_error_tests
+	s@signal_address_tests
+	l@link_failure_tests
+	t@add_addr_timeout_tests
+	r@remove_tests
+	a@add_tests
+	6@ipv6_tests
+	4@v4mapped_tests
+	b@backup_tests
+	p@add_addr_ports_tests
+	k@syncookies_tests
+	S@checksum_tests
+	d@deny_join_id0_tests
+	m@fullmesh_tests
+	z@fastclose_tests
+	I@implicit_tests
+)
+
+all_tests_args=""
+all_tests_names=()
+for subtests in "${all_tests_sorted[@]}"; do
+	key="${subtests%@*}"
+	value="${subtests#*@}"
+
+	all_tests_args+="${key}"
+	all_tests_names+=("${value}")
+	all_tests[${key}]="${value}"
+done
+
 tests=()
-while getopts 'fesltra64bpkdmchzICSi' opt; do
+while getopts "${all_tests_args}cCih" opt; do
 	case $opt in
-		f)
-			tests+=(subflows_tests)
-			;;
-		e)
-			tests+=(subflows_error_tests)
-			;;
-		s)
-			tests+=(signal_address_tests)
-			;;
-		l)
-			tests+=(link_failure_tests)
-			;;
-		t)
-			tests+=(add_addr_timeout_tests)
-			;;
-		r)
-			tests+=(remove_tests)
-			;;
-		a)
-			tests+=(add_tests)
-			;;
-		6)
-			tests+=(ipv6_tests)
-			;;
-		4)
-			tests+=(v4mapped_tests)
-			;;
-		b)
-			tests+=(backup_tests)
-			;;
-		p)
-			tests+=(add_addr_ports_tests)
-			;;
-		k)
-			tests+=(syncookies_tests)
-			;;
-		S)
-			tests+=(checksum_tests)
-			;;
-		d)
-			tests+=(deny_join_id0_tests)
-			;;
-		m)
-			tests+=(fullmesh_tests)
-			;;
-		z)
-			tests+=(fastclose_tests)
-			;;
-		I)
-			tests+=(implicit_tests)
+		["${all_tests_args}"])
+			tests+=("${all_tests[${opt}]}")
 			;;
 		c)
 			capture=1
@@ -2509,11 +2462,11 @@ while getopts 'fesltra64bpkdmchzICSi' opt; do
 done
 
 if [ ${#tests[@]} -eq 0 ]; then
-	all_tests
-else
-	for subtests in "${tests[@]}"; do
-		"${subtests}"
-	done
+	tests=("${all_tests_names[@]}")
 fi
 
+for subtests in "${tests[@]}"; do
+	"${subtests}"
+done
+
 exit $ret
-- 
cgit 


From e59300ce3ff8abee26144fddec11b0492e05154c Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 9 Mar 2022 11:16:29 -0800
Subject: selftests: mptcp: join: reset failing links

Best to always reset this env var before each test to avoid surprising
behaviour depending on the order tests are running.

Also clearly set it for the last failing links test is also needed when
only this test is executed.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 8dc50b480152..65590f965e4d 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -23,6 +23,8 @@ declare -A all_tests
 TEST_COUNT=0
 nr_blank=40
 
+export FAILING_LINKS=""
+
 # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
 #				  (ip6 && (ip6[74] & 0xf0) == 0x30)'"
 CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
@@ -63,6 +65,7 @@ init_partial()
 
 	check_invert=0
 	validate_checksum=$checksum
+	FAILING_LINKS=""
 
 	#  ns1              ns2
 	# ns1eth1    ns2eth1
@@ -1618,7 +1621,7 @@ link_failure_tests()
 	pm_nl_set_limits $ns1 0 2
 	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
 	pm_nl_set_limits $ns2 1 2
-	export FAILING_LINKS="1"
+	FAILING_LINKS="1"
 	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
 	run_tests $ns1 $ns2 10.0.1.1 1
 	chk_join_nr "backup subflow unused, link failure" 2 2 2
@@ -1633,7 +1636,7 @@ link_failure_tests()
 	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
 	pm_nl_set_limits $ns2 1 2
 	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
-	export FAILING_LINKS="1 2"
+	FAILING_LINKS="1 2"
 	run_tests $ns1 $ns2 10.0.1.1 1
 	chk_join_nr "backup flow used, multi links fail" 2 2 2
 	chk_add_nr 1 1
@@ -1648,6 +1651,7 @@ link_failure_tests()
 	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
 	pm_nl_set_limits $ns2 1 3
 	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+	FAILING_LINKS="1 2"
 	run_tests $ns1 $ns2 10.0.1.1 2
 	chk_join_nr "backup flow used, bidi, link failure" 2 2 2
 	chk_add_nr 1 1
-- 
cgit 


From ae7bd9ccecc3495ff5c3692e100bdd9f0164e49d Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 9 Mar 2022 11:16:30 -0800
Subject: selftests: mptcp: join: option to execute specific tests

Often, it is needed to run one specific test.

There are options to run subgroups of tests but when only one fails, no
need to run all the subgroup. So far, the solution was to edit the
script to comment the tests that are not needed but that's not ideal.

Now, it is possible to run one specific test by giving the ID of the
tests that are going to be validated, e.g.

  ./mptcp_join.sh 36 37

This is cleaner and saves time.

Technically, the reset* functions now return 0 if the test can be
executed. This naturally creates sections per test in the code which is
also helpful to understand what a test is exactly doing.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 1629 ++++++++++++-----------
 1 file changed, 877 insertions(+), 752 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 65590f965e4d..a3f6c790765b 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -20,6 +20,7 @@ validate_checksum=0
 init=0
 
 declare -A all_tests
+declare -a only_tests
 TEST_COUNT=0
 nr_blank=40
 
@@ -149,8 +150,30 @@ cleanup()
 	cleanup_partial
 }
 
+skip_test()
+{
+	if [ "${#only_tests[@]}" -eq 0 ]; then
+		return 1
+	fi
+
+	local i
+	for i in "${only_tests[@]}"; do
+		if [ "${TEST_COUNT}" -eq "${i}" ]; then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
 reset()
 {
+	TEST_COUNT=$((TEST_COUNT+1))
+
+	if skip_test; then
+		return 1
+	fi
+
 	if [ "${init}" != "1" ]; then
 		init
 	else
@@ -158,11 +181,13 @@ reset()
 	fi
 
 	init_partial
+
+	return 0
 }
 
 reset_with_cookies()
 {
-	reset
+	reset || return 1
 
 	for netns in "$ns1" "$ns2";do
 		ip netns exec $netns sysctl -q net.ipv4.tcp_syncookies=2
@@ -179,7 +204,7 @@ reset_with_add_addr_timeout()
 		tables="ip6tables"
 	fi
 
-	reset
+	reset || return 1
 
 	ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
 	ip netns exec $ns2 $tables -A OUTPUT -p tcp \
@@ -194,7 +219,7 @@ reset_with_checksum()
 	local ns1_enable=$1
 	local ns2_enable=$2
 
-	reset
+	reset || return 1
 
 	ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
 	ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
@@ -207,7 +232,7 @@ reset_with_allow_join_id0()
 	local ns1_enable=$1
 	local ns2_enable=$2
 
-	reset
+	reset || return 1
 
 	ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable
 	ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
@@ -520,8 +545,7 @@ do_transfer()
 	speed="$9"
 	sflags="${10}"
 
-	port=$((10000+$TEST_COUNT))
-	TEST_COUNT=$((TEST_COUNT+1))
+	port=$((10000+$TEST_COUNT-1))
 
 	:> "$cout"
 	:> "$sout"
@@ -1381,888 +1405,968 @@ wait_attempt_fail()
 
 subflows_tests()
 {
-	reset
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "no JOIN" "0" "0" "0"
+	if reset; then
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "no JOIN" 0 0 0
+	fi
 
 	# subflow limited by client
-	reset
-	pm_nl_set_limits $ns1 0 0
-	pm_nl_set_limits $ns2 0 0
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "single subflow, limited by client" 0 0 0
+	if reset; then
+		pm_nl_set_limits $ns1 0 0
+		pm_nl_set_limits $ns2 0 0
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "single subflow, limited by client" 0 0 0
+	fi
 
 	# subflow limited by server
-	reset
-	pm_nl_set_limits $ns1 0 0
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "single subflow, limited by server" 1 1 0
+	if reset; then
+		pm_nl_set_limits $ns1 0 0
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "single subflow, limited by server" 1 1 0
+	fi
 
 	# subflow
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "single subflow" 1 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "single subflow" 1 1 1
+	fi
 
 	# multiple subflows
-	reset
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_set_limits $ns2 0 2
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "multiple subflows" 2 2 2
+	if reset; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "multiple subflows" 2 2 2
+	fi
 
 	# multiple subflows limited by server
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 2
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "multiple subflows, limited by server" 2 2 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "multiple subflows, limited by server" 2 2 1
+	fi
 
 	# single subflow, dev
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow dev ns2eth3
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "single subflow, dev" 1 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow dev ns2eth3
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "single subflow, dev" 1 1 1
+	fi
 }
 
 subflows_error_tests()
 {
 	# If a single subflow is configured, and matches the MPC src
 	# address, no additional subflow should be created
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
-	chk_join_nr "no MPC reuse with single endpoint" 0 0 0
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+		chk_join_nr "no MPC reuse with single endpoint" 0 0 0
+	fi
 
 	# multiple subflows, with subflow creation error
-	reset
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_set_limits $ns2 0 2
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
-	ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
-	chk_join_nr "multi subflows, with failing subflow" 1 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+		chk_join_nr "multi subflows, with failing subflow" 1 1 1
+	fi
 
 	# multiple subflows, with subflow timeout on MPJ
-	reset
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_set_limits $ns2 0 2
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
-	ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j DROP
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
-	chk_join_nr "multi subflows, with subflow timeout" 1 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j DROP
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+		chk_join_nr "multi subflows, with subflow timeout" 1 1 1
+	fi
 
 	# multiple subflows, check that the endpoint corresponding to
 	# closed subflow (due to reset) is not reused if additional
 	# subflows are added later
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
-
-	# updates in the child shell do not have any effect here, we
-	# need to bump the test counter for the above case
-	TEST_COUNT=$((TEST_COUNT+1))
-
-	# mpj subflow will be in TW after the reset
-	wait_attempt_fail $ns2
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
-	wait
-
-	# additional subflow could be created only if the PM select
-	# the later endpoint, skipping the already used one
-	chk_join_nr "multi subflows, fair usage on close" 1 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
+
+		# mpj subflow will be in TW after the reset
+		wait_attempt_fail $ns2
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		wait
+
+		# additional subflow could be created only if the PM select
+		# the later endpoint, skipping the already used one
+		chk_join_nr "multi subflows, fair usage on close" 1 1 1
+	fi
 }
 
 signal_address_tests()
 {
 	# add_address, unused
-	reset
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "unused signal address" 0 0 0
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "unused signal address" 0 0 0
+		chk_add_nr 1 1
+	fi
 
 	# accept and use add_addr
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 1 1
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "signal address" 1 1 1
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "signal address" 1 1 1
+		chk_add_nr 1 1
+	fi
 
 	# accept and use add_addr with an additional subflow
 	# note: signal address in server ns and local addresses in client ns must
 	# belong to different subnets or one of the listed local address could be
 	# used for 'add_addr' subflow
-	reset
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_set_limits $ns2 1 2
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "subflow and signal" 2 2 2
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "subflow and signal" 2 2 2
+		chk_add_nr 1 1
+	fi
 
 	# accept and use add_addr with additional subflows
-	reset
-	pm_nl_set_limits $ns1 0 3
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_set_limits $ns2 1 3
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "multiple subflows and signal" 3 3 3
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "multiple subflows and signal" 3 3 3
+		chk_add_nr 1 1
+	fi
 
 	# signal addresses
-	reset
-	pm_nl_set_limits $ns1 3 3
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
-	pm_nl_set_limits $ns2 3 3
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "signal addresses" 3 3 3
-	chk_add_nr 3 3
+	if reset; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+		pm_nl_set_limits $ns2 3 3
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "signal addresses" 3 3 3
+		chk_add_nr 3 3
+	fi
 
 	# signal invalid addresses
-	reset
-	pm_nl_set_limits $ns1 3 3
-	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
-	pm_nl_set_limits $ns2 3 3
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "signal invalid addresses" 1 1 1
-	chk_add_nr 3 3
+	if reset; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+		pm_nl_set_limits $ns2 3 3
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "signal invalid addresses" 1 1 1
+		chk_add_nr 3 3
+	fi
 
 	# signal addresses race test
-	reset
-	pm_nl_set_limits $ns1 4 4
-	pm_nl_set_limits $ns2 4 4
-	pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
-	pm_nl_add_endpoint $ns2 10.0.1.2 flags signal
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags signal
-	pm_nl_add_endpoint $ns2 10.0.4.2 flags signal
-
-	# the peer could possibly miss some addr notification, allow retransmission
-	ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
-	chk_join_nr "signal addresses race test" 3 3 3
-
-	# the server will not signal the address terminating
-	# the MPC subflow
-	chk_add_nr 3 3
+	if reset; then
+		pm_nl_set_limits $ns1 4 4
+		pm_nl_set_limits $ns2 4 4
+		pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+		pm_nl_add_endpoint $ns2 10.0.1.2 flags signal
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags signal
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags signal
+
+		# the peer could possibly miss some addr notification, allow retransmission
+		ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+		chk_join_nr "signal addresses race test" 3 3 3
+
+		# the server will not signal the address terminating
+		# the MPC subflow
+		chk_add_nr 3 3
+	fi
 }
 
 link_failure_tests()
 {
 	# accept and use add_addr with additional subflows and link loss
-	reset
-
-	# without any b/w limit each veth could spool the packets and get
-	# them acked at xmit time, so that the corresponding subflow will
-	# have almost always no outstanding pkts, the scheduler will pick
-	# always the first subflow and we will have hard time testing
-	# active backup and link switch-over.
-	# Let's set some arbitrary (low) virtual link limits.
-	init_shapers
-	pm_nl_set_limits $ns1 0 3
-	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
-	pm_nl_set_limits $ns2 1 3
-	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 1
-	chk_join_nr "multiple flows, signal, link failure" 3 3 3
-	chk_add_nr 1 1
-	chk_stale_nr $ns2 1 5 1
+	if reset; then
+		# without any b/w limit each veth could spool the packets and get
+		# them acked at xmit time, so that the corresponding subflow will
+		# have almost always no outstanding pkts, the scheduler will pick
+		# always the first subflow and we will have hard time testing
+		# active backup and link switch-over.
+		# Let's set some arbitrary (low) virtual link limits.
+		init_shapers
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 1
+		chk_join_nr "multiple flows, signal, link failure" 3 3 3
+		chk_add_nr 1 1
+		chk_stale_nr $ns2 1 5 1
+	fi
 
 	# accept and use add_addr with additional subflows and link loss
 	# for bidirectional transfer
-	reset
-	init_shapers
-	pm_nl_set_limits $ns1 0 3
-	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
-	pm_nl_set_limits $ns2 1 3
-	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 2
-	chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
-	chk_add_nr 1 1
-	chk_stale_nr $ns2 1 -1 1
+	if reset; then
+		init_shapers
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 2
+		chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
+		chk_add_nr 1 1
+		chk_stale_nr $ns2 1 -1 1
+	fi
 
 	# 2 subflows plus 1 backup subflow with a lossy link, backup
 	# will never be used
-	reset
-	init_shapers
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
-	pm_nl_set_limits $ns2 1 2
-	FAILING_LINKS="1"
-	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
-	run_tests $ns1 $ns2 10.0.1.1 1
-	chk_join_nr "backup subflow unused, link failure" 2 2 2
-	chk_add_nr 1 1
-	chk_link_usage $ns2 ns2eth3 $cinsent 0
+	if reset; then
+		init_shapers
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+		pm_nl_set_limits $ns2 1 2
+		FAILING_LINKS="1"
+		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+		run_tests $ns1 $ns2 10.0.1.1 1
+		chk_join_nr "backup subflow unused, link failure" 2 2 2
+		chk_add_nr 1 1
+		chk_link_usage $ns2 ns2eth3 $cinsent 0
+	fi
 
 	# 2 lossy links after half transfer, backup will get half of
 	# the traffic
-	reset
-	init_shapers
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
-	pm_nl_set_limits $ns2 1 2
-	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
-	FAILING_LINKS="1 2"
-	run_tests $ns1 $ns2 10.0.1.1 1
-	chk_join_nr "backup flow used, multi links fail" 2 2 2
-	chk_add_nr 1 1
-	chk_stale_nr $ns2 2 4 2
-	chk_link_usage $ns2 ns2eth3 $cinsent 50
+	if reset; then
+		init_shapers
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+		FAILING_LINKS="1 2"
+		run_tests $ns1 $ns2 10.0.1.1 1
+		chk_join_nr "backup flow used, multi links fail" 2 2 2
+		chk_add_nr 1 1
+		chk_stale_nr $ns2 2 4 2
+		chk_link_usage $ns2 ns2eth3 $cinsent 50
+	fi
 
 	# use a backup subflow with the first subflow on a lossy link
 	# for bidirectional transfer
-	reset
-	init_shapers
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
-	pm_nl_set_limits $ns2 1 3
-	pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
-	FAILING_LINKS="1 2"
-	run_tests $ns1 $ns2 10.0.1.1 2
-	chk_join_nr "backup flow used, bidi, link failure" 2 2 2
-	chk_add_nr 1 1
-	chk_stale_nr $ns2 1 -1 2
-	chk_link_usage $ns2 ns2eth3 $cinsent 50
+	if reset; then
+		init_shapers
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+		FAILING_LINKS="1 2"
+		run_tests $ns1 $ns2 10.0.1.1 2
+		chk_join_nr "backup flow used, bidi, link failure" 2 2 2
+		chk_add_nr 1 1
+		chk_stale_nr $ns2 1 -1 2
+		chk_link_usage $ns2 ns2eth3 $cinsent 50
+	fi
 }
 
 add_addr_timeout_tests()
 {
 	# add_addr timeout
-	reset_with_add_addr_timeout
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 1 1
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
-	chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1
-	chk_add_nr 4 0
+	if reset_with_add_addr_timeout; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+		chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1
+		chk_add_nr 4 0
+	fi
 
 	# add_addr timeout IPv6
-	reset_with_add_addr_timeout 6
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 1 1
-	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
-	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
-	chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1
-	chk_add_nr 4 0
+	if reset_with_add_addr_timeout 6; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+		chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1
+		chk_add_nr 4 0
+	fi
 
 	# signal addresses timeout
-	reset_with_add_addr_timeout
-	pm_nl_set_limits $ns1 2 2
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-	pm_nl_set_limits $ns2 2 2
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10
-	chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2
-	chk_add_nr 8 0
+	if reset_with_add_addr_timeout; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_set_limits $ns2 2 2
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10
+		chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2
+		chk_add_nr 8 0
+	fi
 
 	# signal invalid addresses timeout
-	reset_with_add_addr_timeout
-	pm_nl_set_limits $ns1 2 2
-	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-	pm_nl_set_limits $ns2 2 2
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10
-	chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1
-	chk_add_nr 8 0
+	if reset_with_add_addr_timeout; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_set_limits $ns2 2 2
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10
+		chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1
+		chk_add_nr 8 0
+	fi
 }
 
 remove_tests()
 {
 	# single subflow, remove
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
-	chk_join_nr "remove single subflow" 1 1 1
-	chk_rm_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
+		chk_join_nr "remove single subflow" 1 1 1
+		chk_rm_nr 1 1
+	fi
 
 	# multiple subflows, remove
-	reset
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_set_limits $ns2 0 2
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow
-	chk_join_nr "remove multiple subflows" 2 2 2
-	chk_rm_nr 2 2
+	if reset; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow
+		chk_join_nr "remove multiple subflows" 2 2 2
+		chk_rm_nr 2 2
+	fi
 
 	# single address, remove
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_set_limits $ns2 1 1
-	run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
-	chk_join_nr "remove single address" 1 1 1
-	chk_add_nr 1 1
-	chk_rm_nr 1 1 invert
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 1
+		run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
+		chk_join_nr "remove single address" 1 1 1
+		chk_add_nr 1 1
+		chk_rm_nr 1 1 invert
+	fi
 
 	# subflow and signal, remove
-	reset
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_set_limits $ns2 1 2
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
-	chk_join_nr "remove subflow and signal" 2 2 2
-	chk_add_nr 1 1
-	chk_rm_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
+		chk_join_nr "remove subflow and signal" 2 2 2
+		chk_add_nr 1 1
+		chk_rm_nr 1 1
+	fi
 
 	# subflows and signal, remove
-	reset
-	pm_nl_set_limits $ns1 0 3
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_set_limits $ns2 1 3
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow
-	chk_join_nr "remove subflows and signal" 3 3 3
-	chk_add_nr 1 1
-	chk_rm_nr 2 2
+	if reset; then
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow
+		chk_join_nr "remove subflows and signal" 3 3 3
+		chk_add_nr 1 1
+		chk_rm_nr 2 2
+	fi
 
 	# addresses remove
-	reset
-	pm_nl_set_limits $ns1 3 3
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
-	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
-	pm_nl_set_limits $ns2 3 3
-	run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
-	chk_join_nr "remove addresses" 3 3 3
-	chk_add_nr 3 3
-	chk_rm_nr 3 3 invert
+	if reset; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+		pm_nl_set_limits $ns2 3 3
+		run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
+		chk_join_nr "remove addresses" 3 3 3
+		chk_add_nr 3 3
+		chk_rm_nr 3 3 invert
+	fi
 
 	# invalid addresses remove
-	reset
-	pm_nl_set_limits $ns1 3 3
-	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
-	pm_nl_set_limits $ns2 3 3
-	run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
-	chk_join_nr "remove invalid addresses" 1 1 1
-	chk_add_nr 3 3
-	chk_rm_nr 3 1 invert
+	if reset; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+		pm_nl_set_limits $ns2 3 3
+		run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
+		chk_join_nr "remove invalid addresses" 1 1 1
+		chk_add_nr 3 3
+		chk_rm_nr 3 1 invert
+	fi
 
 	# subflows and signal, flush
-	reset
-	pm_nl_set_limits $ns1 0 3
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_set_limits $ns2 1 3
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
-	chk_join_nr "flush subflows and signal" 3 3 3
-	chk_add_nr 1 1
-	chk_rm_nr 1 3 invert simult
+	if reset; then
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+		chk_join_nr "flush subflows and signal" 3 3 3
+		chk_add_nr 1 1
+		chk_rm_nr 1 3 invert simult
+	fi
 
 	# subflows flush
-	reset
-	pm_nl_set_limits $ns1 3 3
-	pm_nl_set_limits $ns2 3 3
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
-	chk_join_nr "flush subflows" 3 3 3
-	chk_rm_nr 0 3 simult
+	if reset; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_set_limits $ns2 3 3
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+		chk_join_nr "flush subflows" 3 3 3
+		chk_rm_nr 0 3 simult
+	fi
 
 	# addresses flush
-	reset
-	pm_nl_set_limits $ns1 3 3
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
-	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
-	pm_nl_set_limits $ns2 3 3
-	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
-	chk_join_nr "flush addresses" 3 3 3
-	chk_add_nr 3 3
-	chk_rm_nr 3 3 invert simult
+	if reset; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+		pm_nl_set_limits $ns2 3 3
+		run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+		chk_join_nr "flush addresses" 3 3 3
+		chk_add_nr 3 3
+		chk_rm_nr 3 3 invert simult
+	fi
 
 	# invalid addresses flush
-	reset
-	pm_nl_set_limits $ns1 3 3
-	pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-	pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
-	pm_nl_set_limits $ns2 3 3
-	run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow
-	chk_join_nr "flush invalid addresses" 1 1 1
-	chk_add_nr 3 3
-	chk_rm_nr 3 1 invert
+	if reset; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+		pm_nl_set_limits $ns2 3 3
+		run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow
+		chk_join_nr "flush invalid addresses" 1 1 1
+		chk_add_nr 3 3
+		chk_rm_nr 3 1 invert
+	fi
 
 	# remove id 0 subflow
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow
-	chk_join_nr "remove id 0 subflow" 1 1 1
-	chk_rm_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow
+		chk_join_nr "remove id 0 subflow" 1 1 1
+		chk_rm_nr 1 1
+	fi
 
 	# remove id 0 address
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_set_limits $ns2 1 1
-	run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow
-	chk_join_nr "remove id 0 address" 1 1 1
-	chk_add_nr 1 1
-	chk_rm_nr 1 1 invert
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 1
+		run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow
+		chk_join_nr "remove id 0 address" 1 1 1
+		chk_add_nr 1 1
+		chk_rm_nr 1 1 invert
+	fi
 }
 
 add_tests()
 {
 	# add single subflow
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow
-	chk_join_nr "add single subflow" 1 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow
+		chk_join_nr "add single subflow" 1 1 1
+	fi
 
 	# add signal address
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 1 1
-	run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
-	chk_join_nr "add signal address" 1 1 1
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
+		chk_join_nr "add signal address" 1 1 1
+		chk_add_nr 1 1
+	fi
 
 	# add multiple subflows
-	reset
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_set_limits $ns2 0 2
-	run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow
-	chk_join_nr "add multiple subflows" 2 2 2
+	if reset; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow
+		chk_join_nr "add multiple subflows" 2 2 2
+	fi
 
 	# add multiple subflows IPv6
-	reset
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_set_limits $ns2 0 2
-	run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow
-	chk_join_nr "add multiple subflows IPv6" 2 2 2
+	if reset; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow
+		chk_join_nr "add multiple subflows IPv6" 2 2 2
+	fi
 
 	# add multiple addresses IPv6
-	reset
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_set_limits $ns2 2 2
-	run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow
-	chk_join_nr "add multiple addresses IPv6" 2 2 2
-	chk_add_nr 2 2
+	if reset; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow
+		chk_join_nr "add multiple addresses IPv6" 2 2 2
+		chk_add_nr 2 2
+	fi
 }
 
 ipv6_tests()
 {
 	# subflow IPv6
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
-	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
-	chk_join_nr "single subflow IPv6" 1 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
+		run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+		chk_join_nr "single subflow IPv6" 1 1 1
+	fi
 
 	# add_address, unused IPv6
-	reset
-	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
-	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
-	chk_join_nr "unused signal address IPv6" 0 0 0
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+		chk_join_nr "unused signal address IPv6" 0 0 0
+		chk_add_nr 1 1
+	fi
 
 	# signal address IPv6
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
-	pm_nl_set_limits $ns2 1 1
-	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
-	chk_join_nr "single address IPv6" 1 1 1
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		pm_nl_set_limits $ns2 1 1
+		run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+		chk_join_nr "single address IPv6" 1 1 1
+		chk_add_nr 1 1
+	fi
 
 	# single address IPv6, remove
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
-	pm_nl_set_limits $ns2 1 1
-	run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow
-	chk_join_nr "remove single address IPv6" 1 1 1
-	chk_add_nr 1 1
-	chk_rm_nr 1 1 invert
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		pm_nl_set_limits $ns2 1 1
+		run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow
+		chk_join_nr "remove single address IPv6" 1 1 1
+		chk_add_nr 1 1
+		chk_rm_nr 1 1 invert
+	fi
 
 	# subflow and signal IPv6, remove
-	reset
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
-	pm_nl_set_limits $ns2 1 2
-	pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
-	run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
-	chk_join_nr "remove subflow and signal IPv6" 2 2 2
-	chk_add_nr 1 1
-	chk_rm_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
+		run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
+		chk_join_nr "remove subflow and signal IPv6" 2 2 2
+		chk_add_nr 1 1
+		chk_rm_nr 1 1
+	fi
 }
 
 v4mapped_tests()
 {
 	# subflow IPv4-mapped to IPv4-mapped
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
-	run_tests $ns1 $ns2 "::ffff:10.0.1.1"
-	chk_join_nr "single subflow IPv4-mapped" 1 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
+		run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+		chk_join_nr "single subflow IPv4-mapped" 1 1 1
+	fi
 
 	# signal address IPv4-mapped with IPv4-mapped sk
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 1 1
-	pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
-	run_tests $ns1 $ns2 "::ffff:10.0.1.1"
-	chk_join_nr "signal address IPv4-mapped" 1 1 1
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
+		run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+		chk_join_nr "signal address IPv4-mapped" 1 1 1
+		chk_add_nr 1 1
+	fi
 
 	# subflow v4-map-v6
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 "::ffff:10.0.1.1"
-	chk_join_nr "single subflow v4-map-v6" 1 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+		chk_join_nr "single subflow v4-map-v6" 1 1 1
+	fi
 
 	# signal address v4-map-v6
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 1 1
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	run_tests $ns1 $ns2 "::ffff:10.0.1.1"
-	chk_join_nr "signal address v4-map-v6" 1 1 1
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+		chk_join_nr "signal address v4-map-v6" 1 1 1
+		chk_add_nr 1 1
+	fi
 
 	# subflow v6-map-v4
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "single subflow v6-map-v4" 1 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "single subflow v6-map-v4" 1 1 1
+	fi
 
 	# signal address v6-map-v4
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 1 1
-	pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "signal address v6-map-v4" 1 1 1
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "signal address v6-map-v4" 1 1 1
+		chk_add_nr 1 1
+	fi
 
 	# no subflow IPv6 to v4 address
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "no JOIN with diff families v4-v6" 0 0 0
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "no JOIN with diff families v4-v6" 0 0 0
+	fi
 
 	# no subflow IPv6 to v4 address even if v6 has a valid v4 at the end
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 dead:beef:2::10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 dead:beef:2::10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0
+	fi
 
 	# no subflow IPv4 to v6 address, no need to slow down too then
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 dead:beef:1::1
-	chk_join_nr "no JOIN with diff families v6-v4" 0 0 0
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr "no JOIN with diff families v6-v4" 0 0 0
+	fi
 }
 
 backup_tests()
 {
 	# single subflow, backup
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup
-	chk_join_nr "single subflow, backup" 1 1 1
-	chk_prio_nr 0 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup
+		chk_join_nr "single subflow, backup" 1 1 1
+		chk_prio_nr 0 1
+	fi
 
 	# single address, backup
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_set_limits $ns2 1 1
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
-	chk_join_nr "single address, backup" 1 1 1
-	chk_add_nr 1 1
-	chk_prio_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 1
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+		chk_join_nr "single address, backup" 1 1 1
+		chk_add_nr 1 1
+		chk_prio_nr 1 1
+	fi
 
 	# single address with port, backup
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
-	pm_nl_set_limits $ns2 1 1
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
-	chk_join_nr "single address with port, backup" 1 1 1
-	chk_add_nr 1 1
-	chk_prio_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_set_limits $ns2 1 1
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+		chk_join_nr "single address with port, backup" 1 1 1
+		chk_add_nr 1 1
+		chk_prio_nr 1 1
+	fi
 }
 
 add_addr_ports_tests()
 {
 	# signal address with port
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 1 1
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "signal address with port" 1 1 1
-	chk_add_nr 1 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "signal address with port" 1 1 1
+		chk_add_nr 1 1 1
+	fi
 
 	# subflow and signal with port
-	reset
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_set_limits $ns2 1 2
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "subflow and signal with port" 2 2 2
-	chk_add_nr 1 1 1
+	if reset; then
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "subflow and signal with port" 2 2 2
+		chk_add_nr 1 1 1
+	fi
 
 	# single address with port, remove
-	reset
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
-	pm_nl_set_limits $ns2 1 1
-	run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
-	chk_join_nr "remove single address with port" 1 1 1
-	chk_add_nr 1 1 1
-	chk_rm_nr 1 1 invert
+	if reset; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_set_limits $ns2 1 1
+		run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
+		chk_join_nr "remove single address with port" 1 1 1
+		chk_add_nr 1 1 1
+		chk_rm_nr 1 1 invert
+	fi
 
 	# subflow and signal with port, remove
-	reset
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
-	pm_nl_set_limits $ns2 1 2
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
-	chk_join_nr "remove subflow and signal with port" 2 2 2
-	chk_add_nr 1 1 1
-	chk_rm_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
+		chk_join_nr "remove subflow and signal with port" 2 2 2
+		chk_add_nr 1 1 1
+		chk_rm_nr 1 1
+	fi
 
 	# subflows and signal with port, flush
-	reset
-	pm_nl_set_limits $ns1 0 3
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
-	pm_nl_set_limits $ns2 1 3
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow
-	chk_join_nr "flush subflows and signal with port" 3 3 3
-	chk_add_nr 1 1
-	chk_rm_nr 1 3 invert simult
+	if reset; then
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow
+		chk_join_nr "flush subflows and signal with port" 3 3 3
+		chk_add_nr 1 1
+		chk_rm_nr 1 3 invert simult
+	fi
 
 	# multiple addresses with port
-	reset
-	pm_nl_set_limits $ns1 2 2
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
-	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10100
-	pm_nl_set_limits $ns2 2 2
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "multiple addresses with port" 2 2 2
-	chk_add_nr 2 2 2
+	if reset; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10100
+		pm_nl_set_limits $ns2 2 2
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "multiple addresses with port" 2 2 2
+		chk_add_nr 2 2 2
+	fi
 
 	# multiple addresses with ports
-	reset
-	pm_nl_set_limits $ns1 2 2
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
-	pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10101
-	pm_nl_set_limits $ns2 2 2
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "multiple addresses with ports" 2 2 2
-	chk_add_nr 2 2 2
+	if reset; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10101
+		pm_nl_set_limits $ns2 2 2
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "multiple addresses with ports" 2 2 2
+		chk_add_nr 2 2 2
+	fi
 }
 
 syncookies_tests()
 {
 	# single subflow, syncookies
-	reset_with_cookies
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "single subflow with syn cookies" 1 1 1
+	if reset_with_cookies; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "single subflow with syn cookies" 1 1 1
+	fi
 
 	# multiple subflows with syn cookies
-	reset_with_cookies
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_set_limits $ns2 0 2
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "multiple subflows with syn cookies" 2 2 2
+	if reset_with_cookies; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "multiple subflows with syn cookies" 2 2 2
+	fi
 
 	# multiple subflows limited by server
-	reset_with_cookies
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 2
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "subflows limited by server w cookies" 2 1 1
+	if reset_with_cookies; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "subflows limited by server w cookies" 2 1 1
+	fi
 
 	# test signal address with cookies
-	reset_with_cookies
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 1 1
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "signal address with syn cookies" 1 1 1
-	chk_add_nr 1 1
+	if reset_with_cookies; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "signal address with syn cookies" 1 1 1
+		chk_add_nr 1 1
+	fi
 
 	# test cookie with subflow and signal
-	reset_with_cookies
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_set_limits $ns1 0 2
-	pm_nl_set_limits $ns2 1 2
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "subflow and signal w cookies" 2 2 2
-	chk_add_nr 1 1
+	if reset_with_cookies; then
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "subflow and signal w cookies" 2 2 2
+		chk_add_nr 1 1
+	fi
 
 	# accept and use add_addr with additional subflows
-	reset_with_cookies
-	pm_nl_set_limits $ns1 0 3
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_set_limits $ns2 1 3
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "subflows and signal w. cookies" 3 3 3
-	chk_add_nr 1 1
+	if reset_with_cookies; then
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "subflows and signal w. cookies" 3 3 3
+		chk_add_nr 1 1
+	fi
 }
 
 checksum_tests()
 {
 	# checksum test 0 0
-	reset_with_checksum 0 0
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "checksum test 0 0" 0 0 0
+	if reset_with_checksum 0 0; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "checksum test 0 0" 0 0 0
+	fi
 
 	# checksum test 1 1
-	reset_with_checksum 1 1
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "checksum test 1 1" 0 0 0
+	if reset_with_checksum 1 1; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "checksum test 1 1" 0 0 0
+	fi
 
 	# checksum test 0 1
-	reset_with_checksum 0 1
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "checksum test 0 1" 0 0 0
+	if reset_with_checksum 0 1; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "checksum test 0 1" 0 0 0
+	fi
 
 	# checksum test 1 0
-	reset_with_checksum 1 0
-	pm_nl_set_limits $ns1 0 1
-	pm_nl_set_limits $ns2 0 1
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "checksum test 1 0" 0 0 0
+	if reset_with_checksum 1 0; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "checksum test 1 0" 0 0 0
+	fi
 }
 
 deny_join_id0_tests()
 {
 	# subflow allow join id0 ns1
-	reset_with_allow_join_id0 1 0
-	pm_nl_set_limits $ns1 1 1
-	pm_nl_set_limits $ns2 1 1
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "single subflow allow join id0 ns1" 1 1 1
+	if reset_with_allow_join_id0 1 0; then
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "single subflow allow join id0 ns1" 1 1 1
+	fi
 
 	# subflow allow join id0 ns2
-	reset_with_allow_join_id0 0 1
-	pm_nl_set_limits $ns1 1 1
-	pm_nl_set_limits $ns2 1 1
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "single subflow allow join id0 ns2" 0 0 0
+	if reset_with_allow_join_id0 0 1; then
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "single subflow allow join id0 ns2" 0 0 0
+	fi
 
 	# signal address allow join id0 ns1
 	# ADD_ADDRs are not affected by allow_join_id0 value.
-	reset_with_allow_join_id0 1 0
-	pm_nl_set_limits $ns1 1 1
-	pm_nl_set_limits $ns2 1 1
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "signal address allow join id0 ns1" 1 1 1
-	chk_add_nr 1 1
+	if reset_with_allow_join_id0 1 0; then
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "signal address allow join id0 ns1" 1 1 1
+		chk_add_nr 1 1
+	fi
 
 	# signal address allow join id0 ns2
 	# ADD_ADDRs are not affected by allow_join_id0 value.
-	reset_with_allow_join_id0 0 1
-	pm_nl_set_limits $ns1 1 1
-	pm_nl_set_limits $ns2 1 1
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "signal address allow join id0 ns2" 1 1 1
-	chk_add_nr 1 1
+	if reset_with_allow_join_id0 0 1; then
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "signal address allow join id0 ns2" 1 1 1
+		chk_add_nr 1 1
+	fi
 
 	# subflow and address allow join id0 ns1
-	reset_with_allow_join_id0 1 0
-	pm_nl_set_limits $ns1 2 2
-	pm_nl_set_limits $ns2 2 2
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "subflow and address allow join id0 1" 2 2 2
+	if reset_with_allow_join_id0 1 0; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "subflow and address allow join id0 1" 2 2 2
+	fi
 
 	# subflow and address allow join id0 ns2
-	reset_with_allow_join_id0 0 1
-	pm_nl_set_limits $ns1 2 2
-	pm_nl_set_limits $ns2 2 2
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-	run_tests $ns1 $ns2 10.0.1.1
-	chk_join_nr "subflow and address allow join id0 2" 1 1 1
+	if reset_with_allow_join_id0 0 1; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr "subflow and address allow join id0 2" 1 1 1
+	fi
 }
 
 fullmesh_tests()
@@ -2270,119 +2374,128 @@ fullmesh_tests()
 	# fullmesh 1
 	# 2 fullmesh addrs in ns2, added before the connection,
 	# 1 non-fullmesh addr in ns1, added during the connection.
-	reset
-	pm_nl_set_limits $ns1 0 4
-	pm_nl_set_limits $ns2 1 4
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh
-	pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh
-	run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
-	chk_join_nr "fullmesh test 2x1" 4 4 4
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 0 4
+		pm_nl_set_limits $ns2 1 4
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh
+		run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
+		chk_join_nr "fullmesh test 2x1" 4 4 4
+		chk_add_nr 1 1
+	fi
 
 	# fullmesh 2
 	# 1 non-fullmesh addr in ns1, added before the connection,
 	# 1 fullmesh addr in ns2, added during the connection.
-	reset
-	pm_nl_set_limits $ns1 1 3
-	pm_nl_set_limits $ns2 1 3
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
-	chk_join_nr "fullmesh test 1x1" 3 3 3
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 1 3
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
+		chk_join_nr "fullmesh test 1x1" 3 3 3
+		chk_add_nr 1 1
+	fi
 
 	# fullmesh 3
 	# 1 non-fullmesh addr in ns1, added before the connection,
 	# 2 fullmesh addrs in ns2, added during the connection.
-	reset
-	pm_nl_set_limits $ns1 2 5
-	pm_nl_set_limits $ns2 1 5
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
-	chk_join_nr "fullmesh test 1x2" 5 5 5
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 2 5
+		pm_nl_set_limits $ns2 1 5
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+		chk_join_nr "fullmesh test 1x2" 5 5 5
+		chk_add_nr 1 1
+	fi
 
 	# fullmesh 4
 	# 1 non-fullmesh addr in ns1, added before the connection,
 	# 2 fullmesh addrs in ns2, added during the connection,
 	# limit max_subflows to 4.
-	reset
-	pm_nl_set_limits $ns1 2 4
-	pm_nl_set_limits $ns2 1 4
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
-	chk_join_nr "fullmesh test 1x2, limited" 4 4 4
-	chk_add_nr 1 1
+	if reset; then
+		pm_nl_set_limits $ns1 2 4
+		pm_nl_set_limits $ns2 1 4
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+		chk_join_nr "fullmesh test 1x2, limited" 4 4 4
+		chk_add_nr 1 1
+	fi
 
 	# set fullmesh flag
-	reset
-	pm_nl_set_limits $ns1 4 4
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
-	pm_nl_set_limits $ns2 4 4
-	run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh
-	chk_join_nr "set fullmesh flag test" 2 2 2
-	chk_rm_nr 0 1
+	if reset; then
+		pm_nl_set_limits $ns1 4 4
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
+		pm_nl_set_limits $ns2 4 4
+		run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh
+		chk_join_nr "set fullmesh flag test" 2 2 2
+		chk_rm_nr 0 1
+	fi
 
 	# set nofullmesh flag
-	reset
-	pm_nl_set_limits $ns1 4 4
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh
-	pm_nl_set_limits $ns2 4 4
-	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh
-	chk_join_nr "set nofullmesh flag test" 2 2 2
-	chk_rm_nr 0 1
+	if reset; then
+		pm_nl_set_limits $ns1 4 4
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh
+		pm_nl_set_limits $ns2 4 4
+		run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh
+		chk_join_nr "set nofullmesh flag test" 2 2 2
+		chk_rm_nr 0 1
+	fi
 
 	# set backup,fullmesh flags
-	reset
-	pm_nl_set_limits $ns1 4 4
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
-	pm_nl_set_limits $ns2 4 4
-	run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh
-	chk_join_nr "set backup,fullmesh flags test" 2 2 2
-	chk_prio_nr 0 1
-	chk_rm_nr 0 1
+	if reset; then
+		pm_nl_set_limits $ns1 4 4
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
+		pm_nl_set_limits $ns2 4 4
+		run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh
+		chk_join_nr "set backup,fullmesh flags test" 2 2 2
+		chk_prio_nr 0 1
+		chk_rm_nr 0 1
+	fi
 
 	# set nobackup,nofullmesh flags
-	reset
-	pm_nl_set_limits $ns1 4 4
-	pm_nl_set_limits $ns2 4 4
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh
-	chk_join_nr "set nobackup,nofullmesh flags test" 2 2 2
-	chk_prio_nr 0 1
-	chk_rm_nr 0 1
+	if reset; then
+		pm_nl_set_limits $ns1 4 4
+		pm_nl_set_limits $ns2 4 4
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh
+		chk_join_nr "set nobackup,nofullmesh flags test" 2 2 2
+		chk_prio_nr 0 1
+		chk_rm_nr 0 1
+	fi
 }
 
 fastclose_tests()
 {
-	reset
-	run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_2
-	chk_join_nr "fastclose test" 0 0 0
-	chk_fclose_nr 1 1
-	chk_rst_nr 1 1 invert
+	if reset; then
+		run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_2
+		chk_join_nr "fastclose test" 0 0 0
+		chk_fclose_nr 1 1
+		chk_rst_nr 1 1 invert
+	fi
 }
 
 implicit_tests()
 {
 	# userspace pm type prevents add_addr
-	reset
-	pm_nl_set_limits $ns1 2 2
-	pm_nl_set_limits $ns2 2 2
-	pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-	run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
-
-	wait_mpj $ns1
-	TEST_COUNT=$((TEST_COUNT + 1))
-	pm_nl_check_endpoint "implicit EP" "creation" \
-		$ns2 10.0.2.2 id 1 flags implicit
-
-	pm_nl_add_endpoint $ns2 10.0.2.2 id 33
-	pm_nl_check_endpoint "" "ID change is prevented" \
-		$ns2 10.0.2.2 id 1 flags implicit
-
-	pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
-	pm_nl_check_endpoint "" "modif is allowed" \
-		$ns2 10.0.2.2 id 1 flags signal
-	wait
+	if reset; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
+
+		wait_mpj $ns1
+		pm_nl_check_endpoint "implicit EP" "creation" \
+			$ns2 10.0.2.2 id 1 flags implicit
+
+		pm_nl_add_endpoint $ns2 10.0.2.2 id 33
+		pm_nl_check_endpoint "" "ID change is prevented" \
+			$ns2 10.0.2.2 id 1 flags implicit
+
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
+		pm_nl_check_endpoint "" "modif is allowed" \
+			$ns2 10.0.2.2 id 1 flags signal
+		wait
+	fi
 }
 
 # [$1: error message]
@@ -2405,6 +2518,8 @@ usage()
 	echo "  -i use ip mptcp"
 	echo "  -h help"
 
+	echo "[test ids]"
+
 	exit ${ret}
 }
 
@@ -2465,6 +2580,16 @@ while getopts "${all_tests_args}cCih" opt; do
 	esac
 done
 
+shift $((OPTIND - 1))
+
+for arg in "${@}"; do
+	if [[ "${arg}" =~ ^[0-9]+$ ]]; then
+		only_tests+=("${arg}")
+	else
+		usage "Unknown argument: ${arg}"
+	fi
+done
+
 if [ ${#tests[@]} -eq 0 ]; then
 	tests=("${all_tests_names[@]}")
 fi
-- 
cgit 


From c7d49c033de057b88398915b0eb2df2484dc8166 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 9 Mar 2022 11:16:31 -0800
Subject: selftests: mptcp: join: alt. to exec specific tests

Running a specific test by giving the ID is often what we want: the CI
reports an issue with the Nth test, it is reproducible with:

  ./mptcp_join.sh N

But this might not work when there is a need to find which commit has
introduced a regression making a test unstable: failing from time to
time. Indeed, a specific test is not attached to one ID: the ID is in
fact a counter. It means the same test can have a different ID if other
tests have been added/removed before this unstable one.

Remembering the current test can also help listing failed tests at the
end.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 446 ++++++++++++------------
 1 file changed, 232 insertions(+), 214 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index a3f6c790765b..64261c3ca320 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -20,8 +20,10 @@ validate_checksum=0
 init=0
 
 declare -A all_tests
-declare -a only_tests
+declare -a only_tests_ids
+declare -a only_tests_names
 TEST_COUNT=0
+TEST_NAME=""
 nr_blank=40
 
 export FAILING_LINKS=""
@@ -152,22 +154,30 @@ cleanup()
 
 skip_test()
 {
-	if [ "${#only_tests[@]}" -eq 0 ]; then
+	if [ "${#only_tests_ids[@]}" -eq 0 ] && [ "${#only_tests_names[@]}" -eq 0 ]; then
 		return 1
 	fi
 
 	local i
-	for i in "${only_tests[@]}"; do
+	for i in "${only_tests_ids[@]}"; do
 		if [ "${TEST_COUNT}" -eq "${i}" ]; then
 			return 1
 		fi
 	done
+	for i in "${only_tests_names[@]}"; do
+		if [ "${TEST_NAME}" = "${i}" ]; then
+			return 1
+		fi
+	done
 
 	return 0
 }
 
+# $1: test name
 reset()
 {
+	TEST_NAME="${1}"
+
 	TEST_COUNT=$((TEST_COUNT+1))
 
 	if skip_test; then
@@ -185,27 +195,29 @@ reset()
 	return 0
 }
 
+# $1: test name
 reset_with_cookies()
 {
-	reset || return 1
+	reset "${1}" || return 1
 
 	for netns in "$ns1" "$ns2";do
 		ip netns exec $netns sysctl -q net.ipv4.tcp_syncookies=2
 	done
 }
 
+# $1: test name
 reset_with_add_addr_timeout()
 {
-	local ip="${1:-4}"
+	local ip="${2:-4}"
 	local tables
 
+	reset "${1}" || return 1
+
 	tables="iptables"
 	if [ $ip -eq 6 ]; then
 		tables="ip6tables"
 	fi
 
-	reset || return 1
-
 	ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
 	ip netns exec $ns2 $tables -A OUTPUT -p tcp \
 		-m tcp --tcp-option 30 \
@@ -214,12 +226,13 @@ reset_with_add_addr_timeout()
 		-j DROP
 }
 
+# $1: test name
 reset_with_checksum()
 {
 	local ns1_enable=$1
 	local ns2_enable=$2
 
-	reset || return 1
+	reset "checksum test ${1} ${2}" || return 1
 
 	ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
 	ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
@@ -229,10 +242,10 @@ reset_with_checksum()
 
 reset_with_allow_join_id0()
 {
-	local ns1_enable=$1
-	local ns2_enable=$2
+	local ns1_enable=$2
+	local ns2_enable=$3
 
-	reset || return 1
+	reset "${1}" || return 1
 
 	ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable
 	ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
@@ -461,7 +474,7 @@ pm_nl_change_endpoint()
 pm_nl_check_endpoint()
 {
 	local line expected_line
-	local title="$1"
+	local need_title=$1
 	local msg="$2"
 	local ns=$3
 	local addr=$4
@@ -473,8 +486,8 @@ pm_nl_check_endpoint()
 	local _id
 	local id
 
-	if [ -n "${title}" ]; then
-		printf "%03u %-36s %s" "${TEST_COUNT}" "${title}" "${msg}"
+	if [ "${need_title}" = 1 ]; then
+		printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}"
 	else
 		printf "%-${nr_blank}s %s" " " "${msg}"
 	fi
@@ -1036,19 +1049,24 @@ chk_rst_nr()
 
 chk_join_nr()
 {
-	local msg="$1"
-	local syn_nr=$2
-	local syn_ack_nr=$3
-	local ack_nr=$4
-	local csum_ns1=${5:-0}
-	local csum_ns2=${6:-0}
-	local fail_nr=${7:-0}
-	local rst_nr=${8:-0}
+	local syn_nr=$1
+	local syn_ack_nr=$2
+	local ack_nr=$3
+	local csum_ns1=${4:-0}
+	local csum_ns2=${5:-0}
+	local fail_nr=${6:-0}
+	local rst_nr=${7:-0}
+	local corrupted_pkts=${8:-0}
 	local count
 	local dump_stats
 	local with_cookie
+	local title="${TEST_NAME}"
+
+	if [ "${corrupted_pkts}" -gt 0 ]; then
+		title+=": ${corrupted_pkts} corrupted pkts"
+	fi
 
-	printf "%03u %-36s %s" "$TEST_COUNT" "$msg" "syn"
+	printf "%03u %-36s %s" "${TEST_COUNT}" "${title}" "syn"
 	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$syn_nr" ]; then
@@ -1405,65 +1423,65 @@ wait_attempt_fail()
 
 subflows_tests()
 {
-	if reset; then
+	if reset "no JOIN"; then
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "no JOIN" 0 0 0
+		chk_join_nr 0 0 0
 	fi
 
 	# subflow limited by client
-	if reset; then
+	if reset "single subflow, limited by client"; then
 		pm_nl_set_limits $ns1 0 0
 		pm_nl_set_limits $ns2 0 0
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "single subflow, limited by client" 0 0 0
+		chk_join_nr 0 0 0
 	fi
 
 	# subflow limited by server
-	if reset; then
+	if reset "single subflow, limited by server"; then
 		pm_nl_set_limits $ns1 0 0
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "single subflow, limited by server" 1 1 0
+		chk_join_nr 1 1 0
 	fi
 
 	# subflow
-	if reset; then
+	if reset "single subflow"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "single subflow" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 
 	# multiple subflows
-	if reset; then
+	if reset "multiple subflows"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 0 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "multiple subflows" 2 2 2
+		chk_join_nr 2 2 2
 	fi
 
 	# multiple subflows limited by server
-	if reset; then
+	if reset "multiple subflows, limited by server"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "multiple subflows, limited by server" 2 2 1
+		chk_join_nr 2 2 1
 	fi
 
 	# single subflow, dev
-	if reset; then
+	if reset "single subflow, dev"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow dev ns2eth3
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "single subflow, dev" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 }
 
@@ -1471,40 +1489,40 @@ subflows_error_tests()
 {
 	# If a single subflow is configured, and matches the MPC src
 	# address, no additional subflow should be created
-	if reset; then
+	if reset "no MPC reuse with single endpoint"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
-		chk_join_nr "no MPC reuse with single endpoint" 0 0 0
+		chk_join_nr 0 0 0
 	fi
 
 	# multiple subflows, with subflow creation error
-	if reset; then
+	if reset "multi subflows, with failing subflow"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 0 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
-		chk_join_nr "multi subflows, with failing subflow" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 
 	# multiple subflows, with subflow timeout on MPJ
-	if reset; then
+	if reset "multi subflows, with subflow timeout"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 0 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j DROP
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
-		chk_join_nr "multi subflows, with subflow timeout" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 
 	# multiple subflows, check that the endpoint corresponding to
 	# closed subflow (due to reset) is not reused if additional
 	# subflows are added later
-	if reset; then
+	if reset "multi subflows, fair usage on close"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
@@ -1518,27 +1536,27 @@ subflows_error_tests()
 
 		# additional subflow could be created only if the PM select
 		# the later endpoint, skipping the already used one
-		chk_join_nr "multi subflows, fair usage on close" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 }
 
 signal_address_tests()
 {
 	# add_address, unused
-	if reset; then
+	if reset "unused signal address"; then
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "unused signal address" 0 0 0
+		chk_join_nr 0 0 0
 		chk_add_nr 1 1
 	fi
 
 	# accept and use add_addr
-	if reset; then
+	if reset "signal address"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "signal address" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 	fi
 
@@ -1546,54 +1564,54 @@ signal_address_tests()
 	# note: signal address in server ns and local addresses in client ns must
 	# belong to different subnets or one of the listed local address could be
 	# used for 'add_addr' subflow
-	if reset; then
+	if reset "subflow and signal"; then
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 1 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "subflow and signal" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 1 1
 	fi
 
 	# accept and use add_addr with additional subflows
-	if reset; then
+	if reset "multiple subflows and signal"; then
 		pm_nl_set_limits $ns1 0 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_set_limits $ns2 1 3
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "multiple subflows and signal" 3 3 3
+		chk_join_nr 3 3 3
 		chk_add_nr 1 1
 	fi
 
 	# signal addresses
-	if reset; then
+	if reset "signal addresses"; then
 		pm_nl_set_limits $ns1 3 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
 		pm_nl_set_limits $ns2 3 3
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "signal addresses" 3 3 3
+		chk_join_nr 3 3 3
 		chk_add_nr 3 3
 	fi
 
 	# signal invalid addresses
-	if reset; then
+	if reset "signal invalid addresses"; then
 		pm_nl_set_limits $ns1 3 3
 		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
 		pm_nl_set_limits $ns2 3 3
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "signal invalid addresses" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 3 3
 	fi
 
 	# signal addresses race test
-	if reset; then
+	if reset "signal addresses race test"; then
 		pm_nl_set_limits $ns1 4 4
 		pm_nl_set_limits $ns2 4 4
 		pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
@@ -1608,7 +1626,7 @@ signal_address_tests()
 		# the peer could possibly miss some addr notification, allow retransmission
 		ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
-		chk_join_nr "signal addresses race test" 3 3 3
+		chk_join_nr 3 3 3
 
 		# the server will not signal the address terminating
 		# the MPC subflow
@@ -1619,7 +1637,7 @@ signal_address_tests()
 link_failure_tests()
 {
 	# accept and use add_addr with additional subflows and link loss
-	if reset; then
+	if reset "multiple flows, signal, link failure"; then
 		# without any b/w limit each veth could spool the packets and get
 		# them acked at xmit time, so that the corresponding subflow will
 		# have almost always no outstanding pkts, the scheduler will pick
@@ -1633,14 +1651,14 @@ link_failure_tests()
 		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1 1
-		chk_join_nr "multiple flows, signal, link failure" 3 3 3
+		chk_join_nr 3 3 3
 		chk_add_nr 1 1
 		chk_stale_nr $ns2 1 5 1
 	fi
 
 	# accept and use add_addr with additional subflows and link loss
 	# for bidirectional transfer
-	if reset; then
+	if reset "multi flows, signal, bidi, link fail"; then
 		init_shapers
 		pm_nl_set_limits $ns1 0 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
@@ -1648,14 +1666,14 @@ link_failure_tests()
 		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1 2
-		chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
+		chk_join_nr 3 3 3
 		chk_add_nr 1 1
 		chk_stale_nr $ns2 1 -1 1
 	fi
 
 	# 2 subflows plus 1 backup subflow with a lossy link, backup
 	# will never be used
-	if reset; then
+	if reset "backup subflow unused, link failure"; then
 		init_shapers
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
@@ -1663,14 +1681,14 @@ link_failure_tests()
 		FAILING_LINKS="1"
 		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
 		run_tests $ns1 $ns2 10.0.1.1 1
-		chk_join_nr "backup subflow unused, link failure" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 1 1
 		chk_link_usage $ns2 ns2eth3 $cinsent 0
 	fi
 
 	# 2 lossy links after half transfer, backup will get half of
 	# the traffic
-	if reset; then
+	if reset "backup flow used, multi links fail"; then
 		init_shapers
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
@@ -1678,7 +1696,7 @@ link_failure_tests()
 		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
 		FAILING_LINKS="1 2"
 		run_tests $ns1 $ns2 10.0.1.1 1
-		chk_join_nr "backup flow used, multi links fail" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 1 1
 		chk_stale_nr $ns2 2 4 2
 		chk_link_usage $ns2 ns2eth3 $cinsent 50
@@ -1686,7 +1704,7 @@ link_failure_tests()
 
 	# use a backup subflow with the first subflow on a lossy link
 	# for bidirectional transfer
-	if reset; then
+	if reset "backup flow used, bidi, link failure"; then
 		init_shapers
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
@@ -1694,7 +1712,7 @@ link_failure_tests()
 		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
 		FAILING_LINKS="1 2"
 		run_tests $ns1 $ns2 10.0.1.1 2
-		chk_join_nr "backup flow used, bidi, link failure" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 1 1
 		chk_stale_nr $ns2 1 -1 2
 		chk_link_usage $ns2 ns2eth3 $cinsent 50
@@ -1704,44 +1722,44 @@ link_failure_tests()
 add_addr_timeout_tests()
 {
 	# add_addr timeout
-	if reset_with_add_addr_timeout; then
+	if reset_with_add_addr_timeout "signal address, ADD_ADDR timeout"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
-		chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 4 0
 	fi
 
 	# add_addr timeout IPv6
-	if reset_with_add_addr_timeout 6; then
+	if reset_with_add_addr_timeout "signal address, ADD_ADDR6 timeout" 6; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
 		run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
-		chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 4 0
 	fi
 
 	# signal addresses timeout
-	if reset_with_add_addr_timeout; then
+	if reset_with_add_addr_timeout "signal addresses, ADD_ADDR timeout"; then
 		pm_nl_set_limits $ns1 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
 		pm_nl_set_limits $ns2 2 2
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10
-		chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 8 0
 	fi
 
 	# signal invalid addresses timeout
-	if reset_with_add_addr_timeout; then
+	if reset_with_add_addr_timeout "invalid address, ADD_ADDR timeout"; then
 		pm_nl_set_limits $ns1 2 2
 		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
 		pm_nl_set_limits $ns2 2 2
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10
-		chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 8 0
 	fi
 }
@@ -1749,156 +1767,156 @@ add_addr_timeout_tests()
 remove_tests()
 {
 	# single subflow, remove
-	if reset; then
+	if reset "remove single subflow"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
-		chk_join_nr "remove single subflow" 1 1 1
+		chk_join_nr 1 1 1
 		chk_rm_nr 1 1
 	fi
 
 	# multiple subflows, remove
-	if reset; then
+	if reset "remove multiple subflows"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 0 2
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow
-		chk_join_nr "remove multiple subflows" 2 2 2
+		chk_join_nr 2 2 2
 		chk_rm_nr 2 2
 	fi
 
 	# single address, remove
-	if reset; then
+	if reset "remove single address"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_set_limits $ns2 1 1
 		run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
-		chk_join_nr "remove single address" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 		chk_rm_nr 1 1 invert
 	fi
 
 	# subflow and signal, remove
-	if reset; then
+	if reset "remove subflow and signal"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_set_limits $ns2 1 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
-		chk_join_nr "remove subflow and signal" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 1 1
 		chk_rm_nr 1 1
 	fi
 
 	# subflows and signal, remove
-	if reset; then
+	if reset "remove subflows and signal"; then
 		pm_nl_set_limits $ns1 0 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_set_limits $ns2 1 3
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow
-		chk_join_nr "remove subflows and signal" 3 3 3
+		chk_join_nr 3 3 3
 		chk_add_nr 1 1
 		chk_rm_nr 2 2
 	fi
 
 	# addresses remove
-	if reset; then
+	if reset "remove addresses"; then
 		pm_nl_set_limits $ns1 3 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
 		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
 		pm_nl_set_limits $ns2 3 3
 		run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
-		chk_join_nr "remove addresses" 3 3 3
+		chk_join_nr 3 3 3
 		chk_add_nr 3 3
 		chk_rm_nr 3 3 invert
 	fi
 
 	# invalid addresses remove
-	if reset; then
+	if reset "remove invalid addresses"; then
 		pm_nl_set_limits $ns1 3 3
 		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
 		pm_nl_set_limits $ns2 3 3
 		run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
-		chk_join_nr "remove invalid addresses" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 3 3
 		chk_rm_nr 3 1 invert
 	fi
 
 	# subflows and signal, flush
-	if reset; then
+	if reset "flush subflows and signal"; then
 		pm_nl_set_limits $ns1 0 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_set_limits $ns2 1 3
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
-		chk_join_nr "flush subflows and signal" 3 3 3
+		chk_join_nr 3 3 3
 		chk_add_nr 1 1
 		chk_rm_nr 1 3 invert simult
 	fi
 
 	# subflows flush
-	if reset; then
+	if reset "flush subflows"; then
 		pm_nl_set_limits $ns1 3 3
 		pm_nl_set_limits $ns2 3 3
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
-		chk_join_nr "flush subflows" 3 3 3
+		chk_join_nr 3 3 3
 		chk_rm_nr 0 3 simult
 	fi
 
 	# addresses flush
-	if reset; then
+	if reset "flush addresses"; then
 		pm_nl_set_limits $ns1 3 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
 		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
 		pm_nl_set_limits $ns2 3 3
 		run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
-		chk_join_nr "flush addresses" 3 3 3
+		chk_join_nr 3 3 3
 		chk_add_nr 3 3
 		chk_rm_nr 3 3 invert simult
 	fi
 
 	# invalid addresses flush
-	if reset; then
+	if reset "flush invalid addresses"; then
 		pm_nl_set_limits $ns1 3 3
 		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
 		pm_nl_set_limits $ns2 3 3
 		run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow
-		chk_join_nr "flush invalid addresses" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 3 3
 		chk_rm_nr 3 1 invert
 	fi
 
 	# remove id 0 subflow
-	if reset; then
+	if reset "remove id 0 subflow"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow
-		chk_join_nr "remove id 0 subflow" 1 1 1
+		chk_join_nr 1 1 1
 		chk_rm_nr 1 1
 	fi
 
 	# remove id 0 address
-	if reset; then
+	if reset "remove id 0 address"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_set_limits $ns2 1 1
 		run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow
-		chk_join_nr "remove id 0 address" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 		chk_rm_nr 1 1 invert
 	fi
@@ -1907,44 +1925,44 @@ remove_tests()
 add_tests()
 {
 	# add single subflow
-	if reset; then
+	if reset "add single subflow"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow
-		chk_join_nr "add single subflow" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 
 	# add signal address
-	if reset; then
+	if reset "add signal address"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 1 1
 		run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
-		chk_join_nr "add signal address" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 	fi
 
 	# add multiple subflows
-	if reset; then
+	if reset "add multiple subflows"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 0 2
 		run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow
-		chk_join_nr "add multiple subflows" 2 2 2
+		chk_join_nr 2 2 2
 	fi
 
 	# add multiple subflows IPv6
-	if reset; then
+	if reset "add multiple subflows IPv6"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 0 2
 		run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow
-		chk_join_nr "add multiple subflows IPv6" 2 2 2
+		chk_join_nr 2 2 2
 	fi
 
 	# add multiple addresses IPv6
-	if reset; then
+	if reset "add multiple addresses IPv6"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 2 2
 		run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow
-		chk_join_nr "add multiple addresses IPv6" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 2 2
 	fi
 }
@@ -1952,51 +1970,51 @@ add_tests()
 ipv6_tests()
 {
 	# subflow IPv6
-	if reset; then
+	if reset "single subflow IPv6"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
 		run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
-		chk_join_nr "single subflow IPv6" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 
 	# add_address, unused IPv6
-	if reset; then
+	if reset "unused signal address IPv6"; then
 		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
 		run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
-		chk_join_nr "unused signal address IPv6" 0 0 0
+		chk_join_nr 0 0 0
 		chk_add_nr 1 1
 	fi
 
 	# signal address IPv6
-	if reset; then
+	if reset "single address IPv6"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
 		pm_nl_set_limits $ns2 1 1
 		run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
-		chk_join_nr "single address IPv6" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 	fi
 
 	# single address IPv6, remove
-	if reset; then
+	if reset "remove single address IPv6"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
 		pm_nl_set_limits $ns2 1 1
 		run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow
-		chk_join_nr "remove single address IPv6" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 		chk_rm_nr 1 1 invert
 	fi
 
 	# subflow and signal IPv6, remove
-	if reset; then
+	if reset "remove subflow and signal IPv6"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
 		pm_nl_set_limits $ns2 1 2
 		pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
 		run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
-		chk_join_nr "remove subflow and signal IPv6" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 1 1
 		chk_rm_nr 1 1
 	fi
@@ -2005,120 +2023,120 @@ ipv6_tests()
 v4mapped_tests()
 {
 	# subflow IPv4-mapped to IPv4-mapped
-	if reset; then
+	if reset "single subflow IPv4-mapped"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
 		run_tests $ns1 $ns2 "::ffff:10.0.1.1"
-		chk_join_nr "single subflow IPv4-mapped" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 
 	# signal address IPv4-mapped with IPv4-mapped sk
-	if reset; then
+	if reset "signal address IPv4-mapped"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
 		run_tests $ns1 $ns2 "::ffff:10.0.1.1"
-		chk_join_nr "signal address IPv4-mapped" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 	fi
 
 	# subflow v4-map-v6
-	if reset; then
+	if reset "single subflow v4-map-v6"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 "::ffff:10.0.1.1"
-		chk_join_nr "single subflow v4-map-v6" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 
 	# signal address v4-map-v6
-	if reset; then
+	if reset "signal address v4-map-v6"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		run_tests $ns1 $ns2 "::ffff:10.0.1.1"
-		chk_join_nr "signal address v4-map-v6" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 	fi
 
 	# subflow v6-map-v4
-	if reset; then
+	if reset "single subflow v6-map-v4"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "single subflow v6-map-v4" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 
 	# signal address v6-map-v4
-	if reset; then
+	if reset "signal address v6-map-v4"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "signal address v6-map-v4" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 	fi
 
 	# no subflow IPv6 to v4 address
-	if reset; then
+	if reset "no JOIN with diff families v4-v6"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "no JOIN with diff families v4-v6" 0 0 0
+		chk_join_nr 0 0 0
 	fi
 
 	# no subflow IPv6 to v4 address even if v6 has a valid v4 at the end
-	if reset; then
+	if reset "no JOIN with diff families v4-v6-2"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 dead:beef:2::10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0
+		chk_join_nr 0 0 0
 	fi
 
 	# no subflow IPv4 to v6 address, no need to slow down too then
-	if reset; then
+	if reset "no JOIN with diff families v6-v4"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 dead:beef:1::1
-		chk_join_nr "no JOIN with diff families v6-v4" 0 0 0
+		chk_join_nr 0 0 0
 	fi
 }
 
 backup_tests()
 {
 	# single subflow, backup
-	if reset; then
+	if reset "single subflow, backup"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup
-		chk_join_nr "single subflow, backup" 1 1 1
+		chk_join_nr 1 1 1
 		chk_prio_nr 0 1
 	fi
 
 	# single address, backup
-	if reset; then
+	if reset "single address, backup"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_set_limits $ns2 1 1
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
-		chk_join_nr "single address, backup" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 		chk_prio_nr 1 1
 	fi
 
 	# single address with port, backup
-	if reset; then
+	if reset "single address with port, backup"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
 		pm_nl_set_limits $ns2 1 1
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
-		chk_join_nr "single address with port, backup" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 		chk_prio_nr 1 1
 	fi
@@ -2127,81 +2145,81 @@ backup_tests()
 add_addr_ports_tests()
 {
 	# signal address with port
-	if reset; then
+	if reset "signal address with port"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "signal address with port" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1 1
 	fi
 
 	# subflow and signal with port
-	if reset; then
+	if reset "subflow and signal with port"; then
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 1 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "subflow and signal with port" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 1 1 1
 	fi
 
 	# single address with port, remove
-	if reset; then
+	if reset "remove single address with port"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
 		pm_nl_set_limits $ns2 1 1
 		run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
-		chk_join_nr "remove single address with port" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1 1
 		chk_rm_nr 1 1 invert
 	fi
 
 	# subflow and signal with port, remove
-	if reset; then
+	if reset "remove subflow and signal with port"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
 		pm_nl_set_limits $ns2 1 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow
-		chk_join_nr "remove subflow and signal with port" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 1 1 1
 		chk_rm_nr 1 1
 	fi
 
 	# subflows and signal with port, flush
-	if reset; then
+	if reset "flush subflows and signal with port"; then
 		pm_nl_set_limits $ns1 0 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
 		pm_nl_set_limits $ns2 1 3
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow
-		chk_join_nr "flush subflows and signal with port" 3 3 3
+		chk_join_nr 3 3 3
 		chk_add_nr 1 1
 		chk_rm_nr 1 3 invert simult
 	fi
 
 	# multiple addresses with port
-	if reset; then
+	if reset "multiple addresses with port"; then
 		pm_nl_set_limits $ns1 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
 		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10100
 		pm_nl_set_limits $ns2 2 2
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "multiple addresses with port" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 2 2 2
 	fi
 
 	# multiple addresses with ports
-	if reset; then
+	if reset "multiple addresses with ports"; then
 		pm_nl_set_limits $ns1 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
 		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10101
 		pm_nl_set_limits $ns2 2 2
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "multiple addresses with ports" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 2 2 2
 	fi
 }
@@ -2209,64 +2227,64 @@ add_addr_ports_tests()
 syncookies_tests()
 {
 	# single subflow, syncookies
-	if reset_with_cookies; then
+	if reset_with_cookies "single subflow with syn cookies"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "single subflow with syn cookies" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 
 	# multiple subflows with syn cookies
-	if reset_with_cookies; then
+	if reset_with_cookies "multiple subflows with syn cookies"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 0 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "multiple subflows with syn cookies" 2 2 2
+		chk_join_nr 2 2 2
 	fi
 
 	# multiple subflows limited by server
-	if reset_with_cookies; then
+	if reset_with_cookies "subflows limited by server w cookies"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "subflows limited by server w cookies" 2 1 1
+		chk_join_nr 2 1 1
 	fi
 
 	# test signal address with cookies
-	if reset_with_cookies; then
+	if reset_with_cookies "signal address with syn cookies"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "signal address with syn cookies" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 	fi
 
 	# test cookie with subflow and signal
-	if reset_with_cookies; then
+	if reset_with_cookies "subflow and signal w cookies"; then
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 1 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "subflow and signal w cookies" 2 2 2
+		chk_join_nr 2 2 2
 		chk_add_nr 1 1
 	fi
 
 	# accept and use add_addr with additional subflows
-	if reset_with_cookies; then
+	if reset_with_cookies "subflows and signal w. cookies"; then
 		pm_nl_set_limits $ns1 0 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_set_limits $ns2 1 3
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "subflows and signal w. cookies" 3 3 3
+		chk_join_nr 3 3 3
 		chk_add_nr 1 1
 	fi
 }
@@ -2278,7 +2296,7 @@ checksum_tests()
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "checksum test 0 0" 0 0 0
+		chk_join_nr 0 0 0
 	fi
 
 	# checksum test 1 1
@@ -2286,7 +2304,7 @@ checksum_tests()
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "checksum test 1 1" 0 0 0
+		chk_join_nr 0 0 0
 	fi
 
 	# checksum test 0 1
@@ -2294,7 +2312,7 @@ checksum_tests()
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "checksum test 0 1" 0 0 0
+		chk_join_nr 0 0 0
 	fi
 
 	# checksum test 1 0
@@ -2302,70 +2320,70 @@ checksum_tests()
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "checksum test 1 0" 0 0 0
+		chk_join_nr 0 0 0
 	fi
 }
 
 deny_join_id0_tests()
 {
 	# subflow allow join id0 ns1
-	if reset_with_allow_join_id0 1 0; then
+	if reset_with_allow_join_id0 "single subflow allow join id0 ns1" 1 0; then
 		pm_nl_set_limits $ns1 1 1
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "single subflow allow join id0 ns1" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 
 	# subflow allow join id0 ns2
-	if reset_with_allow_join_id0 0 1; then
+	if reset_with_allow_join_id0 "single subflow allow join id0 ns2" 0 1; then
 		pm_nl_set_limits $ns1 1 1
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "single subflow allow join id0 ns2" 0 0 0
+		chk_join_nr 0 0 0
 	fi
 
 	# signal address allow join id0 ns1
 	# ADD_ADDRs are not affected by allow_join_id0 value.
-	if reset_with_allow_join_id0 1 0; then
+	if reset_with_allow_join_id0 "signal address allow join id0 ns1" 1 0; then
 		pm_nl_set_limits $ns1 1 1
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "signal address allow join id0 ns1" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 	fi
 
 	# signal address allow join id0 ns2
 	# ADD_ADDRs are not affected by allow_join_id0 value.
-	if reset_with_allow_join_id0 0 1; then
+	if reset_with_allow_join_id0 "signal address allow join id0 ns2" 0 1; then
 		pm_nl_set_limits $ns1 1 1
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "signal address allow join id0 ns2" 1 1 1
+		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 	fi
 
 	# subflow and address allow join id0 ns1
-	if reset_with_allow_join_id0 1 0; then
+	if reset_with_allow_join_id0 "subflow and address allow join id0 1" 1 0; then
 		pm_nl_set_limits $ns1 2 2
 		pm_nl_set_limits $ns2 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "subflow and address allow join id0 1" 2 2 2
+		chk_join_nr 2 2 2
 	fi
 
 	# subflow and address allow join id0 ns2
-	if reset_with_allow_join_id0 0 1; then
+	if reset_with_allow_join_id0 "subflow and address allow join id0 2" 0 1; then
 		pm_nl_set_limits $ns1 2 2
 		pm_nl_set_limits $ns2 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr "subflow and address allow join id0 2" 1 1 1
+		chk_join_nr 1 1 1
 	fi
 }
 
@@ -2374,37 +2392,37 @@ fullmesh_tests()
 	# fullmesh 1
 	# 2 fullmesh addrs in ns2, added before the connection,
 	# 1 non-fullmesh addr in ns1, added during the connection.
-	if reset; then
+	if reset "fullmesh test 2x1"; then
 		pm_nl_set_limits $ns1 0 4
 		pm_nl_set_limits $ns2 1 4
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh
 		run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
-		chk_join_nr "fullmesh test 2x1" 4 4 4
+		chk_join_nr 4 4 4
 		chk_add_nr 1 1
 	fi
 
 	# fullmesh 2
 	# 1 non-fullmesh addr in ns1, added before the connection,
 	# 1 fullmesh addr in ns2, added during the connection.
-	if reset; then
+	if reset "fullmesh test 1x1"; then
 		pm_nl_set_limits $ns1 1 3
 		pm_nl_set_limits $ns2 1 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
-		chk_join_nr "fullmesh test 1x1" 3 3 3
+		chk_join_nr 3 3 3
 		chk_add_nr 1 1
 	fi
 
 	# fullmesh 3
 	# 1 non-fullmesh addr in ns1, added before the connection,
 	# 2 fullmesh addrs in ns2, added during the connection.
-	if reset; then
+	if reset "fullmesh test 1x2"; then
 		pm_nl_set_limits $ns1 2 5
 		pm_nl_set_limits $ns2 1 5
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
-		chk_join_nr "fullmesh test 1x2" 5 5 5
+		chk_join_nr 5 5 5
 		chk_add_nr 1 1
 	fi
 
@@ -2412,53 +2430,53 @@ fullmesh_tests()
 	# 1 non-fullmesh addr in ns1, added before the connection,
 	# 2 fullmesh addrs in ns2, added during the connection,
 	# limit max_subflows to 4.
-	if reset; then
+	if reset "fullmesh test 1x2, limited"; then
 		pm_nl_set_limits $ns1 2 4
 		pm_nl_set_limits $ns2 1 4
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
-		chk_join_nr "fullmesh test 1x2, limited" 4 4 4
+		chk_join_nr 4 4 4
 		chk_add_nr 1 1
 	fi
 
 	# set fullmesh flag
-	if reset; then
+	if reset "set fullmesh flag test"; then
 		pm_nl_set_limits $ns1 4 4
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
 		pm_nl_set_limits $ns2 4 4
 		run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh
-		chk_join_nr "set fullmesh flag test" 2 2 2
+		chk_join_nr 2 2 2
 		chk_rm_nr 0 1
 	fi
 
 	# set nofullmesh flag
-	if reset; then
+	if reset "set nofullmesh flag test"; then
 		pm_nl_set_limits $ns1 4 4
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh
 		pm_nl_set_limits $ns2 4 4
 		run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh
-		chk_join_nr "set nofullmesh flag test" 2 2 2
+		chk_join_nr 2 2 2
 		chk_rm_nr 0 1
 	fi
 
 	# set backup,fullmesh flags
-	if reset; then
+	if reset "set backup,fullmesh flags test"; then
 		pm_nl_set_limits $ns1 4 4
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
 		pm_nl_set_limits $ns2 4 4
 		run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh
-		chk_join_nr "set backup,fullmesh flags test" 2 2 2
+		chk_join_nr 2 2 2
 		chk_prio_nr 0 1
 		chk_rm_nr 0 1
 	fi
 
 	# set nobackup,nofullmesh flags
-	if reset; then
+	if reset "set nobackup,nofullmesh flags test"; then
 		pm_nl_set_limits $ns1 4 4
 		pm_nl_set_limits $ns2 4 4
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh
-		chk_join_nr "set nobackup,nofullmesh flags test" 2 2 2
+		chk_join_nr 2 2 2
 		chk_prio_nr 0 1
 		chk_rm_nr 0 1
 	fi
@@ -2466,9 +2484,9 @@ fullmesh_tests()
 
 fastclose_tests()
 {
-	if reset; then
+	if reset "fastclose test"; then
 		run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_2
-		chk_join_nr "fastclose test" 0 0 0
+		chk_join_nr 0 0 0
 		chk_fclose_nr 1 1
 		chk_rst_nr 1 1 invert
 	fi
@@ -2477,22 +2495,22 @@ fastclose_tests()
 implicit_tests()
 {
 	# userspace pm type prevents add_addr
-	if reset; then
+	if reset "implicit EP"; then
 		pm_nl_set_limits $ns1 2 2
 		pm_nl_set_limits $ns2 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
 
 		wait_mpj $ns1
-		pm_nl_check_endpoint "implicit EP" "creation" \
+		pm_nl_check_endpoint 1 "creation" \
 			$ns2 10.0.2.2 id 1 flags implicit
 
 		pm_nl_add_endpoint $ns2 10.0.2.2 id 33
-		pm_nl_check_endpoint "" "ID change is prevented" \
+		pm_nl_check_endpoint 0 "ID change is prevented" \
 			$ns2 10.0.2.2 id 1 flags implicit
 
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
-		pm_nl_check_endpoint "" "modif is allowed" \
+		pm_nl_check_endpoint 0 "modif is allowed" \
 			$ns2 10.0.2.2 id 1 flags signal
 		wait
 	fi
@@ -2518,7 +2536,7 @@ usage()
 	echo "  -i use ip mptcp"
 	echo "  -h help"
 
-	echo "[test ids]"
+	echo "[test ids|names]"
 
 	exit ${ret}
 }
@@ -2584,9 +2602,9 @@ shift $((OPTIND - 1))
 
 for arg in "${@}"; do
 	if [[ "${arg}" =~ ^[0-9]+$ ]]; then
-		only_tests+=("${arg}")
+		only_tests_ids+=("${arg}")
 	else
-		usage "Unknown argument: ${arg}"
+		only_tests_names+=("${arg}")
 	fi
 done
 
-- 
cgit 


From 39aab88242a8ea63a32d3b199e01292e555e03c7 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 9 Mar 2022 11:16:32 -0800
Subject: selftests: mptcp: join: list failure at the end

With ~100 tests, it helps to have this summary at the end not to scroll
to find which one has failed.

It is especially interseting when looking at the output produced by the
CI where the kernel logs from the serial are mixed together.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 85 ++++++++++++++++---------
 1 file changed, 55 insertions(+), 30 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 64261c3ca320..d3038922a0d2 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -22,6 +22,7 @@ init=0
 declare -A all_tests
 declare -a only_tests_ids
 declare -a only_tests_names
+declare -A failed_tests
 TEST_COUNT=0
 TEST_NAME=""
 nr_blank=40
@@ -251,6 +252,21 @@ reset_with_allow_join_id0()
 	ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
 }
 
+fail_test()
+{
+	ret=1
+	failed_tests[${TEST_COUNT}]="${TEST_NAME}"
+}
+
+get_failed_tests_ids()
+{
+	# sorted
+	local i
+	for i in "${!failed_tests[@]}"; do
+		echo "${i}"
+	done | sort -n
+}
+
 print_file_err()
 {
 	ls -l "$1" 1>&2
@@ -272,7 +288,7 @@ check_transfer()
 			echo "[ FAIL ] $what does not match (in, out):"
 			print_file_err "$in"
 			print_file_err "$out"
-			ret=1
+			fail_test
 
 			return 1
 		else
@@ -292,7 +308,7 @@ do_ping()
 	ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null
 	if [ $? -ne 0 ] ; then
 		echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
-		ret=1
+		fail_test
 	fi
 }
 
@@ -541,7 +557,7 @@ pm_nl_check_endpoint()
 		echo "[ ok ]"
 	else
 		echo "[fail] expected '$expected_line' found '$line'"
-		ret=1
+		fail_test
 	fi
 }
 
@@ -795,7 +811,7 @@ do_transfer()
 		cat /tmp/${connector_ns}.out
 
 		cat "$capout"
-		ret=1
+		fail_test
 		return 1
 	fi
 
@@ -920,7 +936,7 @@ chk_csum_nr()
 	if [ "$count" != $csum_ns1 -a $allow_multi_errors_ns1 -eq 0 ] ||
 	   [ "$count" -lt $csum_ns1 -a $allow_multi_errors_ns1 -eq 1 ]; then
 		echo "[fail] got $count data checksum error[s] expected $csum_ns1"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo -n "[ ok ]"
@@ -931,7 +947,7 @@ chk_csum_nr()
 	if [ "$count" != $csum_ns2 -a $allow_multi_errors_ns2 -eq 0 ] ||
 	   [ "$count" -lt $csum_ns2 -a $allow_multi_errors_ns2 -eq 1 ]; then
 		echo "[fail] got $count data checksum error[s] expected $csum_ns2"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo "[ ok ]"
@@ -951,7 +967,7 @@ chk_fail_nr()
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$fail_tx" ]; then
 		echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo -n "[ ok ]"
@@ -962,7 +978,7 @@ chk_fail_nr()
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$fail_rx" ]; then
 		echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo "[ ok ]"
@@ -983,7 +999,7 @@ chk_fclose_nr()
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$fclose_tx" ]; then
 		echo "[fail] got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo -n "[ ok ]"
@@ -994,7 +1010,7 @@ chk_fclose_nr()
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$fclose_rx" ]; then
 		echo "[fail] got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo "[ ok ]"
@@ -1025,7 +1041,7 @@ chk_rst_nr()
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$rst_tx" ]; then
 		echo "[fail] got $count MP_RST[s] TX expected $rst_tx"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo -n "[ ok ]"
@@ -1036,7 +1052,7 @@ chk_rst_nr()
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$rst_rx" ]; then
 		echo "[fail] got $count MP_RST[s] RX expected $rst_rx"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo -n "[ ok ]"
@@ -1071,7 +1087,7 @@ chk_join_nr()
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$syn_nr" ]; then
 		echo "[fail] got $count JOIN[s] syn expected $syn_nr"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo -n "[ ok ]"
@@ -1089,7 +1105,7 @@ chk_join_nr()
 			echo -n "[ ok ]"
 		else
 			echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
-			ret=1
+			fail_test
 			dump_stats=1
 		fi
 	else
@@ -1101,7 +1117,7 @@ chk_join_nr()
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$ack_nr" ]; then
 		echo "[fail] got $count JOIN[s] ack expected $ack_nr"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo "[ ok ]"
@@ -1141,7 +1157,7 @@ chk_stale_nr()
 		echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \
 		     " expected stale in range [$stale_min..$stale_max]," \
 		     " stale-recover delta $stale_delta "
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo "[ ok ]"
@@ -1178,7 +1194,7 @@ chk_add_nr()
 	# add addrs options, due to retransmissions
 	if [ "$count" != "$add_nr" ] && [ "$timeout" -gt 1 -o "$count" -lt "$add_nr" ]; then
 		echo "[fail] got $count ADD_ADDR[s] expected $add_nr"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo -n "[ ok ]"
@@ -1189,7 +1205,7 @@ chk_add_nr()
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$echo_nr" ]; then
 		echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo -n "[ ok ]"
@@ -1201,7 +1217,7 @@ chk_add_nr()
 		[ -z "$count" ] && count=0
 		if [ "$count" != "$port_nr" ]; then
 			echo "[fail] got $count ADD_ADDR[s] with a port-number expected $port_nr"
-			ret=1
+			fail_test
 			dump_stats=1
 		else
 			echo "[ ok ]"
@@ -1214,7 +1230,7 @@ chk_add_nr()
 		if [ "$count" != "$syn_nr" ]; then
 			echo "[fail] got $count JOIN[s] syn with a different \
 				port-number expected $syn_nr"
-			ret=1
+			fail_test
 			dump_stats=1
 		else
 			echo -n "[ ok ]"
@@ -1227,7 +1243,7 @@ chk_add_nr()
 		if [ "$count" != "$syn_ack_nr" ]; then
 			echo "[fail] got $count JOIN[s] synack with a different \
 				port-number expected $syn_ack_nr"
-			ret=1
+			fail_test
 			dump_stats=1
 		else
 			echo -n "[ ok ]"
@@ -1240,7 +1256,7 @@ chk_add_nr()
 		if [ "$count" != "$ack_nr" ]; then
 			echo "[fail] got $count JOIN[s] ack with a different \
 				port-number expected $ack_nr"
-			ret=1
+			fail_test
 			dump_stats=1
 		else
 			echo "[ ok ]"
@@ -1253,7 +1269,7 @@ chk_add_nr()
 		if [ "$count" != "$mis_syn_nr" ]; then
 			echo "[fail] got $count JOIN[s] syn with a mismatched \
 				port-number expected $mis_syn_nr"
-			ret=1
+			fail_test
 			dump_stats=1
 		else
 			echo -n "[ ok ]"
@@ -1266,7 +1282,7 @@ chk_add_nr()
 		if [ "$count" != "$mis_ack_nr" ]; then
 			echo "[fail] got $count JOIN[s] ack with a mismatched \
 				port-number expected $mis_ack_nr"
-			ret=1
+			fail_test
 			dump_stats=1
 		else
 			echo "[ ok ]"
@@ -1311,7 +1327,7 @@ chk_rm_nr()
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$rm_addr_nr" ]; then
 		echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo -n "[ ok ]"
@@ -1334,14 +1350,14 @@ chk_rm_nr()
 			echo "[ ok ] $suffix"
 		else
 			echo "[fail] got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]"
-			ret=1
+			fail_test
 			dump_stats=1
 		fi
 		return
 	fi
 	if [ "$count" != "$rm_subflow_nr" ]; then
 		echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo -n "[ ok ]"
@@ -1364,7 +1380,7 @@ chk_prio_nr()
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$mp_prio_nr_tx" ]; then
 		echo "[fail] got $count MP_PRIO[s] TX expected $mp_prio_nr_tx"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo -n "[ ok ]"
@@ -1375,7 +1391,7 @@ chk_prio_nr()
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$mp_prio_nr_rx" ]; then
 		echo "[fail] got $count MP_PRIO[s] RX expected $mp_prio_nr_rx"
-		ret=1
+		fail_test
 		dump_stats=1
 	else
 		echo "[ ok ]"
@@ -1399,7 +1415,7 @@ chk_link_usage()
 	if [ $tx_rate -lt $((expected_rate - $tolerance)) -o \
 	     $tx_rate -gt $((expected_rate + $tolerance)) ]; then
 		echo "[fail] got $tx_rate% usage, expected $expected_rate%"
-		ret=1
+		fail_test
 	else
 		echo "[ ok ]"
 	fi
@@ -2616,4 +2632,13 @@ for subtests in "${tests[@]}"; do
 	"${subtests}"
 done
 
+if [ ${ret} -ne 0 ]; then
+	echo
+	echo "${#failed_tests[@]} failure(s) has(ve) been detected:"
+	for i in $(get_failed_tests_ids); do
+		echo -e "\t- ${i}: ${failed_tests[${i}]}"
+	done
+	echo
+fi
+
 exit $ret
-- 
cgit 


From 3469d72f135afa38599e3e0262be10108e1413dd Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 9 Mar 2022 11:16:33 -0800
Subject: selftests: mptcp: join: helper to filter TCP

This is more readable and reduces duplicated commands.

This might also be useful to add v6 support and switch to nftables.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index d3038922a0d2..5223f2a752b9 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -561,6 +561,15 @@ pm_nl_check_endpoint()
 	fi
 }
 
+filter_tcp_from()
+{
+	local ns="${1}"
+	local src="${2}"
+	local target="${3}"
+
+	ip netns exec "${ns}" iptables -A INPUT -s "${src}" -p tcp -j "${target}"
+}
+
 do_transfer()
 {
 	listener_ns="$1"
@@ -1519,7 +1528,7 @@ subflows_error_tests()
 		pm_nl_set_limits $ns2 0 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
-		ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
+		filter_tcp_from $ns1 10.0.3.2 REJECT
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
 		chk_join_nr 1 1 1
 	fi
@@ -1530,7 +1539,7 @@ subflows_error_tests()
 		pm_nl_set_limits $ns2 0 2
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
-		ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j DROP
+		filter_tcp_from $ns1 10.0.3.2 DROP
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
 		chk_join_nr 1 1 1
 	fi
@@ -1542,7 +1551,7 @@ subflows_error_tests()
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-		ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT
+		filter_tcp_from $ns1 10.0.3.2 REJECT
 		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
 
 		# mpj subflow will be in TW after the reset
-- 
cgit 


From 1e777bd818bd6417dbdb97676bd9ca259eada8a4 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 9 Mar 2022 11:16:34 -0800
Subject: selftests: mptcp: join: clarify local/global vars

Some vars are redefined in different places. Best to avoid this
classical Bash pitfall where variables are accidentally overridden by
other functions because the proper scope has not been defined.

Most issues are with loops: typically 'i' is used in for-loops but if it
is not global, calling a function from a for-loop also doing a for-loop
with the same non local 'i' variable causes troubles because the first
'i' will be assigned to another value. To prevent such issues, the
iterator variable is now declared as local just before the loop. If it
is always done like this, issues are avoided.

To distinct between local and non local variables, all non local ones
are defined at the beginning of the script. The others are now defined
with the "local" keyword.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 135 +++++++++++++++---------
 1 file changed, 83 insertions(+), 52 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 5223f2a752b9..d8dc36fcdb56 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -9,6 +9,9 @@ cin=""
 cinfail=""
 cinsent=""
 cout=""
+capout=""
+ns1=""
+ns2=""
 ksft_skip=4
 timeout_poll=30
 timeout_test=$((timeout_poll * 2 + 1))
@@ -51,12 +54,14 @@ init_partial()
 {
 	capout=$(mktemp)
 
+	local rndh
 	rndh=$(mktemp -u XXXXXX)
 
 	ns1="ns1-$rndh"
 	ns2="ns2-$rndh"
 
-	for netns in "$ns1" "$ns2";do
+	local netns
+	for netns in "$ns1" "$ns2"; do
 		ip netns add $netns || exit $ksft_skip
 		ip -net $netns link set lo up
 		ip netns exec $netns sysctl -q net.mptcp.enabled=1
@@ -77,6 +82,7 @@ init_partial()
 	# ns1eth3    ns2eth3
 	# ns1eth4    ns2eth4
 
+	local i
 	for i in `seq 1 4`; do
 		ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
 		ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
@@ -95,6 +101,7 @@ init_partial()
 
 init_shapers()
 {
+	local i
 	for i in `seq 1 4`; do
 		tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
 		tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
@@ -105,6 +112,7 @@ cleanup_partial()
 {
 	rm -f "$capout"
 
+	local netns
 	for netns in "$ns1" "$ns2"; do
 		ip netns del $netns
 		rm -f /tmp/$netns.{nstat,out}
@@ -201,7 +209,8 @@ reset_with_cookies()
 {
 	reset "${1}" || return 1
 
-	for netns in "$ns1" "$ns2";do
+	local netns
+	for netns in "$ns1" "$ns2"; do
 		ip netns exec $netns sysctl -q net.ipv4.tcp_syncookies=2
 	done
 }
@@ -276,10 +285,11 @@ print_file_err()
 
 check_transfer()
 {
-	in=$1
-	out=$2
-	what=$3
+	local in=$1
+	local out=$2
+	local what=$3
 
+	local line
 	cmp -l "$in" "$out" | while read line; do
 		local arr=($line)
 
@@ -301,9 +311,9 @@ check_transfer()
 
 do_ping()
 {
-	listener_ns="$1"
-	connector_ns="$2"
-	connect_addr="$3"
+	local listener_ns="$1"
+	local connector_ns="$2"
+	local connect_addr="$3"
 
 	ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null
 	if [ $? -ne 0 ] ; then
@@ -314,15 +324,16 @@ do_ping()
 
 link_failure()
 {
-	ns="$1"
+	local ns="$1"
 
 	if [ -z "$FAILING_LINKS" ]; then
 		l=$((RANDOM%4))
 		FAILING_LINKS=$((l+1))
 	fi
 
+	local l
 	for l in $FAILING_LINKS; do
-		veth="ns1eth$l"
+		local veth="ns1eth$l"
 		ip -net "$ns" link set "$veth" down
 	done
 }
@@ -339,9 +350,10 @@ wait_local_port_listen()
 	local listener_ns="${1}"
 	local port="${2}"
 
-	local port_hex i
-
+	local port_hex
 	port_hex="$(printf "%04X" "${port}")"
+
+	local i
 	for i in $(seq 10); do
 		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
 			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
@@ -352,7 +364,7 @@ wait_local_port_listen()
 
 rm_addr_count()
 {
-	ns=${1}
+	local ns=${1}
 
 	ip netns exec ${ns} nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'
 }
@@ -363,8 +375,8 @@ wait_rm_addr()
 	local ns="${1}"
 	local old_cnt="${2}"
 	local cnt
-	local i
 
+	local i
 	for i in $(seq 10); do
 		cnt=$(rm_addr_count ${ns})
 		[ "$cnt" = "${old_cnt}" ] || break
@@ -404,12 +416,13 @@ pm_nl_add_endpoint()
 {
 	local ns=$1
 	local addr=$2
-	local flags
-	local port
-	local dev
-	local id
+	local flags _flags
+	local port _port
+	local dev _dev
+	local id _id
 	local nr=2
 
+	local p
 	for p in $@
 	do
 		if [ $p = "flags" ]; then
@@ -572,24 +585,26 @@ filter_tcp_from()
 
 do_transfer()
 {
-	listener_ns="$1"
-	connector_ns="$2"
-	cl_proto="$3"
-	srv_proto="$4"
-	connect_addr="$5"
-	test_link_fail="$6"
-	addr_nr_ns1="$7"
-	addr_nr_ns2="$8"
-	speed="$9"
-	sflags="${10}"
-
-	port=$((10000+$TEST_COUNT-1))
+	local listener_ns="$1"
+	local connector_ns="$2"
+	local cl_proto="$3"
+	local srv_proto="$4"
+	local connect_addr="$5"
+	local test_link_fail="$6"
+	local addr_nr_ns1="$7"
+	local addr_nr_ns2="$8"
+	local speed="$9"
+	local sflags="${10}"
+
+	local port=$((10000 + TEST_COUNT - 1))
+	local cappid
 
 	:> "$cout"
 	:> "$sout"
 	:> "$capout"
 
 	if [ $capture -eq 1 ]; then
+		local capuser
 		if [ -z $SUDO_USER ] ; then
 			capuser=""
 		else
@@ -643,7 +658,7 @@ do_transfer()
 				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
 					$extra_args ${local_addr} < "$sin" > "$sout" &
 	fi
-	spid=$!
+	local spid=$!
 
 	wait_local_port_listen "${listener_ns}" "${port}"
 
@@ -666,15 +681,16 @@ do_transfer()
 					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
 						$extra_args $connect_addr > "$cout" &
 	fi
-	cpid=$!
+	local cpid=$!
 
 	# let the mptcp subflow be established in background before
 	# do endpoint manipulation
 	[ $addr_nr_ns1 = "0" -a $addr_nr_ns2 = "0" ] || sleep 1
 
 	if [ $addr_nr_ns1 -gt 0 ]; then
+		local counter=2
+		local add_nr_ns1
 		let add_nr_ns1=addr_nr_ns1
-		counter=2
 		while [ $add_nr_ns1 -gt 0 ]; do
 			local addr
 			if is_v6 "${connect_addr}"; then
@@ -687,13 +703,16 @@ do_transfer()
 			let add_nr_ns1-=1
 		done
 	elif [ $addr_nr_ns1 -lt 0 ]; then
+		local rm_nr_ns1
 		let rm_nr_ns1=-addr_nr_ns1
 		if [ $rm_nr_ns1 -lt 8 ]; then
-			counter=0
+			local counter=0
+			local line
 			pm_nl_show_endpoints ${listener_ns} | while read line; do
 				local arr=($line)
 				local nr=0
 
+				local i
 				for i in ${arr[@]}; do
 					if [ $i = "id" ]; then
 						if [ $counter -eq $rm_nr_ns1 ]; then
@@ -715,7 +734,7 @@ do_transfer()
 		fi
 	fi
 
-	flags="subflow"
+	local flags="subflow"
 	if [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
 		flags="${flags},fullmesh"
 		addr_nr_ns2=${addr_nr_ns2:9}
@@ -726,8 +745,9 @@ do_transfer()
 	[ $addr_nr_ns1 -gt 0 -a $addr_nr_ns2 -lt 0 ] && sleep 1
 
 	if [ $addr_nr_ns2 -gt 0 ]; then
+		local add_nr_ns2
 		let add_nr_ns2=addr_nr_ns2
-		counter=3
+		local counter=3
 		while [ $add_nr_ns2 -gt 0 ]; do
 			local addr
 			if is_v6 "${connect_addr}"; then
@@ -740,18 +760,21 @@ do_transfer()
 			let add_nr_ns2-=1
 		done
 	elif [ $addr_nr_ns2 -lt 0 ]; then
-		let rm_nr_ns2=-addr_nr_ns2
+		local rm_nr_ns2
 		if [ $rm_nr_ns2 -lt 8 ]; then
-			counter=0
+			local counter=0
+			local line
 			pm_nl_show_endpoints ${connector_ns} | while read line; do
 				local arr=($line)
 				local nr=0
 
+				local i
 				for i in ${arr[@]}; do
 					if [ $i = "id" ]; then
 						if [ $counter -eq $rm_nr_ns2 ]; then
 							break
 						fi
+						local id rm_addr
 						# rm_addr are serialized, allow the previous one to
 						# complete
 						id=${arr[$nr+1]}
@@ -778,12 +801,16 @@ do_transfer()
 
 	if [ ! -z $sflags ]; then
 		sleep 1
+
+		local netns
 		for netns in "$ns1" "$ns2"; do
+			local line
 			pm_nl_show_endpoints $netns | while read line; do
 				local arr=($line)
 				local nr=0
 				local id
 
+				local i
 				for i in ${arr[@]}; do
 					if [ $i = "id" ]; then
 						id=${arr[$nr+1]}
@@ -796,9 +823,9 @@ do_transfer()
 	fi
 
 	wait $cpid
-	retc=$?
+	local retc=$?
 	wait $spid
-	rets=$?
+	local rets=$?
 
 	if [ $capture -eq 1 ]; then
 	    sleep 1
@@ -848,9 +875,9 @@ do_transfer()
 
 make_file()
 {
-	name=$1
-	who=$2
-	size=$3
+	local name=$1
+	local who=$2
+	local size=$3
 
 	dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
 	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
@@ -860,14 +887,16 @@ make_file()
 
 run_tests()
 {
-	listener_ns="$1"
-	connector_ns="$2"
-	connect_addr="$3"
-	test_linkfail="${4:-0}"
-	addr_nr_ns1="${5:-0}"
-	addr_nr_ns2="${6:-0}"
-	speed="${7:-fast}"
-	sflags="${8:-""}"
+	local listener_ns="$1"
+	local connector_ns="$2"
+	local connect_addr="$3"
+	local test_linkfail="${4:-0}"
+	local addr_nr_ns1="${5:-0}"
+	local addr_nr_ns2="${6:-0}"
+	local speed="${7:-fast}"
+	local sflags="${8:-""}"
+
+	local size
 
 	# The values above 2 are reused to make test files
 	# with the given sizes (KB)
@@ -1437,7 +1466,9 @@ wait_attempt_fail()
 	local ns=$1
 
 	while [ $time -lt $timeout_ms ]; do
-		local cnt=$(ip netns exec $ns nstat -as TcpAttemptFails | grep TcpAttemptFails | awk '{print $2}')
+		local cnt
+
+		cnt=$(ip netns exec $ns nstat -as TcpAttemptFails | grep TcpAttemptFails | awk '{print $2}')
 
 		[ "$cnt" = 1 ] && return 1
 		time=$((time + 100))
-- 
cgit 


From 4bfadd7120a1c5485a9994822242a2a163dc0814 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 9 Mar 2022 11:16:35 -0800
Subject: selftests: mptcp: join: avoid backquotes

As explained on ShellCheck's wiki [1], it is recommended to avoid
backquotes `...` in favour of parenthesis $(...):

> Backtick command substitution `...` is legacy syntax with several
> issues.
>
> - It has a series of undefined behaviors related to quoting in POSIX.
> - It imposes a custom escaping mode with surprising results.
> - It's exceptionally hard to nest.
>
> $(...) command substitution has none of these problems, and is
> therefore strongly encouraged.

[1] https://www.shellcheck.net/wiki/SC2006

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 66 +++++++++++++------------
 1 file changed, 34 insertions(+), 32 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index d8dc36fcdb56..f5391d027af2 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -83,7 +83,7 @@ init_partial()
 	# ns1eth4    ns2eth4
 
 	local i
-	for i in `seq 1 4`; do
+	for i in $(seq 1 4); do
 		ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
 		ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
 		ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
@@ -102,7 +102,7 @@ init_partial()
 init_shapers()
 {
 	local i
-	for i in `seq 1 4`; do
+	for i in $(seq 1 4); do
 		tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
 		tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
 	done
@@ -969,7 +969,7 @@ chk_csum_nr()
 	fi
 
 	printf "%-${nr_blank}s %s" " " "sum"
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != $csum_ns1 -a $allow_multi_errors_ns1 -eq 0 ] ||
 	   [ "$count" -lt $csum_ns1 -a $allow_multi_errors_ns1 -eq 1 ]; then
@@ -980,7 +980,7 @@ chk_csum_nr()
 		echo -n "[ ok ]"
 	fi
 	echo -n " - csum  "
-	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+	count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != $csum_ns2 -a $allow_multi_errors_ns2 -eq 0 ] ||
 	   [ "$count" -lt $csum_ns2 -a $allow_multi_errors_ns2 -eq 1 ]; then
@@ -1001,7 +1001,7 @@ chk_fail_nr()
 	local dump_stats
 
 	printf "%-${nr_blank}s %s" " " "ftx"
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
+	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$fail_tx" ]; then
 		echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx"
@@ -1012,7 +1012,7 @@ chk_fail_nr()
 	fi
 
 	echo -n " - failrx"
-	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
+	count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$fail_rx" ]; then
 		echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx"
@@ -1121,7 +1121,7 @@ chk_join_nr()
 	fi
 
 	printf "%03u %-36s %s" "${TEST_COUNT}" "${title}" "syn"
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
+	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$syn_nr" ]; then
 		echo "[fail] got $count JOIN[s] syn expected $syn_nr"
@@ -1132,8 +1132,8 @@ chk_join_nr()
 	fi
 
 	echo -n " - synack"
-	with_cookie=`ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies`
-	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'`
+	with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies)
+	count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$syn_ack_nr" ]; then
 		# simult connections exceeding the limit with cookie enabled could go up to
@@ -1151,7 +1151,7 @@ chk_join_nr()
 	fi
 
 	echo -n " - ack"
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}'`
+	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$ack_nr" ]; then
 		echo "[fail] got $count JOIN[s] ack expected $ack_nr"
@@ -1184,9 +1184,9 @@ chk_stale_nr()
 	local recover_nr
 
 	printf "%-${nr_blank}s %-18s" " " "stale"
-	stale_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}'`
+	stale_nr=$(ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}')
 	[ -z "$stale_nr" ] && stale_nr=0
-	recover_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}'`
+	recover_nr=$(ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}')
 	[ -z "$recover_nr" ] && recover_nr=0
 
 	if [ $stale_nr -lt $stale_min ] ||
@@ -1222,10 +1222,10 @@ chk_add_nr()
 	local dump_stats
 	local timeout
 
-	timeout=`ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout`
+	timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
 
 	printf "%-${nr_blank}s %s" " " "add"
-	count=`ip netns exec $ns2 nstat -as MPTcpExtAddAddr | grep MPTcpExtAddAddr | awk '{print $2}'`
+	count=$(ip netns exec $ns2 nstat -as MPTcpExtAddAddr | grep MPTcpExtAddAddr | awk '{print $2}')
 	[ -z "$count" ] && count=0
 
 	# if the test configured a short timeout tolerate greater then expected
@@ -1239,7 +1239,7 @@ chk_add_nr()
 	fi
 
 	echo -n " - echo  "
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}'`
+	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$echo_nr" ]; then
 		echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr"
@@ -1251,7 +1251,7 @@ chk_add_nr()
 
 	if [ $port_nr -gt 0 ]; then
 		echo -n " - pt "
-		count=`ip netns exec $ns2 nstat -as | grep MPTcpExtPortAdd | awk '{print $2}'`
+		count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtPortAdd | awk '{print $2}')
 		[ -z "$count" ] && count=0
 		if [ "$count" != "$port_nr" ]; then
 			echo "[fail] got $count ADD_ADDR[s] with a port-number expected $port_nr"
@@ -1262,8 +1262,8 @@ chk_add_nr()
 		fi
 
 		printf "%-${nr_blank}s %s" " " "syn"
-		count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortSynRx |
-			awk '{print $2}'`
+		count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortSynRx |
+			awk '{print $2}')
 		[ -z "$count" ] && count=0
 		if [ "$count" != "$syn_nr" ]; then
 			echo "[fail] got $count JOIN[s] syn with a different \
@@ -1275,8 +1275,8 @@ chk_add_nr()
 		fi
 
 		echo -n " - synack"
-		count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinPortSynAckRx |
-			awk '{print $2}'`
+		count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinPortSynAckRx |
+			awk '{print $2}')
 		[ -z "$count" ] && count=0
 		if [ "$count" != "$syn_ack_nr" ]; then
 			echo "[fail] got $count JOIN[s] synack with a different \
@@ -1288,8 +1288,8 @@ chk_add_nr()
 		fi
 
 		echo -n " - ack"
-		count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortAckRx |
-			awk '{print $2}'`
+		count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortAckRx |
+			awk '{print $2}')
 		[ -z "$count" ] && count=0
 		if [ "$count" != "$ack_nr" ]; then
 			echo "[fail] got $count JOIN[s] ack with a different \
@@ -1301,8 +1301,8 @@ chk_add_nr()
 		fi
 
 		printf "%-${nr_blank}s %s" " " "syn"
-		count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortSynRx |
-			awk '{print $2}'`
+		count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortSynRx |
+			awk '{print $2}')
 		[ -z "$count" ] && count=0
 		if [ "$count" != "$mis_syn_nr" ]; then
 			echo "[fail] got $count JOIN[s] syn with a mismatched \
@@ -1314,8 +1314,8 @@ chk_add_nr()
 		fi
 
 		echo -n " - ack   "
-		count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortAckRx |
-			awk '{print $2}'`
+		count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortAckRx |
+			awk '{print $2}')
 		[ -z "$count" ] && count=0
 		if [ "$count" != "$mis_ack_nr" ]; then
 			echo "[fail] got $count JOIN[s] ack with a mismatched \
@@ -1361,7 +1361,7 @@ chk_rm_nr()
 	fi
 
 	printf "%-${nr_blank}s %s" " " "rm "
-	count=`ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
+	count=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$rm_addr_nr" ]; then
 		echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
@@ -1372,7 +1372,7 @@ chk_rm_nr()
 	fi
 
 	echo -n " - rmsf  "
-	count=`ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
+	count=$(ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ -n "$simult" ]; then
 		local cnt=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}')
@@ -1414,7 +1414,7 @@ chk_prio_nr()
 	local dump_stats
 
 	printf "%-${nr_blank}s %s" " " "ptx"
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}'`
+	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$mp_prio_nr_tx" ]; then
 		echo "[fail] got $count MP_PRIO[s] TX expected $mp_prio_nr_tx"
@@ -1425,7 +1425,7 @@ chk_prio_nr()
 	fi
 
 	echo -n " - prx   "
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}'`
+	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$mp_prio_nr_rx" ]; then
 		echo "[fail] got $count MP_PRIO[s] RX expected $mp_prio_nr_rx"
@@ -1444,8 +1444,10 @@ chk_link_usage()
 	local link=$2
 	local out=$3
 	local expected_rate=$4
-	local tx_link=`ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes`
-	local tx_total=`ls -l $out | awk '{print $5}'`
+
+	local tx_link tx_total
+	tx_link=$(ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes)
+	tx_total=$(ls -l $out | awk '{print $5}')
 	local tx_rate=$((tx_link * 100 / $tx_total))
 	local tolerance=5
 
-- 
cgit 


From d8d0830205302545b29c5a0eed5336d64c7b580d Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 9 Mar 2022 11:16:36 -0800
Subject: selftests: mptcp: join: make it shellcheck compliant

This fixes a few issues reported by ShellCheck:

- SC2068: Double quote array expansions to avoid re-splitting elements.
- SC2206: Quote to prevent word splitting/globbing, or split robustly
          with mapfile or read -a.
- SC2166: Prefer [ p ] && [ q ] as [ p -a q ] is not well defined.
- SC2155: Declare and assign separately to avoid masking return values.
- SC2162: read without -r will mangle backslashes.
- SC2219: Instead of 'let expr', prefer (( expr )) .
- SC2181: Check exit code directly with e.g. 'if mycmd;', not indirectly
          with $?.
- SC2236: Use -n instead of ! -z.
- SC2004: $/${} is unnecessary on arithmetic variables.
- SC2012: Use find instead of ls to better handle non-alphanumeric
          filenames.
- SC2002: Useless cat. Consider 'cmd < file | ..' or 'cmd file | ..'
          instead.

SC2086 (Double quotes to prevent globbing and word splitting) is ignored
because it is controlled for the moment and there are too many to
change.

While at it, also fixed the alignment in one comment.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 126 +++++++++++++-----------
 1 file changed, 66 insertions(+), 60 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f5391d027af2..7314257d248a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1,6 +1,11 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
 ret=0
 sin=""
 sinfail=""
@@ -76,7 +81,7 @@ init_partial()
 	validate_checksum=$checksum
 	FAILING_LINKS=""
 
-	#  ns1              ns2
+	#  ns1         ns2
 	# ns1eth1    ns2eth1
 	# ns1eth2    ns2eth2
 	# ns1eth3    ns2eth3
@@ -288,12 +293,11 @@ check_transfer()
 	local in=$1
 	local out=$2
 	local what=$3
+	local i a b
 
 	local line
-	cmp -l "$in" "$out" | while read line; do
-		local arr=($line)
-
-		let sum=0${arr[1]}+0${arr[2]}
+	cmp -l "$in" "$out" | while read -r i a b; do
+		local sum=$((0${a} + 0${b}))
 		if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then
 			echo "[ FAIL ] $what does not match (in, out):"
 			print_file_err "$in"
@@ -302,7 +306,7 @@ check_transfer()
 
 			return 1
 		else
-			echo "$what has inverted byte at ${arr[0]}"
+			echo "$what has inverted byte at ${i}"
 		fi
 	done
 
@@ -315,8 +319,7 @@ do_ping()
 	local connector_ns="$2"
 	local connect_addr="$3"
 
-	ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null
-	if [ $? -ne 0 ] ; then
+	if ! ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null; then
 		echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
 		fail_test
 	fi
@@ -423,26 +426,26 @@ pm_nl_add_endpoint()
 	local nr=2
 
 	local p
-	for p in $@
+	for p in "${@}"
 	do
 		if [ $p = "flags" ]; then
 			eval _flags=\$"$nr"
-			[ ! -z $_flags ]; flags="flags $_flags"
+			[ -n "$_flags" ]; flags="flags $_flags"
 		fi
 		if [ $p = "dev" ]; then
 			eval _dev=\$"$nr"
-			[ ! -z $_dev ]; dev="dev $_dev"
+			[ -n "$_dev" ]; dev="dev $_dev"
 		fi
 		if [ $p = "id" ]; then
 			eval _id=\$"$nr"
-			[ ! -z $_id ]; id="id $_id"
+			[ -n "$_id" ]; id="id $_id"
 		fi
 		if [ $p = "port" ]; then
 			eval _port=\$"$nr"
-			[ ! -z $_port ]; port="port $_port"
+			[ -n "$_port" ]; port="port $_port"
 		fi
 
-		let nr+=1
+		nr=$((nr + 1))
 	done
 
 	if [ $ip_mptcp -eq 1 ]; then
@@ -525,18 +528,18 @@ pm_nl_check_endpoint()
 	while [ -n "$1" ]; do
 		if [ $1 = "flags" ]; then
 			_flags=$2
-			[ ! -z $_flags ]; flags="flags $_flags"
+			[ -n "$_flags" ]; flags="flags $_flags"
 			shift
 		elif [ $1 = "dev" ]; then
-			[ ! -z $2 ]; dev="dev $1"
+			[ -n "$2" ]; dev="dev $1"
 			shift
 		elif [ $1 = "id" ]; then
 			_id=$2
-			[ ! -z $_id ]; id="id $_id"
+			[ -n "$_id" ]; id="id $_id"
 			shift
 		elif [ $1 = "port" ]; then
 			_port=$2
-			[ ! -z $_port ]; port=" port $_port"
+			[ -n "$_port" ]; port=" port $_port"
 			shift
 		fi
 
@@ -675,7 +678,7 @@ do_transfer()
 					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
 						$extra_args $connect_addr > "$cout" &
 	else
-		cat "$cinfail" | tee "$cinsent" | \
+		tee "$cinsent" < "$cinfail" | \
 			timeout ${timeout_test} \
 				ip netns exec ${connector_ns} \
 					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
@@ -685,12 +688,13 @@ do_transfer()
 
 	# let the mptcp subflow be established in background before
 	# do endpoint manipulation
-	[ $addr_nr_ns1 = "0" -a $addr_nr_ns2 = "0" ] || sleep 1
+	if [ $addr_nr_ns1 != "0" ] || [ $addr_nr_ns2 != "0" ]; then
+		sleep 1
+	fi
 
 	if [ $addr_nr_ns1 -gt 0 ]; then
 		local counter=2
-		local add_nr_ns1
-		let add_nr_ns1=addr_nr_ns1
+		local add_nr_ns1=${addr_nr_ns1}
 		while [ $add_nr_ns1 -gt 0 ]; do
 			local addr
 			if is_v6 "${connect_addr}"; then
@@ -699,21 +703,21 @@ do_transfer()
 				addr="10.0.$counter.1"
 			fi
 			pm_nl_add_endpoint $ns1 $addr flags signal
-			let counter+=1
-			let add_nr_ns1-=1
+			counter=$((counter + 1))
+			add_nr_ns1=$((add_nr_ns1 - 1))
 		done
 	elif [ $addr_nr_ns1 -lt 0 ]; then
-		local rm_nr_ns1
-		let rm_nr_ns1=-addr_nr_ns1
+		local rm_nr_ns1=$((-addr_nr_ns1))
 		if [ $rm_nr_ns1 -lt 8 ]; then
 			local counter=0
 			local line
-			pm_nl_show_endpoints ${listener_ns} | while read line; do
+			pm_nl_show_endpoints ${listener_ns} | while read -r line; do
+				# shellcheck disable=SC2206 # we do want to split per word
 				local arr=($line)
 				local nr=0
 
 				local i
-				for i in ${arr[@]}; do
+				for i in "${arr[@]}"; do
 					if [ $i = "id" ]; then
 						if [ $counter -eq $rm_nr_ns1 ]; then
 							break
@@ -722,9 +726,9 @@ do_transfer()
 						rm_addr=$(rm_addr_count ${connector_ns})
 						pm_nl_del_endpoint ${listener_ns} $id
 						wait_rm_addr ${connector_ns} ${rm_addr}
-						let counter+=1
+						counter=$((counter + 1))
 					fi
-					let nr+=1
+					nr=$((nr + 1))
 				done
 			done
 		elif [ $rm_nr_ns1 -eq 8 ]; then
@@ -742,11 +746,10 @@ do_transfer()
 
 	# if newly added endpoints must be deleted, give the background msk
 	# some time to created them
-	[ $addr_nr_ns1 -gt 0 -a $addr_nr_ns2 -lt 0 ] && sleep 1
+	[ $addr_nr_ns1 -gt 0 ] && [ $addr_nr_ns2 -lt 0 ] && sleep 1
 
 	if [ $addr_nr_ns2 -gt 0 ]; then
-		local add_nr_ns2
-		let add_nr_ns2=addr_nr_ns2
+		local add_nr_ns2=${addr_nr_ns2}
 		local counter=3
 		while [ $add_nr_ns2 -gt 0 ]; do
 			local addr
@@ -756,20 +759,21 @@ do_transfer()
 				addr="10.0.$counter.2"
 			fi
 			pm_nl_add_endpoint $ns2 $addr flags $flags
-			let counter+=1
-			let add_nr_ns2-=1
+			counter=$((counter + 1))
+			add_nr_ns2=$((add_nr_ns2 - 1))
 		done
 	elif [ $addr_nr_ns2 -lt 0 ]; then
-		local rm_nr_ns2
+		local rm_nr_ns2=$((-addr_nr_ns2))
 		if [ $rm_nr_ns2 -lt 8 ]; then
 			local counter=0
 			local line
-			pm_nl_show_endpoints ${connector_ns} | while read line; do
+			pm_nl_show_endpoints ${connector_ns} | while read -r line; do
+				# shellcheck disable=SC2206 # we do want to split per word
 				local arr=($line)
 				local nr=0
 
 				local i
-				for i in ${arr[@]}; do
+				for i in "${arr[@]}"; do
 					if [ $i = "id" ]; then
 						if [ $counter -eq $rm_nr_ns2 ]; then
 							break
@@ -781,9 +785,9 @@ do_transfer()
 						rm_addr=$(rm_addr_count ${listener_ns})
 						pm_nl_del_endpoint ${connector_ns} $id
 						wait_rm_addr ${listener_ns} ${rm_addr}
-						let counter+=1
+						counter=$((counter + 1))
 					fi
-					let nr+=1
+					nr=$((nr + 1))
 				done
 			done
 		elif [ $rm_nr_ns2 -eq 8 ]; then
@@ -799,23 +803,24 @@ do_transfer()
 		fi
 	fi
 
-	if [ ! -z $sflags ]; then
+	if [ -n "${sflags}" ]; then
 		sleep 1
 
 		local netns
 		for netns in "$ns1" "$ns2"; do
 			local line
-			pm_nl_show_endpoints $netns | while read line; do
+			pm_nl_show_endpoints $netns | while read -r line; do
+				# shellcheck disable=SC2206 # we do want to split per word
 				local arr=($line)
 				local nr=0
 				local id
 
 				local i
-				for i in ${arr[@]}; do
+				for i in "${arr[@]}"; do
 					if [ $i = "id" ]; then
 						id=${arr[$nr+1]}
 					fi
-					let nr+=1
+					nr=$((nr + 1))
 				done
 				pm_nl_change_endpoint $netns $id $sflags
 			done
@@ -909,14 +914,14 @@ run_tests()
 		make_file "$cinfail" "client" $size
 	# create the input file for the failure test when
 	# the first failure test run
-	elif [ "$test_linkfail" -ne 0 -a -z "$cinfail" ]; then
+	elif [ "$test_linkfail" -ne 0 ] && [ -z "$cinfail" ]; then
 		# the client file must be considerably larger
 		# of the maximum expected cwin value, or the
 		# link utilization will be not predicable
 		size=$((RANDOM%2))
 		size=$((size+1))
 		size=$((size*8192))
-		size=$((size + ( $RANDOM % 8192) ))
+		size=$((size + ( RANDOM % 8192) ))
 
 		cinfail=$(mktemp)
 		make_file "$cinfail" "client" $size
@@ -929,7 +934,7 @@ run_tests()
 			sinfail=$(mktemp)
 		fi
 		make_file "$sinfail" "server" $size
-	elif [ "$test_linkfail" -eq 2 -a -z "$sinfail" ]; then
+	elif [ "$test_linkfail" -eq 2 ] && [ -z "$sinfail" ]; then
 		size=$((RANDOM%16))
 		size=$((size+1))
 		size=$((size*2048))
@@ -971,8 +976,8 @@ chk_csum_nr()
 	printf "%-${nr_blank}s %s" " " "sum"
 	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
 	[ -z "$count" ] && count=0
-	if [ "$count" != $csum_ns1 -a $allow_multi_errors_ns1 -eq 0 ] ||
-	   [ "$count" -lt $csum_ns1 -a $allow_multi_errors_ns1 -eq 1 ]; then
+	if { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
+	   { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
 		echo "[fail] got $count data checksum error[s] expected $csum_ns1"
 		fail_test
 		dump_stats=1
@@ -982,8 +987,8 @@ chk_csum_nr()
 	echo -n " - csum  "
 	count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
 	[ -z "$count" ] && count=0
-	if [ "$count" != $csum_ns2 -a $allow_multi_errors_ns2 -eq 0 ] ||
-	   [ "$count" -lt $csum_ns2 -a $allow_multi_errors_ns2 -eq 1 ]; then
+	if { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
+	   { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
 		echo "[fail] got $count data checksum error[s] expected $csum_ns2"
 		fail_test
 		dump_stats=1
@@ -1190,8 +1195,8 @@ chk_stale_nr()
 	[ -z "$recover_nr" ] && recover_nr=0
 
 	if [ $stale_nr -lt $stale_min ] ||
-	   [ $stale_max -gt 0 -a $stale_nr -gt $stale_max ] ||
-	   [ $((stale_nr - $recover_nr)) -ne $stale_delta ]; then
+	   { [ $stale_max -gt 0 ] && [ $stale_nr -gt $stale_max ]; } ||
+	   [ $((stale_nr - recover_nr)) -ne $stale_delta ]; then
 		echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \
 		     " expected stale in range [$stale_min..$stale_max]," \
 		     " stale-recover delta $stale_delta "
@@ -1230,7 +1235,7 @@ chk_add_nr()
 
 	# if the test configured a short timeout tolerate greater then expected
 	# add addrs options, due to retransmissions
-	if [ "$count" != "$add_nr" ] && [ "$timeout" -gt 1 -o "$count" -lt "$add_nr" ]; then
+	if [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then
 		echo "[fail] got $count ADD_ADDR[s] expected $add_nr"
 		fail_test
 		dump_stats=1
@@ -1375,8 +1380,9 @@ chk_rm_nr()
 	count=$(ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ -n "$simult" ]; then
-		local cnt=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}')
-		local suffix
+		local cnt suffix
+
+		cnt=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}')
 
 		# in case of simult flush, the subflow removal count on each side is
 		# unreliable
@@ -1447,13 +1453,13 @@ chk_link_usage()
 
 	local tx_link tx_total
 	tx_link=$(ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes)
-	tx_total=$(ls -l $out | awk '{print $5}')
-	local tx_rate=$((tx_link * 100 / $tx_total))
+	tx_total=$(stat --format=%s $out)
+	local tx_rate=$((tx_link * 100 / tx_total))
 	local tolerance=5
 
 	printf "%-${nr_blank}s %-18s" " " "link usage"
-	if [ $tx_rate -lt $((expected_rate - $tolerance)) -o \
-	     $tx_rate -gt $((expected_rate + $tolerance)) ]; then
+	if [ $tx_rate -lt $((expected_rate - tolerance)) ] || \
+	   [ $tx_rate -gt $((expected_rate + tolerance)) ]; then
 		echo "[fail] got $tx_rate% usage, expected $expected_rate%"
 		fail_test
 	else
-- 
cgit 


From 3daf0896f3f958b48d7747e96dd57a6b10745b76 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 9 Mar 2022 01:05:15 -0800
Subject: bpf: selftests: Update tests after s/delivery_time/tstamp/ change in
 bpf.h

The previous patch made the follow changes:
- s/delivery_time_type/tstamp_type/
- s/bpf_skb_set_delivery_time/bpf_skb_set_tstamp/
- BPF_SKB_DELIVERY_TIME_* to BPF_SKB_TSTAMP_*

This patch is to change the test_tc_dtime.c to reflect the above.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220309090515.3712742-1-kafai@fb.com
---
 tools/testing/selftests/bpf/progs/test_tc_dtime.c | 38 +++++++++++------------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
index 9d9e8e17b8a0..06f300d06dbd 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
@@ -174,13 +174,13 @@ int egress_host(struct __sk_buff *skb)
 		return TC_ACT_OK;
 
 	if (skb_proto(skb_type) == IPPROTO_TCP) {
-		if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO &&
+		if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
 		    skb->tstamp)
 			inc_dtimes(EGRESS_ENDHOST);
 		else
 			inc_errs(EGRESS_ENDHOST);
 	} else {
-		if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_UNSPEC &&
+		if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC &&
 		    skb->tstamp)
 			inc_dtimes(EGRESS_ENDHOST);
 		else
@@ -204,7 +204,7 @@ int ingress_host(struct __sk_buff *skb)
 	if (!skb_type)
 		return TC_ACT_OK;
 
-	if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO &&
+	if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
 	    skb->tstamp == EGRESS_FWDNS_MAGIC)
 		inc_dtimes(INGRESS_ENDHOST);
 	else
@@ -226,7 +226,7 @@ int ingress_fwdns_prio100(struct __sk_buff *skb)
 		return TC_ACT_OK;
 
 	/* delivery_time is only available to the ingress
-	 * if the tc-bpf checks the skb->delivery_time_type.
+	 * if the tc-bpf checks the skb->tstamp_type.
 	 */
 	if (skb->tstamp == EGRESS_ENDHOST_MAGIC)
 		inc_errs(INGRESS_FWDNS_P100);
@@ -250,7 +250,7 @@ int egress_fwdns_prio100(struct __sk_buff *skb)
 		return TC_ACT_OK;
 
 	/* delivery_time is always available to egress even
-	 * the tc-bpf did not use the delivery_time_type.
+	 * the tc-bpf did not use the tstamp_type.
 	 */
 	if (skb->tstamp == INGRESS_FWDNS_MAGIC)
 		inc_dtimes(EGRESS_FWDNS_P100);
@@ -278,9 +278,9 @@ int ingress_fwdns_prio101(struct __sk_buff *skb)
 	if (skb_proto(skb_type) == IPPROTO_UDP)
 		expected_dtime = 0;
 
-	if (skb->delivery_time_type) {
+	if (skb->tstamp_type) {
 		if (fwdns_clear_dtime() ||
-		    skb->delivery_time_type != BPF_SKB_DELIVERY_TIME_MONO ||
+		    skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
 		    skb->tstamp != expected_dtime)
 			inc_errs(INGRESS_FWDNS_P101);
 		else
@@ -290,14 +290,14 @@ int ingress_fwdns_prio101(struct __sk_buff *skb)
 			inc_errs(INGRESS_FWDNS_P101);
 	}
 
-	if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO) {
+	if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
 		skb->tstamp = INGRESS_FWDNS_MAGIC;
 	} else {
-		if (bpf_skb_set_delivery_time(skb, INGRESS_FWDNS_MAGIC,
-					      BPF_SKB_DELIVERY_TIME_MONO))
+		if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
+				       BPF_SKB_TSTAMP_DELIVERY_MONO))
 			inc_errs(SET_DTIME);
-		if (!bpf_skb_set_delivery_time(skb, INGRESS_FWDNS_MAGIC,
-					       BPF_SKB_DELIVERY_TIME_UNSPEC))
+		if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
+					BPF_SKB_TSTAMP_UNSPEC))
 			inc_errs(SET_DTIME);
 	}
 
@@ -320,9 +320,9 @@ int egress_fwdns_prio101(struct __sk_buff *skb)
 		/* Should have handled in prio100 */
 		return TC_ACT_SHOT;
 
-	if (skb->delivery_time_type) {
+	if (skb->tstamp_type) {
 		if (fwdns_clear_dtime() ||
-		    skb->delivery_time_type != BPF_SKB_DELIVERY_TIME_MONO ||
+		    skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
 		    skb->tstamp != INGRESS_FWDNS_MAGIC)
 			inc_errs(EGRESS_FWDNS_P101);
 		else
@@ -332,14 +332,14 @@ int egress_fwdns_prio101(struct __sk_buff *skb)
 			inc_errs(EGRESS_FWDNS_P101);
 	}
 
-	if (skb->delivery_time_type == BPF_SKB_DELIVERY_TIME_MONO) {
+	if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
 		skb->tstamp = EGRESS_FWDNS_MAGIC;
 	} else {
-		if (bpf_skb_set_delivery_time(skb, EGRESS_FWDNS_MAGIC,
-					      BPF_SKB_DELIVERY_TIME_MONO))
+		if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC,
+				       BPF_SKB_TSTAMP_DELIVERY_MONO))
 			inc_errs(SET_DTIME);
-		if (!bpf_skb_set_delivery_time(skb, EGRESS_FWDNS_MAGIC,
-					       BPF_SKB_DELIVERY_TIME_UNSPEC))
+		if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
+					BPF_SKB_TSTAMP_UNSPEC))
 			inc_errs(SET_DTIME);
 	}
 
-- 
cgit 


From 2746de3c53d64436a5a565e87d74b65d82ab6ac7 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Wed, 2 Mar 2022 12:13:59 +0100
Subject: selftests/bpf: Move sample generation code to ima_test_common()

Move sample generator code to ima_test_common() so that the new function
can be called by multiple LSM hooks.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220302111404.193900-5-roberto.sassu@huawei.com
---
 tools/testing/selftests/bpf/progs/ima.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
index 96060ff4ffc6..b5a0de50d1b4 100644
--- a/tools/testing/selftests/bpf/progs/ima.c
+++ b/tools/testing/selftests/bpf/progs/ima.c
@@ -18,8 +18,7 @@ struct {
 
 char _license[] SEC("license") = "GPL";
 
-SEC("lsm.s/bprm_committed_creds")
-void BPF_PROG(ima, struct linux_binprm *bprm)
+static void ima_test_common(struct file *file)
 {
 	u64 ima_hash = 0;
 	u64 *sample;
@@ -28,7 +27,7 @@ void BPF_PROG(ima, struct linux_binprm *bprm)
 
 	pid = bpf_get_current_pid_tgid() >> 32;
 	if (pid == monitored_pid) {
-		ret = bpf_ima_inode_hash(bprm->file->f_inode, &ima_hash,
+		ret = bpf_ima_inode_hash(file->f_inode, &ima_hash,
 					 sizeof(ima_hash));
 		if (ret < 0 || ima_hash == 0)
 			return;
@@ -43,3 +42,9 @@ void BPF_PROG(ima, struct linux_binprm *bprm)
 
 	return;
 }
+
+SEC("lsm.s/bprm_committed_creds")
+void BPF_PROG(bprm_committed_creds, struct linux_binprm *bprm)
+{
+	ima_test_common(bprm->file);
+}
-- 
cgit 


From 27a77d0d460cdeec57fda2bb6c4f8820ab6e8b38 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Wed, 2 Mar 2022 12:14:00 +0100
Subject: selftests/bpf: Add test for bpf_ima_file_hash()

Add new test to ensure that bpf_ima_file_hash() returns the digest of the
executed files.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220302111404.193900-6-roberto.sassu@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/test_ima.c | 43 ++++++++++++++++++++---
 tools/testing/selftests/bpf/progs/ima.c           | 10 ++++--
 2 files changed, 47 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index 97d8a6f84f4a..acc911ba63fd 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -13,6 +13,8 @@
 
 #include "ima.skel.h"
 
+#define MAX_SAMPLES 2
+
 static int run_measured_process(const char *measured_dir, u32 *monitored_pid)
 {
 	int child_pid, child_status;
@@ -32,14 +34,25 @@ static int run_measured_process(const char *measured_dir, u32 *monitored_pid)
 	return -EINVAL;
 }
 
-static u64 ima_hash_from_bpf;
+static u64 ima_hash_from_bpf[MAX_SAMPLES];
+static int ima_hash_from_bpf_idx;
 
 static int process_sample(void *ctx, void *data, size_t len)
 {
-	ima_hash_from_bpf = *((u64 *)data);
+	if (ima_hash_from_bpf_idx >= MAX_SAMPLES)
+		return -ENOSPC;
+
+	ima_hash_from_bpf[ima_hash_from_bpf_idx++] = *((u64 *)data);
 	return 0;
 }
 
+static void test_init(struct ima__bss *bss)
+{
+	ima_hash_from_bpf_idx = 0;
+
+	bss->use_ima_file_hash = false;
+}
+
 void test_test_ima(void)
 {
 	char measured_dir_template[] = "/tmp/ima_measuredXXXXXX";
@@ -72,13 +85,35 @@ void test_test_ima(void)
 	if (CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno))
 		goto close_clean;
 
+	/*
+	 * Test #1
+	 * - Goal: obtain a sample with the bpf_ima_inode_hash() helper
+	 * - Expected result:  1 sample (/bin/true)
+	 */
+	test_init(skel->bss);
 	err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
-	if (CHECK(err, "run_measured_process", "err = %d\n", err))
+	if (CHECK(err, "run_measured_process #1", "err = %d\n", err))
 		goto close_clean;
 
 	err = ring_buffer__consume(ringbuf);
 	ASSERT_EQ(err, 1, "num_samples_or_err");
-	ASSERT_NEQ(ima_hash_from_bpf, 0, "ima_hash");
+	ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+
+	/*
+	 * Test #2
+	 * - Goal: obtain samples with the bpf_ima_file_hash() helper
+	 * - Expected result: 2 samples (./ima_setup.sh, /bin/true)
+	 */
+	test_init(skel->bss);
+	skel->bss->use_ima_file_hash = true;
+	err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
+	if (CHECK(err, "run_measured_process #2", "err = %d\n", err))
+		goto close_clean;
+
+	err = ring_buffer__consume(ringbuf);
+	ASSERT_EQ(err, 2, "num_samples_or_err");
+	ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+	ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
 
 close_clean:
 	snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir);
diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
index b5a0de50d1b4..e0b073dcfb5d 100644
--- a/tools/testing/selftests/bpf/progs/ima.c
+++ b/tools/testing/selftests/bpf/progs/ima.c
@@ -18,6 +18,8 @@ struct {
 
 char _license[] SEC("license") = "GPL";
 
+bool use_ima_file_hash;
+
 static void ima_test_common(struct file *file)
 {
 	u64 ima_hash = 0;
@@ -27,8 +29,12 @@ static void ima_test_common(struct file *file)
 
 	pid = bpf_get_current_pid_tgid() >> 32;
 	if (pid == monitored_pid) {
-		ret = bpf_ima_inode_hash(file->f_inode, &ima_hash,
-					 sizeof(ima_hash));
+		if (!use_ima_file_hash)
+			ret = bpf_ima_inode_hash(file->f_inode, &ima_hash,
+						 sizeof(ima_hash));
+		else
+			ret = bpf_ima_file_hash(file, &ima_hash,
+						sizeof(ima_hash));
 		if (ret < 0 || ima_hash == 0)
 			return;
 
-- 
cgit 


From 91e8fa254dbd0890c34286acdc12e96412305840 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Wed, 2 Mar 2022 12:14:01 +0100
Subject: selftests/bpf: Check if the digest is refreshed after a file write

Verify that bpf_ima_inode_hash() returns a non-fresh digest after a file
write, and that bpf_ima_file_hash() returns a fresh digest. Verification is
done by requesting the digest from the bprm_creds_for_exec hook, called
before ima_bprm_check().

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220302111404.193900-7-roberto.sassu@huawei.com
---
 tools/testing/selftests/bpf/ima_setup.sh          | 24 +++++++-
 tools/testing/selftests/bpf/prog_tests/test_ima.c | 72 ++++++++++++++++++++++-
 tools/testing/selftests/bpf/progs/ima.c           | 11 ++++
 3 files changed, 103 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index 8e62581113a3..a3de1cd43ba0 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -12,7 +12,7 @@ LOG_FILE="$(mktemp /tmp/ima_setup.XXXX.log)"
 
 usage()
 {
-	echo "Usage: $0 <setup|cleanup|run> <existing_tmp_dir>"
+	echo "Usage: $0 <setup|cleanup|run|modify-bin|restore-bin> <existing_tmp_dir>"
 	exit 1
 }
 
@@ -77,6 +77,24 @@ run()
 	exec "${copied_bin_path}"
 }
 
+modify_bin()
+{
+	local tmp_dir="$1"
+	local mount_dir="${tmp_dir}/mnt"
+	local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+
+	echo "mod" >> "${copied_bin_path}"
+}
+
+restore_bin()
+{
+	local tmp_dir="$1"
+	local mount_dir="${tmp_dir}/mnt"
+	local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+
+	truncate -s -4 "${copied_bin_path}"
+}
+
 catch()
 {
 	local exit_code="$1"
@@ -105,6 +123,10 @@ main()
 		cleanup "${tmp_dir}"
 	elif [[ "${action}" == "run" ]]; then
 		run "${tmp_dir}"
+	elif [[ "${action}" == "modify-bin" ]]; then
+		modify_bin "${tmp_dir}"
+	elif [[ "${action}" == "restore-bin" ]]; then
+		restore_bin "${tmp_dir}"
 	else
 		echo "Unknown action: ${action}"
 		exit 1
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index acc911ba63fd..a0cfba0b4273 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -13,16 +13,17 @@
 
 #include "ima.skel.h"
 
-#define MAX_SAMPLES 2
+#define MAX_SAMPLES 4
 
-static int run_measured_process(const char *measured_dir, u32 *monitored_pid)
+static int _run_measured_process(const char *measured_dir, u32 *monitored_pid,
+				 const char *cmd)
 {
 	int child_pid, child_status;
 
 	child_pid = fork();
 	if (child_pid == 0) {
 		*monitored_pid = getpid();
-		execlp("./ima_setup.sh", "./ima_setup.sh", "run", measured_dir,
+		execlp("./ima_setup.sh", "./ima_setup.sh", cmd, measured_dir,
 		       NULL);
 		exit(errno);
 
@@ -34,6 +35,11 @@ static int run_measured_process(const char *measured_dir, u32 *monitored_pid)
 	return -EINVAL;
 }
 
+static int run_measured_process(const char *measured_dir, u32 *monitored_pid)
+{
+	return _run_measured_process(measured_dir, monitored_pid, "run");
+}
+
 static u64 ima_hash_from_bpf[MAX_SAMPLES];
 static int ima_hash_from_bpf_idx;
 
@@ -51,6 +57,7 @@ static void test_init(struct ima__bss *bss)
 	ima_hash_from_bpf_idx = 0;
 
 	bss->use_ima_file_hash = false;
+	bss->enable_bprm_creds_for_exec = false;
 }
 
 void test_test_ima(void)
@@ -58,6 +65,7 @@ void test_test_ima(void)
 	char measured_dir_template[] = "/tmp/ima_measuredXXXXXX";
 	struct ring_buffer *ringbuf = NULL;
 	const char *measured_dir;
+	u64 bin_true_sample;
 	char cmd[256];
 
 	int err, duration = 0;
@@ -114,6 +122,64 @@ void test_test_ima(void)
 	ASSERT_EQ(err, 2, "num_samples_or_err");
 	ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
 	ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
+	bin_true_sample = ima_hash_from_bpf[1];
+
+	/*
+	 * Test #3
+	 * - Goal: confirm that bpf_ima_inode_hash() returns a non-fresh digest
+	 * - Expected result: 2 samples (/bin/true: non-fresh, fresh)
+	 */
+	test_init(skel->bss);
+
+	err = _run_measured_process(measured_dir, &skel->bss->monitored_pid,
+				    "modify-bin");
+	if (CHECK(err, "modify-bin #3", "err = %d\n", err))
+		goto close_clean;
+
+	skel->bss->enable_bprm_creds_for_exec = true;
+	err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
+	if (CHECK(err, "run_measured_process #3", "err = %d\n", err))
+		goto close_clean;
+
+	err = ring_buffer__consume(ringbuf);
+	ASSERT_EQ(err, 2, "num_samples_or_err");
+	ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+	ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
+	ASSERT_EQ(ima_hash_from_bpf[0], bin_true_sample, "sample_equal_or_err");
+	/* IMA refreshed the digest. */
+	ASSERT_NEQ(ima_hash_from_bpf[1], bin_true_sample,
+		   "sample_different_or_err");
+
+	/*
+	 * Test #4
+	 * - Goal: verify that bpf_ima_file_hash() returns a fresh digest
+	 * - Expected result: 4 samples (./ima_setup.sh: fresh, fresh;
+	 *                               /bin/true: fresh, fresh)
+	 */
+	test_init(skel->bss);
+	skel->bss->use_ima_file_hash = true;
+	skel->bss->enable_bprm_creds_for_exec = true;
+	err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
+	if (CHECK(err, "run_measured_process #4", "err = %d\n", err))
+		goto close_clean;
+
+	err = ring_buffer__consume(ringbuf);
+	ASSERT_EQ(err, 4, "num_samples_or_err");
+	ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+	ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
+	ASSERT_NEQ(ima_hash_from_bpf[2], 0, "ima_hash");
+	ASSERT_NEQ(ima_hash_from_bpf[3], 0, "ima_hash");
+	ASSERT_NEQ(ima_hash_from_bpf[2], bin_true_sample,
+		   "sample_different_or_err");
+	ASSERT_EQ(ima_hash_from_bpf[3], ima_hash_from_bpf[2],
+		  "sample_equal_or_err");
+
+	skel->bss->use_ima_file_hash = false;
+	skel->bss->enable_bprm_creds_for_exec = false;
+	err = _run_measured_process(measured_dir, &skel->bss->monitored_pid,
+				    "restore-bin");
+	if (CHECK(err, "restore-bin #3", "err = %d\n", err))
+		goto close_clean;
 
 close_clean:
 	snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir);
diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
index e0b073dcfb5d..9633e5f2453d 100644
--- a/tools/testing/selftests/bpf/progs/ima.c
+++ b/tools/testing/selftests/bpf/progs/ima.c
@@ -19,6 +19,7 @@ struct {
 char _license[] SEC("license") = "GPL";
 
 bool use_ima_file_hash;
+bool enable_bprm_creds_for_exec;
 
 static void ima_test_common(struct file *file)
 {
@@ -54,3 +55,13 @@ void BPF_PROG(bprm_committed_creds, struct linux_binprm *bprm)
 {
 	ima_test_common(bprm->file);
 }
+
+SEC("lsm.s/bprm_creds_for_exec")
+int BPF_PROG(bprm_creds_for_exec, struct linux_binprm *bprm)
+{
+	if (!enable_bprm_creds_for_exec)
+		return 0;
+
+	ima_test_common(bprm->file);
+	return 0;
+}
-- 
cgit 


From e6dcf7bbf37c9ae72b0bc3a09d5f91dd1f5c19e1 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Wed, 2 Mar 2022 12:14:03 +0100
Subject: selftests/bpf: Add test for bpf_lsm_kernel_read_file()

Test the ability of bpf_lsm_kernel_read_file() to call the sleepable
functions bpf_ima_inode_hash() or bpf_ima_file_hash() to obtain a
measurement of a loaded IMA policy.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220302111404.193900-9-roberto.sassu@huawei.com
---
 tools/testing/selftests/bpf/ima_setup.sh          | 13 ++++++++++++-
 tools/testing/selftests/bpf/prog_tests/test_ima.c | 19 +++++++++++++++++++
 tools/testing/selftests/bpf/progs/ima.c           | 18 ++++++++++++++++++
 3 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index a3de1cd43ba0..8ecead4ccad0 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -12,7 +12,7 @@ LOG_FILE="$(mktemp /tmp/ima_setup.XXXX.log)"
 
 usage()
 {
-	echo "Usage: $0 <setup|cleanup|run|modify-bin|restore-bin> <existing_tmp_dir>"
+	echo "Usage: $0 <setup|cleanup|run|modify-bin|restore-bin|load-policy> <existing_tmp_dir>"
 	exit 1
 }
 
@@ -51,6 +51,7 @@ setup()
 
 	ensure_mount_securityfs
 	echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
+	echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${mount_dir}/policy_test
 }
 
 cleanup() {
@@ -95,6 +96,14 @@ restore_bin()
 	truncate -s -4 "${copied_bin_path}"
 }
 
+load_policy()
+{
+	local tmp_dir="$1"
+	local mount_dir="${tmp_dir}/mnt"
+
+	echo ${mount_dir}/policy_test > ${IMA_POLICY_FILE} 2> /dev/null
+}
+
 catch()
 {
 	local exit_code="$1"
@@ -127,6 +136,8 @@ main()
 		modify_bin "${tmp_dir}"
 	elif [[ "${action}" == "restore-bin" ]]; then
 		restore_bin "${tmp_dir}"
+	elif [[ "${action}" == "load-policy" ]]; then
+		load_policy "${tmp_dir}"
 	else
 		echo "Unknown action: ${action}"
 		exit 1
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index a0cfba0b4273..b13a141c4220 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -58,6 +58,7 @@ static void test_init(struct ima__bss *bss)
 
 	bss->use_ima_file_hash = false;
 	bss->enable_bprm_creds_for_exec = false;
+	bss->enable_kernel_read_file = false;
 }
 
 void test_test_ima(void)
@@ -181,6 +182,24 @@ void test_test_ima(void)
 	if (CHECK(err, "restore-bin #3", "err = %d\n", err))
 		goto close_clean;
 
+	/*
+	 * Test #5
+	 * - Goal: obtain a sample from the kernel_read_file hook
+	 * - Expected result: 2 samples (./ima_setup.sh, policy_test)
+	 */
+	test_init(skel->bss);
+	skel->bss->use_ima_file_hash = true;
+	skel->bss->enable_kernel_read_file = true;
+	err = _run_measured_process(measured_dir, &skel->bss->monitored_pid,
+				    "load-policy");
+	if (CHECK(err, "run_measured_process #5", "err = %d\n", err))
+		goto close_clean;
+
+	err = ring_buffer__consume(ringbuf);
+	ASSERT_EQ(err, 2, "num_samples_or_err");
+	ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+	ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
+
 close_clean:
 	snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir);
 	err = system(cmd);
diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
index 9633e5f2453d..e3ce943c5c3d 100644
--- a/tools/testing/selftests/bpf/progs/ima.c
+++ b/tools/testing/selftests/bpf/progs/ima.c
@@ -20,6 +20,7 @@ char _license[] SEC("license") = "GPL";
 
 bool use_ima_file_hash;
 bool enable_bprm_creds_for_exec;
+bool enable_kernel_read_file;
 
 static void ima_test_common(struct file *file)
 {
@@ -65,3 +66,20 @@ int BPF_PROG(bprm_creds_for_exec, struct linux_binprm *bprm)
 	ima_test_common(bprm->file);
 	return 0;
 }
+
+SEC("lsm.s/kernel_read_file")
+int BPF_PROG(kernel_read_file, struct file *file, enum kernel_read_file_id id,
+	     bool contents)
+{
+	if (!enable_kernel_read_file)
+		return 0;
+
+	if (!contents)
+		return 0;
+
+	if (id != READING_POLICY)
+		return 0;
+
+	ima_test_common(file);
+	return 0;
+}
-- 
cgit 


From 7bae42b68d7f070a346fde4c7c1ce182f2284933 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Wed, 2 Mar 2022 12:14:04 +0100
Subject: selftests/bpf: Check that bpf_kernel_read_file() denies reading IMA
 policy

Check that bpf_kernel_read_file() denies the reading of an IMA policy, by
ensuring that ima_setup.sh exits with an error.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220302111404.193900-10-roberto.sassu@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/test_ima.c | 17 +++++++++++++++++
 tools/testing/selftests/bpf/progs/ima.c           | 18 ++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index b13a141c4220..b13feceb38f1 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -59,6 +59,7 @@ static void test_init(struct ima__bss *bss)
 	bss->use_ima_file_hash = false;
 	bss->enable_bprm_creds_for_exec = false;
 	bss->enable_kernel_read_file = false;
+	bss->test_deny = false;
 }
 
 void test_test_ima(void)
@@ -200,6 +201,22 @@ void test_test_ima(void)
 	ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
 	ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
 
+	/*
+	 * Test #6
+	 * - Goal: ensure that the kernel_read_file hook denies an operation
+	 * - Expected result: 0 samples
+	 */
+	test_init(skel->bss);
+	skel->bss->enable_kernel_read_file = true;
+	skel->bss->test_deny = true;
+	err = _run_measured_process(measured_dir, &skel->bss->monitored_pid,
+				    "load-policy");
+	if (CHECK(!err, "run_measured_process #6", "err = %d\n", err))
+		goto close_clean;
+
+	err = ring_buffer__consume(ringbuf);
+	ASSERT_EQ(err, 0, "num_samples_or_err");
+
 close_clean:
 	snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir);
 	err = system(cmd);
diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
index e3ce943c5c3d..e16a2c208481 100644
--- a/tools/testing/selftests/bpf/progs/ima.c
+++ b/tools/testing/selftests/bpf/progs/ima.c
@@ -21,6 +21,7 @@ char _license[] SEC("license") = "GPL";
 bool use_ima_file_hash;
 bool enable_bprm_creds_for_exec;
 bool enable_kernel_read_file;
+bool test_deny;
 
 static void ima_test_common(struct file *file)
 {
@@ -51,6 +52,17 @@ static void ima_test_common(struct file *file)
 	return;
 }
 
+static int ima_test_deny(void)
+{
+	u32 pid;
+
+	pid = bpf_get_current_pid_tgid() >> 32;
+	if (pid == monitored_pid && test_deny)
+		return -EPERM;
+
+	return 0;
+}
+
 SEC("lsm.s/bprm_committed_creds")
 void BPF_PROG(bprm_committed_creds, struct linux_binprm *bprm)
 {
@@ -71,6 +83,8 @@ SEC("lsm.s/kernel_read_file")
 int BPF_PROG(kernel_read_file, struct file *file, enum kernel_read_file_id id,
 	     bool contents)
 {
+	int ret;
+
 	if (!enable_kernel_read_file)
 		return 0;
 
@@ -80,6 +94,10 @@ int BPF_PROG(kernel_read_file, struct file *file, enum kernel_read_file_id id,
 	if (id != READING_POLICY)
 		return 0;
 
+	ret = ima_test_deny();
+	if (ret < 0)
+		return ret;
+
 	ima_test_common(file);
 	return 0;
 }
-- 
cgit 


From c09df4bd3a915079eec5e77160366225a28699a2 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Thu, 10 Mar 2022 23:56:21 +0100
Subject: selftests/bpf: Add a test for maximum packet size in xdp_do_redirect
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds an extra test to the xdp_do_redirect selftest for XDP live packet
mode, which verifies that the maximum permissible packet size is accepted
without any errors, and that a too big packet is correctly rejected.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220310225621.53374-2-toke@redhat.com
---
 .../selftests/bpf/prog_tests/xdp_do_redirect.c     | 24 ++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 9926b07e38c8..a50971c6cf4a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -62,6 +62,28 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
 	return 0;
 }
 
+/* The maximum permissible size is: PAGE_SIZE - sizeof(struct xdp_page_head) -
+ * sizeof(struct skb_shared_info) - XDP_PACKET_HEADROOM = 3368 bytes
+ */
+#define MAX_PKT_SIZE 3368
+static void test_max_pkt_size(int fd)
+{
+	char data[MAX_PKT_SIZE + 1] = {};
+	int err;
+	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+			    .data_in = &data,
+			    .data_size_in = MAX_PKT_SIZE,
+			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
+			    .repeat = 1,
+		);
+	err = bpf_prog_test_run_opts(fd, &opts);
+	ASSERT_OK(err, "prog_run_max_size");
+
+	opts.data_size_in += 1;
+	err = bpf_prog_test_run_opts(fd, &opts);
+	ASSERT_EQ(err, -EINVAL, "prog_run_too_big");
+}
+
 #define NUM_PKTS 10000
 void test_xdp_do_redirect(void)
 {
@@ -167,6 +189,8 @@ void test_xdp_do_redirect(void)
 	ASSERT_EQ(skel->bss->pkts_seen_zero, 2, "pkt_count_zero");
 	ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc");
 
+	test_max_pkt_size(bpf_program__fd(skel->progs.xdp_count_pkts));
+
 out_tc:
 	bpf_tc_hook_destroy(&tc_hook);
 out:
-- 
cgit 


From d3b351f65bf42ccda1f686de3ccb21ea1a0c4f5a Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 10 Mar 2022 16:37:21 -0800
Subject: selftests/bpf: Fix a clang compilation error for send_signal.c

Building selftests/bpf with latest clang compiler (clang15 built
from source), I hit the following compilation error:

  /.../prog_tests/send_signal.c:43:16: error: variable 'j' set but not used [-Werror,-Wunused-but-set-variable]
                  volatile int j = 0;
                               ^
  1 error generated.

The problem also exists with clang13 and clang14. clang12 is okay.

In send_signal.c, we have the following code ...

  volatile int j = 0;
  [...]
  for (int i = 0; i < 100000000 && !sigusr1_received; i++)
    j /= i + 1;

... to burn CPU cycles so bpf_send_signal() helper can be tested
in NMI mode.

Slightly changing 'j /= i + 1' to 'j /= i + j + 1' or 'j++' can
fix the problem. Further investigation indicated this should be
a clang bug ([1]). The upstream fix will be proposed later. But it
is a good idea to workaround the issue to unblock people who build
kernel/selftests with clang.

  [1] https://discourse.llvm.org/t/strange-clang-unused-but-set-variable-error-with-volatile-variables/60841

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220311003721.2177170-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/send_signal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index def50f1c5c31..d71226e34c34 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -65,7 +65,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
 
 		/* wait a little for signal handler */
 		for (int i = 0; i < 100000000 && !sigusr1_received; i++)
-			j /= i + 1;
+			j /= i + j + 1;
 
 		buf[0] = sigusr1_received ? '2' : '0';
 		ASSERT_EQ(sigusr1_received, 1, "sigusr1_received");
-- 
cgit 


From f8669f1d6a86a6b17104ceca9340ded280307ac1 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 9 Mar 2022 19:49:26 -0800
Subject: nvdimm/blk: Delete the block-aperture window driver

Block Aperture Window support was an attempt to layer an error model
over PMEM for platforms that did not support machine-check-recovery.
However, it was abandoned before it ever shipped, and only ever existed
in the ACPI specification. Meanwhile Linux has carried a large pile of
dead code for non-shipping infrastructure. For years it has been off to
the side out of the way, but now CXL and recent directions with DAX
support have the potential to collide with this code.

In preparation for adding discontiguous namespace support, a
pre-requisite for the nvdimm subsystem to replace device-mapper for
striping + concatenation use cases, delete BLK aperture support.

On the obscure chance that some hardware vendor shipped support for this
mode, note that the driver will still keep BLK space reserved in the
label area. So an end user in this case would still have the opportunity
to report the regression to get BLK-mode support restored without
risking the data they have on that device.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/164688416668.2879318.16903178375774275120.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/driver-api/nvdimm/nvdimm.rst | 406 +++++++----------------------
 drivers/nvdimm/Kconfig                     |  25 +-
 drivers/nvdimm/Makefile                    |   3 -
 drivers/nvdimm/blk.c                       | 335 ------------------------
 tools/testing/nvdimm/Kbuild                |   4 -
 tools/testing/nvdimm/config_check.c        |   1 -
 6 files changed, 92 insertions(+), 682 deletions(-)
 delete mode 100644 drivers/nvdimm/blk.c

(limited to 'tools/testing')

diff --git a/Documentation/driver-api/nvdimm/nvdimm.rst b/Documentation/driver-api/nvdimm/nvdimm.rst
index 1d8302b89bd4..7917f6471092 100644
--- a/Documentation/driver-api/nvdimm/nvdimm.rst
+++ b/Documentation/driver-api/nvdimm/nvdimm.rst
@@ -14,10 +14,8 @@ Version 13
 	Overview
 	    Supporting Documents
 	    Git Trees
-	LIBNVDIMM PMEM and BLK
-	Why BLK?
-	    PMEM vs BLK
-	        BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
+	LIBNVDIMM PMEM
+	        PMEM-REGIONs, Atomic Sectors, and DAX
 	Example NVDIMM Platform
 	LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
 	    LIBNDCTL: Context
@@ -53,19 +51,12 @@ PMEM:
   block device composed of PMEM is capable of DAX.  A PMEM address range
   may span an interleave of several DIMMs.
 
-BLK:
-  A set of one or more programmable memory mapped apertures provided
-  by a DIMM to access its media.  This indirection precludes the
-  performance benefit of interleaving, but enables DIMM-bounded failure
-  modes.
-
 DPA:
   DIMM Physical Address, is a DIMM-relative offset.  With one DIMM in
   the system there would be a 1:1 system-physical-address:DPA association.
   Once more DIMMs are added a memory controller interleave must be
   decoded to determine the DPA associated with a given
-  system-physical-address.  BLK capacity always has a 1:1 relationship
-  with a single-DIMM's DPA range.
+  system-physical-address.
 
 DAX:
   File system extensions to bypass the page cache and block layer to
@@ -84,30 +75,30 @@ BTT:
   Block Translation Table: Persistent memory is byte addressable.
   Existing software may have an expectation that the power-fail-atomicity
   of writes is at least one sector, 512 bytes.  The BTT is an indirection
-  table with atomic update semantics to front a PMEM/BLK block device
+  table with atomic update semantics to front a PMEM block device
   driver and present arbitrary atomic sector sizes.
 
 LABEL:
   Metadata stored on a DIMM device that partitions and identifies
-  (persistently names) storage between PMEM and BLK.  It also partitions
-  BLK storage to host BTTs with different parameters per BLK-partition.
-  Note that traditional partition tables, GPT/MBR, are layered on top of a
-  BLK or PMEM device.
+  (persistently names) capacity allocated to different PMEM namespaces. It
+  also indicates whether an address abstraction like a BTT is applied to
+  the namepsace.  Note that traditional partition tables, GPT/MBR, are
+  layered on top of a PMEM namespace, or an address abstraction like BTT
+  if present, but partition support is deprecated going forward.
 
 
 Overview
 ========
 
-The LIBNVDIMM subsystem provides support for three types of NVDIMMs, namely,
-PMEM, BLK, and NVDIMM devices that can simultaneously support both PMEM
-and BLK mode access.  These three modes of operation are described by
-the "NVDIMM Firmware Interface Table" (NFIT) in ACPI 6.  While the LIBNVDIMM
-implementation is generic and supports pre-NFIT platforms, it was guided
-by the superset of capabilities need to support this ACPI 6 definition
-for NVDIMM resources.  The bulk of the kernel implementation is in place
-to handle the case where DPA accessible via PMEM is aliased with DPA
-accessible via BLK.  When that occurs a LABEL is needed to reserve DPA
-for exclusive access via one mode a time.
+The LIBNVDIMM subsystem provides support for PMEM described by platform
+firmware or a device driver. On ACPI based systems the platform firmware
+conveys persistent memory resource via the ACPI NFIT "NVDIMM Firmware
+Interface Table" in ACPI 6. While the LIBNVDIMM subsystem implementation
+is generic and supports pre-NFIT platforms, it was guided by the
+superset of capabilities need to support this ACPI 6 definition for
+NVDIMM resources. The original implementation supported the
+block-window-aperture capability described in the NFIT, but that support
+has since been abandoned and never shipped in a product.
 
 Supporting Documents
 --------------------
@@ -125,107 +116,38 @@ Git Trees
 ---------
 
 LIBNVDIMM:
-	https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git
+	https://git.kernel.org/cgit/linux/kernel/git/nvdimm/nvdimm.git
 LIBNDCTL:
 	https://github.com/pmem/ndctl.git
-PMEM:
-	https://github.com/01org/prd
 
 
-LIBNVDIMM PMEM and BLK
-======================
+LIBNVDIMM PMEM
+==============
 
 Prior to the arrival of the NFIT, non-volatile memory was described to a
 system in various ad-hoc ways.  Usually only the bare minimum was
 provided, namely, a single system-physical-address range where writes
 are expected to be durable after a system power loss.  Now, the NFIT
 specification standardizes not only the description of PMEM, but also
-BLK and platform message-passing entry points for control and
-configuration.
-
-For each NVDIMM access method (PMEM, BLK), LIBNVDIMM provides a block
-device driver:
-
-    1. PMEM (nd_pmem.ko): Drives a system-physical-address range.  This
-       range is contiguous in system memory and may be interleaved (hardware
-       memory controller striped) across multiple DIMMs.  When interleaved the
-       platform may optionally provide details of which DIMMs are participating
-       in the interleave.
-
-       Note that while LIBNVDIMM describes system-physical-address ranges that may
-       alias with BLK access as ND_NAMESPACE_PMEM ranges and those without
-       alias as ND_NAMESPACE_IO ranges, to the nd_pmem driver there is no
-       distinction.  The different device-types are an implementation detail
-       that userspace can exploit to implement policies like "only interface
-       with address ranges from certain DIMMs".  It is worth noting that when
-       aliasing is present and a DIMM lacks a label, then no block device can
-       be created by default as userspace needs to do at least one allocation
-       of DPA to the PMEM range.  In contrast ND_NAMESPACE_IO ranges, once
-       registered, can be immediately attached to nd_pmem.
-
-    2. BLK (nd_blk.ko): This driver performs I/O using a set of platform
-       defined apertures.  A set of apertures will access just one DIMM.
-       Multiple windows (apertures) allow multiple concurrent accesses, much like
-       tagged-command-queuing, and would likely be used by different threads or
-       different CPUs.
-
-       The NFIT specification defines a standard format for a BLK-aperture, but
-       the spec also allows for vendor specific layouts, and non-NFIT BLK
-       implementations may have other designs for BLK I/O.  For this reason
-       "nd_blk" calls back into platform-specific code to perform the I/O.
-
-       One such implementation is defined in the "Driver Writer's Guide" and "DSM
-       Interface Example".
-
-
-Why BLK?
-========
+platform message-passing entry points for control and configuration.
+
+PMEM (nd_pmem.ko): Drives a system-physical-address range.  This range is
+contiguous in system memory and may be interleaved (hardware memory controller
+striped) across multiple DIMMs.  When interleaved the platform may optionally
+provide details of which DIMMs are participating in the interleave.
+
+It is worth noting that when the labeling capability is detected (a EFI
+namespace label index block is found), then no block device is created
+by default as userspace needs to do at least one allocation of DPA to
+the PMEM range.  In contrast ND_NAMESPACE_IO ranges, once registered,
+can be immediately attached to nd_pmem. This latter mode is called
+label-less or "legacy".
+
+PMEM-REGIONs, Atomic Sectors, and DAX
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-While PMEM provides direct byte-addressable CPU-load/store access to
-NVDIMM storage, it does not provide the best system RAS (recovery,
-availability, and serviceability) model.  An access to a corrupted
-system-physical-address address causes a CPU exception while an access
-to a corrupted address through an BLK-aperture causes that block window
-to raise an error status in a register.  The latter is more aligned with
-the standard error model that host-bus-adapter attached disks present.
-
-Also, if an administrator ever wants to replace a memory it is easier to
-service a system at DIMM module boundaries.  Compare this to PMEM where
-data could be interleaved in an opaque hardware specific manner across
-several DIMMs.
-
-PMEM vs BLK
------------
-
-BLK-apertures solve these RAS problems, but their presence is also the
-major contributing factor to the complexity of the ND subsystem.  They
-complicate the implementation because PMEM and BLK alias in DPA space.
-Any given DIMM's DPA-range may contribute to one or more
-system-physical-address sets of interleaved DIMMs, *and* may also be
-accessed in its entirety through its BLK-aperture.  Accessing a DPA
-through a system-physical-address while simultaneously accessing the
-same DPA through a BLK-aperture has undefined results.  For this reason,
-DIMMs with this dual interface configuration include a DSM function to
-store/retrieve a LABEL.  The LABEL effectively partitions the DPA-space
-into exclusive system-physical-address and BLK-aperture accessible
-regions.  For simplicity a DIMM is allowed a PMEM "region" per each
-interleave set in which it is a member.  The remaining DPA space can be
-carved into an arbitrary number of BLK devices with discontiguous
-extents.
-
-BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-One of the few
-reasons to allow multiple BLK namespaces per REGION is so that each
-BLK-namespace can be configured with a BTT with unique atomic sector
-sizes.  While a PMEM device can host a BTT the LABEL specification does
-not provide for a sector size to be specified for a PMEM namespace.
-
-This is due to the expectation that the primary usage model for PMEM is
-via DAX, and the BTT is incompatible with DAX.  However, for the cases
-where an application or filesystem still needs atomic sector update
-guarantees it can register a BTT on a PMEM device or partition.  See
+For the cases where an application or filesystem still needs atomic sector
+update guarantees it can register a BTT on a PMEM device or partition.  See
 LIBNVDIMM/NDCTL: Block Translation Table "btt"
 
 
@@ -236,51 +158,40 @@ For the remainder of this document the following diagram will be
 referenced for any example sysfs layouts::
 
 
-                               (a)               (b)           DIMM   BLK-REGION
+                               (a)               (b)           DIMM
             +-------------------+--------+--------+--------+
-  +------+  |       pm0.0       | blk2.0 | pm1.0  | blk2.1 |    0      region2
+  +------+  |       pm0.0       |  free  | pm1.0  |  free  |    0
   | imc0 +--+- - - region0- - - +--------+        +--------+
-  +--+---+  |       pm0.0       | blk3.0 | pm1.0  | blk3.1 |    1      region3
+  +--+---+  |       pm0.0       |  free  | pm1.0  |  free  |    1
      |      +-------------------+--------v        v--------+
   +--+---+                               |                 |
   | cpu0 |                                     region1
   +--+---+                               |                 |
      |      +----------------------------^        ^--------+
-  +--+---+  |           blk4.0           | pm1.0  | blk4.0 |    2      region4
+  +--+---+  |           free             | pm1.0  |  free  |    2
   | imc1 +--+----------------------------|        +--------+
-  +------+  |           blk5.0           | pm1.0  | blk5.0 |    3      region5
+  +------+  |           free             | pm1.0  |  free  |    3
             +----------------------------+--------+--------+
 
 In this platform we have four DIMMs and two memory controllers in one
-socket.  Each unique interface (BLK or PMEM) to DPA space is identified
-by a region device with a dynamically assigned id (REGION0 - REGION5).
+socket.  Each PMEM interleave set is identified by a region device with
+a dynamically assigned id.
 
     1. The first portion of DIMM0 and DIMM1 are interleaved as REGION0. A
        single PMEM namespace is created in the REGION0-SPA-range that spans most
        of DIMM0 and DIMM1 with a user-specified name of "pm0.0". Some of that
-       interleaved system-physical-address range is reclaimed as BLK-aperture
-       accessed space starting at DPA-offset (a) into each DIMM.  In that
-       reclaimed space we create two BLK-aperture "namespaces" from REGION2 and
-       REGION3 where "blk2.0" and "blk3.0" are just human readable names that
-       could be set to any user-desired name in the LABEL.
+       interleaved system-physical-address range is left free for
+       another PMEM namespace to be defined.
 
     2. In the last portion of DIMM0 and DIMM1 we have an interleaved
        system-physical-address range, REGION1, that spans those two DIMMs as
        well as DIMM2 and DIMM3.  Some of REGION1 is allocated to a PMEM namespace
-       named "pm1.0", the rest is reclaimed in 4 BLK-aperture namespaces (for
-       each DIMM in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
-       "blk5.0".
-
-    3. The portion of DIMM2 and DIMM3 that do not participate in the REGION1
-       interleaved system-physical-address range (i.e. the DPA address past
-       offset (b) are also included in the "blk4.0" and "blk5.0" namespaces.
-       Note, that this example shows that BLK-aperture namespaces don't need to
-       be contiguous in DPA-space.
+       named "pm1.0".
 
     This bus is provided by the kernel under the device
     /sys/devices/platform/nfit_test.0 when the nfit_test.ko module from
-    tools/testing/nvdimm is loaded.  This not only test LIBNVDIMM but the
-    acpi_nfit.ko driver as well.
+    tools/testing/nvdimm is loaded. This module is a unit test for
+    LIBNVDIMM and the  acpi_nfit.ko driver.
 
 
 LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
@@ -469,17 +380,14 @@ identified by an "nfit_handle" a 32-bit value where:
 LIBNVDIMM/LIBNDCTL: Region
 --------------------------
 
-A generic REGION device is registered for each PMEM range or BLK-aperture
-set.  Per the example there are 6 regions: 2 PMEM and 4 BLK-aperture
-sets on the "nfit_test.0" bus.  The primary role of regions are to be a
-container of "mappings".  A mapping is a tuple of <DIMM,
-DPA-start-offset, length>.
+A generic REGION device is registered for each PMEM interleave-set /
+range. Per the example there are 2 PMEM regions on the "nfit_test.0"
+bus. The primary role of regions are to be a container of "mappings".  A
+mapping is a tuple of <DIMM, DPA-start-offset, length>.
 
-LIBNVDIMM provides a built-in driver for these REGION devices.  This driver
-is responsible for reconciling the aliased DPA mappings across all
-regions, parsing the LABEL, if present, and then emitting NAMESPACE
-devices with the resolved/exclusive DPA-boundaries for the nd_pmem or
-nd_blk device driver to consume.
+LIBNVDIMM provides a built-in driver for REGION devices.  This driver
+is responsible for all parsing LABELs, if present, and then emitting NAMESPACE
+devices for the nd_pmem driver to consume.
 
 In addition to the generic attributes of "mapping"s, "interleave_ways"
 and "size" the REGION device also exports some convenience attributes.
@@ -493,8 +401,6 @@ LIBNVDIMM: region::
 
 	struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
 			struct nd_region_desc *ndr_desc);
-	struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
-			struct nd_region_desc *ndr_desc);
 
 ::
 
@@ -527,8 +433,9 @@ LIBNDCTL: region enumeration example
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Sample region retrieval routines based on NFIT-unique data like
-"spa_index" (interleave set id) for PMEM and "nfit_handle" (dimm id) for
-BLK::
+"spa_index" (interleave set id).
+
+::
 
 	static struct ndctl_region *get_pmem_region_by_spa_index(struct ndctl_bus *bus,
 			unsigned int spa_index)
@@ -544,139 +451,23 @@ BLK::
 		return NULL;
 	}
 
-	static struct ndctl_region *get_blk_region_by_dimm_handle(struct ndctl_bus *bus,
-			unsigned int handle)
-	{
-		struct ndctl_region *region;
-
-		ndctl_region_foreach(bus, region) {
-			struct ndctl_mapping *map;
-
-			if (ndctl_region_get_type(region) != ND_DEVICE_REGION_BLOCK)
-				continue;
-			ndctl_mapping_foreach(region, map) {
-				struct ndctl_dimm *dimm = ndctl_mapping_get_dimm(map);
-
-				if (ndctl_dimm_get_handle(dimm) == handle)
-					return region;
-			}
-		}
-		return NULL;
-	}
-
-
-Why Not Encode the Region Type into the Region Name?
-----------------------------------------------------
-
-At first glance it seems since NFIT defines just PMEM and BLK interface
-types that we should simply name REGION devices with something derived
-from those type names.  However, the ND subsystem explicitly keeps the
-REGION name generic and expects userspace to always consider the
-region-attributes for four reasons:
-
-    1. There are already more than two REGION and "namespace" types.  For
-       PMEM there are two subtypes.  As mentioned previously we have PMEM where
-       the constituent DIMM devices are known and anonymous PMEM.  For BLK
-       regions the NFIT specification already anticipates vendor specific
-       implementations.  The exact distinction of what a region contains is in
-       the region-attributes not the region-name or the region-devtype.
-
-    2. A region with zero child-namespaces is a possible configuration.  For
-       example, the NFIT allows for a DCR to be published without a
-       corresponding BLK-aperture.  This equates to a DIMM that can only accept
-       control/configuration messages, but no i/o through a descendant block
-       device.  Again, this "type" is advertised in the attributes ('mappings'
-       == 0) and the name does not tell you much.
-
-    3. What if a third major interface type arises in the future?  Outside
-       of vendor specific implementations, it's not difficult to envision a
-       third class of interface type beyond BLK and PMEM.  With a generic name
-       for the REGION level of the device-hierarchy old userspace
-       implementations can still make sense of new kernel advertised
-       region-types.  Userspace can always rely on the generic region
-       attributes like "mappings", "size", etc and the expected child devices
-       named "namespace".  This generic format of the device-model hierarchy
-       allows the LIBNVDIMM and LIBNDCTL implementations to be more uniform and
-       future-proof.
-
-    4. There are more robust mechanisms for determining the major type of a
-       region than a device name.  See the next section, How Do I Determine the
-       Major Type of a Region?
-
-How Do I Determine the Major Type of a Region?
-----------------------------------------------
-
-Outside of the blanket recommendation of "use libndctl", or simply
-looking at the kernel header (/usr/include/linux/ndctl.h) to decode the
-"nstype" integer attribute, here are some other options.
-
-1. module alias lookup
-^^^^^^^^^^^^^^^^^^^^^^
-
-    The whole point of region/namespace device type differentiation is to
-    decide which block-device driver will attach to a given LIBNVDIMM namespace.
-    One can simply use the modalias to lookup the resulting module.  It's
-    important to note that this method is robust in the presence of a
-    vendor-specific driver down the road.  If a vendor-specific
-    implementation wants to supplant the standard nd_blk driver it can with
-    minimal impact to the rest of LIBNVDIMM.
-
-    In fact, a vendor may also want to have a vendor-specific region-driver
-    (outside of nd_region).  For example, if a vendor defined its own LABEL
-    format it would need its own region driver to parse that LABEL and emit
-    the resulting namespaces.  The output from module resolution is more
-    accurate than a region-name or region-devtype.
-
-2. udev
-^^^^^^^
-
-    The kernel "devtype" is registered in the udev database::
-
-	# udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region0
-	P: /devices/platform/nfit_test.0/ndbus0/region0
-	E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region0
-	E: DEVTYPE=nd_pmem
-	E: MODALIAS=nd:t2
-	E: SUBSYSTEM=nd
-
-	# udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region4
-	P: /devices/platform/nfit_test.0/ndbus0/region4
-	E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region4
-	E: DEVTYPE=nd_blk
-	E: MODALIAS=nd:t3
-	E: SUBSYSTEM=nd
-
-    ...and is available as a region attribute, but keep in mind that the
-    "devtype" does not indicate sub-type variations and scripts should
-    really be understanding the other attributes.
-
-3. type specific attributes
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-    As it currently stands a BLK-aperture region will never have a
-    "nfit/spa_index" attribute, but neither will a non-NFIT PMEM region.  A
-    BLK region with a "mappings" value of 0 is, as mentioned above, a DIMM
-    that does not allow I/O.  A PMEM region with a "mappings" value of zero
-    is a simple system-physical-address range.
-
 
 LIBNVDIMM/LIBNDCTL: Namespace
 -----------------------------
 
-A REGION, after resolving DPA aliasing and LABEL specified boundaries,
-surfaces one or more "namespace" devices.  The arrival of a "namespace"
-device currently triggers either the nd_blk or nd_pmem driver to load
-and register a disk/block device.
+A REGION, after resolving DPA aliasing and LABEL specified boundaries, surfaces
+one or more "namespace" devices.  The arrival of a "namespace" device currently
+triggers the nd_pmem driver to load and register a disk/block device.
 
 LIBNVDIMM: namespace
 ^^^^^^^^^^^^^^^^^^^^
 
-Here is a sample layout from the three major types of NAMESPACE where
-namespace0.0 represents DIMM-info-backed PMEM (note that it has a 'uuid'
-attribute), namespace2.0 represents a BLK namespace (note it has a
-'sector_size' attribute) that, and namespace6.0 represents an anonymous
-PMEM namespace (note that has no 'uuid' attribute due to not support a
-LABEL)::
+Here is a sample layout from the 2 major types of NAMESPACE where namespace0.0
+represents DIMM-info-backed PMEM (note that it has a 'uuid' attribute), and
+namespace1.0 represents an anonymous PMEM namespace (note that has no 'uuid'
+attribute due to not support a LABEL)
+
+::
 
 	/sys/devices/platform/nfit_test.0/ndbus0/region0/namespace0.0
 	|-- alt_name
@@ -691,20 +482,7 @@ LABEL)::
 	|-- type
 	|-- uevent
 	`-- uuid
-	/sys/devices/platform/nfit_test.0/ndbus0/region2/namespace2.0
-	|-- alt_name
-	|-- devtype
-	|-- dpa_extents
-	|-- force_raw
-	|-- modalias
-	|-- numa_node
-	|-- sector_size
-	|-- size
-	|-- subsystem -> ../../../../../../bus/nd
-	|-- type
-	|-- uevent
-	`-- uuid
-	/sys/devices/platform/nfit_test.1/ndbus1/region6/namespace6.0
+	/sys/devices/platform/nfit_test.1/ndbus1/region1/namespace1.0
 	|-- block
 	|   `-- pmem0
 	|-- devtype
@@ -786,9 +564,9 @@ Why the Term "namespace"?
 LIBNVDIMM/LIBNDCTL: Block Translation Table "btt"
 -------------------------------------------------
 
-A BTT (design document: https://pmem.io/2014/09/23/btt.html) is a stacked
-block device driver that fronts either the whole block device or a
-partition of a block device emitted by either a PMEM or BLK NAMESPACE.
+A BTT (design document: https://pmem.io/2014/09/23/btt.html) is a
+personality driver for a namespace that fronts entire namespace as an
+'address abstraction'.
 
 LIBNVDIMM: btt layout
 ^^^^^^^^^^^^^^^^^^^^^
@@ -815,7 +593,9 @@ LIBNDCTL: btt creation example
 Similar to namespaces an idle BTT device is automatically created per
 region.  Each time this "seed" btt device is configured and enabled a new
 seed is created.  Creating a BTT configuration involves two steps of
-finding and idle BTT and assigning it to consume a PMEM or BLK namespace::
+finding and idle BTT and assigning it to consume a namespace.
+
+::
 
 	static struct ndctl_btt *get_idle_btt(struct ndctl_region *region)
 	{
@@ -863,25 +643,15 @@ For the given example above, here is the view of the objects as seen by the
 LIBNDCTL API::
 
               +---+
-              |CTX|    +---------+   +--------------+  +---------------+
-              +-+-+  +-> REGION0 +---> NAMESPACE0.0 +--> PMEM8 "pm0.0" |
-                |    | +---------+   +--------------+  +---------------+
-  +-------+     |    | +---------+   +--------------+  +---------------+
-  | DIMM0 <-+   |    +-> REGION1 +---> NAMESPACE1.0 +--> PMEM6 "pm1.0" |
-  +-------+ |   |    | +---------+   +--------------+  +---------------+
+              |CTX|
+              +-+-+
+                |
+  +-------+     |
+  | DIMM0 <-+   |      +---------+   +--------------+  +---------------+
+  +-------+ |   |    +-> REGION0 +---> NAMESPACE0.0 +--> PMEM8 "pm0.0" |
   | DIMM1 <-+ +-v--+ | +---------+   +--------------+  +---------------+
-  +-------+ +-+BUS0+---> REGION2 +-+-> NAMESPACE2.0 +--> ND6  "blk2.0" |
-  | DIMM2 <-+ +----+ | +---------+ | +--------------+  +----------------------+
-  +-------+ |        |             +-> NAMESPACE2.1 +--> ND5  "blk2.1" | BTT2 |
-  | DIMM3 <-+        |               +--------------+  +----------------------+
-  +-------+          | +---------+   +--------------+  +---------------+
-                     +-> REGION3 +-+-> NAMESPACE3.0 +--> ND4  "blk3.0" |
-                     | +---------+ | +--------------+  +----------------------+
-                     |             +-> NAMESPACE3.1 +--> ND3  "blk3.1" | BTT1 |
-                     |               +--------------+  +----------------------+
-                     | +---------+   +--------------+  +---------------+
-                     +-> REGION4 +---> NAMESPACE4.0 +--> ND2  "blk4.0" |
-                     | +---------+   +--------------+  +---------------+
-                     | +---------+   +--------------+  +----------------------+
-                     +-> REGION5 +---> NAMESPACE5.0 +--> ND1  "blk5.0" | BTT0 |
-                       +---------+   +--------------+  +---------------+------+
+  +-------+ +-+BUS0+-| +---------+   +--------------+  +----------------------+
+  | DIMM2 <-+ +----+ +-> REGION1 +---> NAMESPACE1.0 +--> PMEM6 "pm1.0" | BTT1 |
+  +-------+ |        | +---------+   +--------------+  +---------------+------+
+  | DIMM3 <-+
+  +-------+
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 347fe7afa583..5a29046e3319 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -10,12 +10,9 @@ menuconfig LIBNVDIMM
 	  ACPI-6-NFIT defined resources.  On platforms that define an
 	  NFIT, or otherwise can discover NVDIMM resources, a libnvdimm
 	  bus is registered to advertise PMEM (persistent memory)
-	  namespaces (/dev/pmemX) and BLK (sliding mmio window(s))
-	  namespaces (/dev/ndblkX.Y). A PMEM namespace refers to a
+	  namespaces (/dev/pmemX). A PMEM namespace refers to a
 	  memory resource that may span multiple DIMMs and support DAX
-	  (see CONFIG_DAX).  A BLK namespace refers to an NVDIMM control
-	  region which exposes an mmio register set for windowed access
-	  mode to non-volatile memory.
+	  (see CONFIG_DAX).
 
 if LIBNVDIMM
 
@@ -38,19 +35,6 @@ config BLK_DEV_PMEM
 
 	  Say Y if you want to use an NVDIMM
 
-config ND_BLK
-	tristate "BLK: Block data window (aperture) device support"
-	default LIBNVDIMM
-	select ND_BTT if BTT
-	help
-	  Support NVDIMMs, or other devices, that implement a BLK-mode
-	  access capability.  BLK-mode access uses memory-mapped-i/o
-	  apertures to access persistent media.
-
-	  Say Y if your platform firmware emits an ACPI.NFIT table
-	  (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode
-	  capabilities.
-
 config ND_CLAIM
 	bool
 
@@ -67,9 +51,8 @@ config BTT
 	  applications that rely on sector writes not being torn (a
 	  guarantee that typical disks provide) can continue to do so.
 	  The BTT manifests itself as an alternate personality for an
-	  NVDIMM namespace, i.e. a namespace can be in raw mode (pmemX,
-	  ndblkX.Y, etc...), or 'sectored' mode, (pmemXs, ndblkX.Ys,
-	  etc...).
+	  NVDIMM namespace, i.e. a namespace can be in raw mode pmemX,
+	  or 'sectored' mode.
 
 	  Select Y if unsure
 
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 25dba6095612..3fb806748716 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -2,7 +2,6 @@
 obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
-obj-$(CONFIG_ND_BLK) += nd_blk.o
 obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
 obj-$(CONFIG_OF_PMEM) += of_pmem.o
 obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o
@@ -11,8 +10,6 @@ nd_pmem-y := pmem.o
 
 nd_btt-y := btt.o
 
-nd_blk-y := blk.o
-
 nd_e820-y := e820.o
 
 libnvdimm-y := core.o
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
deleted file mode 100644
index 228c33b8d1d6..000000000000
--- a/drivers/nvdimm/blk.c
+++ /dev/null
@@ -1,335 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * NVDIMM Block Window Driver
- * Copyright (c) 2014, Intel Corporation.
- */
-
-#include <linux/blkdev.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/nd.h>
-#include <linux/sizes.h>
-#include "nd.h"
-
-static u32 nsblk_meta_size(struct nd_namespace_blk *nsblk)
-{
-	return nsblk->lbasize - ((nsblk->lbasize >= 4096) ? 4096 : 512);
-}
-
-static u32 nsblk_internal_lbasize(struct nd_namespace_blk *nsblk)
-{
-	return roundup(nsblk->lbasize, INT_LBASIZE_ALIGNMENT);
-}
-
-static u32 nsblk_sector_size(struct nd_namespace_blk *nsblk)
-{
-	return nsblk->lbasize - nsblk_meta_size(nsblk);
-}
-
-static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
-				resource_size_t ns_offset, unsigned int len)
-{
-	int i;
-
-	for (i = 0; i < nsblk->num_resources; i++) {
-		if (ns_offset < resource_size(nsblk->res[i])) {
-			if (ns_offset + len > resource_size(nsblk->res[i])) {
-				dev_WARN_ONCE(&nsblk->common.dev, 1,
-					"illegal request\n");
-				return SIZE_MAX;
-			}
-			return nsblk->res[i]->start + ns_offset;
-		}
-		ns_offset -= resource_size(nsblk->res[i]);
-	}
-
-	dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n");
-	return SIZE_MAX;
-}
-
-static struct nd_blk_region *to_ndbr(struct nd_namespace_blk *nsblk)
-{
-	struct nd_region *nd_region;
-	struct device *parent;
-
-	parent = nsblk->common.dev.parent;
-	nd_region = container_of(parent, struct nd_region, dev);
-	return container_of(nd_region, struct nd_blk_region, nd_region);
-}
-
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
-		struct bio_integrity_payload *bip, u64 lba, int rw)
-{
-	struct nd_blk_region *ndbr = to_ndbr(nsblk);
-	unsigned int len = nsblk_meta_size(nsblk);
-	resource_size_t	dev_offset, ns_offset;
-	u32 internal_lbasize, sector_size;
-	int err = 0;
-
-	internal_lbasize = nsblk_internal_lbasize(nsblk);
-	sector_size = nsblk_sector_size(nsblk);
-	ns_offset = lba * internal_lbasize + sector_size;
-	dev_offset = to_dev_offset(nsblk, ns_offset, len);
-	if (dev_offset == SIZE_MAX)
-		return -EIO;
-
-	while (len) {
-		unsigned int cur_len;
-		struct bio_vec bv;
-		void *iobuf;
-
-		bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
-		/*
-		 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
-		 * .bv_offset already adjusted for iter->bi_bvec_done, and we
-		 * can use those directly
-		 */
-
-		cur_len = min(len, bv.bv_len);
-		iobuf = kmap_atomic(bv.bv_page);
-		err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset,
-				cur_len, rw);
-		kunmap_atomic(iobuf);
-		if (err)
-			return err;
-
-		len -= cur_len;
-		dev_offset += cur_len;
-		if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
-			return -EIO;
-	}
-
-	return err;
-}
-
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
-		struct bio_integrity_payload *bip, u64 lba, int rw)
-{
-	return 0;
-}
-#endif
-
-static int nsblk_do_bvec(struct nd_namespace_blk *nsblk,
-		struct bio_integrity_payload *bip, struct page *page,
-		unsigned int len, unsigned int off, int rw, sector_t sector)
-{
-	struct nd_blk_region *ndbr = to_ndbr(nsblk);
-	resource_size_t	dev_offset, ns_offset;
-	u32 internal_lbasize, sector_size;
-	int err = 0;
-	void *iobuf;
-	u64 lba;
-
-	internal_lbasize = nsblk_internal_lbasize(nsblk);
-	sector_size = nsblk_sector_size(nsblk);
-	while (len) {
-		unsigned int cur_len;
-
-		/*
-		 * If we don't have an integrity payload, we don't have to
-		 * split the bvec into sectors, as this would cause unnecessary
-		 * Block Window setup/move steps. the do_io routine is capable
-		 * of handling len <= PAGE_SIZE.
-		 */
-		cur_len = bip ? min(len, sector_size) : len;
-
-		lba = div_u64(sector << SECTOR_SHIFT, sector_size);
-		ns_offset = lba * internal_lbasize;
-		dev_offset = to_dev_offset(nsblk, ns_offset, cur_len);
-		if (dev_offset == SIZE_MAX)
-			return -EIO;
-
-		iobuf = kmap_atomic(page);
-		err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw);
-		kunmap_atomic(iobuf);
-		if (err)
-			return err;
-
-		if (bip) {
-			err = nd_blk_rw_integrity(nsblk, bip, lba, rw);
-			if (err)
-				return err;
-		}
-		len -= cur_len;
-		off += cur_len;
-		sector += sector_size >> SECTOR_SHIFT;
-	}
-
-	return err;
-}
-
-static void nd_blk_submit_bio(struct bio *bio)
-{
-	struct bio_integrity_payload *bip;
-	struct nd_namespace_blk *nsblk = bio->bi_bdev->bd_disk->private_data;
-	struct bvec_iter iter;
-	unsigned long start;
-	struct bio_vec bvec;
-	int err = 0, rw;
-	bool do_acct;
-
-	if (!bio_integrity_prep(bio))
-		return;
-
-	bip = bio_integrity(bio);
-	rw = bio_data_dir(bio);
-	do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
-	if (do_acct)
-		start = bio_start_io_acct(bio);
-	bio_for_each_segment(bvec, bio, iter) {
-		unsigned int len = bvec.bv_len;
-
-		BUG_ON(len > PAGE_SIZE);
-		err = nsblk_do_bvec(nsblk, bip, bvec.bv_page, len,
-				bvec.bv_offset, rw, iter.bi_sector);
-		if (err) {
-			dev_dbg(&nsblk->common.dev,
-					"io error in %s sector %lld, len %d,\n",
-					(rw == READ) ? "READ" : "WRITE",
-					(unsigned long long) iter.bi_sector, len);
-			bio->bi_status = errno_to_blk_status(err);
-			break;
-		}
-	}
-	if (do_acct)
-		bio_end_io_acct(bio, start);
-
-	bio_endio(bio);
-}
-
-static int nsblk_rw_bytes(struct nd_namespace_common *ndns,
-		resource_size_t offset, void *iobuf, size_t n, int rw,
-		unsigned long flags)
-{
-	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(&ndns->dev);
-	struct nd_blk_region *ndbr = to_ndbr(nsblk);
-	resource_size_t	dev_offset;
-
-	dev_offset = to_dev_offset(nsblk, offset, n);
-
-	if (unlikely(offset + n > nsblk->size)) {
-		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
-		return -EFAULT;
-	}
-
-	if (dev_offset == SIZE_MAX)
-		return -EIO;
-
-	return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw);
-}
-
-static const struct block_device_operations nd_blk_fops = {
-	.owner = THIS_MODULE,
-	.submit_bio =  nd_blk_submit_bio,
-};
-
-static void nd_blk_release_disk(void *disk)
-{
-	del_gendisk(disk);
-	blk_cleanup_disk(disk);
-}
-
-static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
-{
-	struct device *dev = &nsblk->common.dev;
-	resource_size_t available_disk_size;
-	struct gendisk *disk;
-	u64 internal_nlba;
-	int rc;
-
-	internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk));
-	available_disk_size = internal_nlba * nsblk_sector_size(nsblk);
-
-	disk = blk_alloc_disk(NUMA_NO_NODE);
-	if (!disk)
-		return -ENOMEM;
-
-	disk->fops		= &nd_blk_fops;
-	disk->private_data	= nsblk;
-	nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name);
-
-	blk_queue_max_hw_sectors(disk->queue, UINT_MAX);
-	blk_queue_logical_block_size(disk->queue, nsblk_sector_size(nsblk));
-	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
-
-	if (nsblk_meta_size(nsblk)) {
-		rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));
-
-		if (rc)
-			goto out_before_devm_err;
-	}
-
-	set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
-	rc = device_add_disk(dev, disk, NULL);
-	if (rc)
-		goto out_before_devm_err;
-
-	/* nd_blk_release_disk() is called if this fails */
-	if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
-		return -ENOMEM;
-
-	nvdimm_check_and_set_ro(disk);
-	return 0;
-
-out_before_devm_err:
-	blk_cleanup_disk(disk);
-	return rc;
-}
-
-static int nd_blk_probe(struct device *dev)
-{
-	struct nd_namespace_common *ndns;
-	struct nd_namespace_blk *nsblk;
-
-	ndns = nvdimm_namespace_common_probe(dev);
-	if (IS_ERR(ndns))
-		return PTR_ERR(ndns);
-
-	nsblk = to_nd_namespace_blk(&ndns->dev);
-	nsblk->size = nvdimm_namespace_capacity(ndns);
-	dev_set_drvdata(dev, nsblk);
-
-	ndns->rw_bytes = nsblk_rw_bytes;
-	if (is_nd_btt(dev))
-		return nvdimm_namespace_attach_btt(ndns);
-	else if (nd_btt_probe(dev, ndns) == 0) {
-		/* we'll come back as btt-blk */
-		return -ENXIO;
-	} else
-		return nsblk_attach_disk(nsblk);
-}
-
-static void nd_blk_remove(struct device *dev)
-{
-	if (is_nd_btt(dev))
-		nvdimm_namespace_detach_btt(to_nd_btt(dev));
-}
-
-static struct nd_device_driver nd_blk_driver = {
-	.probe = nd_blk_probe,
-	.remove = nd_blk_remove,
-	.drv = {
-		.name = "nd_blk",
-	},
-	.type = ND_DRIVER_NAMESPACE_BLK,
-};
-
-static int __init nd_blk_init(void)
-{
-	return nd_driver_register(&nd_blk_driver);
-}
-
-static void __exit nd_blk_exit(void)
-{
-	driver_unregister(&nd_blk_driver.drv);
-}
-
-MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK);
-module_init(nd_blk_init);
-module_exit(nd_blk_exit);
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index c57d9e9d4480..5eb5c23b062f 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -27,7 +27,6 @@ ccflags-y += -I$(srctree)/drivers/acpi/nfit/
 obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
-obj-$(CONFIG_ND_BLK) += nd_blk.o
 obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
 obj-$(CONFIG_ACPI_NFIT) += nfit.o
 ifeq ($(CONFIG_DAX),m)
@@ -50,9 +49,6 @@ nd_pmem-y += config_check.o
 nd_btt-y := $(NVDIMM_SRC)/btt.o
 nd_btt-y += config_check.o
 
-nd_blk-y := $(NVDIMM_SRC)/blk.o
-nd_blk-y += config_check.o
-
 nd_e820-y := $(NVDIMM_SRC)/e820.o
 nd_e820-y += config_check.o
 
diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c
index 3e3a5f518864..baed75e2ccbc 100644
--- a/tools/testing/nvdimm/config_check.c
+++ b/tools/testing/nvdimm/config_check.c
@@ -11,7 +11,6 @@ void check(void)
 	BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
 	BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
 	BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN));
-	BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
 	if (IS_ENABLED(CONFIG_ACPI_NFIT))
 		BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
 	BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
-- 
cgit 


From a4b96046a8823a796b28e713ecc8ec535f5e4607 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 9 Mar 2022 19:49:42 -0800
Subject: ACPI: NFIT: Remove block aperture support

Delete the code to parse interleave-descriptor-tables and coordinate I/O
through a BLK aperture.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/164688418240.2879318.400185926874596938.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c         | 376 ---------------------------------------
 drivers/acpi/nfit/nfit.h         |   6 -
 tools/testing/nvdimm/test/nfit.c |  23 ---
 3 files changed, 405 deletions(-)

(limited to 'tools/testing')

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index e5d7f2bda13f..bea6a219fddd 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -999,80 +999,6 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc,
 	return table + hdr->length;
 }
 
-static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
-		struct nfit_mem *nfit_mem)
-{
-	u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
-	u16 dcr = nfit_mem->dcr->region_index;
-	struct nfit_spa *nfit_spa;
-
-	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-		u16 range_index = nfit_spa->spa->range_index;
-		int type = nfit_spa_type(nfit_spa->spa);
-		struct nfit_memdev *nfit_memdev;
-
-		if (type != NFIT_SPA_BDW)
-			continue;
-
-		list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-			if (nfit_memdev->memdev->range_index != range_index)
-				continue;
-			if (nfit_memdev->memdev->device_handle != device_handle)
-				continue;
-			if (nfit_memdev->memdev->region_index != dcr)
-				continue;
-
-			nfit_mem->spa_bdw = nfit_spa->spa;
-			return;
-		}
-	}
-
-	dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n",
-			nfit_mem->spa_dcr->range_index);
-	nfit_mem->bdw = NULL;
-}
-
-static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
-		struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
-{
-	u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
-	struct nfit_memdev *nfit_memdev;
-	struct nfit_bdw *nfit_bdw;
-	struct nfit_idt *nfit_idt;
-	u16 idt_idx, range_index;
-
-	list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
-		if (nfit_bdw->bdw->region_index != dcr)
-			continue;
-		nfit_mem->bdw = nfit_bdw->bdw;
-		break;
-	}
-
-	if (!nfit_mem->bdw)
-		return;
-
-	nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
-
-	if (!nfit_mem->spa_bdw)
-		return;
-
-	range_index = nfit_mem->spa_bdw->range_index;
-	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
-		if (nfit_memdev->memdev->range_index != range_index ||
-				nfit_memdev->memdev->region_index != dcr)
-			continue;
-		nfit_mem->memdev_bdw = nfit_memdev->memdev;
-		idt_idx = nfit_memdev->memdev->interleave_index;
-		list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
-			if (nfit_idt->idt->interleave_index != idt_idx)
-				continue;
-			nfit_mem->idt_bdw = nfit_idt->idt;
-			break;
-		}
-		break;
-	}
-}
-
 static int __nfit_mem_init(struct acpi_nfit_desc *acpi_desc,
 		struct acpi_nfit_system_address *spa)
 {
@@ -1189,7 +1115,6 @@ static int __nfit_mem_init(struct acpi_nfit_desc *acpi_desc,
 				nfit_mem->idt_dcr = nfit_idt->idt;
 				break;
 			}
-			nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
 		} else if (type == NFIT_SPA_PM) {
 			/*
 			 * A single dimm may belong to multiple SPA-PM
@@ -1532,8 +1457,6 @@ static int num_nvdimm_formats(struct nvdimm *nvdimm)
 
 	if (nfit_mem->memdev_pmem)
 		formats++;
-	if (nfit_mem->memdev_bdw)
-		formats++;
 	return formats;
 }
 
@@ -2079,11 +2002,6 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 			continue;
 		}
 
-		if (nfit_mem->bdw && nfit_mem->memdev_pmem) {
-			set_bit(NDD_ALIASING, &flags);
-			set_bit(NDD_LABELING, &flags);
-		}
-
 		/* collate flags across all memdevs for this dimm */
 		list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
 			struct acpi_nfit_memory_map *dimm_memdev;
@@ -2429,272 +2347,6 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
 	return 0;
 }
 
-static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
-{
-	struct acpi_nfit_interleave *idt = mmio->idt;
-	u32 sub_line_offset, line_index, line_offset;
-	u64 line_no, table_skip_count, table_offset;
-
-	line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
-	table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
-	line_offset = idt->line_offset[line_index]
-		* mmio->line_size;
-	table_offset = table_skip_count * mmio->table_size;
-
-	return mmio->base_offset + line_offset + table_offset + sub_line_offset;
-}
-
-static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
-{
-	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
-	u64 offset = nfit_blk->stat_offset + mmio->size * bw;
-	const u32 STATUS_MASK = 0x80000037;
-
-	if (mmio->num_lines)
-		offset = to_interleave_offset(offset, mmio);
-
-	return readl(mmio->addr.base + offset) & STATUS_MASK;
-}
-
-static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
-		resource_size_t dpa, unsigned int len, unsigned int write)
-{
-	u64 cmd, offset;
-	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
-
-	enum {
-		BCW_OFFSET_MASK = (1ULL << 48)-1,
-		BCW_LEN_SHIFT = 48,
-		BCW_LEN_MASK = (1ULL << 8) - 1,
-		BCW_CMD_SHIFT = 56,
-	};
-
-	cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
-	len = len >> L1_CACHE_SHIFT;
-	cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
-	cmd |= ((u64) write) << BCW_CMD_SHIFT;
-
-	offset = nfit_blk->cmd_offset + mmio->size * bw;
-	if (mmio->num_lines)
-		offset = to_interleave_offset(offset, mmio);
-
-	writeq(cmd, mmio->addr.base + offset);
-	nvdimm_flush(nfit_blk->nd_region, NULL);
-
-	if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
-		readq(mmio->addr.base + offset);
-}
-
-static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
-		resource_size_t dpa, void *iobuf, size_t len, int rw,
-		unsigned int lane)
-{
-	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
-	unsigned int copied = 0;
-	u64 base_offset;
-	int rc;
-
-	base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
-		+ lane * mmio->size;
-	write_blk_ctl(nfit_blk, lane, dpa, len, rw);
-	while (len) {
-		unsigned int c;
-		u64 offset;
-
-		if (mmio->num_lines) {
-			u32 line_offset;
-
-			offset = to_interleave_offset(base_offset + copied,
-					mmio);
-			div_u64_rem(offset, mmio->line_size, &line_offset);
-			c = min_t(size_t, len, mmio->line_size - line_offset);
-		} else {
-			offset = base_offset + nfit_blk->bdw_offset;
-			c = len;
-		}
-
-		if (rw)
-			memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c);
-		else {
-			if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
-				arch_invalidate_pmem((void __force *)
-					mmio->addr.aperture + offset, c);
-
-			memcpy(iobuf + copied, mmio->addr.aperture + offset, c);
-		}
-
-		copied += c;
-		len -= c;
-	}
-
-	if (rw)
-		nvdimm_flush(nfit_blk->nd_region, NULL);
-
-	rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
-	return rc;
-}
-
-static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
-		resource_size_t dpa, void *iobuf, u64 len, int rw)
-{
-	struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
-	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
-	struct nd_region *nd_region = nfit_blk->nd_region;
-	unsigned int lane, copied = 0;
-	int rc = 0;
-
-	lane = nd_region_acquire_lane(nd_region);
-	while (len) {
-		u64 c = min(len, mmio->size);
-
-		rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
-				iobuf + copied, c, rw, lane);
-		if (rc)
-			break;
-
-		copied += c;
-		len -= c;
-	}
-	nd_region_release_lane(nd_region, lane);
-
-	return rc;
-}
-
-static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
-		struct acpi_nfit_interleave *idt, u16 interleave_ways)
-{
-	if (idt) {
-		mmio->num_lines = idt->line_count;
-		mmio->line_size = idt->line_size;
-		if (interleave_ways == 0)
-			return -ENXIO;
-		mmio->table_size = mmio->num_lines * interleave_ways
-			* mmio->line_size;
-	}
-
-	return 0;
-}
-
-static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
-		struct nvdimm *nvdimm, struct nfit_blk *nfit_blk)
-{
-	struct nd_cmd_dimm_flags flags;
-	int rc;
-
-	memset(&flags, 0, sizeof(flags));
-	rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags,
-			sizeof(flags), NULL);
-
-	if (rc >= 0 && flags.status == 0)
-		nfit_blk->dimm_flags = flags.flags;
-	else if (rc == -ENOTTY) {
-		/* fall back to a conservative default */
-		nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH;
-		rc = 0;
-	} else
-		rc = -ENXIO;
-
-	return rc;
-}
-
-static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
-		struct device *dev)
-{
-	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
-	struct nd_blk_region *ndbr = to_nd_blk_region(dev);
-	struct nfit_blk_mmio *mmio;
-	struct nfit_blk *nfit_blk;
-	struct nfit_mem *nfit_mem;
-	struct nvdimm *nvdimm;
-	int rc;
-
-	nvdimm = nd_blk_region_to_dimm(ndbr);
-	nfit_mem = nvdimm_provider_data(nvdimm);
-	if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
-		dev_dbg(dev, "missing%s%s%s\n",
-				nfit_mem ? "" : " nfit_mem",
-				(nfit_mem && nfit_mem->dcr) ? "" : " dcr",
-				(nfit_mem && nfit_mem->bdw) ? "" : " bdw");
-		return -ENXIO;
-	}
-
-	nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
-	if (!nfit_blk)
-		return -ENOMEM;
-	nd_blk_region_set_provider_data(ndbr, nfit_blk);
-	nfit_blk->nd_region = to_nd_region(dev);
-
-	/* map block aperture memory */
-	nfit_blk->bdw_offset = nfit_mem->bdw->offset;
-	mmio = &nfit_blk->mmio[BDW];
-	mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
-			nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr));
-	if (!mmio->addr.base) {
-		dev_dbg(dev, "%s failed to map bdw\n",
-				nvdimm_name(nvdimm));
-		return -ENOMEM;
-	}
-	mmio->size = nfit_mem->bdw->size;
-	mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
-	mmio->idt = nfit_mem->idt_bdw;
-	mmio->spa = nfit_mem->spa_bdw;
-	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
-			nfit_mem->memdev_bdw->interleave_ways);
-	if (rc) {
-		dev_dbg(dev, "%s failed to init bdw interleave\n",
-				nvdimm_name(nvdimm));
-		return rc;
-	}
-
-	/* map block control memory */
-	nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
-	nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
-	mmio = &nfit_blk->mmio[DCR];
-	mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
-			nfit_mem->spa_dcr->length);
-	if (!mmio->addr.base) {
-		dev_dbg(dev, "%s failed to map dcr\n",
-				nvdimm_name(nvdimm));
-		return -ENOMEM;
-	}
-	mmio->size = nfit_mem->dcr->window_size;
-	mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
-	mmio->idt = nfit_mem->idt_dcr;
-	mmio->spa = nfit_mem->spa_dcr;
-	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
-			nfit_mem->memdev_dcr->interleave_ways);
-	if (rc) {
-		dev_dbg(dev, "%s failed to init dcr interleave\n",
-				nvdimm_name(nvdimm));
-		return rc;
-	}
-
-	rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
-	if (rc < 0) {
-		dev_dbg(dev, "%s failed get DIMM flags\n",
-				nvdimm_name(nvdimm));
-		return rc;
-	}
-
-	if (nvdimm_has_flush(nfit_blk->nd_region) < 0)
-		dev_warn(dev, "unable to guarantee persistence of writes\n");
-
-	if (mmio->line_size == 0)
-		return 0;
-
-	if ((u32) nfit_blk->cmd_offset % mmio->line_size
-			+ 8 > mmio->line_size) {
-		dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
-		return -ENXIO;
-	} else if ((u32) nfit_blk->stat_offset % mmio->line_size
-			+ 8 > mmio->line_size) {
-		dev_dbg(dev, "stat_offset crosses interleave boundary\n");
-		return -ENXIO;
-	}
-
-	return 0;
-}
-
 static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
 		struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
 {
@@ -2911,9 +2563,6 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
 	struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
 			memdev->device_handle);
 	struct acpi_nfit_system_address *spa = nfit_spa->spa;
-	struct nd_blk_region_desc *ndbr_desc;
-	struct nfit_mem *nfit_mem;
-	int rc;
 
 	if (!nvdimm) {
 		dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
@@ -2928,30 +2577,6 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
 		mapping->start = memdev->address;
 		mapping->size = memdev->region_size;
 		break;
-	case NFIT_SPA_DCR:
-		nfit_mem = nvdimm_provider_data(nvdimm);
-		if (!nfit_mem || !nfit_mem->bdw) {
-			dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
-					spa->range_index, nvdimm_name(nvdimm));
-			break;
-		}
-
-		mapping->size = nfit_mem->bdw->capacity;
-		mapping->start = nfit_mem->bdw->start_address;
-		ndr_desc->num_lanes = nfit_mem->bdw->windows;
-		ndr_desc->mapping = mapping;
-		ndr_desc->num_mappings = 1;
-		ndbr_desc = to_blk_region_desc(ndr_desc);
-		ndbr_desc->enable = acpi_nfit_blk_region_enable;
-		ndbr_desc->do_io = acpi_desc->blk_do_io;
-		rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
-		if (rc)
-			return rc;
-		nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
-				ndr_desc);
-		if (!nfit_spa->nd_region)
-			return -ENOMEM;
-		break;
 	}
 
 	return 0;
@@ -3635,7 +3260,6 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
 
 	dev_set_drvdata(dev, acpi_desc);
 	acpi_desc->dev = dev;
-	acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
 	nd_desc = &acpi_desc->nd_desc;
 	nd_desc->provider_name = "ACPI.NFIT";
 	nd_desc->module = THIS_MODULE;
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index c674f3df9be7..50882bdbeb96 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -208,13 +208,9 @@ struct nfit_mem {
 	struct nvdimm *nvdimm;
 	struct acpi_nfit_memory_map *memdev_dcr;
 	struct acpi_nfit_memory_map *memdev_pmem;
-	struct acpi_nfit_memory_map *memdev_bdw;
 	struct acpi_nfit_control_region *dcr;
-	struct acpi_nfit_data_region *bdw;
 	struct acpi_nfit_system_address *spa_dcr;
-	struct acpi_nfit_system_address *spa_bdw;
 	struct acpi_nfit_interleave *idt_dcr;
-	struct acpi_nfit_interleave *idt_bdw;
 	struct kernfs_node *flags_attr;
 	struct nfit_flush *nfit_flush;
 	struct list_head list;
@@ -266,8 +262,6 @@ struct acpi_nfit_desc {
 	unsigned long family_dsm_mask[NVDIMM_BUS_FAMILY_MAX + 1];
 	unsigned int platform_cap;
 	unsigned int scrub_tmo;
-	int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
-			void *iobuf, u64 len, int rw);
 	enum nvdimm_fwa_state fwa_state;
 	enum nvdimm_fwa_capability fwa_cap;
 	int fwa_count;
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 0bc91ffee257..65dbdda3a054 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -2842,28 +2842,6 @@ static void nfit_test1_setup(struct nfit_test *t)
 	set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
 }
 
-static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
-		void *iobuf, u64 len, int rw)
-{
-	struct nfit_blk *nfit_blk = ndbr->blk_provider_data;
-	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
-	struct nd_region *nd_region = &ndbr->nd_region;
-	unsigned int lane;
-
-	lane = nd_region_acquire_lane(nd_region);
-	if (rw)
-		memcpy(mmio->addr.base + dpa, iobuf, len);
-	else {
-		memcpy(iobuf, mmio->addr.base + dpa, len);
-
-		/* give us some some coverage of the arch_invalidate_pmem() API */
-		arch_invalidate_pmem(mmio->addr.base + dpa, len);
-	}
-	nd_region_release_lane(nd_region, lane);
-
-	return 0;
-}
-
 static unsigned long nfit_ctl_handle;
 
 union acpi_object *result;
@@ -3219,7 +3197,6 @@ static int nfit_test_probe(struct platform_device *pdev)
 	nfit_test->setup(nfit_test);
 	acpi_desc = &nfit_test->acpi_desc;
 	acpi_nfit_desc_init(acpi_desc, &pdev->dev);
-	acpi_desc->blk_do_io = nfit_test_blk_do_io;
 	nd_desc = &acpi_desc->nd_desc;
 	nd_desc->provider_name = NULL;
 	nd_desc->module = THIS_MODULE;
-- 
cgit 


From 3b6c6c039707f6bb7c64af2aa82a437fabb93aee Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 9 Mar 2022 19:49:48 -0800
Subject: nvdimm/region: Delete nd_blk_region infrastructure

Now that the nd_namespace_blk infrastructure is removed, delete all the
region machinery to coordinate provisioning aliased capacity between
PMEM and BLK.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/164688418803.2879318.1302315202397235855.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c           |  11 +-
 drivers/nvdimm/bus.c               |   2 -
 drivers/nvdimm/dimm_devs.c         | 204 +++----------------------------------
 drivers/nvdimm/label.c             |   6 +-
 drivers/nvdimm/label.h             |   2 +-
 drivers/nvdimm/namespace_devs.c    | 127 +++--------------------
 drivers/nvdimm/nd-core.h           |  24 +----
 drivers/nvdimm/nd.h                |  12 ---
 drivers/nvdimm/region.c            |  31 ++----
 drivers/nvdimm/region_devs.c       | 158 ++++------------------------
 include/linux/libnvdimm.h          |  24 -----
 include/uapi/linux/ndctl.h         |   2 -
 tools/testing/nvdimm/test/ndtest.c |  67 +-----------
 13 files changed, 66 insertions(+), 604 deletions(-)

(limited to 'tools/testing')

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index bea6a219fddd..fe61f617a943 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2036,10 +2036,6 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 			cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
 		}
 
-		/* Quirk to ignore LOCAL for labels on HYPERV DIMMs */
-		if (nfit_mem->family == NVDIMM_FAMILY_HYPERV)
-			set_bit(NDD_NOBLK, &flags);
-
 		if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) {
 			set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
 			set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
@@ -2602,8 +2598,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
 {
 	static struct nd_mapping_desc mappings[ND_MAX_MAPPINGS];
 	struct acpi_nfit_system_address *spa = nfit_spa->spa;
-	struct nd_blk_region_desc ndbr_desc;
-	struct nd_region_desc *ndr_desc;
+	struct nd_region_desc *ndr_desc, _ndr_desc;
 	struct nfit_memdev *nfit_memdev;
 	struct nvdimm_bus *nvdimm_bus;
 	struct resource res;
@@ -2619,10 +2614,10 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
 
 	memset(&res, 0, sizeof(res));
 	memset(&mappings, 0, sizeof(mappings));
-	memset(&ndbr_desc, 0, sizeof(ndbr_desc));
+	memset(&_ndr_desc, 0, sizeof(_ndr_desc));
 	res.start = spa->address;
 	res.end = res.start + spa->length - 1;
-	ndr_desc = &ndbr_desc.ndr_desc;
+	ndr_desc = &_ndr_desc;
 	ndr_desc->res = &res;
 	ndr_desc->provider_data = nfit_spa;
 	ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 9dc7f3edd42b..a4b5f637e599 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -35,8 +35,6 @@ static int to_nd_device_type(struct device *dev)
 		return ND_DEVICE_DIMM;
 	else if (is_memory(dev))
 		return ND_DEVICE_REGION_PMEM;
-	else if (is_nd_blk(dev))
-		return ND_DEVICE_REGION_BLK;
 	else if (is_nd_dax(dev))
 		return ND_DEVICE_DAX_PMEM;
 	else if (is_nd_region(dev->parent))
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index dc7449a40003..ee507eed42b5 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -18,10 +18,6 @@
 
 static DEFINE_IDA(dimm_ida);
 
-static bool noblk;
-module_param(noblk, bool, 0444);
-MODULE_PARM_DESC(noblk, "force disable BLK / local alias support");
-
 /*
  * Retrieve bus and dimm handle and return if this bus supports
  * get_config_data commands
@@ -211,22 +207,6 @@ struct nvdimm *to_nvdimm(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(to_nvdimm);
 
-struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr)
-{
-	struct nd_region *nd_region = &ndbr->nd_region;
-	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
-
-	return nd_mapping->nvdimm;
-}
-EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm);
-
-unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr)
-{
-	/* pmem mapping properties are private to libnvdimm */
-	return ARCH_MEMREMAP_PMEM;
-}
-EXPORT_SYMBOL_GPL(nd_blk_memremap_flags);
-
 struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
 {
 	struct nvdimm *nvdimm = nd_mapping->nvdimm;
@@ -312,8 +292,7 @@ static ssize_t flags_show(struct device *dev,
 {
 	struct nvdimm *nvdimm = to_nvdimm(dev);
 
-	return sprintf(buf, "%s%s%s\n",
-			test_bit(NDD_ALIASING, &nvdimm->flags) ? "alias " : "",
+	return sprintf(buf, "%s%s\n",
 			test_bit(NDD_LABELING, &nvdimm->flags) ? "label " : "",
 			test_bit(NDD_LOCKED, &nvdimm->flags) ? "lock " : "");
 }
@@ -612,8 +591,6 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
 
 	nvdimm->dimm_id = dimm_id;
 	nvdimm->provider_data = provider_data;
-	if (noblk)
-		flags |= 1 << NDD_NOBLK;
 	nvdimm->flags = flags;
 	nvdimm->cmd_mask = cmd_mask;
 	nvdimm->num_flush = num_flush;
@@ -726,133 +703,6 @@ static unsigned long dpa_align(struct nd_region *nd_region)
 	return nd_region->align / nd_region->ndr_mappings;
 }
 
-int alias_dpa_busy(struct device *dev, void *data)
-{
-	resource_size_t map_end, blk_start, new;
-	struct blk_alloc_info *info = data;
-	struct nd_mapping *nd_mapping;
-	struct nd_region *nd_region;
-	struct nvdimm_drvdata *ndd;
-	struct resource *res;
-	unsigned long align;
-	int i;
-
-	if (!is_memory(dev))
-		return 0;
-
-	nd_region = to_nd_region(dev);
-	for (i = 0; i < nd_region->ndr_mappings; i++) {
-		nd_mapping  = &nd_region->mapping[i];
-		if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
-			break;
-	}
-
-	if (i >= nd_region->ndr_mappings)
-		return 0;
-
-	ndd = to_ndd(nd_mapping);
-	map_end = nd_mapping->start + nd_mapping->size - 1;
-	blk_start = nd_mapping->start;
-
-	/*
-	 * In the allocation case ->res is set to free space that we are
-	 * looking to validate against PMEM aliasing collision rules
-	 * (i.e. BLK is allocated after all aliased PMEM).
-	 */
-	if (info->res) {
-		if (info->res->start >= nd_mapping->start
-				&& info->res->start < map_end)
-			/* pass */;
-		else
-			return 0;
-	}
-
- retry:
-	/*
-	 * Find the free dpa from the end of the last pmem allocation to
-	 * the end of the interleave-set mapping.
-	 */
-	align = dpa_align(nd_region);
-	if (!align)
-		return 0;
-
-	for_each_dpa_resource(ndd, res) {
-		resource_size_t start, end;
-
-		if (strncmp(res->name, "pmem", 4) != 0)
-			continue;
-
-		start = ALIGN_DOWN(res->start, align);
-		end = ALIGN(res->end + 1, align) - 1;
-		if ((start >= blk_start && start < map_end)
-				|| (end >= blk_start && end <= map_end)) {
-			new = max(blk_start, min(map_end, end) + 1);
-			if (new != blk_start) {
-				blk_start = new;
-				goto retry;
-			}
-		}
-	}
-
-	/* update the free space range with the probed blk_start */
-	if (info->res && blk_start > info->res->start) {
-		info->res->start = max(info->res->start, blk_start);
-		if (info->res->start > info->res->end)
-			info->res->end = info->res->start - 1;
-		return 1;
-	}
-
-	info->available -= blk_start - nd_mapping->start;
-
-	return 0;
-}
-
-/**
- * nd_blk_available_dpa - account the unused dpa of BLK region
- * @nd_mapping: container of dpa-resource-root + labels
- *
- * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges, but
- * we arrange for them to never start at an lower dpa than the last
- * PMEM allocation in an aliased region.
- */
-resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
-{
-	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
-	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
-	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
-	struct blk_alloc_info info = {
-		.nd_mapping = nd_mapping,
-		.available = nd_mapping->size,
-		.res = NULL,
-	};
-	struct resource *res;
-	unsigned long align;
-
-	if (!ndd)
-		return 0;
-
-	device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
-
-	/* now account for busy blk allocations in unaliased dpa */
-	align = dpa_align(nd_region);
-	if (!align)
-		return 0;
-	for_each_dpa_resource(ndd, res) {
-		resource_size_t start, end, size;
-
-		if (strncmp(res->name, "blk", 3) != 0)
-			continue;
-		start = ALIGN_DOWN(res->start, align);
-		end = ALIGN(res->end + 1, align) - 1;
-		size = end - start + 1;
-		if (size >= info.available)
-			return 0;
-		info.available -= size;
-	}
-
-	return info.available;
-}
-
 /**
  * nd_pmem_max_contiguous_dpa - For the given dimm+region, return the max
  *			   contiguous unallocated dpa range.
@@ -900,24 +750,16 @@ resource_size_t nd_pmem_max_contiguous_dpa(struct nd_region *nd_region,
  * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa
  * @nd_mapping: container of dpa-resource-root + labels
  * @nd_region: constrain available space check to this reference region
- * @overlap: calculate available space assuming this level of overlap
  *
  * Validate that a PMEM label, if present, aligns with the start of an
- * interleave set and truncate the available size at the lowest BLK
- * overlap point.
- *
- * The expectation is that this routine is called multiple times as it
- * probes for the largest BLK encroachment for any single member DIMM of
- * the interleave set.  Once that value is determined the PMEM-limit for
- * the set can be established.
+ * interleave set.
  */
 resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
-		struct nd_mapping *nd_mapping, resource_size_t *overlap)
+				      struct nd_mapping *nd_mapping)
 {
-	resource_size_t map_start, map_end, busy = 0, available, blk_start;
 	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	resource_size_t map_start, map_end, busy = 0;
 	struct resource *res;
-	const char *reason;
 	unsigned long align;
 
 	if (!ndd)
@@ -929,46 +771,28 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
 
 	map_start = nd_mapping->start;
 	map_end = map_start + nd_mapping->size - 1;
-	blk_start = max(map_start, map_end + 1 - *overlap);
 	for_each_dpa_resource(ndd, res) {
 		resource_size_t start, end;
 
 		start = ALIGN_DOWN(res->start, align);
 		end = ALIGN(res->end + 1, align) - 1;
 		if (start >= map_start && start < map_end) {
-			if (strncmp(res->name, "blk", 3) == 0)
-				blk_start = min(blk_start,
-						max(map_start, start));
-			else if (end > map_end) {
-				reason = "misaligned to iset";
-				goto err;
-			} else
-				busy += end - start + 1;
+			if (end > map_end) {
+				nd_dbg_dpa(nd_region, ndd, res,
+					   "misaligned to iset\n");
+				return 0;
+			}
+			busy += end - start + 1;
 		} else if (end >= map_start && end <= map_end) {
-			if (strncmp(res->name, "blk", 3) == 0) {
-				/*
-				 * If a BLK allocation overlaps the start of
-				 * PMEM the entire interleave set may now only
-				 * be used for BLK.
-				 */
-				blk_start = map_start;
-			} else
-				busy += end - start + 1;
+			busy += end - start + 1;
 		} else if (map_start > start && map_start < end) {
 			/* total eclipse of the mapping */
 			busy += nd_mapping->size;
-			blk_start = map_start;
 		}
 	}
 
-	*overlap = map_end + 1 - blk_start;
-	available = blk_start - map_start;
-	if (busy < available)
-		return ALIGN_DOWN(available - busy, align);
-	return 0;
-
- err:
-	nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
+	if (busy < nd_mapping->size)
+		return ALIGN_DOWN(nd_mapping->size - busy, align);
 	return 0;
 }
 
@@ -999,7 +823,7 @@ struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
 /**
  * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id
  * @nvdimm: container of dpa-resource-root + labels
- * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid>
+ * @label_id: dpa resource name of the form pmem-<human readable uuid>
  */
 resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
 		struct nd_label_id *label_id)
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 8c972bcb2ac3..082253a3a956 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -334,8 +334,7 @@ char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid,
 {
 	if (!label_id || !uuid)
 		return NULL;
-	snprintf(label_id->id, ND_LABEL_ID_SIZE, "%s-%pUb",
-			flags & NSLABEL_FLAG_LOCAL ? "blk" : "pmem", uuid);
+	snprintf(label_id->id, ND_LABEL_ID_SIZE, "pmem-%pUb", uuid);
 	return label_id->id;
 }
 
@@ -406,7 +405,6 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
 		return 0; /* no label, nothing to reserve */
 
 	for_each_clear_bit_le(slot, free, nslot) {
-		struct nvdimm *nvdimm = to_nvdimm(ndd->dev);
 		struct nd_namespace_label *nd_label;
 		struct nd_region *nd_region = NULL;
 		struct nd_label_id label_id;
@@ -421,8 +419,6 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
 
 		nsl_get_uuid(ndd, nd_label, &label_uuid);
 		flags = nsl_get_flags(ndd, nd_label);
-		if (test_bit(NDD_NOBLK, &nvdimm->flags))
-			flags &= ~NSLABEL_FLAG_LOCAL;
 		nd_label_gen_id(&label_id, &label_uuid, flags);
 		res = nvdimm_allocate_dpa(ndd, &label_id,
 					  nsl_get_dpa(ndd, nd_label),
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index 198ef1df298b..0650fb4b9821 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -193,7 +193,7 @@ struct nd_namespace_label {
 
 /**
  * struct nd_label_id - identifier string for dpa allocation
- * @id: "{blk|pmem}-<namespace uuid>"
+ * @id: "pmem-<namespace uuid>"
  */
 struct nd_label_id {
 	char id[ND_LABEL_ID_SIZE];
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index d1c190b02657..62b83b2e26e3 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -297,13 +297,11 @@ static int scan_free(struct nd_region *nd_region,
 		struct nd_mapping *nd_mapping, struct nd_label_id *label_id,
 		resource_size_t n)
 {
-	bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
 	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 	int rc = 0;
 
 	while (n) {
 		struct resource *res, *last;
-		resource_size_t new_start;
 
 		last = NULL;
 		for_each_dpa_resource(ndd, res)
@@ -321,16 +319,7 @@ static int scan_free(struct nd_region *nd_region,
 			continue;
 		}
 
-		/*
-		 * Keep BLK allocations relegated to high DPA as much as
-		 * possible
-		 */
-		if (is_blk)
-			new_start = res->start + n;
-		else
-			new_start = res->start;
-
-		rc = adjust_resource(res, new_start, resource_size(res) - n);
+		rc = adjust_resource(res, res->start, resource_size(res) - n);
 		if (rc == 0)
 			res->flags |= DPA_RESOURCE_ADJUSTED;
 		nd_dbg_dpa(nd_region, ndd, res, "shrink %d\n", rc);
@@ -372,20 +361,12 @@ static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
 		struct nd_region *nd_region, struct nd_mapping *nd_mapping,
 		resource_size_t n)
 {
-	bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
 	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
-	resource_size_t first_dpa;
 	struct resource *res;
 	int rc = 0;
 
-	/* allocate blk from highest dpa first */
-	if (is_blk)
-		first_dpa = nd_mapping->start + nd_mapping->size - n;
-	else
-		first_dpa = nd_mapping->start;
-
 	/* first resource allocation for this label-id or dimm */
-	res = nvdimm_allocate_dpa(ndd, label_id, first_dpa, n);
+	res = nvdimm_allocate_dpa(ndd, label_id, nd_mapping->start, n);
 	if (!res)
 		rc = -EBUSY;
 
@@ -416,7 +397,6 @@ static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd,
 		resource_size_t n, struct resource *valid)
 {
 	bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
-	bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
 	unsigned long align;
 
 	align = nd_region->align / nd_region->ndr_mappings;
@@ -429,21 +409,6 @@ static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd,
 	if (is_reserve)
 		return;
 
-	if (!is_pmem) {
-		struct nd_mapping *nd_mapping = &nd_region->mapping[0];
-		struct nvdimm_bus *nvdimm_bus;
-		struct blk_alloc_info info = {
-			.nd_mapping = nd_mapping,
-			.available = nd_mapping->size,
-			.res = valid,
-		};
-
-		WARN_ON(!is_nd_blk(&nd_region->dev));
-		nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
-		device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
-		return;
-	}
-
 	/* allocation needs to be contiguous, so this is all or nothing */
 	if (resource_size(valid) < n)
 		goto invalid;
@@ -471,7 +436,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
 		resource_size_t n)
 {
 	resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1;
-	bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
 	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 	struct resource *res, *exist = NULL, valid;
 	const resource_size_t to_allocate = n;
@@ -569,10 +533,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
 		}
 
 		if (strcmp(action, "allocate") == 0) {
-			/* BLK allocate bottom up */
-			if (!is_pmem)
-				valid.start += available - allocate;
-
 			new_res = nvdimm_allocate_dpa(ndd, label_id,
 					valid.start, allocate);
 			if (!new_res)
@@ -608,12 +568,7 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
 			return 0;
 	}
 
-	/*
-	 * If we allocated nothing in the BLK case it may be because we are in
-	 * an initial "pmem-reserve pass".  Only do an initial BLK allocation
-	 * when none of the DPA space is reserved.
-	 */
-	if ((is_pmem || !ndd->dpa.child) && n == to_allocate)
+	if (n == to_allocate)
 		return init_dpa_allocation(label_id, nd_region, nd_mapping, n);
 	return n;
 }
@@ -672,7 +627,7 @@ int __reserve_free_pmem(struct device *dev, void *data)
 		if (nd_mapping->nvdimm != nvdimm)
 			continue;
 
-		n = nd_pmem_available_dpa(nd_region, nd_mapping, &rem);
+		n = nd_pmem_available_dpa(nd_region, nd_mapping);
 		if (n == 0)
 			return 0;
 		rem = scan_allocate(nd_region, nd_mapping, &label_id, n);
@@ -697,19 +652,6 @@ void release_free_pmem(struct nvdimm_bus *nvdimm_bus,
 			nvdimm_free_dpa(ndd, res);
 }
 
-static int reserve_free_pmem(struct nvdimm_bus *nvdimm_bus,
-		struct nd_mapping *nd_mapping)
-{
-	struct nvdimm *nvdimm = nd_mapping->nvdimm;
-	int rc;
-
-	rc = device_for_each_child(&nvdimm_bus->dev, nvdimm,
-			__reserve_free_pmem);
-	if (rc)
-		release_free_pmem(nvdimm_bus, nd_mapping);
-	return rc;
-}
-
 /**
  * grow_dpa_allocation - for each dimm allocate n bytes for @label_id
  * @nd_region: the set of dimms to allocate @n more bytes from
@@ -726,37 +668,14 @@ static int reserve_free_pmem(struct nvdimm_bus *nvdimm_bus,
 static int grow_dpa_allocation(struct nd_region *nd_region,
 		struct nd_label_id *label_id, resource_size_t n)
 {
-	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
-	bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
 	int i;
 
 	for (i = 0; i < nd_region->ndr_mappings; i++) {
 		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 		resource_size_t rem = n;
-		int rc, j;
-
-		/*
-		 * In the BLK case try once with all unallocated PMEM
-		 * reserved, and once without
-		 */
-		for (j = is_pmem; j < 2; j++) {
-			bool blk_only = j == 0;
-
-			if (blk_only) {
-				rc = reserve_free_pmem(nvdimm_bus, nd_mapping);
-				if (rc)
-					return rc;
-			}
-			rem = scan_allocate(nd_region, nd_mapping,
-					label_id, rem);
-			if (blk_only)
-				release_free_pmem(nvdimm_bus, nd_mapping);
-
-			/* try again and allow encroachments into PMEM */
-			if (rem == 0)
-				break;
-		}
+		int rc;
 
+		rem = scan_allocate(nd_region, nd_mapping, label_id, rem);
 		dev_WARN_ONCE(&nd_region->dev, rem,
 				"allocation underrun: %#llx of %#llx bytes\n",
 				(unsigned long long) n - rem,
@@ -869,8 +788,8 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
 		ndd = to_ndd(nd_mapping);
 
 		/*
-		 * All dimms in an interleave set, or the base dimm for a blk
-		 * region, need to be enabled for the size to be changed.
+		 * All dimms in an interleave set, need to be enabled
+		 * for the size to be changed.
 		 */
 		if (!ndd)
 			return -ENXIO;
@@ -1169,9 +1088,6 @@ static ssize_t resource_show(struct device *dev,
 }
 static DEVICE_ATTR_ADMIN_RO(resource);
 
-static const unsigned long blk_lbasize_supported[] = { 512, 520, 528,
-	4096, 4104, 4160, 4224, 0 };
-
 static const unsigned long pmem_lbasize_supported[] = { 512, 4096, 0 };
 
 static ssize_t sector_size_show(struct device *dev,
@@ -1823,10 +1739,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 	/*
 	 * Fix up each mapping's 'labels' to have the validated pmem label for
 	 * that position at labels[0], and NULL at labels[1].  In the process,
-	 * check that the namespace aligns with interleave-set.  We know
-	 * that it does not overlap with any blk namespaces by virtue of
-	 * the dimm being enabled (i.e. nd_label_reserve_dpa()
-	 * succeeded).
+	 * check that the namespace aligns with interleave-set.
 	 */
 	nsl_get_uuid(ndd, nd_label, &uuid);
 	rc = select_pmem_id(nd_region, &uuid);
@@ -1931,8 +1844,7 @@ void nd_region_create_ns_seed(struct nd_region *nd_region)
 	 * disabled until memory becomes available
 	 */
 	if (!nd_region->ns_seed)
-		dev_err(&nd_region->dev, "failed to create %s namespace\n",
-				is_nd_blk(&nd_region->dev) ? "blk" : "pmem");
+		dev_err(&nd_region->dev, "failed to create namespace\n");
 	else
 		nd_device_register(nd_region->ns_seed);
 }
@@ -2028,16 +1940,9 @@ static struct device **scan_labels(struct nd_region *nd_region)
 	list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
 		struct nd_namespace_label *nd_label = label_ent->label;
 		struct device **__devs;
-		u32 flags;
 
 		if (!nd_label)
 			continue;
-		flags = nsl_get_flags(ndd, nd_label);
-		if (is_nd_blk(&nd_region->dev)
-				== !!(flags & NSLABEL_FLAG_LOCAL))
-			/* pass, region matches label type */;
-		else
-			continue;
 
 		/* skip labels that describe extents outside of the region */
 		if (nsl_get_dpa(ndd, nd_label) < nd_mapping->start ||
@@ -2073,9 +1978,8 @@ static struct device **scan_labels(struct nd_region *nd_region)
 
 	}
 
-	dev_dbg(&nd_region->dev, "discovered %d %s namespace%s\n",
-			count, is_nd_blk(&nd_region->dev)
-			? "blk" : "pmem", count == 1 ? "" : "s");
+	dev_dbg(&nd_region->dev, "discovered %d namespace%s\n", count,
+		count == 1 ? "" : "s");
 
 	if (count == 0) {
 		struct nd_namespace_pmem *nspm;
@@ -2226,12 +2130,6 @@ static int init_active_labels(struct nd_region *nd_region)
 			if (!label_ent)
 				break;
 			label = nd_label_active(ndd, j);
-			if (test_bit(NDD_NOBLK, &nvdimm->flags)) {
-				u32 flags = nsl_get_flags(ndd, label);
-
-				flags &= ~NSLABEL_FLAG_LOCAL;
-				nsl_set_flags(ndd, label, flags);
-			}
 			label_ent->label = label;
 
 			mutex_lock(&nd_mapping->lock);
@@ -2275,7 +2173,6 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
 		devs = create_namespace_io(nd_region);
 		break;
 	case ND_DEVICE_NAMESPACE_PMEM:
-	case ND_DEVICE_NAMESPACE_BLK:
 		devs = create_namespaces(nd_region);
 		break;
 	default:
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index e4af0719cf33..17febf9ac911 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -82,30 +82,12 @@ static inline void nvdimm_security_overwrite_query(struct work_struct *work)
 }
 #endif
 
-/**
- * struct blk_alloc_info - tracking info for BLK dpa scanning
- * @nd_mapping: blk region mapping boundaries
- * @available: decremented in alias_dpa_busy as aliased PMEM is scanned
- * @busy: decremented in blk_dpa_busy to account for ranges already
- * 	  handled by alias_dpa_busy
- * @res: alias_dpa_busy interprets this a free space range that needs to
- * 	 be truncated to the valid BLK allocation starting DPA, blk_dpa_busy
- * 	 treats it as a busy range that needs the aliased PMEM ranges
- * 	 truncated.
- */
-struct blk_alloc_info {
-	struct nd_mapping *nd_mapping;
-	resource_size_t available, busy;
-	struct resource *res;
-};
-
 bool is_nvdimm(struct device *dev);
 bool is_nd_pmem(struct device *dev);
 bool is_nd_volatile(struct device *dev);
-bool is_nd_blk(struct device *dev);
 static inline bool is_nd_region(struct device *dev)
 {
-	return is_nd_pmem(dev) || is_nd_blk(dev) || is_nd_volatile(dev);
+	return is_nd_pmem(dev) || is_nd_volatile(dev);
 }
 static inline bool is_memory(struct device *dev)
 {
@@ -142,14 +124,12 @@ resource_size_t nd_pmem_max_contiguous_dpa(struct nd_region *nd_region,
 					   struct nd_mapping *nd_mapping);
 resource_size_t nd_region_allocatable_dpa(struct nd_region *nd_region);
 resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
-		struct nd_mapping *nd_mapping, resource_size_t *overlap);
-resource_size_t nd_blk_available_dpa(struct nd_region *nd_region);
+				      struct nd_mapping *nd_mapping);
 resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
 int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
 		resource_size_t size);
 resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
 		struct nd_label_id *label_id);
-int alias_dpa_busy(struct device *dev, void *data);
 int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd);
 void get_ndd(struct nvdimm_drvdata *ndd);
 resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 8391bf2729bc..ec5219680092 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -295,9 +295,6 @@ static inline const u8 *nsl_uuid_raw(struct nvdimm_drvdata *ndd,
 	return nd_label->efi.uuid;
 }
 
-bool nsl_validate_blk_isetcookie(struct nvdimm_drvdata *ndd,
-				 struct nd_namespace_label *nd_label,
-				 u64 isetcookie);
 bool nsl_validate_type_guid(struct nvdimm_drvdata *ndd,
 			    struct nd_namespace_label *nd_label, guid_t *guid);
 enum nvdimm_claim_class nsl_get_claim_class(struct nvdimm_drvdata *ndd,
@@ -437,14 +434,6 @@ static inline bool nsl_validate_nlabel(struct nd_region *nd_region,
 	return nsl_get_nlabel(ndd, nd_label) == nd_region->ndr_mappings;
 }
 
-struct nd_blk_region {
-	int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
-	int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
-			void *iobuf, u64 len, int rw);
-	void *blk_provider_data;
-	struct nd_region nd_region;
-};
-
 /*
  * Lookup next in the repeating sequence of 01, 10, and 11.
  */
@@ -672,7 +661,6 @@ static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
 	return -ENXIO;
 }
 #endif
-int nd_blk_region_init(struct nd_region *nd_region);
 int nd_region_activate(struct nd_region *nd_region);
 static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
 		unsigned int len)
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index e0c34120df37..188560b1c110 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -15,6 +15,10 @@ static int nd_region_probe(struct device *dev)
 	static unsigned long once;
 	struct nd_region_data *ndrd;
 	struct nd_region *nd_region = to_nd_region(dev);
+	struct range range = {
+		.start = nd_region->ndr_start,
+		.end = nd_region->ndr_start + nd_region->ndr_size - 1,
+	};
 
 	if (nd_region->num_lanes > num_online_cpus()
 			&& nd_region->num_lanes < num_possible_cpus()
@@ -30,25 +34,13 @@ static int nd_region_probe(struct device *dev)
 	if (rc)
 		return rc;
 
-	rc = nd_blk_region_init(nd_region);
-	if (rc)
-		return rc;
-
-	if (is_memory(&nd_region->dev)) {
-		struct range range = {
-			.start = nd_region->ndr_start,
-			.end = nd_region->ndr_start + nd_region->ndr_size - 1,
-		};
-
-		if (devm_init_badblocks(dev, &nd_region->bb))
-			return -ENODEV;
-		nd_region->bb_state = sysfs_get_dirent(nd_region->dev.kobj.sd,
-						       "badblocks");
-		if (!nd_region->bb_state)
-			dev_warn(&nd_region->dev,
-					"'badblocks' notification disabled\n");
-		nvdimm_badblocks_populate(nd_region, &nd_region->bb, &range);
-	}
+	if (devm_init_badblocks(dev, &nd_region->bb))
+		return -ENODEV;
+	nd_region->bb_state =
+		sysfs_get_dirent(nd_region->dev.kobj.sd, "badblocks");
+	if (!nd_region->bb_state)
+		dev_warn(dev, "'badblocks' notification disabled\n");
+	nvdimm_badblocks_populate(nd_region, &nd_region->bb, &range);
 
 	rc = nd_region_register_namespaces(nd_region, &err);
 	if (rc < 0)
@@ -158,4 +150,3 @@ void nd_region_exit(void)
 }
 
 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_PMEM);
-MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_BLK);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 70ad891a76ba..0cb274c2b508 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -134,10 +134,7 @@ static void nd_region_release(struct device *dev)
 	}
 	free_percpu(nd_region->lane);
 	memregion_free(nd_region->id);
-	if (is_nd_blk(dev))
-		kfree(to_nd_blk_region(dev));
-	else
-		kfree(nd_region);
+	kfree(nd_region);
 }
 
 struct nd_region *to_nd_region(struct device *dev)
@@ -157,33 +154,12 @@ struct device *nd_region_dev(struct nd_region *nd_region)
 }
 EXPORT_SYMBOL_GPL(nd_region_dev);
 
-struct nd_blk_region *to_nd_blk_region(struct device *dev)
-{
-	struct nd_region *nd_region = to_nd_region(dev);
-
-	WARN_ON(!is_nd_blk(dev));
-	return container_of(nd_region, struct nd_blk_region, nd_region);
-}
-EXPORT_SYMBOL_GPL(to_nd_blk_region);
-
 void *nd_region_provider_data(struct nd_region *nd_region)
 {
 	return nd_region->provider_data;
 }
 EXPORT_SYMBOL_GPL(nd_region_provider_data);
 
-void *nd_blk_region_provider_data(struct nd_blk_region *ndbr)
-{
-	return ndbr->blk_provider_data;
-}
-EXPORT_SYMBOL_GPL(nd_blk_region_provider_data);
-
-void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data)
-{
-	ndbr->blk_provider_data = data;
-}
-EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);
-
 /**
  * nd_region_to_nstype() - region to an integer namespace type
  * @nd_region: region-device to interrogate
@@ -208,8 +184,6 @@ int nd_region_to_nstype(struct nd_region *nd_region)
 			return ND_DEVICE_NAMESPACE_PMEM;
 		else
 			return ND_DEVICE_NAMESPACE_IO;
-	} else if (is_nd_blk(&nd_region->dev)) {
-		return ND_DEVICE_NAMESPACE_BLK;
 	}
 
 	return 0;
@@ -332,14 +306,12 @@ static DEVICE_ATTR_RO(set_cookie);
 
 resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
 {
-	resource_size_t blk_max_overlap = 0, available, overlap;
+	resource_size_t available;
 	int i;
 
 	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
 
- retry:
 	available = 0;
-	overlap = blk_max_overlap;
 	for (i = 0; i < nd_region->ndr_mappings; i++) {
 		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
@@ -348,15 +320,7 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
 		if (!ndd)
 			return 0;
 
-		if (is_memory(&nd_region->dev)) {
-			available += nd_pmem_available_dpa(nd_region,
-					nd_mapping, &overlap);
-			if (overlap > blk_max_overlap) {
-				blk_max_overlap = overlap;
-				goto retry;
-			}
-		} else if (is_nd_blk(&nd_region->dev))
-			available += nd_blk_available_dpa(nd_region);
+		available += nd_pmem_available_dpa(nd_region, nd_mapping);
 	}
 
 	return available;
@@ -364,26 +328,17 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
 
 resource_size_t nd_region_allocatable_dpa(struct nd_region *nd_region)
 {
-	resource_size_t available = 0;
+	resource_size_t avail = 0;
 	int i;
 
-	if (is_memory(&nd_region->dev))
-		available = PHYS_ADDR_MAX;
-
 	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
 	for (i = 0; i < nd_region->ndr_mappings; i++) {
 		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 
-		if (is_memory(&nd_region->dev))
-			available = min(available,
-					nd_pmem_max_contiguous_dpa(nd_region,
-								   nd_mapping));
-		else if (is_nd_blk(&nd_region->dev))
-			available += nd_blk_available_dpa(nd_region);
+		avail = min_not_zero(avail, nd_pmem_max_contiguous_dpa(
+						    nd_region, nd_mapping));
 	}
-	if (is_memory(&nd_region->dev))
-		return available * nd_region->ndr_mappings;
-	return available;
+	return avail * nd_region->ndr_mappings;
 }
 
 static ssize_t available_size_show(struct device *dev,
@@ -693,9 +648,8 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
 			&& a != &dev_attr_available_size.attr)
 		return a->mode;
 
-	if ((type == ND_DEVICE_NAMESPACE_PMEM
-				|| type == ND_DEVICE_NAMESPACE_BLK)
-			&& a == &dev_attr_available_size.attr)
+	if (type == ND_DEVICE_NAMESPACE_PMEM &&
+	    a == &dev_attr_available_size.attr)
 		return a->mode;
 	else if (is_memory(dev) && nd_set)
 		return a->mode;
@@ -828,12 +782,6 @@ static const struct attribute_group *nd_region_attribute_groups[] = {
 	NULL,
 };
 
-static const struct device_type nd_blk_device_type = {
-	.name = "nd_blk",
-	.release = nd_region_release,
-	.groups = nd_region_attribute_groups,
-};
-
 static const struct device_type nd_pmem_device_type = {
 	.name = "nd_pmem",
 	.release = nd_region_release,
@@ -851,11 +799,6 @@ bool is_nd_pmem(struct device *dev)
 	return dev ? dev->type == &nd_pmem_device_type : false;
 }
 
-bool is_nd_blk(struct device *dev)
-{
-	return dev ? dev->type == &nd_blk_device_type : false;
-}
-
 bool is_nd_volatile(struct device *dev)
 {
 	return dev ? dev->type == &nd_volatile_device_type : false;
@@ -929,22 +872,6 @@ void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev)
 	nvdimm_bus_unlock(dev);
 }
 
-int nd_blk_region_init(struct nd_region *nd_region)
-{
-	struct device *dev = &nd_region->dev;
-	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
-
-	if (!is_nd_blk(dev))
-		return 0;
-
-	if (nd_region->ndr_mappings < 1) {
-		dev_dbg(dev, "invalid BLK region\n");
-		return -ENXIO;
-	}
-
-	return to_nd_blk_region(dev)->enable(nvdimm_bus, dev);
-}
-
 /**
  * nd_region_acquire_lane - allocate and lock a lane
  * @nd_region: region id and number of lanes possible
@@ -1007,24 +934,10 @@ EXPORT_SYMBOL(nd_region_release_lane);
 static unsigned long default_align(struct nd_region *nd_region)
 {
 	unsigned long align;
-	int i, mappings;
 	u32 remainder;
+	int mappings;
 
-	if (is_nd_blk(&nd_region->dev))
-		align = PAGE_SIZE;
-	else
-		align = MEMREMAP_COMPAT_ALIGN_MAX;
-
-	for (i = 0; i < nd_region->ndr_mappings; i++) {
-		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
-		struct nvdimm *nvdimm = nd_mapping->nvdimm;
-
-		if (test_bit(NDD_ALIASING, &nvdimm->flags)) {
-			align = MEMREMAP_COMPAT_ALIGN_MAX;
-			break;
-		}
-	}
-
+	align = MEMREMAP_COMPAT_ALIGN_MAX;
 	if (nd_region->ndr_size < MEMREMAP_COMPAT_ALIGN_MAX)
 		align = PAGE_SIZE;
 
@@ -1042,7 +955,6 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
 {
 	struct nd_region *nd_region;
 	struct device *dev;
-	void *region_buf;
 	unsigned int i;
 	int ro = 0;
 
@@ -1060,36 +972,13 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
 		if (test_bit(NDD_UNARMED, &nvdimm->flags))
 			ro = 1;
 
-		if (test_bit(NDD_NOBLK, &nvdimm->flags)
-				&& dev_type == &nd_blk_device_type) {
-			dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not BLK capable\n",
-					caller, dev_name(&nvdimm->dev), i);
-			return NULL;
-		}
 	}
 
-	if (dev_type == &nd_blk_device_type) {
-		struct nd_blk_region_desc *ndbr_desc;
-		struct nd_blk_region *ndbr;
-
-		ndbr_desc = to_blk_region_desc(ndr_desc);
-		ndbr = kzalloc(sizeof(*ndbr) + sizeof(struct nd_mapping)
-				* ndr_desc->num_mappings,
-				GFP_KERNEL);
-		if (ndbr) {
-			nd_region = &ndbr->nd_region;
-			ndbr->enable = ndbr_desc->enable;
-			ndbr->do_io = ndbr_desc->do_io;
-		}
-		region_buf = ndbr;
-	} else {
-		nd_region = kzalloc(struct_size(nd_region, mapping,
-						ndr_desc->num_mappings),
-				    GFP_KERNEL);
-		region_buf = nd_region;
-	}
+	nd_region =
+		kzalloc(struct_size(nd_region, mapping, ndr_desc->num_mappings),
+			GFP_KERNEL);
 
-	if (!region_buf)
+	if (!nd_region)
 		return NULL;
 	nd_region->id = memregion_alloc(GFP_KERNEL);
 	if (nd_region->id < 0)
@@ -1153,7 +1042,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
  err_percpu:
 	memregion_free(nd_region->id);
  err_id:
-	kfree(region_buf);
+	kfree(nd_region);
 	return NULL;
 }
 
@@ -1166,17 +1055,6 @@ struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
 }
 EXPORT_SYMBOL_GPL(nvdimm_pmem_region_create);
 
-struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
-		struct nd_region_desc *ndr_desc)
-{
-	if (ndr_desc->num_mappings > 1)
-		return NULL;
-	ndr_desc->num_lanes = min(ndr_desc->num_lanes, ND_MAX_LANES);
-	return nd_region_create(nvdimm_bus, ndr_desc, &nd_blk_device_type,
-			__func__);
-}
-EXPORT_SYMBOL_GPL(nvdimm_blk_region_create);
-
 struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
 		struct nd_region_desc *ndr_desc)
 {
@@ -1201,7 +1079,7 @@ int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
 }
 /**
  * nvdimm_flush - flush any posted write queues between the cpu and pmem media
- * @nd_region: blk or interleaved pmem region
+ * @nd_region: interleaved pmem region
  */
 int generic_nvdimm_flush(struct nd_region *nd_region)
 {
@@ -1234,7 +1112,7 @@ EXPORT_SYMBOL_GPL(nvdimm_flush);
 
 /**
  * nvdimm_has_flush - determine write flushing requirements
- * @nd_region: blk or interleaved pmem region
+ * @nd_region: interleaved pmem region
  *
  * Returns 1 if writes require flushing
  * Returns 0 if writes do not require flushing
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 7074aa9af525..0d61e07b6827 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -25,8 +25,6 @@ struct badrange {
 };
 
 enum {
-	/* when a dimm supports both PMEM and BLK access a label is required */
-	NDD_ALIASING = 0,
 	/* unarmed memory devices may not persist writes */
 	NDD_UNARMED = 1,
 	/* locked memory devices should not be accessed */
@@ -35,8 +33,6 @@ enum {
 	NDD_SECURITY_OVERWRITE = 3,
 	/*  tracking whether or not there is a pending device reference */
 	NDD_WORK_PENDING = 4,
-	/* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */
-	NDD_NOBLK = 5,
 	/* dimm supports namespace labels */
 	NDD_LABELING = 6,
 
@@ -140,21 +136,6 @@ static inline void __iomem *devm_nvdimm_ioremap(struct device *dev,
 }
 
 struct nvdimm_bus;
-struct module;
-struct nd_blk_region;
-struct nd_blk_region_desc {
-	int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
-	int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
-			void *iobuf, u64 len, int rw);
-	struct nd_region_desc ndr_desc;
-};
-
-static inline struct nd_blk_region_desc *to_blk_region_desc(
-		struct nd_region_desc *ndr_desc)
-{
-	return container_of(ndr_desc, struct nd_blk_region_desc, ndr_desc);
-
-}
 
 /*
  * Note that separate bits for locked + unlocked are defined so that
@@ -257,7 +238,6 @@ struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm);
 struct nvdimm *to_nvdimm(struct device *dev);
 struct nd_region *to_nd_region(struct device *dev);
 struct device *nd_region_dev(struct nd_region *nd_region);
-struct nd_blk_region *to_nd_blk_region(struct device *dev);
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
 struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
 const char *nvdimm_name(struct nvdimm *nvdimm);
@@ -295,10 +275,6 @@ struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
 struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
 		struct nd_region_desc *ndr_desc);
 void *nd_region_provider_data(struct nd_region *nd_region);
-void *nd_blk_region_provider_data(struct nd_blk_region *ndbr);
-void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data);
-struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr);
-unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr);
 unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
 void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
 u64 nd_fletcher64(void *addr, size_t len, bool le);
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 8cf1e4884fd5..17e02b64ea2e 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -189,7 +189,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
 #define ND_DEVICE_REGION_BLK 3      /* nd_region: (parent of BLK namespaces) */
 #define ND_DEVICE_NAMESPACE_IO 4    /* legacy persistent memory */
 #define ND_DEVICE_NAMESPACE_PMEM 5  /* PMEM namespace (may alias with BLK) */
-#define ND_DEVICE_NAMESPACE_BLK 6   /* BLK namespace (may alias with PMEM) */
 #define ND_DEVICE_DAX_PMEM 7        /* Device DAX interface to pmem */
 
 enum nd_driver_flags {
@@ -198,7 +197,6 @@ enum nd_driver_flags {
 	ND_DRIVER_REGION_BLK      = 1 << ND_DEVICE_REGION_BLK,
 	ND_DRIVER_NAMESPACE_IO    = 1 << ND_DEVICE_NAMESPACE_IO,
 	ND_DRIVER_NAMESPACE_PMEM  = 1 << ND_DEVICE_NAMESPACE_PMEM,
-	ND_DRIVER_NAMESPACE_BLK   = 1 << ND_DEVICE_NAMESPACE_BLK,
 	ND_DRIVER_DAX_PMEM	  = 1 << ND_DEVICE_DAX_PMEM,
 };
 
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 3ca7c32e9362..4d1a947367f9 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -338,62 +338,6 @@ static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc,
 	return 0;
 }
 
-static int ndtest_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
-		void *iobuf, u64 len, int rw)
-{
-	struct ndtest_dimm *dimm = ndbr->blk_provider_data;
-	struct ndtest_blk_mmio *mmio = dimm->mmio;
-	struct nd_region *nd_region = &ndbr->nd_region;
-	unsigned int lane;
-
-	if (!mmio)
-		return -ENOMEM;
-
-	lane = nd_region_acquire_lane(nd_region);
-	if (rw)
-		memcpy(mmio->base + dpa, iobuf, len);
-	else {
-		memcpy(iobuf, mmio->base + dpa, len);
-		arch_invalidate_pmem(mmio->base + dpa, len);
-	}
-
-	nd_region_release_lane(nd_region, lane);
-
-	return 0;
-}
-
-static int ndtest_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
-				    struct device *dev)
-{
-	struct nd_blk_region *ndbr = to_nd_blk_region(dev);
-	struct nvdimm *nvdimm;
-	struct ndtest_dimm *dimm;
-	struct ndtest_blk_mmio *mmio;
-
-	nvdimm = nd_blk_region_to_dimm(ndbr);
-	dimm = nvdimm_provider_data(nvdimm);
-
-	nd_blk_region_set_provider_data(ndbr, dimm);
-	dimm->blk_region = to_nd_region(dev);
-
-	mmio = devm_kzalloc(dev, sizeof(struct ndtest_blk_mmio), GFP_KERNEL);
-	if (!mmio)
-		return -ENOMEM;
-
-	mmio->base = (void __iomem *) devm_nvdimm_memremap(
-		dev, dimm->address, 12, nd_blk_memremap_flags(ndbr));
-	if (!mmio->base) {
-		dev_err(dev, "%s failed to map blk dimm\n", nvdimm_name(nvdimm));
-		return -ENOMEM;
-	}
-	mmio->size = dimm->size;
-	mmio->base_offset = 0;
-
-	dimm->mmio = mmio;
-
-	return 0;
-}
-
 static struct nfit_test_resource *ndtest_resource_lookup(resource_size_t addr)
 {
 	int i;
@@ -523,17 +467,16 @@ static int ndtest_create_region(struct ndtest_priv *p,
 				struct ndtest_region *region)
 {
 	struct nd_mapping_desc mappings[NDTEST_MAX_MAPPING];
-	struct nd_blk_region_desc ndbr_desc;
+	struct nd_region_desc *ndr_desc, _ndr_desc;
 	struct nd_interleave_set *nd_set;
-	struct nd_region_desc *ndr_desc;
 	struct resource res;
 	int i, ndimm = region->mapping[0].dimm;
 	u64 uuid[2];
 
 	memset(&res, 0, sizeof(res));
 	memset(&mappings, 0, sizeof(mappings));
-	memset(&ndbr_desc, 0, sizeof(ndbr_desc));
-	ndr_desc = &ndbr_desc.ndr_desc;
+	memset(&_ndr_desc, 0, sizeof(_ndr_desc));
+	ndr_desc = &_ndr_desc;
 
 	if (!ndtest_alloc_resource(p, region->size, &res.start))
 		return -ENOMEM;
@@ -857,10 +800,8 @@ static int ndtest_dimm_register(struct ndtest_priv *priv,
 	struct device *dev = &priv->pdev.dev;
 	unsigned long dimm_flags = dimm->flags;
 
-	if (dimm->num_formats > 1) {
-		set_bit(NDD_ALIASING, &dimm_flags);
+	if (dimm->num_formats > 1)
 		set_bit(NDD_LABELING, &dimm_flags);
-	}
 
 	if (dimm->flags & PAPR_PMEM_UNARMED_MASK)
 		set_bit(NDD_UNARMED, &dimm_flags);
-- 
cgit 


From 102e4a8e12fda992803adec51be65e8d1089d4db Mon Sep 17 00:00:00 2001
From: Victor Nogueira <victor@mojatatu.com>
Date: Fri, 11 Mar 2022 12:29:42 -0300
Subject: selftests: tc-testing: Increase timeout in tdc config file

Some tests, such as Test d052: Add 1M filters with the same action, may
not work with a small timeout value.

Increase timeout to 24 seconds.

Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Acked-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index ea04f04c173e..ccb0f06ef9e3 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -21,7 +21,7 @@ NAMES = {
           'BATCH_FILE': './batch.txt',
           'BATCH_DIR': 'tmp',
           # Length of time in seconds to wait before terminating a command
-          'TIMEOUT': 12,
+          'TIMEOUT': 24,
           # Name of the namespace to use
           'NS': 'tcut',
           # Directory containing eBPF test programs
-- 
cgit 


From 70e2f9f0390570077a3904f39c994b348ca6778d Mon Sep 17 00:00:00 2001
From: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Date: Tue, 8 Mar 2022 13:58:37 +0100
Subject: KVM: s390: selftests: Split memop tests

Split success case/copy test from error test, making them independent.
This means they do not share state and are easier to understand.
Also, new test can be added in the same manner without affecting the old
ones. In order to make that simpler, introduce functionality for the
setup of commonly used variables.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Link: https://lore.kernel.org/r/20220308125841.3271721-2-scgl@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
---
 tools/testing/selftests/kvm/s390x/memop.c | 137 ++++++++++++++++++------------
 1 file changed, 82 insertions(+), 55 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index d19c3ffdea3f..b9b673acb766 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -18,71 +18,82 @@
 static uint8_t mem1[65536];
 static uint8_t mem2[65536];
 
-static void guest_code(void)
+struct test_default {
+	struct kvm_vm *kvm_vm;
+	struct kvm_run *run;
+	int size;
+};
+
+static struct test_default test_default_init(void *guest_code)
 {
-	int i;
+	struct test_default t;
 
-	for (;;) {
-		for (i = 0; i < sizeof(mem2); i++)
-			mem2[i] = mem1[i];
-		GUEST_SYNC(0);
-	}
+	t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1));
+	t.kvm_vm = vm_create_default(VCPU_ID, 0, guest_code);
+	t.run = vcpu_state(t.kvm_vm, VCPU_ID);
+	return t;
 }
 
-int main(int argc, char *argv[])
+static void guest_copy(void)
 {
-	struct kvm_vm *vm;
-	struct kvm_run *run;
-	struct kvm_s390_mem_op ksmo;
-	int rv, i, maxsize;
-
-	setbuf(stdout, NULL);	/* Tell stdout not to buffer its content */
-
-	maxsize = kvm_check_cap(KVM_CAP_S390_MEM_OP);
-	if (!maxsize) {
-		print_skip("CAP_S390_MEM_OP not supported");
-		exit(KSFT_SKIP);
-	}
-	if (maxsize > sizeof(mem1))
-		maxsize = sizeof(mem1);
+	memcpy(&mem2, &mem1, sizeof(mem2));
+	GUEST_SYNC(0);
+}
 
-	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
-	run = vcpu_state(vm, VCPU_ID);
+static void test_copy(void)
+{
+	struct test_default t = test_default_init(guest_copy);
+	struct kvm_s390_mem_op ksmo;
+	int i;
 
 	for (i = 0; i < sizeof(mem1); i++)
 		mem1[i] = i * i + i;
 
 	/* Set the first array */
-	ksmo.gaddr = addr_gva2gpa(vm, (uintptr_t)mem1);
+	ksmo.gaddr = addr_gva2gpa(t.kvm_vm, (uintptr_t)mem1);
 	ksmo.flags = 0;
-	ksmo.size = maxsize;
+	ksmo.size = t.size;
 	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
 	ksmo.buf = (uintptr_t)mem1;
 	ksmo.ar = 0;
-	vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
 
 	/* Let the guest code copy the first array to the second */
-	vcpu_run(vm, VCPU_ID);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+	vcpu_run(t.kvm_vm, VCPU_ID);
+	TEST_ASSERT(t.run->exit_reason == KVM_EXIT_S390_SIEIC,
 		    "Unexpected exit reason: %u (%s)\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+		    t.run->exit_reason,
+		    exit_reason_str(t.run->exit_reason));
 
 	memset(mem2, 0xaa, sizeof(mem2));
 
 	/* Get the second array */
 	ksmo.gaddr = (uintptr_t)mem2;
 	ksmo.flags = 0;
-	ksmo.size = maxsize;
+	ksmo.size = t.size;
 	ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
 	ksmo.buf = (uintptr_t)mem2;
 	ksmo.ar = 0;
-	vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
 
-	TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
+	TEST_ASSERT(!memcmp(mem1, mem2, t.size),
 		    "Memory contents do not match!");
 
+	kvm_vm_free(t.kvm_vm);
+}
+
+static void guest_idle(void)
+{
+	for (;;)
+		GUEST_SYNC(0);
+}
+
+static void test_errors(void)
+{
+	struct test_default t = test_default_init(guest_idle);
+	struct kvm_s390_mem_op ksmo;
+	int rv;
+
 	/* Check error conditions - first bad size: */
 	ksmo.gaddr = (uintptr_t)mem1;
 	ksmo.flags = 0;
@@ -90,7 +101,7 @@ int main(int argc, char *argv[])
 	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
 	ksmo.buf = (uintptr_t)mem1;
 	ksmo.ar = 0;
-	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
 	TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
 
 	/* Zero size: */
@@ -100,65 +111,65 @@ int main(int argc, char *argv[])
 	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
 	ksmo.buf = (uintptr_t)mem1;
 	ksmo.ar = 0;
-	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
 	TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
 		    "ioctl allows 0 as size");
 
 	/* Bad flags: */
 	ksmo.gaddr = (uintptr_t)mem1;
 	ksmo.flags = -1;
-	ksmo.size = maxsize;
+	ksmo.size = t.size;
 	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
 	ksmo.buf = (uintptr_t)mem1;
 	ksmo.ar = 0;
-	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
 	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
 
 	/* Bad operation: */
 	ksmo.gaddr = (uintptr_t)mem1;
 	ksmo.flags = 0;
-	ksmo.size = maxsize;
+	ksmo.size = t.size;
 	ksmo.op = -1;
 	ksmo.buf = (uintptr_t)mem1;
 	ksmo.ar = 0;
-	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
 	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
 
 	/* Bad guest address: */
 	ksmo.gaddr = ~0xfffUL;
 	ksmo.flags = KVM_S390_MEMOP_F_CHECK_ONLY;
-	ksmo.size = maxsize;
+	ksmo.size = t.size;
 	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
 	ksmo.buf = (uintptr_t)mem1;
 	ksmo.ar = 0;
-	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
 	TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access");
 
 	/* Bad host address: */
 	ksmo.gaddr = (uintptr_t)mem1;
 	ksmo.flags = 0;
-	ksmo.size = maxsize;
+	ksmo.size = t.size;
 	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
 	ksmo.buf = 0;
 	ksmo.ar = 0;
-	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
 	TEST_ASSERT(rv == -1 && errno == EFAULT,
 		    "ioctl does not report bad host memory address");
 
 	/* Bad access register: */
-	run->psw_mask &= ~(3UL << (63 - 17));
-	run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
-	vcpu_run(vm, VCPU_ID);              /* To sync new state to SIE block */
+	t.run->psw_mask &= ~(3UL << (63 - 17));
+	t.run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
+	vcpu_run(t.kvm_vm, VCPU_ID);              /* To sync new state to SIE block */
 	ksmo.gaddr = (uintptr_t)mem1;
 	ksmo.flags = 0;
-	ksmo.size = maxsize;
+	ksmo.size = t.size;
 	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
 	ksmo.buf = (uintptr_t)mem1;
 	ksmo.ar = 17;
-	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
 	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
-	run->psw_mask &= ~(3UL << (63 - 17));   /* Disable AR mode */
-	vcpu_run(vm, VCPU_ID);                  /* Run to sync new state */
+	t.run->psw_mask &= ~(3UL << (63 - 17));   /* Disable AR mode */
+	vcpu_run(t.kvm_vm, VCPU_ID);                  /* Run to sync new state */
 
 	/* Check that the SIDA calls are rejected for non-protected guests */
 	ksmo.gaddr = 0;
@@ -167,15 +178,31 @@ int main(int argc, char *argv[])
 	ksmo.op = KVM_S390_MEMOP_SIDA_READ;
 	ksmo.buf = (uintptr_t)mem1;
 	ksmo.sida_offset = 0x1c0;
-	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
 	TEST_ASSERT(rv == -1 && errno == EINVAL,
 		    "ioctl does not reject SIDA_READ in non-protected mode");
 	ksmo.op = KVM_S390_MEMOP_SIDA_WRITE;
-	rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
 	TEST_ASSERT(rv == -1 && errno == EINVAL,
 		    "ioctl does not reject SIDA_WRITE in non-protected mode");
 
-	kvm_vm_free(vm);
+	kvm_vm_free(t.kvm_vm);
+}
+
+int main(int argc, char *argv[])
+{
+	int memop_cap;
+
+	setbuf(stdout, NULL);	/* Tell stdout not to buffer its content */
+
+	memop_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP);
+	if (!memop_cap) {
+		print_skip("CAP_S390_MEM_OP not supported");
+		exit(KSFT_SKIP);
+	}
+
+	test_copy();
+	test_errors();
 
 	return 0;
 }
-- 
cgit 


From 4eb562ab99c427bcfb94d39bf54a44919ccbb64c Mon Sep 17 00:00:00 2001
From: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Date: Tue, 8 Mar 2022 13:58:38 +0100
Subject: KVM: s390: selftests: Add macro as abstraction for MEM_OP

In order to achieve good test coverage we need to be able to invoke the
MEM_OP ioctl with all possible parametrizations.
However, for a given test, we want to be concise and not specify a long
list of default values for parameters not relevant for the test, so the
readers attention is not needlessly diverted.
Add a macro that enables this and convert the existing test to use it.
The macro emulates named arguments and hides some of the ioctl's
redundancy, e.g. sets the key flag if an access key is specified.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Link: https://lore.kernel.org/r/20220308125841.3271721-3-scgl@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
---
 tools/testing/selftests/kvm/s390x/memop.c | 272 ++++++++++++++++++++++--------
 1 file changed, 197 insertions(+), 75 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index b9b673acb766..e2ad3d70bae4 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -13,6 +13,188 @@
 #include "test_util.h"
 #include "kvm_util.h"
 
+enum mop_target {
+	LOGICAL,
+	SIDA,
+	ABSOLUTE,
+	INVALID,
+};
+
+enum mop_access_mode {
+	READ,
+	WRITE,
+};
+
+struct mop_desc {
+	uintptr_t gaddr;
+	uintptr_t gaddr_v;
+	uint64_t set_flags;
+	unsigned int f_check : 1;
+	unsigned int f_inject : 1;
+	unsigned int f_key : 1;
+	unsigned int _gaddr_v : 1;
+	unsigned int _set_flags : 1;
+	unsigned int _sida_offset : 1;
+	unsigned int _ar : 1;
+	uint32_t size;
+	enum mop_target target;
+	enum mop_access_mode mode;
+	void *buf;
+	uint32_t sida_offset;
+	uint8_t ar;
+	uint8_t key;
+};
+
+static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc desc)
+{
+	struct kvm_s390_mem_op ksmo = {
+		.gaddr = (uintptr_t)desc.gaddr,
+		.size = desc.size,
+		.buf = ((uintptr_t)desc.buf),
+		.reserved = "ignored_ignored_ignored_ignored"
+	};
+
+	switch (desc.target) {
+	case LOGICAL:
+		if (desc.mode == READ)
+			ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
+		if (desc.mode == WRITE)
+			ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+		break;
+	case SIDA:
+		if (desc.mode == READ)
+			ksmo.op = KVM_S390_MEMOP_SIDA_READ;
+		if (desc.mode == WRITE)
+			ksmo.op = KVM_S390_MEMOP_SIDA_WRITE;
+		break;
+	case ABSOLUTE:
+		if (desc.mode == READ)
+			ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ;
+		if (desc.mode == WRITE)
+			ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE;
+		break;
+	case INVALID:
+		ksmo.op = -1;
+	}
+	if (desc.f_check)
+		ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY;
+	if (desc.f_inject)
+		ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION;
+	if (desc._set_flags)
+		ksmo.flags = desc.set_flags;
+	if (desc.f_key) {
+		ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION;
+		ksmo.key = desc.key;
+	}
+	if (desc._ar)
+		ksmo.ar = desc.ar;
+	else
+		ksmo.ar = 0;
+	if (desc._sida_offset)
+		ksmo.sida_offset = desc.sida_offset;
+
+	return ksmo;
+}
+
+/* vcpu dummy id signifying that vm instead of vcpu ioctl is to occur */
+const uint32_t VM_VCPU_ID = (uint32_t)-1;
+
+struct test_vcpu {
+	struct kvm_vm *vm;
+	uint32_t id;
+};
+
+#define PRINT_MEMOP false
+static void print_memop(uint32_t vcpu_id, const struct kvm_s390_mem_op *ksmo)
+{
+	if (!PRINT_MEMOP)
+		return;
+
+	if (vcpu_id == VM_VCPU_ID)
+		printf("vm memop(");
+	else
+		printf("vcpu memop(");
+	switch (ksmo->op) {
+	case KVM_S390_MEMOP_LOGICAL_READ:
+		printf("LOGICAL, READ, ");
+		break;
+	case KVM_S390_MEMOP_LOGICAL_WRITE:
+		printf("LOGICAL, WRITE, ");
+		break;
+	case KVM_S390_MEMOP_SIDA_READ:
+		printf("SIDA, READ, ");
+		break;
+	case KVM_S390_MEMOP_SIDA_WRITE:
+		printf("SIDA, WRITE, ");
+		break;
+	case KVM_S390_MEMOP_ABSOLUTE_READ:
+		printf("ABSOLUTE, READ, ");
+		break;
+	case KVM_S390_MEMOP_ABSOLUTE_WRITE:
+		printf("ABSOLUTE, WRITE, ");
+		break;
+	}
+	printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u",
+	       ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key);
+	if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY)
+		printf(", CHECK_ONLY");
+	if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION)
+		printf(", INJECT_EXCEPTION");
+	if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION)
+		printf(", SKEY_PROTECTION");
+	puts(")");
+}
+
+static void memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo)
+{
+	if (vcpu.id == VM_VCPU_ID)
+		vm_ioctl(vcpu.vm, KVM_S390_MEM_OP, ksmo);
+	else
+		vcpu_ioctl(vcpu.vm, vcpu.id, KVM_S390_MEM_OP, ksmo);
+}
+
+static int err_memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo)
+{
+	if (vcpu.id == VM_VCPU_ID)
+		return _vm_ioctl(vcpu.vm, KVM_S390_MEM_OP, ksmo);
+	else
+		return _vcpu_ioctl(vcpu.vm, vcpu.id, KVM_S390_MEM_OP, ksmo);
+}
+
+#define MEMOP(err, vcpu_p, mop_target_p, access_mode_p, buf_p, size_p, ...)	\
+({										\
+	struct test_vcpu __vcpu = (vcpu_p);					\
+	struct mop_desc __desc = {						\
+		.target = (mop_target_p),					\
+		.mode = (access_mode_p),					\
+		.buf = (buf_p),							\
+		.size = (size_p),						\
+		__VA_ARGS__							\
+	};									\
+	struct kvm_s390_mem_op __ksmo;						\
+										\
+	if (__desc._gaddr_v) {							\
+		if (__desc.target == ABSOLUTE)					\
+			__desc.gaddr = addr_gva2gpa(__vcpu.vm, __desc.gaddr_v);	\
+		else								\
+			__desc.gaddr = __desc.gaddr_v;				\
+	}									\
+	__ksmo = ksmo_from_desc(__desc);					\
+	print_memop(__vcpu.id, &__ksmo);					\
+	err##memop_ioctl(__vcpu, &__ksmo);					\
+})
+
+#define MOP(...) MEMOP(, __VA_ARGS__)
+#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__)
+
+#define GADDR(a) .gaddr = ((uintptr_t)a)
+#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v)
+#define CHECK_ONLY .f_check = 1
+#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f)
+#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o)
+#define AR(a) ._ar = 1, .ar = (a)
+#define KEY(a) .f_key = 1, .key = (a)
+
 #define VCPU_ID 1
 
 static uint8_t mem1[65536];
@@ -20,6 +202,7 @@ static uint8_t mem2[65536];
 
 struct test_default {
 	struct kvm_vm *kvm_vm;
+	struct test_vcpu vcpu;
 	struct kvm_run *run;
 	int size;
 };
@@ -30,6 +213,7 @@ static struct test_default test_default_init(void *guest_code)
 
 	t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1));
 	t.kvm_vm = vm_create_default(VCPU_ID, 0, guest_code);
+	t.vcpu = (struct test_vcpu) { t.kvm_vm, VCPU_ID };
 	t.run = vcpu_state(t.kvm_vm, VCPU_ID);
 	return t;
 }
@@ -43,20 +227,14 @@ static void guest_copy(void)
 static void test_copy(void)
 {
 	struct test_default t = test_default_init(guest_copy);
-	struct kvm_s390_mem_op ksmo;
 	int i;
 
 	for (i = 0; i < sizeof(mem1); i++)
 		mem1[i] = i * i + i;
 
 	/* Set the first array */
-	ksmo.gaddr = addr_gva2gpa(t.kvm_vm, (uintptr_t)mem1);
-	ksmo.flags = 0;
-	ksmo.size = t.size;
-	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
-	ksmo.buf = (uintptr_t)mem1;
-	ksmo.ar = 0;
-	vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size,
+	    GADDR(addr_gva2gpa(t.kvm_vm, (uintptr_t)mem1)));
 
 	/* Let the guest code copy the first array to the second */
 	vcpu_run(t.kvm_vm, VCPU_ID);
@@ -68,13 +246,7 @@ static void test_copy(void)
 	memset(mem2, 0xaa, sizeof(mem2));
 
 	/* Get the second array */
-	ksmo.gaddr = (uintptr_t)mem2;
-	ksmo.flags = 0;
-	ksmo.size = t.size;
-	ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
-	ksmo.buf = (uintptr_t)mem2;
-	ksmo.ar = 0;
-	vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	MOP(t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem2));
 
 	TEST_ASSERT(!memcmp(mem1, mem2, t.size),
 		    "Memory contents do not match!");
@@ -91,68 +263,31 @@ static void guest_idle(void)
 static void test_errors(void)
 {
 	struct test_default t = test_default_init(guest_idle);
-	struct kvm_s390_mem_op ksmo;
 	int rv;
 
-	/* Check error conditions - first bad size: */
-	ksmo.gaddr = (uintptr_t)mem1;
-	ksmo.flags = 0;
-	ksmo.size = -1;
-	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
-	ksmo.buf = (uintptr_t)mem1;
-	ksmo.ar = 0;
-	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	/* Bad size: */
+	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, -1, GADDR_V(mem1));
 	TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
 
 	/* Zero size: */
-	ksmo.gaddr = (uintptr_t)mem1;
-	ksmo.flags = 0;
-	ksmo.size = 0;
-	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
-	ksmo.buf = (uintptr_t)mem1;
-	ksmo.ar = 0;
-	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, 0, GADDR_V(mem1));
 	TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
 		    "ioctl allows 0 as size");
 
 	/* Bad flags: */
-	ksmo.gaddr = (uintptr_t)mem1;
-	ksmo.flags = -1;
-	ksmo.size = t.size;
-	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
-	ksmo.buf = (uintptr_t)mem1;
-	ksmo.ar = 0;
-	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), SET_FLAGS(-1));
 	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
 
 	/* Bad operation: */
-	ksmo.gaddr = (uintptr_t)mem1;
-	ksmo.flags = 0;
-	ksmo.size = t.size;
-	ksmo.op = -1;
-	ksmo.buf = (uintptr_t)mem1;
-	ksmo.ar = 0;
-	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1));
 	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
 
 	/* Bad guest address: */
-	ksmo.gaddr = ~0xfffUL;
-	ksmo.flags = KVM_S390_MEMOP_F_CHECK_ONLY;
-	ksmo.size = t.size;
-	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
-	ksmo.buf = (uintptr_t)mem1;
-	ksmo.ar = 0;
-	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR((void *)~0xfffUL), CHECK_ONLY);
 	TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access");
 
 	/* Bad host address: */
-	ksmo.gaddr = (uintptr_t)mem1;
-	ksmo.flags = 0;
-	ksmo.size = t.size;
-	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
-	ksmo.buf = 0;
-	ksmo.ar = 0;
-	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, 0, t.size, GADDR_V(mem1));
 	TEST_ASSERT(rv == -1 && errno == EFAULT,
 		    "ioctl does not report bad host memory address");
 
@@ -160,29 +295,16 @@ static void test_errors(void)
 	t.run->psw_mask &= ~(3UL << (63 - 17));
 	t.run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
 	vcpu_run(t.kvm_vm, VCPU_ID);              /* To sync new state to SIE block */
-	ksmo.gaddr = (uintptr_t)mem1;
-	ksmo.flags = 0;
-	ksmo.size = t.size;
-	ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
-	ksmo.buf = (uintptr_t)mem1;
-	ksmo.ar = 17;
-	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17));
 	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
 	t.run->psw_mask &= ~(3UL << (63 - 17));   /* Disable AR mode */
 	vcpu_run(t.kvm_vm, VCPU_ID);                  /* Run to sync new state */
 
 	/* Check that the SIDA calls are rejected for non-protected guests */
-	ksmo.gaddr = 0;
-	ksmo.flags = 0;
-	ksmo.size = 8;
-	ksmo.op = KVM_S390_MEMOP_SIDA_READ;
-	ksmo.buf = (uintptr_t)mem1;
-	ksmo.sida_offset = 0x1c0;
-	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
 	TEST_ASSERT(rv == -1 && errno == EINVAL,
 		    "ioctl does not reject SIDA_READ in non-protected mode");
-	ksmo.op = KVM_S390_MEMOP_SIDA_WRITE;
-	rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+	rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
 	TEST_ASSERT(rv == -1 && errno == EINVAL,
 		    "ioctl does not reject SIDA_WRITE in non-protected mode");
 
-- 
cgit 


From c4816a1b7fed3c000b37ad6516f65aad8bc5fba6 Mon Sep 17 00:00:00 2001
From: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Date: Tue, 8 Mar 2022 13:58:39 +0100
Subject: KVM: s390: selftests: Add named stages for memop test

The stages synchronize guest and host execution.
This helps the reader and constraits the execution of the test -- if the
observed staging differs from the expected the test fails.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Link: https://lore.kernel.org/r/20220308125841.3271721-4-scgl@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
---
 tools/testing/selftests/kvm/s390x/memop.c | 44 +++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index e2ad3d70bae4..88ce2d670ed5 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -218,10 +218,32 @@ static struct test_default test_default_init(void *guest_code)
 	return t;
 }
 
+enum stage {
+	/* Synced state set by host, e.g. DAT */
+	STAGE_INITED,
+	/* Guest did nothing */
+	STAGE_IDLED,
+	/* Guest copied memory (locations up to test case) */
+	STAGE_COPIED,
+};
+
+#define HOST_SYNC(vcpu_p, stage)					\
+({									\
+	struct test_vcpu __vcpu = (vcpu_p);				\
+	struct ucall uc;						\
+	int __stage = (stage);						\
+									\
+	vcpu_run(__vcpu.vm, __vcpu.id);					\
+	get_ucall(__vcpu.vm, __vcpu.id, &uc);				\
+	ASSERT_EQ(uc.cmd, UCALL_SYNC);					\
+	ASSERT_EQ(uc.args[1], __stage);					\
+})									\
+
 static void guest_copy(void)
 {
+	GUEST_SYNC(STAGE_INITED);
 	memcpy(&mem2, &mem1, sizeof(mem2));
-	GUEST_SYNC(0);
+	GUEST_SYNC(STAGE_COPIED);
 }
 
 static void test_copy(void)
@@ -232,16 +254,13 @@ static void test_copy(void)
 	for (i = 0; i < sizeof(mem1); i++)
 		mem1[i] = i * i + i;
 
+	HOST_SYNC(t.vcpu, STAGE_INITED);
+
 	/* Set the first array */
-	MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size,
-	    GADDR(addr_gva2gpa(t.kvm_vm, (uintptr_t)mem1)));
+	MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1));
 
 	/* Let the guest code copy the first array to the second */
-	vcpu_run(t.kvm_vm, VCPU_ID);
-	TEST_ASSERT(t.run->exit_reason == KVM_EXIT_S390_SIEIC,
-		    "Unexpected exit reason: %u (%s)\n",
-		    t.run->exit_reason,
-		    exit_reason_str(t.run->exit_reason));
+	HOST_SYNC(t.vcpu, STAGE_COPIED);
 
 	memset(mem2, 0xaa, sizeof(mem2));
 
@@ -256,8 +275,9 @@ static void test_copy(void)
 
 static void guest_idle(void)
 {
+	GUEST_SYNC(STAGE_INITED); /* for consistency's sake */
 	for (;;)
-		GUEST_SYNC(0);
+		GUEST_SYNC(STAGE_IDLED);
 }
 
 static void test_errors(void)
@@ -265,6 +285,8 @@ static void test_errors(void)
 	struct test_default t = test_default_init(guest_idle);
 	int rv;
 
+	HOST_SYNC(t.vcpu, STAGE_INITED);
+
 	/* Bad size: */
 	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, -1, GADDR_V(mem1));
 	TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
@@ -294,11 +316,11 @@ static void test_errors(void)
 	/* Bad access register: */
 	t.run->psw_mask &= ~(3UL << (63 - 17));
 	t.run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
-	vcpu_run(t.kvm_vm, VCPU_ID);              /* To sync new state to SIE block */
+	HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */
 	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17));
 	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
 	t.run->psw_mask &= ~(3UL << (63 - 17));   /* Disable AR mode */
-	vcpu_run(t.kvm_vm, VCPU_ID);                  /* Run to sync new state */
+	HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */
 
 	/* Check that the SIDA calls are rejected for non-protected guests */
 	rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
-- 
cgit 


From 1bb873495a9ebbc8f6dd54a110b2c639e225c782 Mon Sep 17 00:00:00 2001
From: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Date: Tue, 8 Mar 2022 13:58:40 +0100
Subject: KVM: s390: selftests: Add more copy memop tests

Do not just test the actual copy, but also that success is indicated
when using the check only flag.
Add copy test with storage key checking enabled, including tests for
storage and fetch protection override.
These test cover both logical vcpu ioctls as well as absolute vm ioctls.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Link: https://lore.kernel.org/r/20220308125841.3271721-5-scgl@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
---
 tools/testing/selftests/kvm/s390x/memop.c | 243 ++++++++++++++++++++++++++++--
 1 file changed, 230 insertions(+), 13 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index 88ce2d670ed5..42282663b38b 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -195,13 +195,21 @@ static int err_memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo)
 #define AR(a) ._ar = 1, .ar = (a)
 #define KEY(a) .f_key = 1, .key = (a)
 
+#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
+
 #define VCPU_ID 1
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1ULL << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+#define CR0_FETCH_PROTECTION_OVERRIDE	(1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE	(1UL << (63 - 39))
 
 static uint8_t mem1[65536];
 static uint8_t mem2[65536];
 
 struct test_default {
 	struct kvm_vm *kvm_vm;
+	struct test_vcpu vm;
 	struct test_vcpu vcpu;
 	struct kvm_run *run;
 	int size;
@@ -213,6 +221,7 @@ static struct test_default test_default_init(void *guest_code)
 
 	t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1));
 	t.kvm_vm = vm_create_default(VCPU_ID, 0, guest_code);
+	t.vm = (struct test_vcpu) { t.kvm_vm, VM_VCPU_ID };
 	t.vcpu = (struct test_vcpu) { t.kvm_vm, VCPU_ID };
 	t.run = vcpu_state(t.kvm_vm, VCPU_ID);
 	return t;
@@ -223,6 +232,8 @@ enum stage {
 	STAGE_INITED,
 	/* Guest did nothing */
 	STAGE_IDLED,
+	/* Guest set storage keys (specifics up to test case) */
+	STAGE_SKEYS_SET,
 	/* Guest copied memory (locations up to test case) */
 	STAGE_COPIED,
 };
@@ -239,6 +250,47 @@ enum stage {
 	ASSERT_EQ(uc.args[1], __stage);					\
 })									\
 
+static void prepare_mem12(void)
+{
+	int i;
+
+	for (i = 0; i < sizeof(mem1); i++)
+		mem1[i] = rand();
+	memset(mem2, 0xaa, sizeof(mem2));
+}
+
+#define ASSERT_MEM_EQ(p1, p2, size) \
+	TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!")
+
+#define DEFAULT_WRITE_READ(copy_cpu, mop_cpu, mop_target_p, size, ...)		\
+({										\
+	struct test_vcpu __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu);	\
+	enum mop_target __target = (mop_target_p);				\
+	uint32_t __size = (size);						\
+										\
+	prepare_mem12();							\
+	CHECK_N_DO(MOP, __mop_cpu, __target, WRITE, mem1, __size,		\
+			GADDR_V(mem1), ##__VA_ARGS__);				\
+	HOST_SYNC(__copy_cpu, STAGE_COPIED);					\
+	CHECK_N_DO(MOP, __mop_cpu, __target, READ, mem2, __size,		\
+			GADDR_V(mem2), ##__VA_ARGS__);				\
+	ASSERT_MEM_EQ(mem1, mem2, __size);					\
+})
+
+#define DEFAULT_READ(copy_cpu, mop_cpu, mop_target_p, size, ...)		\
+({										\
+	struct test_vcpu __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu);	\
+	enum mop_target __target = (mop_target_p);				\
+	uint32_t __size = (size);						\
+										\
+	prepare_mem12();							\
+	CHECK_N_DO(MOP, __mop_cpu, __target, WRITE, mem1, __size,		\
+			GADDR_V(mem1));						\
+	HOST_SYNC(__copy_cpu, STAGE_COPIED);					\
+	CHECK_N_DO(MOP, __mop_cpu, __target, READ, mem2, __size, ##__VA_ARGS__);\
+	ASSERT_MEM_EQ(mem1, mem2, __size);					\
+})
+
 static void guest_copy(void)
 {
 	GUEST_SYNC(STAGE_INITED);
@@ -249,30 +301,186 @@ static void guest_copy(void)
 static void test_copy(void)
 {
 	struct test_default t = test_default_init(guest_copy);
-	int i;
 
-	for (i = 0; i < sizeof(mem1); i++)
-		mem1[i] = i * i + i;
+	HOST_SYNC(t.vcpu, STAGE_INITED);
+
+	DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size);
+
+	kvm_vm_free(t.kvm_vm);
+}
+
+static void set_storage_key_range(void *addr, size_t len, uint8_t key)
+{
+	uintptr_t _addr, abs, i;
+	int not_mapped = 0;
+
+	_addr = (uintptr_t)addr;
+	for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) {
+		abs = i;
+		asm volatile (
+			       "lra	%[abs], 0(0,%[abs])\n"
+			"	jz	0f\n"
+			"	llill	%[not_mapped],1\n"
+			"	j	1f\n"
+			"0:	sske	%[key], %[abs]\n"
+			"1:"
+			: [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped)
+			: [key] "r" (key)
+			: "cc"
+		);
+		GUEST_ASSERT_EQ(not_mapped, 0);
+	}
+}
+
+static void guest_copy_key(void)
+{
+	set_storage_key_range(mem1, sizeof(mem1), 0x90);
+	set_storage_key_range(mem2, sizeof(mem2), 0x90);
+	GUEST_SYNC(STAGE_SKEYS_SET);
+
+	for (;;) {
+		memcpy(&mem2, &mem1, sizeof(mem2));
+		GUEST_SYNC(STAGE_COPIED);
+	}
+}
+
+static void test_copy_key(void)
+{
+	struct test_default t = test_default_init(guest_copy_key);
+
+	HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+	/* vm, no key */
+	DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size);
+
+	/* vm/vcpu, machting key or key 0 */
+	DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(0));
+	DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(9));
+	DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size, KEY(0));
+	DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size, KEY(9));
+	/*
+	 * There used to be different code paths for key handling depending on
+	 * if the region crossed a page boundary.
+	 * There currently are not, but the more tests the merrier.
+	 */
+	DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, 1, KEY(0));
+	DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, 1, KEY(9));
+	DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, 1, KEY(0));
+	DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, 1, KEY(9));
+
+	/* vm/vcpu, mismatching keys on read, but no fetch protection */
+	DEFAULT_READ(t.vcpu, t.vcpu, LOGICAL, t.size, GADDR_V(mem2), KEY(2));
+	DEFAULT_READ(t.vcpu, t.vm, ABSOLUTE, t.size, GADDR_V(mem1), KEY(2));
+
+	kvm_vm_free(t.kvm_vm);
+}
+
+static void guest_copy_key_fetch_prot(void)
+{
+	/*
+	 * For some reason combining the first sync with override enablement
+	 * results in an exception when calling HOST_SYNC.
+	 */
+	GUEST_SYNC(STAGE_INITED);
+	/* Storage protection override applies to both store and fetch. */
+	set_storage_key_range(mem1, sizeof(mem1), 0x98);
+	set_storage_key_range(mem2, sizeof(mem2), 0x98);
+	GUEST_SYNC(STAGE_SKEYS_SET);
+
+	for (;;) {
+		memcpy(&mem2, &mem1, sizeof(mem2));
+		GUEST_SYNC(STAGE_COPIED);
+	}
+}
+
+static void test_copy_key_storage_prot_override(void)
+{
+	struct test_default t = test_default_init(guest_copy_key_fetch_prot);
 
 	HOST_SYNC(t.vcpu, STAGE_INITED);
+	t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+	t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+	HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
 
-	/* Set the first array */
-	MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1));
+	/* vcpu, mismatching keys, storage protection override in effect */
+	DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(2));
 
-	/* Let the guest code copy the first array to the second */
-	HOST_SYNC(t.vcpu, STAGE_COPIED);
+	kvm_vm_free(t.kvm_vm);
+}
 
-	memset(mem2, 0xaa, sizeof(mem2));
+static void test_copy_key_fetch_prot(void)
+{
+	struct test_default t = test_default_init(guest_copy_key_fetch_prot);
 
-	/* Get the second array */
-	MOP(t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem2));
+	HOST_SYNC(t.vcpu, STAGE_INITED);
+	HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
 
-	TEST_ASSERT(!memcmp(mem1, mem2, t.size),
-		    "Memory contents do not match!");
+	/* vm/vcpu, matching key, fetch protection in effect */
+	DEFAULT_READ(t.vcpu, t.vcpu, LOGICAL, t.size, GADDR_V(mem2), KEY(9));
+	DEFAULT_READ(t.vcpu, t.vm, ABSOLUTE, t.size, GADDR_V(mem2), KEY(9));
 
 	kvm_vm_free(t.kvm_vm);
 }
 
+const uint64_t last_page_addr = -PAGE_SIZE;
+
+static void guest_copy_key_fetch_prot_override(void)
+{
+	int i;
+	char *page_0 = 0;
+
+	GUEST_SYNC(STAGE_INITED);
+	set_storage_key_range(0, PAGE_SIZE, 0x18);
+	set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0);
+	asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0), [key] "r"(0x18) : "cc");
+	GUEST_SYNC(STAGE_SKEYS_SET);
+
+	for (;;) {
+		for (i = 0; i < PAGE_SIZE; i++)
+			page_0[i] = mem1[i];
+		GUEST_SYNC(STAGE_COPIED);
+	}
+}
+
+static void test_copy_key_fetch_prot_override(void)
+{
+	struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+	vm_vaddr_t guest_0_page, guest_last_page;
+
+	guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+	guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+	if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+		print_skip("did not allocate guest pages at required positions");
+		goto out;
+	}
+
+	HOST_SYNC(t.vcpu, STAGE_INITED);
+	t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+	t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+	HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+	/* vcpu, mismatching keys on fetch, fetch protection override applies */
+	prepare_mem12();
+	MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1));
+	HOST_SYNC(t.vcpu, STAGE_COPIED);
+	CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
+	ASSERT_MEM_EQ(mem1, mem2, 2048);
+
+	/*
+	 * vcpu, mismatching keys on fetch, fetch protection override applies,
+	 * wraparound
+	 */
+	prepare_mem12();
+	MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page));
+	HOST_SYNC(t.vcpu, STAGE_COPIED);
+	CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048,
+		   GADDR_V(guest_last_page), KEY(2));
+	ASSERT_MEM_EQ(mem1, mem2, 2048);
+
+out:
+	kvm_vm_free(t.kvm_vm);
+}
+
 static void guest_idle(void)
 {
 	GUEST_SYNC(STAGE_INITED); /* for consistency's sake */
@@ -335,17 +543,26 @@ static void test_errors(void)
 
 int main(int argc, char *argv[])
 {
-	int memop_cap;
+	int memop_cap, extension_cap;
 
 	setbuf(stdout, NULL);	/* Tell stdout not to buffer its content */
 
 	memop_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP);
+	extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
 	if (!memop_cap) {
 		print_skip("CAP_S390_MEM_OP not supported");
 		exit(KSFT_SKIP);
 	}
 
 	test_copy();
+	if (extension_cap > 0) {
+		test_copy_key();
+		test_copy_key_storage_prot_override();
+		test_copy_key_fetch_prot();
+		test_copy_key_fetch_prot_override();
+	} else {
+		print_skip("storage key memop extension not supported");
+	}
 	test_errors();
 
 	return 0;
-- 
cgit 


From 3bcc372c9865bec3ab9bfcf30b2426cf68bc18af Mon Sep 17 00:00:00 2001
From: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Date: Tue, 8 Mar 2022 13:58:41 +0100
Subject: KVM: s390: selftests: Add error memop tests

Test that errors occur if key protection disallows access, including
tests for storage and fetch protection override. Perform tests for both
logical vcpu and absolute vm ioctls.
Also extend the existing tests to the vm ioctl.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Link: https://lore.kernel.org/r/20220308125841.3271721-6-scgl@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
---
 tools/testing/selftests/kvm/s390x/memop.c | 137 +++++++++++++++++++++++++++---
 1 file changed, 124 insertions(+), 13 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index 42282663b38b..b04c2c1b3c30 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -422,6 +422,46 @@ static void test_copy_key_fetch_prot(void)
 	kvm_vm_free(t.kvm_vm);
 }
 
+#define ERR_PROT_MOP(...)							\
+({										\
+	int rv;									\
+										\
+	rv = ERR_MOP(__VA_ARGS__);						\
+	TEST_ASSERT(rv == 4, "Should result in protection exception");		\
+})
+
+static void test_errors_key(void)
+{
+	struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+	HOST_SYNC(t.vcpu, STAGE_INITED);
+	HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+	/* vm/vcpu, mismatching keys, fetch protection in effect */
+	CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+	CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem2), KEY(2));
+	CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+	CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2));
+
+	kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_storage_prot_override(void)
+{
+	struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+	HOST_SYNC(t.vcpu, STAGE_INITED);
+	t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+	t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+	HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+	/* vm, mismatching keys, storage protection override not applicable to vm */
+	CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+	CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2));
+
+	kvm_vm_free(t.kvm_vm);
+}
+
 const uint64_t last_page_addr = -PAGE_SIZE;
 
 static void guest_copy_key_fetch_prot_override(void)
@@ -481,6 +521,58 @@ out:
 	kvm_vm_free(t.kvm_vm);
 }
 
+static void test_errors_key_fetch_prot_override_not_enabled(void)
+{
+	struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+	vm_vaddr_t guest_0_page, guest_last_page;
+
+	guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+	guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+	if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+		print_skip("did not allocate guest pages at required positions");
+		goto out;
+	}
+	HOST_SYNC(t.vcpu, STAGE_INITED);
+	HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+	/* vcpu, mismatching keys on fetch, fetch protection override not enabled */
+	CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2));
+
+out:
+	kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_fetch_prot_override_enabled(void)
+{
+	struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+	vm_vaddr_t guest_0_page, guest_last_page;
+
+	guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+	guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+	if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+		print_skip("did not allocate guest pages at required positions");
+		goto out;
+	}
+	HOST_SYNC(t.vcpu, STAGE_INITED);
+	t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+	t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+	HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+	/*
+	 * vcpu, mismatching keys on fetch,
+	 * fetch protection override does not apply because memory range acceeded
+	 */
+	CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2));
+	CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1,
+		   GADDR_V(guest_last_page), KEY(2));
+	/* vm, fetch protected override does not apply */
+	CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2));
+	CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
+
+out:
+	kvm_vm_free(t.kvm_vm);
+}
+
 static void guest_idle(void)
 {
 	GUEST_SYNC(STAGE_INITED); /* for consistency's sake */
@@ -488,39 +580,54 @@ static void guest_idle(void)
 		GUEST_SYNC(STAGE_IDLED);
 }
 
-static void test_errors(void)
+static void _test_errors_common(struct test_vcpu vcpu, enum mop_target target, int size)
 {
-	struct test_default t = test_default_init(guest_idle);
 	int rv;
 
-	HOST_SYNC(t.vcpu, STAGE_INITED);
-
 	/* Bad size: */
-	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, -1, GADDR_V(mem1));
+	rv = ERR_MOP(vcpu, target, WRITE, mem1, -1, GADDR_V(mem1));
 	TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
 
 	/* Zero size: */
-	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, 0, GADDR_V(mem1));
+	rv = ERR_MOP(vcpu, target, WRITE, mem1, 0, GADDR_V(mem1));
 	TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
 		    "ioctl allows 0 as size");
 
 	/* Bad flags: */
-	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), SET_FLAGS(-1));
+	rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1));
 	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
 
-	/* Bad operation: */
-	rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1));
-	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
-
 	/* Bad guest address: */
-	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR((void *)~0xfffUL), CHECK_ONLY);
+	rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY);
 	TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access");
 
 	/* Bad host address: */
-	rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, 0, t.size, GADDR_V(mem1));
+	rv = ERR_MOP(vcpu, target, WRITE, 0, size, GADDR_V(mem1));
 	TEST_ASSERT(rv == -1 && errno == EFAULT,
 		    "ioctl does not report bad host memory address");
 
+	/* Bad key: */
+	rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17));
+	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key");
+}
+
+static void test_errors(void)
+{
+	struct test_default t = test_default_init(guest_idle);
+	int rv;
+
+	HOST_SYNC(t.vcpu, STAGE_INITED);
+
+	_test_errors_common(t.vcpu, LOGICAL, t.size);
+	_test_errors_common(t.vm, ABSOLUTE, t.size);
+
+	/* Bad operation: */
+	rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1));
+	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+	/* virtual addresses are not translated when passing INVALID */
+	rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0));
+	TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+
 	/* Bad access register: */
 	t.run->psw_mask &= ~(3UL << (63 - 17));
 	t.run->psw_mask |= 1UL << (63 - 17);  /* Enable AR mode */
@@ -560,6 +667,10 @@ int main(int argc, char *argv[])
 		test_copy_key_storage_prot_override();
 		test_copy_key_fetch_prot();
 		test_copy_key_fetch_prot_override();
+		test_errors_key();
+		test_errors_key_storage_prot_override();
+		test_errors_key_fetch_prot_override_not_enabled();
+		test_errors_key_fetch_prot_override_enabled();
 	} else {
 		print_skip("storage key memop extension not supported");
 	}
-- 
cgit 


From 9b18942e9993aef24bdeb294adf1d48c305188bd Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Mon, 14 Mar 2022 15:01:16 +0100
Subject: selftests: netdevsim: hw_stats_l3: Add a new test

Add a test that verifies basic UAPI contracts, netdevsim operation,
rollbacks after partial enablement in core, and UAPI notifications.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../selftests/drivers/net/netdevsim/hw_stats_l3.sh | 421 +++++++++++++++++++++
 tools/testing/selftests/net/forwarding/lib.sh      |  60 +++
 2 files changed, 481 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
new file mode 100755
index 000000000000..fe1898402987
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
@@ -0,0 +1,421 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	l3_reporting_test
+	l3_fail_next_test
+	l3_counter_test
+	l3_rollback_test
+	l3_monitor_test
+"
+
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR_1=1337
+DEV_ADDR_2=1057
+DEV_ADDR_3=5417
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+DUMMY_IFINDEX=
+
+DEV_ADDR()
+{
+	local n=$1; shift
+	local var=DEV_ADDR_$n
+
+	echo ${!var}
+}
+
+DEV()
+{
+	echo netdevsim$(DEV_ADDR $1)
+}
+
+DEVLINK_DEV()
+{
+	echo netdevsim/$(DEV $1)
+}
+
+SYSFS_NET_DIR()
+{
+	echo /sys/bus/netdevsim/devices/$(DEV $1)/net/
+}
+
+DEBUGFS_DIR()
+{
+	echo /sys/kernel/debug/netdevsim/$(DEV $1)/
+}
+
+nsim_add()
+{
+	local n=$1; shift
+
+	echo "$(DEV_ADDR $n) 1" > ${NETDEVSIM_PATH}/new_device
+	while [ ! -d $(SYSFS_NET_DIR $n) ] ; do :; done
+}
+
+nsim_reload()
+{
+	local n=$1; shift
+	local ns=$1; shift
+
+	devlink dev reload $(DEVLINK_DEV $n) netns $ns
+
+	if [ $? -ne 0 ]; then
+		echo "Failed to reload $(DEV $n) into netns \"testns1\""
+		exit 1
+	fi
+
+}
+
+nsim_del()
+{
+	local n=$1; shift
+
+	echo "$(DEV_ADDR $n)" > ${NETDEVSIM_PATH}/del_device
+}
+
+nsim_hwstats_toggle()
+{
+	local action=$1; shift
+	local instance=$1; shift
+	local netdev=$1; shift
+	local type=$1; shift
+
+	local ifindex=$($IP -j link show dev $netdev | jq '.[].ifindex')
+
+	echo $ifindex > $(DEBUGFS_DIR $instance)/hwstats/$type/$action
+}
+
+nsim_hwstats_enable()
+{
+	nsim_hwstats_toggle enable_ifindex "$@"
+}
+
+nsim_hwstats_disable()
+{
+	nsim_hwstats_toggle disable_ifindex "$@"
+}
+
+nsim_hwstats_fail_next_enable()
+{
+	nsim_hwstats_toggle fail_next_enable "$@"
+}
+
+setup_prepare()
+{
+	modprobe netdevsim &> /dev/null
+	nsim_add 1
+	nsim_add 2
+	nsim_add 3
+
+	ip netns add testns1
+
+	if [ $? -ne 0 ]; then
+		echo "Failed to add netns \"testns1\""
+		exit 1
+	fi
+
+	nsim_reload 1 testns1
+	nsim_reload 2 testns1
+	nsim_reload 3 testns1
+
+	IP="ip -n testns1"
+
+	$IP link add name dummy1 type dummy
+	$IP link set dev dummy1 up
+	DUMMY_IFINDEX=$($IP -j link show dev dummy1 | jq '.[].ifindex')
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	$IP link del name dummy1
+	ip netns del testns1
+	nsim_del 3
+	nsim_del 2
+	nsim_del 1
+	modprobe -r netdevsim &> /dev/null
+}
+
+netdev_hwstats_used()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+
+	$IP -j stats show dev "$netdev" group offload subgroup hw_stats_info |
+	    jq '.[].info.l3_stats.used'
+}
+
+netdev_check_used()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+
+	[[ $(netdev_hwstats_used $netdev $type) == "true" ]]
+}
+
+netdev_check_unused()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+
+	[[ $(netdev_hwstats_used $netdev $type) == "false" ]]
+}
+
+netdev_hwstats_request()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+
+	$IP -j stats show dev "$netdev" group offload subgroup hw_stats_info |
+	    jq ".[].info.${type}_stats.request"
+}
+
+netdev_check_requested()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+
+	[[ $(netdev_hwstats_request $netdev $type) == "true" ]]
+}
+
+netdev_check_unrequested()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+
+	[[ $(netdev_hwstats_request $netdev $type) == "false" ]]
+}
+
+reporting_test()
+{
+	local type=$1; shift
+	local instance=1
+
+	RET=0
+
+	[[ -n $(netdev_hwstats_used dummy1 $type) ]]
+	check_err $? "$type stats not reported"
+
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used before either device or netdevsim request"
+
+	nsim_hwstats_enable $instance dummy1 $type
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used before device request"
+	netdev_check_unrequested dummy1 $type
+	check_err $? "$type stats reported as requested before device request"
+
+	$IP stats set dev dummy1 ${type}_stats on
+	netdev_check_used dummy1 $type
+	check_err $? "$type stats reported as not used after both device and netdevsim request"
+	netdev_check_requested dummy1 $type
+	check_err $? "$type stats reported as not requested after device request"
+
+	nsim_hwstats_disable $instance dummy1 $type
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used after netdevsim request withdrawn"
+
+	nsim_hwstats_enable $instance dummy1 $type
+	netdev_check_used dummy1 $type
+	check_err $? "$type stats reported as not used after netdevsim request reenabled"
+
+	$IP stats set dev dummy1 ${type}_stats off
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used after device request withdrawn"
+	netdev_check_unrequested dummy1 $type
+	check_err $? "$type stats reported as requested after device request withdrawn"
+
+	nsim_hwstats_disable $instance dummy1 $type
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used after both requests withdrawn"
+
+	log_test "Reporting of $type stats usage"
+}
+
+l3_reporting_test()
+{
+	reporting_test l3
+}
+
+__fail_next_test()
+{
+	local instance=$1; shift
+	local type=$1; shift
+
+	RET=0
+
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used before either device or netdevsim request"
+
+	nsim_hwstats_enable $instance dummy1 $type
+	nsim_hwstats_fail_next_enable $instance dummy1 $type
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used before device request"
+	netdev_check_unrequested dummy1 $type
+	check_err $? "$type stats reported as requested before device request"
+
+	$IP stats set dev dummy1 ${type}_stats on 2>/dev/null
+	check_fail $? "$type stats request not bounced as it should have been"
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used after bounce"
+	netdev_check_unrequested dummy1 $type
+	check_err $? "$type stats reported as requested after bounce"
+
+	$IP stats set dev dummy1 ${type}_stats on
+	check_err $? "$type stats request failed when it shouldn't have"
+	netdev_check_used dummy1 $type
+	check_err $? "$type stats reported as not used after both device and netdevsim request"
+	netdev_check_requested dummy1 $type
+	check_err $? "$type stats reported as not requested after device request"
+
+	$IP stats set dev dummy1 ${type}_stats off
+	nsim_hwstats_disable $instance dummy1 $type
+
+	log_test "Injected failure of $type stats enablement (netdevsim #$instance)"
+}
+
+fail_next_test()
+{
+	__fail_next_test 1 "$@"
+	__fail_next_test 2 "$@"
+	__fail_next_test 3 "$@"
+}
+
+l3_fail_next_test()
+{
+	fail_next_test l3
+}
+
+get_hwstat()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+	local selector=$1; shift
+
+	$IP -j stats show dev $netdev group offload subgroup ${type}_stats |
+		  jq ".[0].stats64.${selector}"
+}
+
+counter_test()
+{
+	local type=$1; shift
+	local instance=1
+
+	RET=0
+
+	nsim_hwstats_enable $instance dummy1 $type
+	$IP stats set dev dummy1 ${type}_stats on
+	netdev_check_used dummy1 $type
+	check_err $? "$type stats reported as not used after both device and netdevsim request"
+
+	# Netdevsim counts 10pps on ingress. We should see maybe a couple
+	# packets, unless things take a reeealy long time.
+	local pkts=$(get_hwstat dummy1 l3 rx.packets)
+	((pkts < 10))
+	check_err $? "$type stats show >= 10 packets after first enablement"
+
+	sleep 2
+
+	local pkts=$(get_hwstat dummy1 l3 rx.packets)
+	((pkts >= 20))
+	check_err $? "$type stats show < 20 packets after 2s passed"
+
+	$IP stats set dev dummy1 ${type}_stats off
+
+	sleep 2
+
+	$IP stats set dev dummy1 ${type}_stats on
+	local pkts=$(get_hwstat dummy1 l3 rx.packets)
+	((pkts < 10))
+	check_err $? "$type stats show >= 10 packets after second enablement"
+
+	$IP stats set dev dummy1 ${type}_stats off
+	nsim_hwstats_fail_next_enable $instance dummy1 $type
+	$IP stats set dev dummy1 ${type}_stats on 2>/dev/null
+	check_fail $? "$type stats request not bounced as it should have been"
+
+	sleep 2
+
+	$IP stats set dev dummy1 ${type}_stats on
+	local pkts=$(get_hwstat dummy1 l3 rx.packets)
+	((pkts < 10))
+	check_err $? "$type stats show >= 10 packets after post-fail enablement"
+
+	$IP stats set dev dummy1 ${type}_stats off
+
+	log_test "Counter values in $type stats"
+}
+
+l3_counter_test()
+{
+	counter_test l3
+}
+
+rollback_test()
+{
+	local type=$1; shift
+
+	RET=0
+
+	nsim_hwstats_enable 1 dummy1 l3
+	nsim_hwstats_enable 2 dummy1 l3
+	nsim_hwstats_enable 3 dummy1 l3
+
+	# The three netdevsim instances are registered in order of their number
+	# one after another. It is reasonable to expect that whatever
+	# notifications take place hit no. 2 in between hitting nos. 1 and 3,
+	# whatever the actual order. This allows us to test that a fail caused
+	# by no. 2 does not leave the system in a partial state, and rolls
+	# everything back.
+
+	nsim_hwstats_fail_next_enable 2 dummy1 l3
+	$IP stats set dev dummy1 ${type}_stats on 2>/dev/null
+	check_fail $? "$type stats request not bounced as it should have been"
+
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used after bounce"
+	netdev_check_unrequested dummy1 $type
+	check_err $? "$type stats reported as requested after bounce"
+
+	sleep 2
+
+	$IP stats set dev dummy1 ${type}_stats on
+	check_err $? "$type stats request not upheld as it should have been"
+
+	local pkts=$(get_hwstat dummy1 l3 rx.packets)
+	((pkts < 10))
+	check_err $? "$type stats show $pkts packets after post-fail enablement"
+
+	$IP stats set dev dummy1 ${type}_stats off
+
+	nsim_hwstats_disable 3 dummy1 l3
+	nsim_hwstats_disable 2 dummy1 l3
+	nsim_hwstats_disable 1 dummy1 l3
+
+	log_test "Failure in $type stats enablement rolled back"
+}
+
+l3_rollback_test()
+{
+	rollback_test l3
+}
+
+l3_monitor_test()
+{
+	hw_stats_monitor_test dummy1 l3		   \
+		"nsim_hwstats_enable 1 dummy1 l3"  \
+		"nsim_hwstats_disable 1 dummy1 l3" \
+		"$IP"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 159afc7f0979..664b9ecaf228 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1498,3 +1498,63 @@ brmcast_check_sg_state()
 		check_err_fail $should_fail $? "Entry $src has blocked flag"
 	done
 }
+
+start_ip_monitor()
+{
+	local mtype=$1; shift
+	local ip=${1-ip}; shift
+
+	# start the monitor in the background
+	tmpfile=`mktemp /var/run/nexthoptestXXX`
+	mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
+	sleep 0.2
+	echo "$mpid $tmpfile"
+}
+
+stop_ip_monitor()
+{
+	local mpid=$1; shift
+	local tmpfile=$1; shift
+	local el=$1; shift
+	local what=$1; shift
+
+	sleep 0.2
+	kill $mpid
+	local lines=`grep '^\w' $tmpfile | wc -l`
+	test $lines -eq $el
+	check_err $? "$what: $lines lines of events, expected $el"
+	rm -rf $tmpfile
+}
+
+hw_stats_monitor_test()
+{
+	local dev=$1; shift
+	local type=$1; shift
+	local make_suitable=$1; shift
+	local make_unsuitable=$1; shift
+	local ip=${1-ip}; shift
+
+	RET=0
+
+	# Expect a notification about enablement.
+	local ipmout=$(start_ip_monitor stats "$ip")
+	$ip stats set dev $dev ${type}_stats on
+	stop_ip_monitor $ipmout 1 "${type}_stats enablement"
+
+	# Expect a notification about offload.
+	local ipmout=$(start_ip_monitor stats "$ip")
+	$make_suitable
+	stop_ip_monitor $ipmout 1 "${type}_stats installation"
+
+	# Expect a notification about loss of offload.
+	local ipmout=$(start_ip_monitor stats "$ip")
+	$make_unsuitable
+	stop_ip_monitor $ipmout 1 "${type}_stats deinstallation"
+
+	# Expect a notification about disablement
+	local ipmout=$(start_ip_monitor stats "$ip")
+	$ip stats set dev $dev ${type}_stats off
+	stop_ip_monitor $ipmout 1 "${type}_stats disablement"
+
+	log_test "${type}_stats notifications"
+}
-- 
cgit 


From ed2ae69c40537ac3250a318f344722fef0c4f67c Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Mon, 14 Mar 2022 15:01:17 +0100
Subject: selftests: mlxsw: hw_stats_l3: Add a new test

Add a test that verifies that UAPI notifications are emitted, as mlxsw
installs and deinstalls HW counters for the L3 offload xstats.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../selftests/drivers/net/mlxsw/hw_stats_l3.sh     | 31 ++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh
new file mode 100755
index 000000000000..941ba4c485c9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	l3_monitor_test
+"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+swp=$NETIF_NO_CABLE
+
+cleanup()
+{
+	pre_cleanup
+}
+
+l3_monitor_test()
+{
+	hw_stats_monitor_test $swp l3		    \
+		"ip addr add dev $swp 192.0.2.1/28" \
+		"ip addr del dev $swp 192.0.2.1/28"
+}
+
+trap cleanup EXIT
+
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From f98d6dd1e79d4b04c2e13e91a9348473cfa805a6 Mon Sep 17 00:00:00 2001
From: Guo Zhengkui <guozhengkui@vivo.com>
Date: Tue, 15 Mar 2022 21:01:26 +0800
Subject: selftests/bpf: Clean up array_size.cocci warnings

Clean up the array_size.cocci warnings under tools/testing/selftests/bpf/:

Use `ARRAY_SIZE(arr)` instead of forms like `sizeof(arr)/sizeof(arr[0])`.

tools/testing/selftests/bpf/test_cgroup_storage.c uses ARRAY_SIZE() defined
in tools/include/linux/kernel.h (sys/sysinfo.h -> linux/kernel.h), while
others use ARRAY_SIZE() in bpf_util.h.

Signed-off-by: Guo Zhengkui <guozhengkui@vivo.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220315130143.2403-1-guozhengkui@vivo.com
---
 tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c | 2 +-
 tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c      | 2 +-
 tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c   | 2 +-
 tools/testing/selftests/bpf/prog_tests/global_data.c              | 6 +++---
 tools/testing/selftests/bpf/prog_tests/obj_name.c                 | 2 +-
 tools/testing/selftests/bpf/test_cgroup_storage.c                 | 2 +-
 tools/testing/selftests/bpf/test_lru_map.c                        | 4 ++--
 tools/testing/selftests/bpf/test_sock_addr.c                      | 6 +++---
 tools/testing/selftests/bpf/test_sockmap.c                        | 4 ++--
 9 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
index 858916d11e2e..9367bd2f0ae1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
@@ -14,7 +14,7 @@ static int prog_load(void)
 		BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = 1 */
 		BPF_EXIT_INSN(),
 	};
-	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+	size_t insns_cnt = ARRAY_SIZE(prog);
 
 	return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
 			       prog, insns_cnt, "GPL", 0,
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index 38b3c47293da..db0b7bac78d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -63,7 +63,7 @@ static int prog_load_cnt(int verdict, int val)
 		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
 		BPF_EXIT_INSN(),
 	};
-	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+	size_t insns_cnt = ARRAY_SIZE(prog);
 	int ret;
 
 	ret = bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
index 356547e849e2..9421a5b7f4e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
@@ -16,7 +16,7 @@ static int prog_load(int verdict)
 		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
 		BPF_EXIT_INSN(),
 	};
-	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+	size_t insns_cnt = ARRAY_SIZE(prog);
 
 	return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
 			       prog, insns_cnt, "GPL", 0,
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index 6fb3d3155c35..027685858925 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -29,7 +29,7 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
 		{ "relocate .rodata reference", 10, ~0 },
 	};
 
-	for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		err = bpf_map_lookup_elem(map_fd, &tests[i].key, &num);
 		CHECK(err || num != tests[i].num, tests[i].name,
 		      "err %d result %llx expected %llx\n",
@@ -58,7 +58,7 @@ static void test_global_data_string(struct bpf_object *obj, __u32 duration)
 		{ "relocate .bss reference",    4, "\0\0hello" },
 	};
 
-	for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		err = bpf_map_lookup_elem(map_fd, &tests[i].key, str);
 		CHECK(err || memcmp(str, tests[i].str, sizeof(str)),
 		      tests[i].name, "err %d result \'%s\' expected \'%s\'\n",
@@ -92,7 +92,7 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration)
 		{ "relocate .data reference",   3, { 41, 0xeeeeefef, 0x2111111111111111ULL, } },
 	};
 
-	for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		err = bpf_map_lookup_elem(map_fd, &tests[i].key, &val);
 		CHECK(err || memcmp(&val, &tests[i].val, sizeof(val)),
 		      tests[i].name, "err %d result { %u, %u, %llu } expected { %u, %u, %llu }\n",
diff --git a/tools/testing/selftests/bpf/prog_tests/obj_name.c b/tools/testing/selftests/bpf/prog_tests/obj_name.c
index 6194b776a28b..7093edca6e08 100644
--- a/tools/testing/selftests/bpf/prog_tests/obj_name.c
+++ b/tools/testing/selftests/bpf/prog_tests/obj_name.c
@@ -20,7 +20,7 @@ void test_obj_name(void)
 	__u32 duration = 0;
 	int i;
 
-	for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		size_t name_len = strlen(tests[i].name) + 1;
 		union bpf_attr attr;
 		size_t ncopy;
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index 5b8314cd77fd..d6a1be4d8020 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -36,7 +36,7 @@ int main(int argc, char **argv)
 		BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
 		BPF_EXIT_INSN(),
 	};
-	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+	size_t insns_cnt = ARRAY_SIZE(prog);
 	int error = EXIT_FAILURE;
 	int map_fd, percpu_map_fd, prog_fd, cgroup_fd;
 	struct bpf_cgroup_storage_key key;
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 6e6235185a86..563bbe18c172 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -878,11 +878,11 @@ int main(int argc, char **argv)
 	assert(nr_cpus != -1);
 	printf("nr_cpus:%d\n\n", nr_cpus);
 
-	for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
+	for (f = 0; f < ARRAY_SIZE(map_flags); f++) {
 		unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ?
 			PERCPU_FREE_TARGET : LOCAL_FREE_TARGET;
 
-		for (t = 0; t < sizeof(map_types) / sizeof(*map_types); t++) {
+		for (t = 0; t < ARRAY_SIZE(map_types); t++) {
 			test_lru_sanity0(map_types[t], map_flags[f]);
 			test_lru_sanity1(map_types[t], map_flags[f], tgt_free);
 			test_lru_sanity2(map_types[t], map_flags[f], tgt_free);
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index f0c8d05ba6d1..f3d5d7ac6505 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -723,7 +723,7 @@ static int xmsg_ret_only_prog_load(const struct sock_addr_test *test,
 		BPF_MOV64_IMM(BPF_REG_0, rc),
 		BPF_EXIT_INSN(),
 	};
-	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+	return load_insns(test, insns, ARRAY_SIZE(insns));
 }
 
 static int sendmsg_allow_prog_load(const struct sock_addr_test *test)
@@ -795,7 +795,7 @@ static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test)
 		BPF_EXIT_INSN(),
 	};
 
-	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+	return load_insns(test, insns, ARRAY_SIZE(insns));
 }
 
 static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test)
@@ -858,7 +858,7 @@ static int sendmsg6_rw_dst_asm_prog_load(const struct sock_addr_test *test,
 		BPF_EXIT_INSN(),
 	};
 
-	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+	return load_insns(test, insns, ARRAY_SIZE(insns));
 }
 
 static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 1ba7e7346afb..dfb4f5c0fcb9 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1786,7 +1786,7 @@ static int populate_progs(char *bpf_file)
 		i++;
 	}
 
-	for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
+	for (i = 0; i < ARRAY_SIZE(map_fd); i++) {
 		maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
 		map_fd[i] = bpf_map__fd(maps[i]);
 		if (map_fd[i] < 0) {
@@ -1867,7 +1867,7 @@ static int __test_selftests(int cg_fd, struct sockmap_options *opt)
 	}
 
 	/* Tests basic commands and APIs */
-	for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
+	for (i = 0; i < ARRAY_SIZE(test); i++) {
 		struct _test t = test[i];
 
 		if (check_whitelist(&t, opt) != 0)
-- 
cgit 


From 40867d74c374b235e14d839f3a77f26684feefe5 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Mon, 14 Mar 2022 14:45:51 -0600
Subject: net: Add l3mdev index to flow struct and avoid oif reset for port
 devices

The fundamental premise of VRF and l3mdev core code is binding a socket
to a device (l3mdev or netdev with an L3 domain) to indicate L3 scope.
Legacy code resets flowi_oif to the l3mdev losing any original port
device binding. Ben (among others) has demonstrated use cases where the
original port device binding is important and needs to be retained.
This patch handles that by adding a new entry to the common flow struct
that can indicate the l3mdev index for later rule and table matching
avoiding the need to reset flowi_oif.

In addition to allowing more use cases that require port device binds,
this patch brings a few datapath simplications:

1. l3mdev_fib_rule_match is only called when walking fib rules and
   always after l3mdev_update_flow. That allows an optimization to bail
   early for non-VRF type uses cases when flowi_l3mdev is not set. Also,
   only that index needs to be checked for the FIB table id.

2. l3mdev_update_flow can be called with flowi_oif set to a l3mdev
   (e.g., VRF) device. By resetting flowi_oif only for this case the
   FLOWI_FLAG_SKIP_NH_OIF flag is not longer needed and can be removed,
   removing several checks in the datapath. The flowi_iif path can be
   simplified to only be called if the it is not loopback (loopback can
   not be assigned to an L3 domain) and the l3mdev index is not already
   set.

3. Avoid another device lookup in the output path when the fib lookup
   returns a reject failure.

Note: 2 functional tests for local traffic with reject fib rules are
updated to reflect the new direct failure at FIB lookup time for ping
rather than the failure on packet path. The current code fails like this:

    HINT: Fails since address on vrf device is out of device scope
    COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1
    ping: Warning: source address might be selected on device other than: eth1
    PING 172.16.3.1 (172.16.3.1) from 172.16.3.1 eth1: 56(84) bytes of data.

    --- 172.16.3.1 ping statistics ---
    1 packets transmitted, 0 received, 100% packet loss, time 0ms

where the test now directly fails:

    HINT: Fails since address on vrf device is out of device scope
    COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1
    ping: connect: No route to host

Signed-off-by: David Ahern <dsahern@kernel.org>
Tested-by: Ben Greear <greearb@candelatech.com>
Link: https://lore.kernel.org/r/20220314204551.16369-1-dsahern@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/vrf.c                         |  7 +++--
 include/net/flow.h                        |  6 ++++-
 net/ipv4/fib_frontend.c                   |  7 +++--
 net/ipv4/fib_semantics.c                  |  2 +-
 net/ipv4/fib_trie.c                       |  7 ++---
 net/ipv4/route.c                          |  4 +--
 net/ipv4/xfrm4_policy.c                   |  4 +--
 net/ipv6/ip6_output.c                     |  3 +--
 net/ipv6/route.c                          | 12 ---------
 net/ipv6/xfrm6_policy.c                   |  3 +--
 net/l3mdev/l3mdev.c                       | 43 ++++++++++++-------------------
 tools/testing/selftests/net/fcnal-test.sh |  2 +-
 12 files changed, 37 insertions(+), 63 deletions(-)

(limited to 'tools/testing')

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 714cafcf6c6c..85e362461d71 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -472,14 +472,13 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
 
 	memset(&fl6, 0, sizeof(fl6));
 	/* needed to match OIF rule */
-	fl6.flowi6_oif = dev->ifindex;
+	fl6.flowi6_l3mdev = dev->ifindex;
 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
 	fl6.daddr = iph->daddr;
 	fl6.saddr = iph->saddr;
 	fl6.flowlabel = ip6_flowinfo(iph);
 	fl6.flowi6_mark = skb->mark;
 	fl6.flowi6_proto = iph->nexthdr;
-	fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
 
 	dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL);
 	if (IS_ERR(dst) || dst == dst_null)
@@ -551,10 +550,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
 
 	memset(&fl4, 0, sizeof(fl4));
 	/* needed to match OIF rule */
-	fl4.flowi4_oif = vrf_dev->ifindex;
+	fl4.flowi4_l3mdev = vrf_dev->ifindex;
 	fl4.flowi4_iif = LOOPBACK_IFINDEX;
 	fl4.flowi4_tos = RT_TOS(ip4h->tos);
-	fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF;
+	fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
 	fl4.flowi4_proto = ip4h->protocol;
 	fl4.daddr = ip4h->daddr;
 	fl4.saddr = ip4h->saddr;
diff --git a/include/net/flow.h b/include/net/flow.h
index 58beb16a49b8..987bd511d652 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -29,6 +29,7 @@ struct flowi_tunnel {
 struct flowi_common {
 	int	flowic_oif;
 	int	flowic_iif;
+	int     flowic_l3mdev;
 	__u32	flowic_mark;
 	__u8	flowic_tos;
 	__u8	flowic_scope;
@@ -36,7 +37,6 @@ struct flowi_common {
 	__u8	flowic_flags;
 #define FLOWI_FLAG_ANYSRC		0x01
 #define FLOWI_FLAG_KNOWN_NH		0x02
-#define FLOWI_FLAG_SKIP_NH_OIF		0x04
 	__u32	flowic_secid;
 	kuid_t  flowic_uid;
 	struct flowi_tunnel flowic_tun_key;
@@ -70,6 +70,7 @@ struct flowi4 {
 	struct flowi_common	__fl_common;
 #define flowi4_oif		__fl_common.flowic_oif
 #define flowi4_iif		__fl_common.flowic_iif
+#define flowi4_l3mdev		__fl_common.flowic_l3mdev
 #define flowi4_mark		__fl_common.flowic_mark
 #define flowi4_tos		__fl_common.flowic_tos
 #define flowi4_scope		__fl_common.flowic_scope
@@ -102,6 +103,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 {
 	fl4->flowi4_oif = oif;
 	fl4->flowi4_iif = LOOPBACK_IFINDEX;
+	fl4->flowi4_l3mdev = 0;
 	fl4->flowi4_mark = mark;
 	fl4->flowi4_tos = tos;
 	fl4->flowi4_scope = scope;
@@ -132,6 +134,7 @@ struct flowi6 {
 	struct flowi_common	__fl_common;
 #define flowi6_oif		__fl_common.flowic_oif
 #define flowi6_iif		__fl_common.flowic_iif
+#define flowi6_l3mdev		__fl_common.flowic_l3mdev
 #define flowi6_mark		__fl_common.flowic_mark
 #define flowi6_scope		__fl_common.flowic_scope
 #define flowi6_proto		__fl_common.flowic_proto
@@ -177,6 +180,7 @@ struct flowi {
 	} u;
 #define flowi_oif	u.__fl_common.flowic_oif
 #define flowi_iif	u.__fl_common.flowic_iif
+#define flowi_l3mdev	u.__fl_common.flowic_l3mdev
 #define flowi_mark	u.__fl_common.flowic_mark
 #define flowi_tos	u.__fl_common.flowic_tos
 #define flowi_scope	u.__fl_common.flowic_scope
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7408051632ac..af8209f912ab 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -291,7 +291,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 		bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
 		struct flowi4 fl4 = {
 			.flowi4_iif = LOOPBACK_IFINDEX,
-			.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
+			.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev),
 			.daddr = ip_hdr(skb)->saddr,
 			.flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
 			.flowi4_scope = scope,
@@ -353,9 +353,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	bool dev_match;
 
 	fl4.flowi4_oif = 0;
-	fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
-	if (!fl4.flowi4_iif)
-		fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
+	fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev);
+	fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
 	fl4.daddr = src;
 	fl4.saddr = dst;
 	fl4.flowi4_tos = tos;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c5a29703185a..cc8e84ef2ae4 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -2234,7 +2234,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
 void fib_select_path(struct net *net, struct fib_result *res,
 		     struct flowi4 *fl4, const struct sk_buff *skb)
 {
-	if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
+	if (fl4->flowi4_oif)
 		goto check_saddr;
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2af2b99e0bea..fb0e49c36c2e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1429,11 +1429,8 @@ bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
 	    !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
 		return false;
 
-	if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
-		if (flp->flowi4_oif &&
-		    flp->flowi4_oif != nhc->nhc_oif)
-			return false;
-	}
+	if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif)
+		return false;
 
 	return true;
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f444f5983405..63f3256a407d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2263,6 +2263,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	/*
 	 *	Now we are ready to route packet.
 	 */
+	fl4.flowi4_l3mdev = 0;
 	fl4.flowi4_oif = 0;
 	fl4.flowi4_iif = dev->ifindex;
 	fl4.flowi4_mark = skb->mark;
@@ -2738,8 +2739,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
 		res->fi = NULL;
 		res->table = NULL;
 		if (fl4->flowi4_oif &&
-		    (ipv4_is_multicast(fl4->daddr) ||
-		    !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
+		    (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) {
 			/* Apparently, routing tables are wrong. Assume,
 			 * that the destination is on link.
 			 *
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 9e83bcb6bc99..6fde0b184791 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -28,13 +28,11 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
 	memset(fl4, 0, sizeof(*fl4));
 	fl4->daddr = daddr->a4;
 	fl4->flowi4_tos = tos;
-	fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
+	fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
 	fl4->flowi4_mark = mark;
 	if (saddr)
 		fl4->saddr = saddr->a4;
 
-	fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF;
-
 	rt = __ip_route_output_key(net, fl4);
 	if (!IS_ERR(rt))
 		return &rt->dst;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e69fac576970..a76fba3dd47a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1035,8 +1035,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 #ifdef CONFIG_IPV6_SUBTREES
 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 #endif
-	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
-	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
+	   (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
 		dst_release(dst);
 		dst = NULL;
 	}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6188712f24b0..2fa10e60cccd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1209,9 +1209,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
 	struct fib6_node *fn;
 	struct rt6_info *rt;
 
-	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
-		flags &= ~RT6_LOOKUP_F_IFACE;
-
 	rcu_read_lock();
 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
@@ -2181,9 +2178,6 @@ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 	saved_fn = fn;
 
-	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
-		oif = 0;
-
 redo_rt6_select:
 	rt6_select(net, fn, oif, res, strict);
 	if (res->f6i == net->ipv6.fib6_null_entry) {
@@ -3058,12 +3052,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
 	struct fib6_info *rt;
 	struct fib6_node *fn;
 
-	/* l3mdev_update_flow overrides oif if the device is enslaved; in
-	 * this case we must match on the real ingress device, so reset it
-	 */
-	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
-		fl6->flowi6_oif = skb->dev->ifindex;
-
 	/* Get the "current" route for this destination and
 	 * check if the redirect has come from appropriate router.
 	 *
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 55bb2cbae13d..e64e427a51cf 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -33,8 +33,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
 	int err;
 
 	memset(&fl6, 0, sizeof(fl6));
-	fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
-	fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
+	fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
 	fl6.flowi6_mark = mark;
 	memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
 	if (saddr)
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 17927966abb3..4eb8892fb2ff 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -250,25 +250,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
 	struct net_device *dev;
 	int rc = 0;
 
-	rcu_read_lock();
+	/* update flow ensures flowi_l3mdev is set when relevant */
+	if (!fl->flowi_l3mdev)
+		return 0;
 
-	dev = dev_get_by_index_rcu(net, fl->flowi_oif);
-	if (dev && netif_is_l3_master(dev) &&
-	    dev->l3mdev_ops->l3mdev_fib_table) {
-		arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
-		rc = 1;
-		goto out;
-	}
+	rcu_read_lock();
 
-	dev = dev_get_by_index_rcu(net, fl->flowi_iif);
+	dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev);
 	if (dev && netif_is_l3_master(dev) &&
 	    dev->l3mdev_ops->l3mdev_fib_table) {
 		arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
 		rc = 1;
-		goto out;
 	}
 
-out:
 	rcu_read_unlock();
 
 	return rc;
@@ -277,31 +271,28 @@ out:
 void l3mdev_update_flow(struct net *net, struct flowi *fl)
 {
 	struct net_device *dev;
-	int ifindex;
 
 	rcu_read_lock();
 
 	if (fl->flowi_oif) {
 		dev = dev_get_by_index_rcu(net, fl->flowi_oif);
 		if (dev) {
-			ifindex = l3mdev_master_ifindex_rcu(dev);
-			if (ifindex) {
-				fl->flowi_oif = ifindex;
-				fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
-				goto out;
-			}
+			if (!fl->flowi_l3mdev)
+				fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
+
+			/* oif set to L3mdev directs lookup to its table;
+			 * reset to avoid oif match in fib_lookup
+			 */
+			if (netif_is_l3_master(dev))
+				fl->flowi_oif = 0;
+			goto out;
 		}
 	}
 
-	if (fl->flowi_iif) {
+	if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) {
 		dev = dev_get_by_index_rcu(net, fl->flowi_iif);
-		if (dev) {
-			ifindex = l3mdev_master_ifindex_rcu(dev);
-			if (ifindex) {
-				fl->flowi_iif = ifindex;
-				fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
-			}
-		}
+		if (dev)
+			fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
 	}
 
 out:
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 3f4c8cfe7aca..47c4d4b4a44a 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -750,7 +750,7 @@ ipv4_ping_vrf()
 		log_start
 		show_hint "Fails since address on vrf device is out of device scope"
 		run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
-		log_test_addr ${a} $? 1 "ping local, device bind"
+		log_test_addr ${a} $? 2 "ping local, device bind"
 	done
 
 	#
-- 
cgit 


From 663af70aabb7c9b6bd5e1c1cdeb44e7025a4f855 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 16 Mar 2022 10:38:23 -0700
Subject: bpf: selftests: Add helpers to directly use the capget and capset
 syscall

After upgrading to the newer libcap (>= 2.60),
the libcap commit aca076443591 ("Make cap_t operations thread safe.")
added a "__u8 mutex;" to the "struct _cap_struct".  It caused a few byte
shift that breaks the assumption made in the "struct libcap" definition
in test_verifier.c.

The bpf selftest usage only needs to enable and disable the effective
caps of the running task.  It is easier to directly syscall the
capget and capset instead.  It can also remove the libcap
library dependency.

The cap_helpers.{c,h} is added.  One __u64 is used for all CAP_*
bits instead of two __u32.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20220316173823.2036955-1-kafai@fb.com
---
 tools/testing/selftests/bpf/cap_helpers.c | 67 +++++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/cap_helpers.h | 19 +++++++++
 2 files changed, 86 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/cap_helpers.c
 create mode 100644 tools/testing/selftests/bpf/cap_helpers.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/cap_helpers.c b/tools/testing/selftests/bpf/cap_helpers.c
new file mode 100644
index 000000000000..d5ac507401d7
--- /dev/null
+++ b/tools/testing/selftests/bpf/cap_helpers.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cap_helpers.h"
+
+/* Avoid including <sys/capability.h> from the libcap-devel package,
+ * so directly declare them here and use them from glibc.
+ */
+int capget(cap_user_header_t header, cap_user_data_t data);
+int capset(cap_user_header_t header, const cap_user_data_t data);
+
+int cap_enable_effective(__u64 caps, __u64 *old_caps)
+{
+	struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
+	struct __user_cap_header_struct hdr = {
+		.version = _LINUX_CAPABILITY_VERSION_3,
+	};
+	__u32 cap0 = caps;
+	__u32 cap1 = caps >> 32;
+	int err;
+
+	err = capget(&hdr, data);
+	if (err)
+		return err;
+
+	if (old_caps)
+		*old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
+
+	if ((data[0].effective & cap0) == cap0 &&
+	    (data[1].effective & cap1) == cap1)
+		return 0;
+
+	data[0].effective |= cap0;
+	data[1].effective |= cap1;
+	err = capset(&hdr, data);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int cap_disable_effective(__u64 caps, __u64 *old_caps)
+{
+	struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
+	struct __user_cap_header_struct hdr = {
+		.version = _LINUX_CAPABILITY_VERSION_3,
+	};
+	__u32 cap0 = caps;
+	__u32 cap1 = caps >> 32;
+	int err;
+
+	err = capget(&hdr, data);
+	if (err)
+		return err;
+
+	if (old_caps)
+		*old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
+
+	if (!(data[0].effective & cap0) && !(data[1].effective & cap1))
+		return 0;
+
+	data[0].effective &= ~cap0;
+	data[1].effective &= ~cap1;
+	err = capset(&hdr, data);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/cap_helpers.h b/tools/testing/selftests/bpf/cap_helpers.h
new file mode 100644
index 000000000000..6d163530cb0f
--- /dev/null
+++ b/tools/testing/selftests/bpf/cap_helpers.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CAP_HELPERS_H
+#define __CAP_HELPERS_H
+
+#include <linux/types.h>
+#include <linux/capability.h>
+
+#ifndef CAP_PERFMON
+#define CAP_PERFMON		38
+#endif
+
+#ifndef CAP_BPF
+#define CAP_BPF			39
+#endif
+
+int cap_enable_effective(__u64 caps, __u64 *old_caps);
+int cap_disable_effective(__u64 caps, __u64 *old_caps);
+
+#endif
-- 
cgit 


From b1c2768a82b9cd149f54e335de38ea1212f47b07 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 16 Mar 2022 10:38:29 -0700
Subject: bpf: selftests: Remove libcap usage from test_verifier

This patch removes the libcap usage from test_verifier.
The cap_*_effective() helpers added in the earlier patch are
used instead.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20220316173829.2038682-1-kafai@fb.com
---
 tools/testing/selftests/bpf/Makefile        |  3 +-
 tools/testing/selftests/bpf/test_verifier.c | 88 ++++++++---------------------
 2 files changed, 27 insertions(+), 64 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fe12b4f5fe20..1c6e55740019 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -195,6 +195,7 @@ $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
 CGROUP_HELPERS	:= $(OUTPUT)/cgroup_helpers.o
 TESTING_HELPERS	:= $(OUTPUT)/testing_helpers.o
 TRACE_HELPERS	:= $(OUTPUT)/trace_helpers.o
+CAP_HELPERS	:= $(OUTPUT)/cap_helpers.o
 
 $(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS)
 $(OUTPUT)/test_skb_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS)
@@ -211,7 +212,7 @@ $(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS)
 $(OUTPUT)/xdping: $(TESTING_HELPERS)
 $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS)
 $(OUTPUT)/test_maps: $(TESTING_HELPERS)
-$(OUTPUT)/test_verifier: $(TESTING_HELPERS)
+$(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS)
 
 BPFTOOL ?= $(DEFAULT_BPFTOOL)
 $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 92e3465fbae8..a2cd236c32eb 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -22,8 +22,6 @@
 #include <limits.h>
 #include <assert.h>
 
-#include <sys/capability.h>
-
 #include <linux/unistd.h>
 #include <linux/filter.h>
 #include <linux/bpf_perf_event.h>
@@ -42,6 +40,7 @@
 #  define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
 # endif
 #endif
+#include "cap_helpers.h"
 #include "bpf_rand.h"
 #include "bpf_util.h"
 #include "test_btf.h"
@@ -62,6 +61,10 @@
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
 
+/* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */
+#define ADMIN_CAPS (1ULL << CAP_NET_ADMIN |	\
+		    1ULL << CAP_PERFMON |	\
+		    1ULL << CAP_BPF)
 #define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
 static bool unpriv_disabled = false;
 static int skips;
@@ -973,47 +976,19 @@ struct libcap {
 
 static int set_admin(bool admin)
 {
-	cap_t caps;
-	/* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */
-	const cap_value_t cap_net_admin = CAP_NET_ADMIN;
-	const cap_value_t cap_sys_admin = CAP_SYS_ADMIN;
-	struct libcap *cap;
-	int ret = -1;
-
-	caps = cap_get_proc();
-	if (!caps) {
-		perror("cap_get_proc");
-		return -1;
-	}
-	cap = (struct libcap *)caps;
-	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_sys_admin, CAP_CLEAR)) {
-		perror("cap_set_flag clear admin");
-		goto out;
-	}
-	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_admin,
-				admin ? CAP_SET : CAP_CLEAR)) {
-		perror("cap_set_flag set_or_clear net");
-		goto out;
-	}
-	/* libcap is likely old and simply ignores CAP_BPF and CAP_PERFMON,
-	 * so update effective bits manually
-	 */
+	int err;
+
 	if (admin) {
-		cap->data[1].effective |= 1 << (38 /* CAP_PERFMON */ - 32);
-		cap->data[1].effective |= 1 << (39 /* CAP_BPF */ - 32);
+		err = cap_enable_effective(ADMIN_CAPS, NULL);
+		if (err)
+			perror("cap_enable_effective(ADMIN_CAPS)");
 	} else {
-		cap->data[1].effective &= ~(1 << (38 - 32));
-		cap->data[1].effective &= ~(1 << (39 - 32));
-	}
-	if (cap_set_proc(caps)) {
-		perror("cap_set_proc");
-		goto out;
+		err = cap_disable_effective(ADMIN_CAPS, NULL);
+		if (err)
+			perror("cap_disable_effective(ADMIN_CAPS)");
 	}
-	ret = 0;
-out:
-	if (cap_free(caps))
-		perror("cap_free");
-	return ret;
+
+	return err;
 }
 
 static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
@@ -1291,31 +1266,18 @@ fail_log:
 
 static bool is_admin(void)
 {
-	cap_flag_value_t net_priv = CAP_CLEAR;
-	bool perfmon_priv = false;
-	bool bpf_priv = false;
-	struct libcap *cap;
-	cap_t caps;
-
-#ifdef CAP_IS_SUPPORTED
-	if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
-		perror("cap_get_flag");
-		return false;
-	}
-#endif
-	caps = cap_get_proc();
-	if (!caps) {
-		perror("cap_get_proc");
+	__u64 caps;
+
+	/* The test checks for finer cap as CAP_NET_ADMIN,
+	 * CAP_PERFMON, and CAP_BPF instead of CAP_SYS_ADMIN.
+	 * Thus, disable CAP_SYS_ADMIN at the beginning.
+	 */
+	if (cap_disable_effective(1ULL << CAP_SYS_ADMIN, &caps)) {
+		perror("cap_disable_effective(CAP_SYS_ADMIN)");
 		return false;
 	}
-	cap = (struct libcap *)caps;
-	bpf_priv = cap->data[1].effective & (1 << (39/* CAP_BPF */ - 32));
-	perfmon_priv = cap->data[1].effective & (1 << (38/* CAP_PERFMON */ - 32));
-	if (cap_get_flag(caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &net_priv))
-		perror("cap_get_flag NET");
-	if (cap_free(caps))
-		perror("cap_free");
-	return bpf_priv && perfmon_priv && net_priv == CAP_SET;
+
+	return (caps & ADMIN_CAPS) == ADMIN_CAPS;
 }
 
 static void get_unpriv_disabled()
-- 
cgit 


From 82cb2b30773e3ccbbd2ed4427d52a91862d4db6d Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 16 Mar 2022 10:38:35 -0700
Subject: bpf: selftests: Remove libcap usage from test_progs

This patch removes the libcap usage from test_progs.
bind_perm.c is the only user.  cap_*_effective() helpers added in the
earlier patch are directly used instead.

No other selftest binary is using libcap, so '-lcap' is also removed
from the Makefile.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20220316173835.2039334-1-kafai@fb.com
---
 tools/testing/selftests/bpf/Makefile               |  5 ++-
 tools/testing/selftests/bpf/prog_tests/bind_perm.c | 44 ++++------------------
 2 files changed, 11 insertions(+), 38 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1c6e55740019..11f5883636c3 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -25,7 +25,7 @@ CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS)	\
 	  -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)		\
 	  -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
 LDFLAGS += $(SAN_CFLAGS)
-LDLIBS += -lcap -lelf -lz -lrt -lpthread
+LDLIBS += -lelf -lz -lrt -lpthread
 
 # Silence some warnings when compiled with clang
 ifneq ($(LLVM),)
@@ -480,7 +480,8 @@ TRUNNER_TESTS_DIR := prog_tests
 TRUNNER_BPF_PROGS_DIR := progs
 TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
 			 network_helpers.c testing_helpers.c		\
-			 btf_helpers.c flow_dissector_load.h
+			 btf_helpers.c flow_dissector_load.h		\
+			 cap_helpers.c
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
 		       ima_setup.sh					\
 		       $(wildcard progs/btf_dump_test_case_*.c)
diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
index eac71fbb24ce..a1766a298bb7 100644
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -4,9 +4,9 @@
 #include <stdlib.h>
 #include <sys/types.h>
 #include <sys/socket.h>
-#include <sys/capability.h>
 
 #include "test_progs.h"
+#include "cap_helpers.h"
 #include "bind_perm.skel.h"
 
 static int duration;
@@ -49,41 +49,11 @@ close_socket:
 		close(fd);
 }
 
-bool cap_net_bind_service(cap_flag_value_t flag)
-{
-	const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
-	cap_flag_value_t original_value;
-	bool was_effective = false;
-	cap_t caps;
-
-	caps = cap_get_proc();
-	if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
-		goto free_caps;
-
-	if (CHECK(cap_get_flag(caps, CAP_NET_BIND_SERVICE, CAP_EFFECTIVE,
-			       &original_value),
-		  "cap_get_flag", "errno %d", errno))
-		goto free_caps;
-
-	was_effective = (original_value == CAP_SET);
-
-	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
-			       flag),
-		  "cap_set_flag", "errno %d", errno))
-		goto free_caps;
-
-	if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno))
-		goto free_caps;
-
-free_caps:
-	CHECK(cap_free(caps), "cap_free", "errno %d", errno);
-	return was_effective;
-}
-
 void test_bind_perm(void)
 {
-	bool cap_was_effective;
+	const __u64 net_bind_svc_cap = 1ULL << CAP_NET_BIND_SERVICE;
 	struct bind_perm *skel;
+	__u64 old_caps = 0;
 	int cgroup_fd;
 
 	if (create_netns())
@@ -105,7 +75,8 @@ void test_bind_perm(void)
 	if (!ASSERT_OK_PTR(skel, "bind_v6_prog"))
 		goto close_skeleton;
 
-	cap_was_effective = cap_net_bind_service(CAP_CLEAR);
+	ASSERT_OK(cap_disable_effective(net_bind_svc_cap, &old_caps),
+		  "cap_disable_effective");
 
 	try_bind(AF_INET, 110, EACCES);
 	try_bind(AF_INET6, 110, EACCES);
@@ -113,8 +84,9 @@ void test_bind_perm(void)
 	try_bind(AF_INET, 111, 0);
 	try_bind(AF_INET6, 111, 0);
 
-	if (cap_was_effective)
-		cap_net_bind_service(CAP_SET);
+	if (old_caps & net_bind_svc_cap)
+		ASSERT_OK(cap_enable_effective(net_bind_svc_cap, NULL),
+			  "cap_enable_effective");
 
 close_skeleton:
 	bind_perm__destroy(skel);
-- 
cgit 


From ad13baf4569152b00de11949b8c93aaa83c1243f Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 9 Mar 2022 20:33:21 +0800
Subject: selftests/bpf: Test subprog jit when toggle bpf_jit_harden repeatedly

When bpf_jit_harden is toggled between 0 and 2, subprog jit may fail
due to inconsistent twice read values of bpf_jit_harden during jit. So
add a test to ensure the problem is fixed.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220309123321.2400262-5-houtao1@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/subprogs.c | 77 ++++++++++++++++++++---
 1 file changed, 68 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c
index 3f3d2ac4dd57..903f35a9e62e 100644
--- a/tools/testing/selftests/bpf/prog_tests/subprogs.c
+++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c
@@ -1,32 +1,83 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
 #include <test_progs.h>
-#include <time.h>
 #include "test_subprogs.skel.h"
 #include "test_subprogs_unused.skel.h"
 
-static int duration;
+struct toggler_ctx {
+	int fd;
+	bool stop;
+};
 
-void test_subprogs(void)
+static void *toggle_jit_harden(void *arg)
+{
+	struct toggler_ctx *ctx = arg;
+	char two = '2';
+	char zero = '0';
+
+	while (!ctx->stop) {
+		lseek(ctx->fd, SEEK_SET, 0);
+		write(ctx->fd, &two, sizeof(two));
+		lseek(ctx->fd, SEEK_SET, 0);
+		write(ctx->fd, &zero, sizeof(zero));
+	}
+
+	return NULL;
+}
+
+static void test_subprogs_with_jit_harden_toggling(void)
+{
+	struct toggler_ctx ctx;
+	pthread_t toggler;
+	int err;
+	unsigned int i, loop = 10;
+
+	ctx.fd = open("/proc/sys/net/core/bpf_jit_harden", O_RDWR);
+	if (!ASSERT_GE(ctx.fd, 0, "open bpf_jit_harden"))
+		return;
+
+	ctx.stop = false;
+	err = pthread_create(&toggler, NULL, toggle_jit_harden, &ctx);
+	if (!ASSERT_OK(err, "new toggler"))
+		goto out;
+
+	/* Make toggler thread to run */
+	usleep(1);
+
+	for (i = 0; i < loop; i++) {
+		struct test_subprogs *skel = test_subprogs__open_and_load();
+
+		if (!ASSERT_OK_PTR(skel, "skel open"))
+			break;
+		test_subprogs__destroy(skel);
+	}
+
+	ctx.stop = true;
+	pthread_join(toggler, NULL);
+out:
+	close(ctx.fd);
+}
+
+static void test_subprogs_alone(void)
 {
 	struct test_subprogs *skel;
 	struct test_subprogs_unused *skel2;
 	int err;
 
 	skel = test_subprogs__open_and_load();
-	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		return;
 
 	err = test_subprogs__attach(skel);
-	if (CHECK(err, "skel_attach", "failed to attach skeleton: %d\n", err))
+	if (!ASSERT_OK(err, "skel attach"))
 		goto cleanup;
 
 	usleep(1);
 
-	CHECK(skel->bss->res1 != 12, "res1", "got %d, exp %d\n", skel->bss->res1, 12);
-	CHECK(skel->bss->res2 != 17, "res2", "got %d, exp %d\n", skel->bss->res2, 17);
-	CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19);
-	CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36);
+	ASSERT_EQ(skel->bss->res1, 12, "res1");
+	ASSERT_EQ(skel->bss->res2, 17, "res2");
+	ASSERT_EQ(skel->bss->res3, 19, "res3");
+	ASSERT_EQ(skel->bss->res4, 36, "res4");
 
 	skel2 = test_subprogs_unused__open_and_load();
 	ASSERT_OK_PTR(skel2, "unused_progs_skel");
@@ -35,3 +86,11 @@ void test_subprogs(void)
 cleanup:
 	test_subprogs__destroy(skel);
 }
+
+void test_subprogs(void)
+{
+	if (test__start_subtest("subprogs_alone"))
+		test_subprogs_alone();
+	if (test__start_subtest("subprogs_and_jit_harden"))
+		test_subprogs_with_jit_harden_toggling();
+}
-- 
cgit 


From 1abea24af42c35c6eb537e4402836e2cde2a5b13 Mon Sep 17 00:00:00 2001
From: Guo Zhengkui <guozhengkui@vivo.com>
Date: Wed, 16 Mar 2022 17:28:57 +0800
Subject: selftests: net: fix array_size.cocci warning

Fix array_size.cocci warning in tools/testing/selftests/net.

Use `ARRAY_SIZE(arr)` instead of forms like `sizeof(arr)/sizeof(arr[0])`.

It has been tested with gcc (Debian 8.3.0-6) 8.3.0.

Signed-off-by: Guo Zhengkui <guozhengkui@vivo.com>
Link: https://lore.kernel.org/r/20220316092858.9398-1-guozhengkui@vivo.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/cmsg_sender.c  | 4 +++-
 tools/testing/selftests/net/psock_fanout.c | 5 +++--
 tools/testing/selftests/net/toeplitz.c     | 6 ++++--
 3 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index aed7845c08a8..bc2162909a1a 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -16,6 +16,8 @@
 #include <linux/udp.h>
 #include <sys/socket.h>
 
+#include "../kselftest.h"
+
 enum {
 	ERN_SUCCESS = 0,
 	/* Well defined errors, callers may depend on these */
@@ -318,7 +320,7 @@ static const char *cs_ts_info2str(unsigned int info)
 		[SCM_TSTAMP_ACK]	= "ACK",
 	};
 
-	if (info < sizeof(names) / sizeof(names[0]))
+	if (info < ARRAY_SIZE(names))
 		return names[info];
 	return "unknown";
 }
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 3653d6468c67..1a736f700be4 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -53,6 +53,7 @@
 #include <unistd.h>
 
 #include "psock_lib.h"
+#include "../kselftest.h"
 
 #define RING_NUM_FRAMES			20
 
@@ -117,7 +118,7 @@ static void sock_fanout_set_cbpf(int fd)
 	struct sock_fprog bpf_prog;
 
 	bpf_prog.filter = bpf_filter;
-	bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+	bpf_prog.len = ARRAY_SIZE(bpf_filter);
 
 	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog,
 		       sizeof(bpf_prog))) {
@@ -162,7 +163,7 @@ static void sock_fanout_set_ebpf(int fd)
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
 	attr.insns = (unsigned long) prog;
-	attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+	attr.insn_cnt = ARRAY_SIZE(prog);
 	attr.license = (unsigned long) "GPL";
 	attr.log_buf = (unsigned long) log_buf,
 	attr.log_size = sizeof(log_buf),
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
index c5489341cfb8..90026a27eac0 100644
--- a/tools/testing/selftests/net/toeplitz.c
+++ b/tools/testing/selftests/net/toeplitz.c
@@ -52,6 +52,8 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include "../kselftest.h"
+
 #define TOEPLITZ_KEY_MIN_LEN	40
 #define TOEPLITZ_KEY_MAX_LEN	60
 
@@ -295,7 +297,7 @@ static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
 	struct sock_fprog prog = {};
 
 	prog.filter = filter;
-	prog.len = sizeof(filter) / sizeof(struct sock_filter);
+	prog.len = ARRAY_SIZE(filter);
 	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
 		error(1, errno, "setsockopt filter");
 }
@@ -324,7 +326,7 @@ static void set_filter_null(int fd)
 	struct sock_fprog prog = {};
 
 	prog.filter = filter;
-	prog.len = sizeof(filter) / sizeof(struct sock_filter);
+	prog.len = ARRAY_SIZE(filter);
 	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
 		error(1, errno, "setsockopt filter");
 }
-- 
cgit 


From e0999c8e590935b13dd598a6480685eae9c1b3c5 Mon Sep 17 00:00:00 2001
From: Kaixi Fan <fankaixi.li@bytedance.com>
Date: Mon, 14 Mar 2022 00:41:16 +0800
Subject: selftests/bpf: Fix tunnel remote IP comments

In namespace at_ns0, the IP address of tnl dev is 10.1.1.100 which is the
overlay IP, and the ip address of veth0 is 172.16.1.100 which is the vtep
IP. When doing 'ping 10.1.1.100' from root namespace, the remote_ip should
be 172.16.1.100.

Fixes: 933a741e3b82 ("selftests/bpf: bpf tunnel test.")
Signed-off-by: Kaixi Fan <fankaixi.li@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20220313164116.5889-1-fankaixi.li@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_tunnel.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index ca1372924023..2817d9948d59 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -39,7 +39,7 @@
 # from root namespace, the following operations happen:
 # 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
 # 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
-#    with remote_ip=172.16.1.200 and others.
+#    with remote_ip=172.16.1.100 and others.
 # 3) Outer tunnel header is prepended and route the packet to veth1's egress
 # 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
 # 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
-- 
cgit 


From f7a11eeccb11185437f4da1c80b66b857d1e906f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 16 Mar 2022 13:24:16 +0100
Subject: selftests/bpf: Add kprobe_multi attach test

Adding kprobe_multi attach test that uses new fprobe interface to
attach kprobe program to multiple functions.

The test is attaching programs to bpf_fentry_test* functions and
uses single trampoline program bpf_prog_test_run to trigger
bpf_fentry_test* functions.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220316122419.933957-11-jolsa@kernel.org
---
 .../selftests/bpf/prog_tests/kprobe_multi_test.c   | 141 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/kprobe_multi.c   |  95 ++++++++++++++
 tools/testing/selftests/bpf/trace_helpers.c        |   7 +
 3 files changed, 243 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
 create mode 100644 tools/testing/selftests/bpf/progs/kprobe_multi.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
new file mode 100644
index 000000000000..ded6b8c8ec05
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "kprobe_multi.skel.h"
+#include "trace_helpers.h"
+
+static void kprobe_multi_test_run(struct kprobe_multi *skel)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	int err, prog_fd;
+
+	prog_fd = bpf_program__fd(skel->progs.trigger);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
+
+	ASSERT_EQ(skel->bss->kprobe_test1_result, 1, "kprobe_test1_result");
+	ASSERT_EQ(skel->bss->kprobe_test2_result, 1, "kprobe_test2_result");
+	ASSERT_EQ(skel->bss->kprobe_test3_result, 1, "kprobe_test3_result");
+	ASSERT_EQ(skel->bss->kprobe_test4_result, 1, "kprobe_test4_result");
+	ASSERT_EQ(skel->bss->kprobe_test5_result, 1, "kprobe_test5_result");
+	ASSERT_EQ(skel->bss->kprobe_test6_result, 1, "kprobe_test6_result");
+	ASSERT_EQ(skel->bss->kprobe_test7_result, 1, "kprobe_test7_result");
+	ASSERT_EQ(skel->bss->kprobe_test8_result, 1, "kprobe_test8_result");
+
+	ASSERT_EQ(skel->bss->kretprobe_test1_result, 1, "kretprobe_test1_result");
+	ASSERT_EQ(skel->bss->kretprobe_test2_result, 1, "kretprobe_test2_result");
+	ASSERT_EQ(skel->bss->kretprobe_test3_result, 1, "kretprobe_test3_result");
+	ASSERT_EQ(skel->bss->kretprobe_test4_result, 1, "kretprobe_test4_result");
+	ASSERT_EQ(skel->bss->kretprobe_test5_result, 1, "kretprobe_test5_result");
+	ASSERT_EQ(skel->bss->kretprobe_test6_result, 1, "kretprobe_test6_result");
+	ASSERT_EQ(skel->bss->kretprobe_test7_result, 1, "kretprobe_test7_result");
+	ASSERT_EQ(skel->bss->kretprobe_test8_result, 1, "kretprobe_test8_result");
+}
+
+static void test_skel_api(void)
+{
+	struct kprobe_multi *skel = NULL;
+	int err;
+
+	skel = kprobe_multi__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "kprobe_multi__open_and_load"))
+		goto cleanup;
+
+	skel->bss->pid = getpid();
+	err = kprobe_multi__attach(skel);
+	if (!ASSERT_OK(err, "kprobe_multi__attach"))
+		goto cleanup;
+
+	kprobe_multi_test_run(skel);
+
+cleanup:
+	kprobe_multi__destroy(skel);
+}
+
+static void test_link_api(struct bpf_link_create_opts *opts)
+{
+	int prog_fd, link1_fd = -1, link2_fd = -1;
+	struct kprobe_multi *skel = NULL;
+
+	skel = kprobe_multi__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+		goto cleanup;
+
+	skel->bss->pid = getpid();
+	prog_fd = bpf_program__fd(skel->progs.test_kprobe);
+	link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, opts);
+	if (!ASSERT_GE(link1_fd, 0, "link_fd"))
+		goto cleanup;
+
+	opts->kprobe_multi.flags = BPF_F_KPROBE_MULTI_RETURN;
+	prog_fd = bpf_program__fd(skel->progs.test_kretprobe);
+	link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, opts);
+	if (!ASSERT_GE(link2_fd, 0, "link_fd"))
+		goto cleanup;
+
+	kprobe_multi_test_run(skel);
+
+cleanup:
+	if (link1_fd != -1)
+		close(link1_fd);
+	if (link2_fd != -1)
+		close(link2_fd);
+	kprobe_multi__destroy(skel);
+}
+
+#define GET_ADDR(__sym, __addr) ({					\
+	__addr = ksym_get_addr(__sym);					\
+	if (!ASSERT_NEQ(__addr, 0, "kallsyms load failed for " #__sym))	\
+		return;							\
+})
+
+static void test_link_api_addrs(void)
+{
+	LIBBPF_OPTS(bpf_link_create_opts, opts);
+	unsigned long long addrs[8];
+
+	GET_ADDR("bpf_fentry_test1", addrs[0]);
+	GET_ADDR("bpf_fentry_test2", addrs[1]);
+	GET_ADDR("bpf_fentry_test3", addrs[2]);
+	GET_ADDR("bpf_fentry_test4", addrs[3]);
+	GET_ADDR("bpf_fentry_test5", addrs[4]);
+	GET_ADDR("bpf_fentry_test6", addrs[5]);
+	GET_ADDR("bpf_fentry_test7", addrs[6]);
+	GET_ADDR("bpf_fentry_test8", addrs[7]);
+
+	opts.kprobe_multi.addrs = (const unsigned long*) addrs;
+	opts.kprobe_multi.cnt = ARRAY_SIZE(addrs);
+	test_link_api(&opts);
+}
+
+static void test_link_api_syms(void)
+{
+	LIBBPF_OPTS(bpf_link_create_opts, opts);
+	const char *syms[8] = {
+		"bpf_fentry_test1",
+		"bpf_fentry_test2",
+		"bpf_fentry_test3",
+		"bpf_fentry_test4",
+		"bpf_fentry_test5",
+		"bpf_fentry_test6",
+		"bpf_fentry_test7",
+		"bpf_fentry_test8",
+	};
+
+	opts.kprobe_multi.syms = syms;
+	opts.kprobe_multi.cnt = ARRAY_SIZE(syms);
+	test_link_api(&opts);
+}
+
+void test_kprobe_multi_test(void)
+{
+	if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
+		return;
+
+	if (test__start_subtest("skel_api"))
+		test_skel_api();
+	if (test__start_subtest("link_api_addrs"))
+		test_link_api_syms();
+	if (test__start_subtest("link_api_syms"))
+		test_link_api_addrs();
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c
new file mode 100644
index 000000000000..6616c082c8c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern const void bpf_fentry_test1 __ksym;
+extern const void bpf_fentry_test2 __ksym;
+extern const void bpf_fentry_test3 __ksym;
+extern const void bpf_fentry_test4 __ksym;
+extern const void bpf_fentry_test5 __ksym;
+extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
+extern const void bpf_fentry_test8 __ksym;
+
+int pid = 0;
+
+__u64 kprobe_test1_result = 0;
+__u64 kprobe_test2_result = 0;
+__u64 kprobe_test3_result = 0;
+__u64 kprobe_test4_result = 0;
+__u64 kprobe_test5_result = 0;
+__u64 kprobe_test6_result = 0;
+__u64 kprobe_test7_result = 0;
+__u64 kprobe_test8_result = 0;
+
+__u64 kretprobe_test1_result = 0;
+__u64 kretprobe_test2_result = 0;
+__u64 kretprobe_test3_result = 0;
+__u64 kretprobe_test4_result = 0;
+__u64 kretprobe_test5_result = 0;
+__u64 kretprobe_test6_result = 0;
+__u64 kretprobe_test7_result = 0;
+__u64 kretprobe_test8_result = 0;
+
+static void kprobe_multi_check(void *ctx, bool is_return)
+{
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return;
+
+	__u64 addr = bpf_get_func_ip(ctx);
+
+#define SET(__var, __addr) ({			\
+	if ((const void *) addr == __addr) 	\
+		__var = 1;			\
+})
+
+	if (is_return) {
+		SET(kretprobe_test1_result, &bpf_fentry_test1);
+		SET(kretprobe_test2_result, &bpf_fentry_test2);
+		SET(kretprobe_test3_result, &bpf_fentry_test3);
+		SET(kretprobe_test4_result, &bpf_fentry_test4);
+		SET(kretprobe_test5_result, &bpf_fentry_test5);
+		SET(kretprobe_test6_result, &bpf_fentry_test6);
+		SET(kretprobe_test7_result, &bpf_fentry_test7);
+		SET(kretprobe_test8_result, &bpf_fentry_test8);
+	} else {
+		SET(kprobe_test1_result, &bpf_fentry_test1);
+		SET(kprobe_test2_result, &bpf_fentry_test2);
+		SET(kprobe_test3_result, &bpf_fentry_test3);
+		SET(kprobe_test4_result, &bpf_fentry_test4);
+		SET(kprobe_test5_result, &bpf_fentry_test5);
+		SET(kprobe_test6_result, &bpf_fentry_test6);
+		SET(kprobe_test7_result, &bpf_fentry_test7);
+		SET(kprobe_test8_result, &bpf_fentry_test8);
+	}
+
+#undef SET
+}
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+	return 0;
+}
+
+SEC("kprobe.multi/bpf_fentry_tes??")
+int test_kprobe(struct pt_regs *ctx)
+{
+	kprobe_multi_check(ctx, false);
+	return 0;
+}
+
+SEC("kretprobe.multi/bpf_fentry_test*")
+int test_kretprobe(struct pt_regs *ctx)
+{
+	kprobe_multi_check(ctx, true);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index ca6abae9b09c..3d6217e3aff7 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -34,6 +34,13 @@ int load_kallsyms(void)
 	if (!f)
 		return -ENOENT;
 
+	/*
+	 * This is called/used from multiplace places,
+	 * load symbols just once.
+	 */
+	if (sym_cnt)
+		return 0;
+
 	while (fgets(buf, sizeof(buf), f)) {
 		if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
 			break;
-- 
cgit 


From 2c6401c966ae1fbe07eb3b8a07256dc41b596928 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 16 Mar 2022 13:24:17 +0100
Subject: selftests/bpf: Add kprobe_multi bpf_cookie test

Adding bpf_cookie test for programs attached by kprobe_multi links.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220316122419.933957-12-jolsa@kernel.org
---
 .../testing/selftests/bpf/prog_tests/bpf_cookie.c  | 109 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/kprobe_multi.c   |  41 ++++----
 2 files changed, 131 insertions(+), 19 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 0612e79a9281..6671d4dc0b5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -7,6 +7,7 @@
 #include <unistd.h>
 #include <test_progs.h>
 #include "test_bpf_cookie.skel.h"
+#include "kprobe_multi.skel.h"
 
 /* uprobe attach point */
 static void trigger_func(void)
@@ -63,6 +64,112 @@ cleanup:
 	bpf_link__destroy(retlink2);
 }
 
+static void kprobe_multi_test_run(struct kprobe_multi *skel)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	int err, prog_fd;
+
+	prog_fd = bpf_program__fd(skel->progs.trigger);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
+
+	ASSERT_EQ(skel->bss->kprobe_test1_result, 1, "kprobe_test1_result");
+	ASSERT_EQ(skel->bss->kprobe_test2_result, 1, "kprobe_test2_result");
+	ASSERT_EQ(skel->bss->kprobe_test3_result, 1, "kprobe_test3_result");
+	ASSERT_EQ(skel->bss->kprobe_test4_result, 1, "kprobe_test4_result");
+	ASSERT_EQ(skel->bss->kprobe_test5_result, 1, "kprobe_test5_result");
+	ASSERT_EQ(skel->bss->kprobe_test6_result, 1, "kprobe_test6_result");
+	ASSERT_EQ(skel->bss->kprobe_test7_result, 1, "kprobe_test7_result");
+	ASSERT_EQ(skel->bss->kprobe_test8_result, 1, "kprobe_test8_result");
+
+	ASSERT_EQ(skel->bss->kretprobe_test1_result, 1, "kretprobe_test1_result");
+	ASSERT_EQ(skel->bss->kretprobe_test2_result, 1, "kretprobe_test2_result");
+	ASSERT_EQ(skel->bss->kretprobe_test3_result, 1, "kretprobe_test3_result");
+	ASSERT_EQ(skel->bss->kretprobe_test4_result, 1, "kretprobe_test4_result");
+	ASSERT_EQ(skel->bss->kretprobe_test5_result, 1, "kretprobe_test5_result");
+	ASSERT_EQ(skel->bss->kretprobe_test6_result, 1, "kretprobe_test6_result");
+	ASSERT_EQ(skel->bss->kretprobe_test7_result, 1, "kretprobe_test7_result");
+	ASSERT_EQ(skel->bss->kretprobe_test8_result, 1, "kretprobe_test8_result");
+}
+
+static void kprobe_multi_link_api_subtest(void)
+{
+	int prog_fd, link1_fd = -1, link2_fd = -1;
+	struct kprobe_multi *skel = NULL;
+	LIBBPF_OPTS(bpf_link_create_opts, opts);
+	unsigned long long addrs[8];
+	__u64 cookies[8];
+
+	if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
+		goto cleanup;
+
+	skel = kprobe_multi__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+		goto cleanup;
+
+	skel->bss->pid = getpid();
+	skel->bss->test_cookie = true;
+
+#define GET_ADDR(__sym, __addr) ({				\
+	__addr = ksym_get_addr(__sym);				\
+	if (!ASSERT_NEQ(__addr, 0, "ksym_get_addr " #__sym))	\
+		goto cleanup;					\
+})
+
+	GET_ADDR("bpf_fentry_test1", addrs[0]);
+	GET_ADDR("bpf_fentry_test2", addrs[1]);
+	GET_ADDR("bpf_fentry_test3", addrs[2]);
+	GET_ADDR("bpf_fentry_test4", addrs[3]);
+	GET_ADDR("bpf_fentry_test5", addrs[4]);
+	GET_ADDR("bpf_fentry_test6", addrs[5]);
+	GET_ADDR("bpf_fentry_test7", addrs[6]);
+	GET_ADDR("bpf_fentry_test8", addrs[7]);
+
+#undef GET_ADDR
+
+	cookies[0] = 1;
+	cookies[1] = 2;
+	cookies[2] = 3;
+	cookies[3] = 4;
+	cookies[4] = 5;
+	cookies[5] = 6;
+	cookies[6] = 7;
+	cookies[7] = 8;
+
+	opts.kprobe_multi.addrs = (const unsigned long *) &addrs;
+	opts.kprobe_multi.cnt = ARRAY_SIZE(addrs);
+	opts.kprobe_multi.cookies = (const __u64 *) &cookies;
+	prog_fd = bpf_program__fd(skel->progs.test_kprobe);
+
+	link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &opts);
+	if (!ASSERT_GE(link1_fd, 0, "link1_fd"))
+		goto cleanup;
+
+	cookies[0] = 8;
+	cookies[1] = 7;
+	cookies[2] = 6;
+	cookies[3] = 5;
+	cookies[4] = 4;
+	cookies[5] = 3;
+	cookies[6] = 2;
+	cookies[7] = 1;
+
+	opts.kprobe_multi.flags = BPF_F_KPROBE_MULTI_RETURN;
+	prog_fd = bpf_program__fd(skel->progs.test_kretprobe);
+
+	link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &opts);
+	if (!ASSERT_GE(link2_fd, 0, "link2_fd"))
+		goto cleanup;
+
+	kprobe_multi_test_run(skel);
+
+cleanup:
+	close(link1_fd);
+	close(link2_fd);
+	kprobe_multi__destroy(skel);
+}
+
 static void uprobe_subtest(struct test_bpf_cookie *skel)
 {
 	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
@@ -249,6 +356,8 @@ void test_bpf_cookie(void)
 
 	if (test__start_subtest("kprobe"))
 		kprobe_subtest(skel);
+	if (test__start_subtest("multi_kprobe_link_api"))
+		kprobe_multi_link_api_subtest();
 	if (test__start_subtest("uprobe"))
 		uprobe_subtest(skel);
 	if (test__start_subtest("tracepoint"))
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c
index 6616c082c8c2..af27d2c6fce8 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c
@@ -16,6 +16,7 @@ extern const void bpf_fentry_test7 __ksym;
 extern const void bpf_fentry_test8 __ksym;
 
 int pid = 0;
+bool test_cookie = false;
 
 __u64 kprobe_test1_result = 0;
 __u64 kprobe_test2_result = 0;
@@ -40,31 +41,33 @@ static void kprobe_multi_check(void *ctx, bool is_return)
 	if (bpf_get_current_pid_tgid() >> 32 != pid)
 		return;
 
+	__u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
 	__u64 addr = bpf_get_func_ip(ctx);
 
-#define SET(__var, __addr) ({			\
-	if ((const void *) addr == __addr) 	\
-		__var = 1;			\
+#define SET(__var, __addr, __cookie) ({			\
+	if (((const void *) addr == __addr) &&		\
+	     (!test_cookie || (cookie == __cookie)))	\
+		__var = 1;				\
 })
 
 	if (is_return) {
-		SET(kretprobe_test1_result, &bpf_fentry_test1);
-		SET(kretprobe_test2_result, &bpf_fentry_test2);
-		SET(kretprobe_test3_result, &bpf_fentry_test3);
-		SET(kretprobe_test4_result, &bpf_fentry_test4);
-		SET(kretprobe_test5_result, &bpf_fentry_test5);
-		SET(kretprobe_test6_result, &bpf_fentry_test6);
-		SET(kretprobe_test7_result, &bpf_fentry_test7);
-		SET(kretprobe_test8_result, &bpf_fentry_test8);
+		SET(kretprobe_test1_result, &bpf_fentry_test1, 8);
+		SET(kretprobe_test2_result, &bpf_fentry_test2, 7);
+		SET(kretprobe_test3_result, &bpf_fentry_test3, 6);
+		SET(kretprobe_test4_result, &bpf_fentry_test4, 5);
+		SET(kretprobe_test5_result, &bpf_fentry_test5, 4);
+		SET(kretprobe_test6_result, &bpf_fentry_test6, 3);
+		SET(kretprobe_test7_result, &bpf_fentry_test7, 2);
+		SET(kretprobe_test8_result, &bpf_fentry_test8, 1);
 	} else {
-		SET(kprobe_test1_result, &bpf_fentry_test1);
-		SET(kprobe_test2_result, &bpf_fentry_test2);
-		SET(kprobe_test3_result, &bpf_fentry_test3);
-		SET(kprobe_test4_result, &bpf_fentry_test4);
-		SET(kprobe_test5_result, &bpf_fentry_test5);
-		SET(kprobe_test6_result, &bpf_fentry_test6);
-		SET(kprobe_test7_result, &bpf_fentry_test7);
-		SET(kprobe_test8_result, &bpf_fentry_test8);
+		SET(kprobe_test1_result, &bpf_fentry_test1, 1);
+		SET(kprobe_test2_result, &bpf_fentry_test2, 2);
+		SET(kprobe_test3_result, &bpf_fentry_test3, 3);
+		SET(kprobe_test4_result, &bpf_fentry_test4, 4);
+		SET(kprobe_test5_result, &bpf_fentry_test5, 5);
+		SET(kprobe_test6_result, &bpf_fentry_test6, 6);
+		SET(kprobe_test7_result, &bpf_fentry_test7, 7);
+		SET(kprobe_test8_result, &bpf_fentry_test8, 8);
 	}
 
 #undef SET
-- 
cgit 


From 9271a0c7ae7a914782759d8fe22ef28fffab82fe Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 16 Mar 2022 13:24:18 +0100
Subject: selftests/bpf: Add attach test for
 bpf_program__attach_kprobe_multi_opts

Adding tests for bpf_program__attach_kprobe_multi_opts function,
that test attach with pattern, symbols and addrs.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220316122419.933957-13-jolsa@kernel.org
---
 .../selftests/bpf/prog_tests/kprobe_multi_test.c   | 204 +++++++++++++++++++--
 1 file changed, 193 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index ded6b8c8ec05..b9876b55fc0c 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -3,7 +3,7 @@
 #include "kprobe_multi.skel.h"
 #include "trace_helpers.h"
 
-static void kprobe_multi_test_run(struct kprobe_multi *skel)
+static void kprobe_multi_test_run(struct kprobe_multi *skel, bool test_return)
 {
 	LIBBPF_OPTS(bpf_test_run_opts, topts);
 	int err, prog_fd;
@@ -22,14 +22,16 @@ static void kprobe_multi_test_run(struct kprobe_multi *skel)
 	ASSERT_EQ(skel->bss->kprobe_test7_result, 1, "kprobe_test7_result");
 	ASSERT_EQ(skel->bss->kprobe_test8_result, 1, "kprobe_test8_result");
 
-	ASSERT_EQ(skel->bss->kretprobe_test1_result, 1, "kretprobe_test1_result");
-	ASSERT_EQ(skel->bss->kretprobe_test2_result, 1, "kretprobe_test2_result");
-	ASSERT_EQ(skel->bss->kretprobe_test3_result, 1, "kretprobe_test3_result");
-	ASSERT_EQ(skel->bss->kretprobe_test4_result, 1, "kretprobe_test4_result");
-	ASSERT_EQ(skel->bss->kretprobe_test5_result, 1, "kretprobe_test5_result");
-	ASSERT_EQ(skel->bss->kretprobe_test6_result, 1, "kretprobe_test6_result");
-	ASSERT_EQ(skel->bss->kretprobe_test7_result, 1, "kretprobe_test7_result");
-	ASSERT_EQ(skel->bss->kretprobe_test8_result, 1, "kretprobe_test8_result");
+	if (test_return) {
+		ASSERT_EQ(skel->bss->kretprobe_test1_result, 1, "kretprobe_test1_result");
+		ASSERT_EQ(skel->bss->kretprobe_test2_result, 1, "kretprobe_test2_result");
+		ASSERT_EQ(skel->bss->kretprobe_test3_result, 1, "kretprobe_test3_result");
+		ASSERT_EQ(skel->bss->kretprobe_test4_result, 1, "kretprobe_test4_result");
+		ASSERT_EQ(skel->bss->kretprobe_test5_result, 1, "kretprobe_test5_result");
+		ASSERT_EQ(skel->bss->kretprobe_test6_result, 1, "kretprobe_test6_result");
+		ASSERT_EQ(skel->bss->kretprobe_test7_result, 1, "kretprobe_test7_result");
+		ASSERT_EQ(skel->bss->kretprobe_test8_result, 1, "kretprobe_test8_result");
+	}
 }
 
 static void test_skel_api(void)
@@ -46,7 +48,7 @@ static void test_skel_api(void)
 	if (!ASSERT_OK(err, "kprobe_multi__attach"))
 		goto cleanup;
 
-	kprobe_multi_test_run(skel);
+	kprobe_multi_test_run(skel, true);
 
 cleanup:
 	kprobe_multi__destroy(skel);
@@ -73,7 +75,7 @@ static void test_link_api(struct bpf_link_create_opts *opts)
 	if (!ASSERT_GE(link2_fd, 0, "link_fd"))
 		goto cleanup;
 
-	kprobe_multi_test_run(skel);
+	kprobe_multi_test_run(skel, true);
 
 cleanup:
 	if (link1_fd != -1)
@@ -127,6 +129,178 @@ static void test_link_api_syms(void)
 	test_link_api(&opts);
 }
 
+static void
+test_attach_api(const char *pattern, struct bpf_kprobe_multi_opts *opts)
+{
+	struct bpf_link *link1 = NULL, *link2 = NULL;
+	struct kprobe_multi *skel = NULL;
+
+	skel = kprobe_multi__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+		goto cleanup;
+
+	skel->bss->pid = getpid();
+	link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+						      pattern, opts);
+	if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts"))
+		goto cleanup;
+
+	if (opts) {
+		opts->retprobe = true;
+		link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe,
+							      pattern, opts);
+		if (!ASSERT_OK_PTR(link2, "bpf_program__attach_kprobe_multi_opts"))
+			goto cleanup;
+	}
+
+	kprobe_multi_test_run(skel, !!opts);
+
+cleanup:
+	bpf_link__destroy(link2);
+	bpf_link__destroy(link1);
+	kprobe_multi__destroy(skel);
+}
+
+static void test_attach_api_pattern(void)
+{
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+
+	test_attach_api("bpf_fentry_test*", &opts);
+	test_attach_api("bpf_fentry_test?", NULL);
+}
+
+static void test_attach_api_addrs(void)
+{
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+	unsigned long long addrs[8];
+
+	GET_ADDR("bpf_fentry_test1", addrs[0]);
+	GET_ADDR("bpf_fentry_test2", addrs[1]);
+	GET_ADDR("bpf_fentry_test3", addrs[2]);
+	GET_ADDR("bpf_fentry_test4", addrs[3]);
+	GET_ADDR("bpf_fentry_test5", addrs[4]);
+	GET_ADDR("bpf_fentry_test6", addrs[5]);
+	GET_ADDR("bpf_fentry_test7", addrs[6]);
+	GET_ADDR("bpf_fentry_test8", addrs[7]);
+
+	opts.addrs = (const unsigned long *) addrs;
+	opts.cnt = ARRAY_SIZE(addrs);
+	test_attach_api(NULL, &opts);
+}
+
+static void test_attach_api_syms(void)
+{
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+	const char *syms[8] = {
+		"bpf_fentry_test1",
+		"bpf_fentry_test2",
+		"bpf_fentry_test3",
+		"bpf_fentry_test4",
+		"bpf_fentry_test5",
+		"bpf_fentry_test6",
+		"bpf_fentry_test7",
+		"bpf_fentry_test8",
+	};
+
+	opts.syms = syms;
+	opts.cnt = ARRAY_SIZE(syms);
+	test_attach_api(NULL, &opts);
+}
+
+static void test_attach_api_fails(void)
+{
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+	struct kprobe_multi *skel = NULL;
+	struct bpf_link *link = NULL;
+	unsigned long long addrs[2];
+	const char *syms[2] = {
+		"bpf_fentry_test1",
+		"bpf_fentry_test2",
+	};
+	__u64 cookies[2];
+
+	addrs[0] = ksym_get_addr("bpf_fentry_test1");
+	addrs[1] = ksym_get_addr("bpf_fentry_test2");
+
+	if (!ASSERT_FALSE(!addrs[0] || !addrs[1], "ksym_get_addr"))
+		goto cleanup;
+
+	skel = kprobe_multi__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+		goto cleanup;
+
+	skel->bss->pid = getpid();
+
+	/* fail_1 - pattern and opts NULL */
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+						     NULL, NULL);
+	if (!ASSERT_ERR_PTR(link, "fail_1"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_1_error"))
+		goto cleanup;
+
+	/* fail_2 - both addrs and syms set */
+	opts.addrs = (const unsigned long *) addrs;
+	opts.syms = syms;
+	opts.cnt = ARRAY_SIZE(syms);
+	opts.cookies = NULL;
+
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+						     NULL, &opts);
+	if (!ASSERT_ERR_PTR(link, "fail_2"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_2_error"))
+		goto cleanup;
+
+	/* fail_3 - pattern and addrs set */
+	opts.addrs = (const unsigned long *) addrs;
+	opts.syms = NULL;
+	opts.cnt = ARRAY_SIZE(syms);
+	opts.cookies = NULL;
+
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+						     "ksys_*", &opts);
+	if (!ASSERT_ERR_PTR(link, "fail_3"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_3_error"))
+		goto cleanup;
+
+	/* fail_4 - pattern and cnt set */
+	opts.addrs = NULL;
+	opts.syms = NULL;
+	opts.cnt = ARRAY_SIZE(syms);
+	opts.cookies = NULL;
+
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+						     "ksys_*", &opts);
+	if (!ASSERT_ERR_PTR(link, "fail_4"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_4_error"))
+		goto cleanup;
+
+	/* fail_5 - pattern and cookies */
+	opts.addrs = NULL;
+	opts.syms = NULL;
+	opts.cnt = 0;
+	opts.cookies = cookies;
+
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+						     "ksys_*", &opts);
+	if (!ASSERT_ERR_PTR(link, "fail_5"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_5_error"))
+		goto cleanup;
+
+cleanup:
+	bpf_link__destroy(link);
+	kprobe_multi__destroy(skel);
+}
+
 void test_kprobe_multi_test(void)
 {
 	if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
@@ -138,4 +312,12 @@ void test_kprobe_multi_test(void)
 		test_link_api_syms();
 	if (test__start_subtest("link_api_syms"))
 		test_link_api_addrs();
+	if (test__start_subtest("attach_api_pattern"))
+		test_attach_api_pattern();
+	if (test__start_subtest("attach_api_addrs"))
+		test_attach_api_addrs();
+	if (test__start_subtest("attach_api_syms"))
+		test_attach_api_syms();
+	if (test__start_subtest("attach_api_fails"))
+		test_attach_api_fails();
 }
-- 
cgit 


From 318c812cebfcfdf42f254e6c1e6490a46e7714f8 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 16 Mar 2022 13:24:19 +0100
Subject: selftests/bpf: Add cookie test for
 bpf_program__attach_kprobe_multi_opts

Adding bpf_cookie test for programs attached by
bpf_program__attach_kprobe_multi_opts API.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220316122419.933957-14-jolsa@kernel.org
---
 .../testing/selftests/bpf/prog_tests/bpf_cookie.c  | 68 ++++++++++++++++++++++
 1 file changed, 68 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 6671d4dc0b5d..923a6139b2d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -170,6 +170,72 @@ cleanup:
 	kprobe_multi__destroy(skel);
 }
 
+static void kprobe_multi_attach_api_subtest(void)
+{
+	struct bpf_link *link1 = NULL, *link2 = NULL;
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	struct kprobe_multi *skel = NULL;
+	const char *syms[8] = {
+		"bpf_fentry_test1",
+		"bpf_fentry_test2",
+		"bpf_fentry_test3",
+		"bpf_fentry_test4",
+		"bpf_fentry_test5",
+		"bpf_fentry_test6",
+		"bpf_fentry_test7",
+		"bpf_fentry_test8",
+	};
+	__u64 cookies[8];
+
+	skel = kprobe_multi__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+		goto cleanup;
+
+	skel->bss->pid = getpid();
+	skel->bss->test_cookie = true;
+
+	cookies[0] = 1;
+	cookies[1] = 2;
+	cookies[2] = 3;
+	cookies[3] = 4;
+	cookies[4] = 5;
+	cookies[5] = 6;
+	cookies[6] = 7;
+	cookies[7] = 8;
+
+	opts.syms = syms;
+	opts.cnt = ARRAY_SIZE(syms);
+	opts.cookies = cookies;
+
+	link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+						      NULL, &opts);
+	if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts"))
+		goto cleanup;
+
+	cookies[0] = 8;
+	cookies[1] = 7;
+	cookies[2] = 6;
+	cookies[3] = 5;
+	cookies[4] = 4;
+	cookies[5] = 3;
+	cookies[6] = 2;
+	cookies[7] = 1;
+
+	opts.retprobe = true;
+
+	link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe,
+						      NULL, &opts);
+	if (!ASSERT_OK_PTR(link2, "bpf_program__attach_kprobe_multi_opts"))
+		goto cleanup;
+
+	kprobe_multi_test_run(skel);
+
+cleanup:
+	bpf_link__destroy(link2);
+	bpf_link__destroy(link1);
+	kprobe_multi__destroy(skel);
+}
 static void uprobe_subtest(struct test_bpf_cookie *skel)
 {
 	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
@@ -358,6 +424,8 @@ void test_bpf_cookie(void)
 		kprobe_subtest(skel);
 	if (test__start_subtest("multi_kprobe_link_api"))
 		kprobe_multi_link_api_subtest();
+	if (test__start_subtest("multi_kprobe_attach_api"))
+		kprobe_multi_attach_api_subtest();
 	if (test__start_subtest("uprobe"))
 		uprobe_subtest(skel);
 	if (test__start_subtest("tracepoint"))
-- 
cgit 


From 3cccbaa0332169d4ff05587062a7ed528aeddb60 Mon Sep 17 00:00:00 2001
From: Delyan Kratunov <delyank@fb.com>
Date: Wed, 16 Mar 2022 23:37:31 +0000
Subject: selftests/bpf: Test subskeleton functionality

This patch changes the selftests/bpf Makefile to also generate
a subskel.h for every skel.h it would have normally generated.

Separately, it also introduces a new subskeleton test which tests
library objects, externs, weak symbols, kconfigs, and user maps.

Signed-off-by: Delyan Kratunov <delyank@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1bd24956940bbbfe169bb34f7f87b11df52ef011.1647473511.git.delyank@fb.com
---
 tools/testing/selftests/bpf/.gitignore             |  1 +
 tools/testing/selftests/bpf/Makefile               | 12 +++-
 .../testing/selftests/bpf/prog_tests/subskeleton.c | 78 ++++++++++++++++++++++
 .../testing/selftests/bpf/progs/test_subskeleton.c | 28 ++++++++
 .../selftests/bpf/progs/test_subskeleton_lib.c     | 61 +++++++++++++++++
 .../selftests/bpf/progs/test_subskeleton_lib2.c    | 16 +++++
 6 files changed, 194 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/subskeleton.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_subskeleton.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_subskeleton_lib.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_subskeleton_lib2.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index a7eead8820a0..595565eb68c0 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -31,6 +31,7 @@ test_tcp_check_syncookie_user
 test_sysctl
 xdping
 test_cpp
+*.subskel.h
 *.skel.h
 *.lskel.h
 /no_alu32
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 11f5883636c3..3820608faf57 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -327,7 +327,13 @@ endef
 SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
 
 LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
-		linked_vars.skel.h linked_maps.skel.h
+		linked_vars.skel.h linked_maps.skel.h 			\
+		test_subskeleton.skel.h test_subskeleton_lib.skel.h
+
+# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file
+# but that's created as a side-effect of the skel.h generation.
+test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o
+test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o
 
 LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
 	test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \
@@ -405,6 +411,7 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
 	$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
 	$(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
+	$(Q)$$(BPFTOOL) gen subskeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$(@:.skel.h=.subskel.h)
 
 $(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
@@ -422,6 +429,7 @@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$(Q)diff $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o)
 	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
 	$(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@
+	$(Q)$$(BPFTOOL) gen subskeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$(@:.skel.h=.subskel.h)
 endif
 
 # ensure we set up tests.h header generation rule just once
@@ -559,6 +567,6 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR)	\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
 	feature bpftool							\
-	$(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h no_alu32 bpf_gcc bpf_testmod.ko)
+	$(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h no_alu32 bpf_gcc bpf_testmod.ko)
 
 .PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/prog_tests/subskeleton.c b/tools/testing/selftests/bpf/prog_tests/subskeleton.c
new file mode 100644
index 000000000000..9c31b7004f9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/subskeleton.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "test_subskeleton.skel.h"
+#include "test_subskeleton_lib.subskel.h"
+
+static void subskeleton_lib_setup(struct bpf_object *obj)
+{
+	struct test_subskeleton_lib *lib = test_subskeleton_lib__open(obj);
+
+	if (!ASSERT_OK_PTR(lib, "open subskeleton"))
+		return;
+
+	*lib->rodata.var1 = 1;
+	*lib->data.var2 = 2;
+	lib->bss.var3->var3_1 = 3;
+	lib->bss.var3->var3_2 = 4;
+
+	test_subskeleton_lib__destroy(lib);
+}
+
+static int subskeleton_lib_subresult(struct bpf_object *obj)
+{
+	struct test_subskeleton_lib *lib = test_subskeleton_lib__open(obj);
+	int result;
+
+	if (!ASSERT_OK_PTR(lib, "open subskeleton"))
+		return -EINVAL;
+
+	result = *lib->bss.libout1;
+	ASSERT_EQ(result, 1 + 2 + 3 + 4 + 5 + 6, "lib subresult");
+
+	ASSERT_OK_PTR(lib->progs.lib_perf_handler, "lib_perf_handler");
+	ASSERT_STREQ(bpf_program__name(lib->progs.lib_perf_handler),
+		     "lib_perf_handler", "program name");
+
+	ASSERT_OK_PTR(lib->maps.map1, "map1");
+	ASSERT_STREQ(bpf_map__name(lib->maps.map1), "map1", "map name");
+
+	ASSERT_EQ(*lib->data.var5, 5, "__weak var5");
+	ASSERT_EQ(*lib->data.var6, 6, "extern var6");
+	ASSERT_TRUE(*lib->kconfig.CONFIG_BPF_SYSCALL, "CONFIG_BPF_SYSCALL");
+
+	test_subskeleton_lib__destroy(lib);
+	return result;
+}
+
+void test_subskeleton(void)
+{
+	int err, result;
+	struct test_subskeleton *skel;
+
+	skel = test_subskeleton__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	skel->rodata->rovar1 = 10;
+	skel->rodata->var1 = 1;
+	subskeleton_lib_setup(skel->obj);
+
+	err = test_subskeleton__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	err = test_subskeleton__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	/* trigger tracepoint */
+	usleep(1);
+
+	result = subskeleton_lib_subresult(skel->obj) * 10;
+	ASSERT_EQ(skel->bss->out1, result, "unexpected calculation");
+
+cleanup:
+	test_subskeleton__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_subskeleton.c b/tools/testing/selftests/bpf/progs/test_subskeleton.c
new file mode 100644
index 000000000000..006417974372
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subskeleton.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* volatile to force a read, compiler may assume 0 otherwise */
+const volatile int rovar1;
+int out1;
+
+/* Override weak symbol in test_subskeleton_lib */
+int var5 = 5;
+
+extern volatile bool CONFIG_BPF_SYSCALL __kconfig;
+
+extern int lib_routine(void);
+
+SEC("raw_tp/sys_enter")
+int handler1(const void *ctx)
+{
+	(void) CONFIG_BPF_SYSCALL;
+
+	out1 = lib_routine() * rovar1;
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subskeleton_lib.c b/tools/testing/selftests/bpf/progs/test_subskeleton_lib.c
new file mode 100644
index 000000000000..ecfafe812c36
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subskeleton_lib.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* volatile to force a read */
+const volatile int var1;
+volatile int var2 = 1;
+struct {
+	int var3_1;
+	__s64 var3_2;
+} var3;
+int libout1;
+
+extern volatile bool CONFIG_BPF_SYSCALL __kconfig;
+
+int var4[4];
+
+__weak int var5 SEC(".data");
+
+/* Fully contained within library extern-and-definition */
+extern int var6;
+
+int var7 SEC(".data.custom");
+
+int (*fn_ptr)(void);
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 16);
+} map1 SEC(".maps");
+
+extern struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 16);
+} map2 SEC(".maps");
+
+int lib_routine(void)
+{
+	__u32 key = 1, value = 2;
+
+	(void) CONFIG_BPF_SYSCALL;
+	bpf_map_update_elem(&map2, &key, &value, BPF_ANY);
+
+	libout1 = var1 + var2 + var3.var3_1 + var3.var3_2 + var5 + var6;
+	return libout1;
+}
+
+SEC("perf_event")
+int lib_perf_handler(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subskeleton_lib2.c b/tools/testing/selftests/bpf/progs/test_subskeleton_lib2.c
new file mode 100644
index 000000000000..80238486b7ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subskeleton_lib2.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int var6 = 6;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 16);
+} map2 SEC(".maps");
+
+char LICENSE[] SEC("license") = "GPL";
-- 
cgit 


From d9a232d435dcc966738b0f414a86f7edf4f4c8c4 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Date: Thu, 17 Mar 2022 12:08:09 +0900
Subject: af_unix: Support POLLPRI for OOB.

The commit 314001f0bf92 ("af_unix: Add OOB support") introduced OOB for
AF_UNIX, but it lacks some changes for POLLPRI.  Let's add the missing
piece.

In the selftest, normal datagrams are sent followed by OOB data, so this
commit replaces `POLLIN | POLLPRI` with just `POLLPRI` in the first test
case.

Fixes: 314001f0bf92 ("af_unix: Add OOB support")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c                                  | 4 ++++
 tools/testing/selftests/net/af_unix/test_unix_oob.c | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0c37e5595aae..1e7ed5829ed5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -3137,6 +3137,10 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
 		mask |= EPOLLIN | EPOLLRDNORM;
 	if (sk_is_readable(sk))
 		mask |= EPOLLIN | EPOLLRDNORM;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+	if (READ_ONCE(unix_sk(sk)->oob_skb))
+		mask |= EPOLLPRI;
+#endif
 
 	/* Connection-based need to check for termination and startup */
 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
index 3dece8b29253..b57e91e1c3f2 100644
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -218,10 +218,10 @@ main(int argc, char **argv)
 
 	/* Test 1:
 	 * veriyf that SIGURG is
-	 * delivered and 63 bytes are
-	 * read and oob is '@'
+	 * delivered, 63 bytes are
+	 * read, oob is '@', and POLLPRI works.
 	 */
-	wait_for_data(pfd, POLLIN | POLLPRI);
+	wait_for_data(pfd, POLLPRI);
 	read_oob(pfd, &oob);
 	len = read_data(pfd, buf, 1024);
 	if (!signal_recvd || len != 63 || oob != '@') {
-- 
cgit 


From a4c9fe0ed4a13e25e43fcd44d9f89bc19ba8fbb7 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 17 Mar 2022 12:39:17 +0100
Subject: selftests/bpf: Fix error reporting from sock_fields programs

The helper macro that records an error in BPF programs that exercise sock
fields access has been inadvertently broken by adaptation work that
happened in commit b18c1f0aa477 ("bpf: selftest: Adapt sock_fields test to
use skel and global variables").

BPF_NOEXIST flag cannot be used to update BPF_MAP_TYPE_ARRAY. The operation
always fails with -EEXIST, which in turn means the error never gets
recorded, and the checks for errors always pass.

Revert the change in update flags.

Fixes: b18c1f0aa477 ("bpf: selftest: Adapt sock_fields test to use skel and global variables")
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220317113920.1068535-2-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/progs/test_sock_fields.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 246f1f001813..3e2e3ee51cc9 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -114,7 +114,7 @@ static void tpcpy(struct bpf_tcp_sock *dst,
 
 #define RET_LOG() ({						\
 	linum = __LINE__;					\
-	bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST);	\
+	bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY);	\
 	return CG_OK;						\
 })
 
-- 
cgit 


From 2d2202ba858c112b03f84d546e260c61425831a1 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 17 Mar 2022 12:39:18 +0100
Subject: selftests/bpf: Check dst_port only on the client socket

cgroup_skb/egress programs which sock_fields test installs process packets
flying in both directions, from the client to the server, and in reverse
direction.

Recently added dst_port check relies on the fact that destination
port (remote peer port) of the socket which sends the packet is known ahead
of time. This holds true only for the client socket, which connects to the
known server port.

Filter out any traffic that is not egressing from the client socket in the
BPF program that tests reading the dst_port.

Fixes: 8f50f16ff39d ("selftests/bpf: Extend verifier and bpf_sock tests for dst_port loads")
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220317113920.1068535-3-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/progs/test_sock_fields.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 3e2e3ee51cc9..43b31aa1fcf7 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -281,6 +281,10 @@ int read_sk_dst_port(struct __sk_buff *skb)
 	if (!sk)
 		RET_LOG();
 
+	/* Ignore everything but the SYN from the client socket */
+	if (sk->state != BPF_TCP_SYN_SENT)
+		return CG_OK;
+
 	if (!sk_dst_port__load_word(sk))
 		RET_LOG();
 	if (!sk_dst_port__load_half(sk))
-- 
cgit 


From e06b5bbcf3f107fb5e148714efe2decfb3b4e0ac Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 17 Mar 2022 12:39:19 +0100
Subject: selftests/bpf: Use constants for socket states in sock_fields test

Replace magic numbers in BPF code with constants from bpf.h, so that they
don't require an explanation in the comments.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220317113920.1068535-4-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/progs/test_sock_fields.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 43b31aa1fcf7..43a17fdef226 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -134,11 +134,11 @@ int egress_read_sock_fields(struct __sk_buff *skb)
 	if (!sk)
 		RET_LOG();
 
-	/* Not the testing egress traffic or
-	 * TCP_LISTEN (10) socket will be copied at the ingress side.
+	/* Not testing the egress traffic or the listening socket,
+	 * which are covered by the cgroup_skb/ingress test program.
 	 */
 	if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
-	    sk->state == 10)
+	    sk->state == BPF_TCP_LISTEN)
 		return CG_OK;
 
 	if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
@@ -232,8 +232,8 @@ int ingress_read_sock_fields(struct __sk_buff *skb)
 	    sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
 		return CG_OK;
 
-	/* Only interested in TCP_LISTEN */
-	if (sk->state != 10)
+	/* Only interested in the listening socket */
+	if (sk->state != BPF_TCP_LISTEN)
 		return CG_OK;
 
 	/* It must be a fullsock for cgroup_skb/ingress prog */
-- 
cgit 


From deb59400464468352b4d7827420e04f1add94726 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 17 Mar 2022 12:39:20 +0100
Subject: selftests/bpf: Fix test for 4-byte load from dst_port on big-endian

The check for 4-byte load from dst_port offset into bpf_sock is failing on
big-endian architecture - s390. The bpf access converter rewrites the
4-byte load to a 2-byte load from sock_common at skc_dport offset, as shown
below.

  * s390 / llvm-objdump -S --no-show-raw-insn

  00000000000002a0 <sk_dst_port__load_word>:
        84:       r1 = *(u32 *)(r1 + 48)
        85:       w0 = 1
        86:       if w1 == 51966 goto +1 <LBB5_2>
        87:       w0 = 0
  00000000000002c0 <LBB5_2>:
        88:       exit

  * s390 / bpftool prog dump xlated

  _Bool sk_dst_port__load_word(struct bpf_sock * sk):
    35: (69) r1 = *(u16 *)(r1 +12)
    36: (bc) w1 = w1
    37: (b4) w0 = 1
    38: (16) if w1 == 0xcafe goto pc+1
    39: (b4) w0 = 0
    40: (95) exit

  * x86_64 / llvm-objdump -S --no-show-raw-insn

  00000000000002a0 <sk_dst_port__load_word>:
        84:       r1 = *(u32 *)(r1 + 48)
        85:       w0 = 1
        86:       if w1 == 65226 goto +1 <LBB5_2>
        87:       w0 = 0
  00000000000002c0 <LBB5_2>:
        88:       exit

  * x86_64 / bpftool prog dump xlated

  _Bool sk_dst_port__load_word(struct bpf_sock * sk):
    33: (69) r1 = *(u16 *)(r1 +12)
    34: (b4) w0 = 1
    35: (16) if w1 == 0xfeca goto pc+1
    36: (b4) w0 = 0
    37: (95) exit

This leads to surprises if we treat the destination register contents as a
32-bit value, ignoring the fact that in reality it contains a 16-bit value.

On little-endian the register contents reflect the bpf_sock struct
definition, where the lower 16-bits contain the port number:

	struct bpf_sock {
		...
		__be16 dst_port;	/* offset 48 */
		__u16 :16;
		...
	};

However, on big-endian the register contents suggest that field the layout
of bpf_sock struct is as so:

	struct bpf_sock {
		...
		__u16 :16;		/* offset 48 */
		__be16 dst_port;
		...
	};

Account for this quirky access conversion in the test case exercising the
4-byte load by treating the result as 16-bit wide.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220317113920.1068535-5-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/progs/test_sock_fields.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 43a17fdef226..9f4b8f9f1181 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -251,10 +251,16 @@ int ingress_read_sock_fields(struct __sk_buff *skb)
 	return CG_OK;
 }
 
+/*
+ * NOTE: 4-byte load from bpf_sock at dst_port offset is quirky. It
+ * gets rewritten by the access converter to a 2-byte load for
+ * backward compatibility. Treating the load result as a be16 value
+ * makes the code portable across little- and big-endian platforms.
+ */
 static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
 {
 	__u32 *word = (__u32 *)&sk->dst_port;
-	return word[0] == bpf_htonl(0xcafe0000);
+	return word[0] == bpf_htons(0xcafe);
 }
 
 static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
-- 
cgit 


From efb3719f4ab0c4646ce2fe16c610e6a9bb0e1b08 Mon Sep 17 00:00:00 2001
From: Krasnov Arseniy Vladimirovich <AVKrasnov@sberdevices.ru>
Date: Thu, 17 Mar 2022 08:31:49 +0000
Subject: af_vsock: SOCK_SEQPACKET receive timeout test

Test for receive timeout check: connection is established,
receiver sets timeout, but sender does nothing. Receiver's
'read()' call must return EAGAIN.

Signed-off-by: Krasnov Arseniy Vladimirovich <AVKrasnov@sberdevices.ru>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/vsock/vsock_test.c | 84 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 2a3638c0a008..5b8561b80914 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -16,6 +16,7 @@
 #include <linux/kernel.h>
 #include <sys/types.h>
 #include <sys/socket.h>
+#include <time.h>
 
 #include "timeout.h"
 #include "control.h"
@@ -391,6 +392,84 @@ static void test_seqpacket_msg_trunc_server(const struct test_opts *opts)
 	close(fd);
 }
 
+static time_t current_nsec(void)
+{
+	struct timespec ts;
+
+	if (clock_gettime(CLOCK_REALTIME, &ts)) {
+		perror("clock_gettime(3) failed");
+		exit(EXIT_FAILURE);
+	}
+
+	return (ts.tv_sec * 1000000000ULL) + ts.tv_nsec;
+}
+
+#define RCVTIMEO_TIMEOUT_SEC 1
+#define READ_OVERHEAD_NSEC 250000000 /* 0.25 sec */
+
+static void test_seqpacket_timeout_client(const struct test_opts *opts)
+{
+	int fd;
+	struct timeval tv;
+	char dummy;
+	time_t read_enter_ns;
+	time_t read_overhead_ns;
+
+	fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	tv.tv_sec = RCVTIMEO_TIMEOUT_SEC;
+	tv.tv_usec = 0;
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv)) == -1) {
+		perror("setsockopt 'SO_RCVTIMEO'");
+		exit(EXIT_FAILURE);
+	}
+
+	read_enter_ns = current_nsec();
+
+	if (read(fd, &dummy, sizeof(dummy)) != -1) {
+		fprintf(stderr,
+			"expected 'dummy' read(2) failure\n");
+		exit(EXIT_FAILURE);
+	}
+
+	if (errno != EAGAIN) {
+		perror("EAGAIN expected");
+		exit(EXIT_FAILURE);
+	}
+
+	read_overhead_ns = current_nsec() - read_enter_ns -
+			1000000000ULL * RCVTIMEO_TIMEOUT_SEC;
+
+	if (read_overhead_ns > READ_OVERHEAD_NSEC) {
+		fprintf(stderr,
+			"too much time in read(2), %lu > %i ns\n",
+			read_overhead_ns, READ_OVERHEAD_NSEC);
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("WAITDONE");
+	close(fd);
+}
+
+static void test_seqpacket_timeout_server(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	control_expectln("WAITDONE");
+	close(fd);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -431,6 +510,11 @@ static struct test_case test_cases[] = {
 		.run_client = test_seqpacket_msg_trunc_client,
 		.run_server = test_seqpacket_msg_trunc_server,
 	},
+	{
+		.name = "SOCK_SEQPACKET timeout",
+		.run_client = test_seqpacket_timeout_client,
+		.run_server = test_seqpacket_timeout_server,
+	},
 	{},
 };
 
-- 
cgit 


From e89600ebeeb14d18c0b062837a84196f72542830 Mon Sep 17 00:00:00 2001
From: Krasnov Arseniy Vladimirovich <AVKrasnov@sberdevices.ru>
Date: Thu, 17 Mar 2022 08:33:23 +0000
Subject: af_vsock: SOCK_SEQPACKET broken buffer test

Add test where sender sends two message, each with own
data pattern. Reader tries to read first to broken buffer:
it has three pages size, but middle page is unmapped. Then,
reader tries to read second message to valid buffer. Test
checks, that uncopied part of first message was dropped
and thus not copied as part of second message.

Signed-off-by: Krasnov Arseniy Vladimirovich <AVKrasnov@sberdevices.ru>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/vsock/vsock_test.c | 131 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 5b8561b80914..dc577461afc2 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -17,6 +17,7 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <time.h>
+#include <sys/mman.h>
 
 #include "timeout.h"
 #include "control.h"
@@ -470,6 +471,131 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts)
 	close(fd);
 }
 
+#define BUF_PATTERN_1 'a'
+#define BUF_PATTERN_2 'b'
+
+static void test_seqpacket_invalid_rec_buffer_client(const struct test_opts *opts)
+{
+	int fd;
+	unsigned char *buf1;
+	unsigned char *buf2;
+	int buf_size = getpagesize() * 3;
+
+	fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	buf1 = malloc(buf_size);
+	if (!buf1) {
+		perror("'malloc()' for 'buf1'");
+		exit(EXIT_FAILURE);
+	}
+
+	buf2 = malloc(buf_size);
+	if (!buf2) {
+		perror("'malloc()' for 'buf2'");
+		exit(EXIT_FAILURE);
+	}
+
+	memset(buf1, BUF_PATTERN_1, buf_size);
+	memset(buf2, BUF_PATTERN_2, buf_size);
+
+	if (send(fd, buf1, buf_size, 0) != buf_size) {
+		perror("send failed");
+		exit(EXIT_FAILURE);
+	}
+
+	if (send(fd, buf2, buf_size, 0) != buf_size) {
+		perror("send failed");
+		exit(EXIT_FAILURE);
+	}
+
+	close(fd);
+}
+
+static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts *opts)
+{
+	int fd;
+	unsigned char *broken_buf;
+	unsigned char *valid_buf;
+	int page_size = getpagesize();
+	int buf_size = page_size * 3;
+	ssize_t res;
+	int prot = PROT_READ | PROT_WRITE;
+	int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+	int i;
+
+	fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Setup first buffer. */
+	broken_buf = mmap(NULL, buf_size, prot, flags, -1, 0);
+	if (broken_buf == MAP_FAILED) {
+		perror("mmap for 'broken_buf'");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Unmap "hole" in buffer. */
+	if (munmap(broken_buf + page_size, page_size)) {
+		perror("'broken_buf' setup");
+		exit(EXIT_FAILURE);
+	}
+
+	valid_buf = mmap(NULL, buf_size, prot, flags, -1, 0);
+	if (valid_buf == MAP_FAILED) {
+		perror("mmap for 'valid_buf'");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Try to fill buffer with unmapped middle. */
+	res = read(fd, broken_buf, buf_size);
+	if (res != -1) {
+		fprintf(stderr,
+			"expected 'broken_buf' read(2) failure, got %zi\n",
+			res);
+		exit(EXIT_FAILURE);
+	}
+
+	if (errno != ENOMEM) {
+		perror("unexpected errno of 'broken_buf'");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Try to fill valid buffer. */
+	res = read(fd, valid_buf, buf_size);
+	if (res < 0) {
+		perror("unexpected 'valid_buf' read(2) failure");
+		exit(EXIT_FAILURE);
+	}
+
+	if (res != buf_size) {
+		fprintf(stderr,
+			"invalid 'valid_buf' read(2), expected %i, got %zi\n",
+			buf_size, res);
+		exit(EXIT_FAILURE);
+	}
+
+	for (i = 0; i < buf_size; i++) {
+		if (valid_buf[i] != BUF_PATTERN_2) {
+			fprintf(stderr,
+				"invalid pattern for 'valid_buf' at %i, expected %hhX, got %hhX\n",
+				i, BUF_PATTERN_2, valid_buf[i]);
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	/* Unmap buffers. */
+	munmap(broken_buf, page_size);
+	munmap(broken_buf + page_size * 2, page_size);
+	munmap(valid_buf, buf_size);
+	close(fd);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -515,6 +641,11 @@ static struct test_case test_cases[] = {
 		.run_client = test_seqpacket_timeout_client,
 		.run_server = test_seqpacket_timeout_server,
 	},
+	{
+		.name = "SOCK_SEQPACKET invalid receive buffer",
+		.run_client = test_seqpacket_invalid_rec_buffer_client,
+		.run_server = test_seqpacket_invalid_rec_buffer_server,
+	},
 	{},
 };
 
-- 
cgit 


From ec730c3e1f0e3a80612a9be2beb00e2b4f93fe70 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Thu, 17 Mar 2022 13:45:11 +0100
Subject: selftest: net: Test IPv4 PMTU exceptions with DSCP and ECN

Add two tests to pmtu.sh, for verifying that PMTU exceptions get
properly created for routes that don't belong to the main table.

A fib-rule based on the packet's DSCP field is used to jump to the
correct table. ECN shouldn't interfere with this process, so each test
has two components: one that only sets DSCP and one that sets both DSCP
and ECN.

One of the test triggers PMTU exceptions using ICMP Echo Requests, the
other using UDP packets (to test different handlers in the kernel).

A few adjustments are necessary in the rest of the script to allow
policy routing scenarios:

  * Add global variable rt_table that allows setup_routing_*() to
    add routes to a specific routing table. By default rt_table is set
    to "main", so existing tests don't need to be modified.

  * Another global variable, policy_mark, is used to define which
    dsfield value is used for policy routing. This variable has no
    effect on tests that don't use policy routing.

  * The UDP version of the test uses socat. So cleanup() now also need
    to kill socat PIDs.

  * route_get_dst_pmtu_from_exception() and route_get_dst_exception()
    now take an optional third argument specifying the dsfield. If
    not specified, 0 is used, so existing users don't need to be
    modified.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/pmtu.sh | 141 +++++++++++++++++++++++++++++++++++-
 1 file changed, 137 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 694732e4b344..736e358dc549 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -26,6 +26,15 @@
 # - pmtu_ipv6
 #	Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
 #
+# - pmtu_ipv4_dscp_icmp_exception
+#	Set up the same network topology as pmtu_ipv4, but use non-default
+#	routing table in A. A fib-rule is used to jump to this routing table
+#	based on DSCP. Send ICMPv4 packets with the expected DSCP value and
+#	verify that ECN doesn't interfere with the creation of PMTU exceptions.
+#
+# - pmtu_ipv4_dscp_udp_exception
+#	Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP.
+#
 # - pmtu_ipv4_vxlan4_exception
 #	Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
 #	over IPv4 between A and B, routed via R1. On the link between R1 and B,
@@ -203,6 +212,8 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 tests="
 	pmtu_ipv4_exception		ipv4: PMTU exceptions			1
 	pmtu_ipv6_exception		ipv6: PMTU exceptions			1
+	pmtu_ipv4_dscp_icmp_exception	ICMPv4 with DSCP and ECN: PMTU exceptions	1
+	pmtu_ipv4_dscp_udp_exception	UDPv4 with DSCP and ECN: PMTU exceptions	1
 	pmtu_ipv4_vxlan4_exception	IPv4 over vxlan4: PMTU exceptions	1
 	pmtu_ipv6_vxlan4_exception	IPv6 over vxlan4: PMTU exceptions	1
 	pmtu_ipv4_vxlan6_exception	IPv4 over vxlan6: PMTU exceptions	1
@@ -323,6 +334,9 @@ routes_nh="
 	B	6	default			61
 "
 
+policy_mark=0x04
+rt_table=main
+
 veth4_a_addr="192.168.1.1"
 veth4_b_addr="192.168.1.2"
 veth4_c_addr="192.168.2.10"
@@ -346,6 +360,7 @@ dummy6_mask="64"
 err_buf=
 tcpdump_pids=
 nettest_pids=
+socat_pids=
 
 err() {
 	err_buf="${err_buf}${1}
@@ -723,7 +738,7 @@ setup_routing_old() {
 
 		ns_name="$(nsname ${ns})"
 
-		ip -n ${ns_name} route add ${addr} via ${gw}
+		ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}"
 
 		ns=""; addr=""; gw=""
 	done
@@ -753,7 +768,7 @@ setup_routing_new() {
 
 		ns_name="$(nsname ${ns})"
 
-		ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}
+		ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}"
 
 		ns=""; fam=""; addr=""; nhid=""
 	done
@@ -798,6 +813,24 @@ setup_routing() {
 	return 0
 }
 
+setup_policy_routing() {
+	setup_routing
+
+	ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \
+		table "${rt_table}"
+
+	# Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to
+	# have an option do to it.
+	tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio
+	tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio
+	tc -netns "${NS_A}" filter add dev veth_A-R1                      \
+		protocol ipv4 flower ip_proto udp                         \
+		action pedit ex munge ip df set 0x40 pipe csum ip and udp
+	tc -netns "${NS_A}" filter add dev veth_A-R2                      \
+		protocol ipv4 flower ip_proto udp                         \
+		action pedit ex munge ip df set 0x40 pipe csum ip and udp
+}
+
 setup_bridge() {
 	run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip
 	run_cmd ${ns_a} ip link set br0 up
@@ -903,6 +936,11 @@ cleanup() {
 	done
 	nettest_pids=
 
+	for pid in ${socat_pids}; do
+		kill "${pid}"
+	done
+	socat_pids=
+
 	for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
 		ip netns del ${n} 2> /dev/null
 	done
@@ -950,15 +988,21 @@ link_get_mtu() {
 route_get_dst_exception() {
 	ns_cmd="${1}"
 	dst="${2}"
+	dsfield="${3}"
 
-	${ns_cmd} ip route get "${dst}"
+	if [ -z "${dsfield}" ]; then
+		dsfield=0
+	fi
+
+	${ns_cmd} ip route get "${dst}" dsfield "${dsfield}"
 }
 
 route_get_dst_pmtu_from_exception() {
 	ns_cmd="${1}"
 	dst="${2}"
+	dsfield="${3}"
 
-	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
+	mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")"
 }
 
 check_pmtu_value() {
@@ -1068,6 +1112,95 @@ test_pmtu_ipv6_exception() {
 	test_pmtu_ipvX 6
 }
 
+test_pmtu_ipv4_dscp_icmp_exception() {
+	rt_table=100
+
+	setup namespaces policy_routing || return $ksft_skip
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
+
+	# Set up initial MTU values
+	mtu "${ns_a}"  veth_A-R1 2000
+	mtu "${ns_r1}" veth_R1-A 2000
+	mtu "${ns_r1}" veth_R1-B 1400
+	mtu "${ns_b}"  veth_B-R1 1400
+
+	mtu "${ns_a}"  veth_A-R2 2000
+	mtu "${ns_r2}" veth_R2-A 2000
+	mtu "${ns_r2}" veth_R2-B 1500
+	mtu "${ns_b}"  veth_B-R2 1500
+
+	len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
+
+	dst1="${prefix4}.${b_r1}.1"
+	dst2="${prefix4}.${b_r2}.1"
+
+	# Create route exceptions
+	dsfield=${policy_mark} # No ECN bit set (Not-ECT)
+	run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}"
+
+	dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
+	run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}"
+
+	# Check that exceptions have been created with the correct PMTU
+	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
+	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
+	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+}
+
+test_pmtu_ipv4_dscp_udp_exception() {
+	rt_table=100
+
+	if ! which socat > /dev/null 2>&1; then
+		echo "'socat' command not found; skipping tests"
+		return $ksft_skip
+	fi
+
+	setup namespaces policy_routing || return $ksft_skip
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
+
+	# Set up initial MTU values
+	mtu "${ns_a}"  veth_A-R1 2000
+	mtu "${ns_r1}" veth_R1-A 2000
+	mtu "${ns_r1}" veth_R1-B 1400
+	mtu "${ns_b}"  veth_B-R1 1400
+
+	mtu "${ns_a}"  veth_A-R2 2000
+	mtu "${ns_r2}" veth_R2-A 2000
+	mtu "${ns_r2}" veth_R2-B 1500
+	mtu "${ns_b}"  veth_B-R2 1500
+
+	len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
+
+	dst1="${prefix4}.${b_r1}.1"
+	dst2="${prefix4}.${b_r2}.1"
+
+	# Create route exceptions
+	run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=1
+	socat_pids="${socat_pids} $!"
+
+	dsfield=${policy_mark} # No ECN bit set (Not-ECT)
+	run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
+		UDP:"${dst1}":50000,tos="${dsfield}"
+
+	dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
+	run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
+		UDP:"${dst2}":50000,tos="${dsfield}"
+
+	# Check that exceptions have been created with the correct PMTU
+	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
+	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
+	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+}
+
 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
 	type=${1}
 	family=${2}
-- 
cgit 


From 0e790cbb1af97473d3ea53616f8584a71f80fc3b Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Thu, 17 Mar 2022 21:55:53 -0700
Subject: selftests/bpf: Test for associating multiple elements with the local
 storage

This patch adds a few calls to the existing local storage selftest to
test that we can associate multiple elements with the local storage.

The sleepable program's call to bpf_sk_storage_get with sk_storage_map2
will lead to an allocation of a new selem under the GFP_KERNEL flag.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220318045553.3091807-3-joannekoong@fb.com
---
 tools/testing/selftests/bpf/progs/local_storage.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 9b1f9b75d5c2..19423ed862e3 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -36,6 +36,13 @@ struct {
 	__type(value, struct local_storage);
 } sk_storage_map SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+	__type(key, int);
+	__type(value, struct local_storage);
+} sk_storage_map2 SEC(".maps");
+
 struct {
 	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
 	__uint(map_flags, BPF_F_NO_PREALLOC);
@@ -115,7 +122,19 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
 	if (storage->value != DUMMY_STORAGE_VALUE)
 		sk_storage_result = -1;
 
+	/* This tests that we can associate multiple elements
+	 * with the local storage.
+	 */
+	storage = bpf_sk_storage_get(&sk_storage_map2, sock->sk, 0,
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0;
+
 	err = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
+	if (err)
+		return 0;
+
+	err = bpf_sk_storage_delete(&sk_storage_map2, sock->sk);
 	if (!err)
 		sk_storage_result = err;
 
-- 
cgit 


From 3c69611b8926f8e74fcf76bd97ae0e5dafbeb26a Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sat, 19 Mar 2022 19:33:55 +0100
Subject: selftests/bpf: Fix u8 narrow load checks for bpf_sk_lookup
 remote_port

In commit 9a69e2b385f4 ("bpf: Make remote_port field in struct
bpf_sk_lookup 16-bit wide") ->remote_port field changed from __u32 to
__be16.

However, narrow load tests which exercise 1-byte sized loads from
offsetof(struct bpf_sk_lookup, remote_port) were not adopted to reflect the
change.

As a result, on little-endian we continue testing loads from addresses:

 - (__u8 *)&ctx->remote_port + 3
 - (__u8 *)&ctx->remote_port + 4

which map to the zero padding following the remote_port field, and don't
break the tests because there is no observable change.

While on big-endian, we observe breakage because tests expect to see zeros
for values loaded from:

 - (__u8 *)&ctx->remote_port - 1
 - (__u8 *)&ctx->remote_port - 2

Above addresses map to ->remote_ip6 field, which precedes ->remote_port,
and are populated during the bpf_sk_lookup IPv6 tests.

Unsurprisingly, on s390x we observe:

  #136/38 sk_lookup/narrow access to ctx v4:OK
  #136/39 sk_lookup/narrow access to ctx v6:FAIL

Fix it by removing the checks for 1-byte loads from offsets outside of the
->remote_port field.

Fixes: 9a69e2b385f4 ("bpf: Make remote_port field in struct bpf_sk_lookup 16-bit wide")
Suggested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220319183356.233666-3-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/progs/test_sk_lookup.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index bf5b7caefdd0..38b7a1fe67b6 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -413,8 +413,7 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 
 	/* Narrow loads from remote_port field. Expect SRC_PORT. */
 	if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) ||
-	    LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff) ||
-	    LSB(ctx->remote_port, 2) != 0 || LSB(ctx->remote_port, 3) != 0)
+	    LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff))
 		return SK_DROP;
 	if (LSW(ctx->remote_port, 0) != SRC_PORT)
 		return SK_DROP;
-- 
cgit 


From ce5236800116d18ac2c06c58f73930406e3ea4be Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sat, 19 Mar 2022 19:33:56 +0100
Subject: selftests/bpf: Fix test for 4-byte load from remote_port on
 big-endian

The context access converter rewrites the 4-byte load from
bpf_sk_lookup->remote_port to a 2-byte load from bpf_sk_lookup_kern
structure.

It means that we cannot treat the destination register contents as a 32-bit
value, or the code will not be portable across big- and little-endian
architectures.

This is exactly the same case as with 4-byte loads from bpf_sock->dst_port
so follow the approach outlined in [1] and treat the register contents as a
16-bit value in the test.

[1]: https://lore.kernel.org/bpf/20220317113920.1068535-5-jakub@cloudflare.com/

Fixes: 2ed0dc5937d3 ("selftests/bpf: Cover 4-byte load from remote_port in bpf_sk_lookup")
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220319183356.233666-4-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/progs/test_sk_lookup.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index 38b7a1fe67b6..6058dcb11b36 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -418,9 +418,15 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 	if (LSW(ctx->remote_port, 0) != SRC_PORT)
 		return SK_DROP;
 
-	/* Load from remote_port field with zero padding (backward compatibility) */
+	/*
+	 * NOTE: 4-byte load from bpf_sk_lookup at remote_port offset
+	 * is quirky. It gets rewritten by the access converter to a
+	 * 2-byte load for backward compatibility. Treating the load
+	 * result as a be16 value makes the code portable across
+	 * little- and big-endian platforms.
+	 */
 	val_u32 = *(__u32 *)&ctx->remote_port;
-	if (val_u32 != bpf_htonl(bpf_ntohs(SRC_PORT) << 16))
+	if (val_u32 != SRC_PORT)
 		return SK_DROP;
 
 	/* Narrow loads from local_port field. Expect DST_PORT. */
-- 
cgit 


From e1cc1f39981b06ba22a0c92e800e9fd8ba59d2d3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Mar 2022 11:20:42 -0700
Subject: selftests/bpf: Test skipping stacktrace

Add a test case for stacktrace with skip > 0 using a small sized
buffer.  It didn't support skipping entries greater than or equal to
the size of buffer and filled the skipped part with 0.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220314182042.71025-2-namhyung@kernel.org
---
 .../selftests/bpf/prog_tests/stacktrace_map_skip.c | 63 ++++++++++++++++++++
 .../selftests/bpf/progs/stacktrace_map_skip.c      | 68 ++++++++++++++++++++++
 2 files changed, 131 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
 create mode 100644 tools/testing/selftests/bpf/progs/stacktrace_map_skip.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
new file mode 100644
index 000000000000..1932b1e0685c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "stacktrace_map_skip.skel.h"
+
+#define TEST_STACK_DEPTH  2
+
+void test_stacktrace_map_skip(void)
+{
+	struct stacktrace_map_skip *skel;
+	int stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	int err, stack_trace_len;
+
+	skel = stacktrace_map_skip__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	/* find map fds */
+	stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+	if (!ASSERT_GE(stackid_hmap_fd, 0, "stackid_hmap fd"))
+		goto out;
+
+	stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+	if (!ASSERT_GE(stackmap_fd, 0, "stackmap fd"))
+		goto out;
+
+	stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
+	if (!ASSERT_GE(stack_amap_fd, 0, "stack_amap fd"))
+		goto out;
+
+	skel->bss->pid = getpid();
+
+	err = stacktrace_map_skip__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	/* give some time for bpf program run */
+	sleep(1);
+
+	/* disable stack trace collection */
+	skel->bss->control = 1;
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (!ASSERT_OK(err, "compare_map_keys stackid_hmap vs. stackmap"))
+		goto out;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (!ASSERT_OK(err, "compare_map_keys stackmap vs. stackid_hmap"))
+		goto out;
+
+	stack_trace_len = TEST_STACK_DEPTH * sizeof(__u64);
+	err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+	if (!ASSERT_OK(err, "compare_stack_ips stackmap vs. stack_amap"))
+		goto out;
+
+	if (!ASSERT_EQ(skel->bss->failed, 0, "skip_failed"))
+		goto out;
+
+out:
+	stacktrace_map_skip__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
new file mode 100644
index 000000000000..2eb297df3dd6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define TEST_STACK_DEPTH	2
+#define TEST_MAX_ENTRIES	16384
+
+typedef __u64 stack_trace_t[TEST_STACK_DEPTH];
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, TEST_MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, stack_trace_t);
+} stackmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, TEST_MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} stackid_hmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, TEST_MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, stack_trace_t);
+} stack_amap SEC(".maps");
+
+int pid = 0;
+int control = 0;
+int failed = 0;
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct trace_event_raw_sched_switch *ctx)
+{
+	__u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
+	__u32 key = 0, val = 0;
+	__u64 *stack_p;
+
+	if (pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	if (control)
+		return 0;
+
+	/* it should allow skipping whole buffer size entries */
+	key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
+	if ((int)key >= 0) {
+		/* The size of stackmap and stack_amap should be the same */
+		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+		stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+		if (stack_p) {
+			bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
+			/* it wrongly skipped all the entries and filled zero */
+			if (stack_p[0] == 0)
+				failed = 1;
+		}
+	} else {
+		/* old kernel doesn't support skipping that many entries */
+		failed = 2;
+	}
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From ec80906b0fbd7be11e3e960813b977b1ffe5f8fe Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 21 Mar 2022 10:41:49 +0800
Subject: selftests/bpf/test_lirc_mode2.sh: Exit with proper code

When test_lirc_mode2_user exec failed, the test report failed but still
exit with 0. Fix it by exiting with an error code.

Another issue is for the LIRCDEV checking. With bash -n, we need to quote
the variable, or it will always be true. So if test_lirc_mode2_user was
not run, just exit with skip code.

Fixes: 6bdd533cee9a ("bpf: add selftest for lirc_mode2 type program")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220321024149.157861-1-liuhangbin@gmail.com
---
 tools/testing/selftests/bpf/test_lirc_mode2.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
index ec4e15948e40..5252b91f48a1 100755
--- a/tools/testing/selftests/bpf/test_lirc_mode2.sh
+++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
@@ -3,6 +3,7 @@
 
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
+ret=$ksft_skip
 
 msg="skip all tests:"
 if [ $UID != 0 ]; then
@@ -25,7 +26,7 @@ do
 	fi
 done
 
-if [ -n $LIRCDEV ];
+if [ -n "$LIRCDEV" ];
 then
 	TYPE=lirc_mode2
 	./test_lirc_mode2_user $LIRCDEV $INPUTDEV
@@ -36,3 +37,5 @@ then
 		echo -e ${GREEN}"PASS: $TYPE"${NC}
 	fi
 fi
+
+exit $ret
-- 
cgit 


From 72e725887413f031fa72d27fea5795450bab1940 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 24 Jul 2021 23:43:58 -0400
Subject: selftests/vm/transhuge-stress: Support file-backed PMD folios

Add a -f <filename> option to test PMD folios on files

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 tools/testing/selftests/vm/transhuge-stress.c | 35 ++++++++++++++++++---------
 1 file changed, 24 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c
index 5e4c036f6ad3..a03cb3fce1f6 100644
--- a/tools/testing/selftests/vm/transhuge-stress.c
+++ b/tools/testing/selftests/vm/transhuge-stress.c
@@ -26,15 +26,17 @@
 #define PAGEMAP_PFN(ent)	((ent) & ((1ull << 55) - 1))
 
 int pagemap_fd;
+int backing_fd = -1;
+int mmap_flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE;
+#define PROT_RW (PROT_READ | PROT_WRITE)
 
 int64_t allocate_transhuge(void *ptr)
 {
 	uint64_t ent[2];
 
 	/* drop pmd */
-	if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
-				MAP_FIXED | MAP_ANONYMOUS |
-				MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
+	if (mmap(ptr, HPAGE_SIZE, PROT_RW, MAP_FIXED | mmap_flags,
+		 backing_fd, 0) != ptr)
 		errx(2, "mmap transhuge");
 
 	if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
@@ -60,6 +62,8 @@ int main(int argc, char **argv)
 	size_t ram, len;
 	void *ptr, *p;
 	struct timespec a, b;
+	int i = 0;
+	char *name = NULL;
 	double s;
 	uint8_t *map;
 	size_t map_len;
@@ -69,13 +73,23 @@ int main(int argc, char **argv)
 		ram = SIZE_MAX / 4;
 	else
 		ram *= sysconf(_SC_PAGESIZE);
+	len = ram;
+
+	while (++i < argc) {
+		if (!strcmp(argv[i], "-h"))
+			errx(1, "usage: %s [size in MiB]", argv[0]);
+		else if (!strcmp(argv[i], "-f"))
+			name = argv[++i];
+		else
+			len = atoll(argv[i]) << 20;
+	}
 
-	if (argc == 1)
-		len = ram;
-	else if (!strcmp(argv[1], "-h"))
-		errx(1, "usage: %s [size in MiB]", argv[0]);
-	else
-		len = atoll(argv[1]) << 20;
+	if (name) {
+		backing_fd = open(name, O_RDWR);
+		if (backing_fd == -1)
+			errx(2, "open %s", name);
+		mmap_flags = MAP_SHARED;
+	}
 
 	warnx("allocate %zd transhuge pages, using %zd MiB virtual memory"
 	      " and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20,
@@ -86,8 +100,7 @@ int main(int argc, char **argv)
 		err(2, "open pagemap");
 
 	len -= len % HPAGE_SIZE;
-	ptr = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE,
-			MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
+	ptr = mmap(NULL, len + HPAGE_SIZE, PROT_RW, mmap_flags, backing_fd, 0);
 	if (ptr == MAP_FAILED)
 		err(2, "initial mmap");
 	ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE;
-- 
cgit 


From 94f19e1ec38ff67311b176f16f87685a8e110161 Mon Sep 17 00:00:00 2001
From: Guo Zhengkui <guozhengkui@vivo.com>
Date: Sat, 19 Mar 2022 15:37:30 +0800
Subject: selftests: net: change fprintf format specifiers

`cur64`, `start64` and `ts_delta` are int64_t. Change format
specifiers in fprintf from `"%lu"` to `"%" PRId64` to adapt
to 32-bit and 64-bit systems.

Signed-off-by: Guo Zhengkui <guozhengkui@vivo.com>
Link: https://lore.kernel.org/r/20220319073730.5235-1-guozhengkui@vivo.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/txtimestamp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index fabb1d555ee5..10f2fde3686b 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -161,7 +161,7 @@ static void validate_timestamp(struct timespec *cur, int min_delay)
 	max_delay = min_delay + cfg_delay_tolerance_usec;
 
 	if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) {
-		fprintf(stderr, "ERROR: %lu us expected between %d and %d\n",
+		fprintf(stderr, "ERROR: %" PRId64 " us expected between %d and %d\n",
 				cur64 - start64, min_delay, max_delay);
 		test_failed = true;
 	}
@@ -170,9 +170,9 @@ static void validate_timestamp(struct timespec *cur, int min_delay)
 static void __print_ts_delta_formatted(int64_t ts_delta)
 {
 	if (cfg_print_nsec)
-		fprintf(stderr, "%lu ns", ts_delta);
+		fprintf(stderr, "%" PRId64 " ns", ts_delta);
 	else
-		fprintf(stderr, "%lu us", ts_delta / NSEC_PER_USEC);
+		fprintf(stderr, "%" PRId64 " us", ts_delta / NSEC_PER_USEC);
 }
 
 static void __print_timestamp(const char *name, struct timespec *cur,
-- 
cgit 


From 917b149ac3d5c8b3a582559b9779ee29d69fad78 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 21 Mar 2022 19:51:01 +0200
Subject: selftests: forwarding: Disable learning before link up

Disable learning before bringing the bridge port up in order to avoid
the FDB being populated and the test failing.

Before:

 # ./bridge_locked_port.sh
 RTNETLINK answers: File exists
 TEST: Locked port ipv4                                              [FAIL]
         Ping worked after locking port, but before adding FDB entry
 TEST: Locked port ipv6                                              [ OK ]
 TEST: Locked port vlan                                              [ OK ]

After:

 # ./bridge_locked_port.sh
 TEST: Locked port ipv4                                              [ OK ]
 TEST: Locked port ipv6                                              [ OK ]
 TEST: Locked port vlan                                              [ OK ]

Fixes: b2b681a41251 ("selftests: forwarding: tests of locked port feature")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/forwarding/bridge_locked_port.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
index 6e98efa6d371..67ce59bb3555 100755
--- a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
@@ -41,11 +41,11 @@ switch_create()
 	ip link set dev $swp1 master br0
 	ip link set dev $swp2 master br0
 
+	bridge link set dev $swp1 learning off
+
 	ip link set dev br0 up
 	ip link set dev $swp1 up
 	ip link set dev $swp2 up
-
-	bridge link set dev $swp1 learning off
 }
 
 switch_destroy()
-- 
cgit 


From f70f5f1a8ffff4c943182aa80c600aeec1b9b001 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 21 Mar 2022 19:51:02 +0200
Subject: selftests: forwarding: Use same VRF for port and VLAN upper

The test creates a separate VRF for the VLAN upper, but does not destroy
it during cleanup, resulting in "RTNETLINK answers: File exists" errors.

Fix by using the same VRF for the port and its VLAN upper. This is OK
since their IP addresses do not overlap.

Before:

 # ./bridge_locked_port.sh
 TEST: Locked port ipv4                                              [ OK ]
 TEST: Locked port ipv6                                              [ OK ]
 TEST: Locked port vlan                                              [ OK ]

 # ./bridge_locked_port.sh
 RTNETLINK answers: File exists
 RTNETLINK answers: File exists
 RTNETLINK answers: File exists
 RTNETLINK answers: File exists
 RTNETLINK answers: File exists
 RTNETLINK answers: File exists
 TEST: Locked port ipv4                                              [ OK ]
 TEST: Locked port ipv6                                              [ OK ]
 TEST: Locked port vlan                                              [ OK ]

After:

 # ./bridge_locked_port.sh
 TEST: Locked port ipv4                                              [ OK ]
 TEST: Locked port ipv6                                              [ OK ]
 TEST: Locked port vlan                                              [ OK ]

 # ./bridge_locked_port.sh
 TEST: Locked port ipv4                                              [ OK ]
 TEST: Locked port ipv6                                              [ OK ]
 TEST: Locked port vlan                                              [ OK ]

Fixes: b2b681a41251 ("selftests: forwarding: tests of locked port feature")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/forwarding/bridge_locked_port.sh | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
index 67ce59bb3555..5b02b6b60ce7 100755
--- a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
@@ -9,9 +9,7 @@ source lib.sh
 h1_create()
 {
 	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
-	vrf_create "vrf-vlan-h1"
-	ip link set dev vrf-vlan-h1 up
-	vlan_create $h1 100 vrf-vlan-h1 198.51.100.1/24
+	vlan_create $h1 100 v$h1 198.51.100.1/24
 }
 
 h1_destroy()
@@ -23,9 +21,7 @@ h1_destroy()
 h2_create()
 {
 	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
-	vrf_create "vrf-vlan-h2"
-	ip link set dev vrf-vlan-h2 up
-	vlan_create $h2 100 vrf-vlan-h2 198.51.100.2/24
+	vlan_create $h2 100 v$h2 198.51.100.2/24
 }
 
 h2_destroy()
-- 
cgit 


From 7f0059b58f0257d895fafd2f2e3afe3bbdf21e64 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 21 Mar 2022 21:54:19 -0700
Subject: selftests/bpf: Fix kprobe_multi test.

When compiler emits endbr insn the function address could
be different than what bpf_get_func_ip() reports.
This is a short term workaround.
bpf_get_func_ip() will be fixed later.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/kprobe_multi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c
index af27d2c6fce8..600be50800f8 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c
@@ -36,13 +36,15 @@ __u64 kretprobe_test6_result = 0;
 __u64 kretprobe_test7_result = 0;
 __u64 kretprobe_test8_result = 0;
 
+extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak;
+
 static void kprobe_multi_check(void *ctx, bool is_return)
 {
 	if (bpf_get_current_pid_tgid() >> 32 != pid)
 		return;
 
 	__u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
-	__u64 addr = bpf_get_func_ip(ctx);
+	__u64 addr = bpf_get_func_ip(ctx) - (CONFIG_X86_KERNEL_IBT ? 4 : 0);
 
 #define SET(__var, __addr, __cookie) ({			\
 	if (((const void *) addr == __addr) &&		\
-- 
cgit 


From 6d1a6f464efd596779d1b272b3dc8170c5fa189f Mon Sep 17 00:00:00 2001
From: Vincent Chen <vincent.chen@sifive.com>
Date: Tue, 8 Mar 2022 16:32:53 +0800
Subject: rseq/selftests: Add support for RISC-V

Add support for RISC-V in the rseq selftests, which covers both
64-bit and 32-bit ISA with little endian mode.

Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
Tested-by: Eric Lin <eric.lin@sifive.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 tools/testing/selftests/rseq/param_test.c |  23 +
 tools/testing/selftests/rseq/rseq-riscv.h | 677 ++++++++++++++++++++++++++++++
 tools/testing/selftests/rseq/rseq.h       |   2 +
 3 files changed, 702 insertions(+)
 create mode 100644 tools/testing/selftests/rseq/rseq-riscv.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index 699ad5f93c34..0a6b8eafd444 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -207,6 +207,29 @@ unsigned int yield_mod_cnt, nr_abort;
 	"addiu " INJECT_ASM_REG ", -1\n\t" \
 	"bnez " INJECT_ASM_REG ", 222b\n\t" \
 	"333:\n\t"
+#elif defined(__riscv)
+
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1]"m"(loop_cnt[1]) \
+	, [loop_cnt_2]"m"(loop_cnt[2]) \
+	, [loop_cnt_3]"m"(loop_cnt[3]) \
+	, [loop_cnt_4]"m"(loop_cnt[4]) \
+	, [loop_cnt_5]"m"(loop_cnt[5]) \
+	, [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG	"t1"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n)					\
+	"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t"		\
+	"beqz " INJECT_ASM_REG ", 333f\n\t"			\
+	"222:\n\t"						\
+	"addi  " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t"	\
+	"bnez " INJECT_ASM_REG ", 222b\n\t"			\
+	"333:\n\t"
+
 
 #else
 #error unsupported target
diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h
new file mode 100644
index 000000000000..b86642f90d7f
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-riscv.h
@@ -0,0 +1,677 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Select the instruction "csrw mhartid, x0" as the RSEQ_SIG. Unlike
+ * other architectures, the ebreak instruction has no immediate field for
+ * distinguishing purposes. Hence, ebreak is not suitable as RSEQ_SIG.
+ * "csrw mhartid, x0" can also satisfy the RSEQ requirement because it
+ * is an uncommon instruction and will raise an illegal instruction
+ * exception when executed in all modes.
+ */
+#include <endian.h>
+
+#if defined(__BYTE_ORDER) ? (__BYTE_ORDER == __LITTLE_ENDIAN) : defined(__LITTLE_ENDIAN)
+#define RSEQ_SIG   0xf1401073  /* csrr mhartid, x0 */
+#else
+#error "Currently, RSEQ only supports Little-Endian version"
+#endif
+
+#if __riscv_xlen == 64
+#define __REG_SEL(a, b)	a
+#elif __riscv_xlen == 32
+#define __REG_SEL(a, b)	b
+#endif
+
+#define REG_L	__REG_SEL("ld ", "lw ")
+#define REG_S	__REG_SEL("sd ", "sw ")
+
+#define RISCV_FENCE(p, s) \
+	__asm__ __volatile__ ("fence " #p "," #s : : : "memory")
+#define rseq_smp_mb()	RISCV_FENCE(rw, rw)
+#define rseq_smp_rmb()	RISCV_FENCE(r, r)
+#define rseq_smp_wmb()	RISCV_FENCE(w, w)
+#define RSEQ_ASM_TMP_REG_1	"t6"
+#define RSEQ_ASM_TMP_REG_2	"t5"
+#define RSEQ_ASM_TMP_REG_3	"t4"
+#define RSEQ_ASM_TMP_REG_4	"t3"
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p));			\
+	RISCV_FENCE(r, rw)						\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	RISCV_FENCE(rw, w);						\
+	RSEQ_WRITE_ONCE(*(p), v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip,	\
+				post_commit_offset, abort_ip)		\
+	".pushsection	__rseq_cs, \"aw\"\n"				\
+	".balign	32\n"						\
+	__rseq_str(label) ":\n"						\
+	".long	" __rseq_str(version) ", " __rseq_str(flags) "\n"	\
+	".quad	" __rseq_str(start_ip) ", "				\
+			  __rseq_str(post_commit_offset) ", "		\
+			  __rseq_str(abort_ip) "\n"			\
+	".popsection\n\t"						\
+	".pushsection __rseq_cs_ptr_array, \"aw\"\n"			\
+	".quad " __rseq_str(label) "b\n"				\
+	".popsection\n"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,		 \
+				((post_commit_ip) - (start_ip)), abort_ip)
+
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip)			\
+	".pushsection __rseq_exit_point_array, \"aw\"\n"		\
+	".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n"	\
+	".popsection\n"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)		\
+	RSEQ_INJECT_ASM(1)						\
+	"la	"RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n"	\
+	REG_S	RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n"	\
+	__rseq_str(label) ":\n"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label)			\
+	"j	222f\n"							\
+	".balign	4\n"						\
+	".long "	__rseq_str(RSEQ_SIG) "\n"			\
+	__rseq_str(label) ":\n"						\
+	"j	%l[" __rseq_str(abort_label) "]\n"			\
+	"222:\n"
+
+#define RSEQ_ASM_OP_STORE(value, var)					\
+	REG_S	"%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label)				\
+	REG_L	RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"		\
+	"bne	"RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ,"	\
+		  __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPEQ32(var, expect, label)				\
+	"lw	"RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"	\
+	"bne	"RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ,"	\
+		  __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPNE(var, expect, label)				\
+	REG_L	RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"		\
+	"beq	"RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ,"	\
+		  __rseq_str(label) "\n"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)		\
+	RSEQ_INJECT_ASM(2)						\
+	RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label)
+
+#define RSEQ_ASM_OP_R_LOAD(var)						\
+	REG_L	RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_STORE(var)					\
+	REG_S	RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_LOAD_OFF(offset)					\
+	"add	"RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], "	\
+		 RSEQ_ASM_TMP_REG_1 "\n"				\
+	REG_L	RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n"
+
+#define RSEQ_ASM_OP_R_ADD(count)					\
+	"add	"RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1		\
+		", %[" __rseq_str(count) "]\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label)		\
+	RSEQ_ASM_OP_STORE(value, var)					\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label)	\
+	"fence	rw, w\n"						\
+	RSEQ_ASM_OP_STORE(value, var)					\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label)		\
+	REG_S	RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"		\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)				\
+	"beqz	%[" __rseq_str(len) "], 333f\n"				\
+	"mv	" RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(len) "]\n"	\
+	"mv	" RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(src) "]\n"	\
+	"mv	" RSEQ_ASM_TMP_REG_3 ", %[" __rseq_str(dst) "]\n"	\
+	"222:\n"							\
+	"lb	" RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_2 ")\n"	\
+	"sb	" RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_3 ")\n"	\
+	"addi	" RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 ", -1\n"	\
+	"addi	" RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", 1\n"	\
+	"addi	" RSEQ_ASM_TMP_REG_3 ", " RSEQ_ASM_TMP_REG_3 ", 1\n"	\
+	"bnez	" RSEQ_ASM_TMP_REG_1 ", 222b\n"				\
+	"333:\n"
+
+#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label)		\
+	"mv	" RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n"	\
+	RSEQ_ASM_OP_R_ADD(off)						\
+	REG_L	  RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n"	\
+	RSEQ_ASM_OP_R_ADD(inc)						\
+	__rseq_str(post_commit_label) ":\n"
+
+static inline __always_inline
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+				  RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+				  RSEQ_INJECT_ASM(5)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+				    [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+				    [v]			"m" (*v),
+				    [expect]		"r" (expect),
+				    [newv]		"r" (newv)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+				  RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+				  RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[error2]")
+#endif
+				  RSEQ_ASM_OP_R_LOAD(v)
+				  RSEQ_ASM_OP_R_STORE(load)
+				  RSEQ_ASM_OP_R_LOAD_OFF(voffp)
+				  RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+				  RSEQ_INJECT_ASM(5)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+				    [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+				    [v]			"m" (*v),
+				    [expectnot]		"r" (expectnot),
+				    [load]		"m" (*load),
+				    [voffp]		"r" (voffp)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+#endif
+				  RSEQ_ASM_OP_R_LOAD(v)
+				  RSEQ_ASM_OP_R_ADD(count)
+				  RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+				  RSEQ_INJECT_ASM(4)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+				    [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+				    [v]			"m" (*v),
+				    [count]		"r" (count)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+				  RSEQ_ASM_OP_STORE(newv2, v2)
+				  RSEQ_INJECT_ASM(5)
+				  RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+				  RSEQ_INJECT_ASM(6)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+				    [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+				    [expect]		"r" (expect),
+				    [v]			"m" (*v),
+				    [newv]		"r" (newv),
+				    [v2]			"m" (*v2),
+				    [newv2]		"r" (newv2)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+				  RSEQ_ASM_OP_STORE(newv2, v2)
+				  RSEQ_INJECT_ASM(5)
+				  RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+				  RSEQ_INJECT_ASM(6)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+				    [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+				    [expect]		"r" (expect),
+				    [v]			"m" (*v),
+				    [newv]		"r" (newv),
+				    [v2]			"m" (*v2),
+				    [newv2]		"r" (newv2)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error3]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(4)
+				  RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+				  RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[error3]")
+#endif
+				  RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+				  RSEQ_INJECT_ASM(6)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+				    [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+				    [v]			"m" (*v),
+				    [expect]		"r" (expect),
+				    [v2]			"m" (*v2),
+				    [expect2]		"r" (expect2),
+				    [newv]		"r" (newv)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1, error2, error3
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+				  RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+				  RSEQ_INJECT_ASM(5)
+				  RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+				  RSEQ_INJECT_ASM(6)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+				    [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+				    [expect]		"r" (expect),
+				    [v]			"m" (*v),
+				    [newv]		"r" (newv),
+				    [dst]			"r" (dst),
+				    [src]			"r" (src),
+				    [len]			"r" (len)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2,
+				    RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4
+				    RSEQ_INJECT_CLOBBER
+				  : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+				  RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+				  RSEQ_INJECT_ASM(5)
+				  RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+				  RSEQ_INJECT_ASM(6)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+				    [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+				    [expect]		"r" (expect),
+				    [v]			"m" (*v),
+				    [newv]		"r" (newv),
+				    [dst]			"r" (dst),
+				    [src]			"r" (src),
+				    [len]			"r" (len)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2,
+				    RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4
+				    RSEQ_INJECT_CLOBBER
+				  : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+
+/*
+ *   pval = *(ptr+off)
+ *  *pval += inc;
+ */
+static inline __always_inline
+int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+#endif
+				  RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3)
+				  RSEQ_INJECT_ASM(4)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]      "m" (__rseq_abi.cpu_id),
+				    [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+				    [ptr]			"r" (ptr),
+				    [off]			"er" (off),
+				    [inc]			"er" (inc)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index 3f63eb362b92..72efb6d3d84e 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -79,6 +79,8 @@ extern int __rseq_handled;
 #include <rseq-mips.h>
 #elif defined(__s390__)
 #include <rseq-s390.h>
+#elif defined(__riscv)
+#include <rseq-riscv.h>
 #else
 #error unsupported target
 #endif
-- 
cgit 


From 6323ec54b4504070c1e001242e3e912837b3ae3a Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Tue, 22 Mar 2022 14:40:25 -0700
Subject: selftests: memcg: test high limit for single entry allocation

Test the enforcement of memory.high limit for large amount of memory
allocation within a single kernel entry.  There are valid use-cases
where the application can trigger large amount of memory allocation
within a single syscall e.g.  mlock() or mmap(MAP_POPULATE).

Make sure memory.high limit enforcement works for such use-cases.

Link: https://lkml.kernel.org/r/20220211064917.2028469-4-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Roman Gushchin <guro@fb.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Chris Down <chris@chrisdown.name>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/cgroup/cgroup_util.c     | 15 ++++-
 tools/testing/selftests/cgroup/cgroup_util.h     |  1 +
 tools/testing/selftests/cgroup/test_memcontrol.c | 78 ++++++++++++++++++++++++
 3 files changed, 91 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 0cf7e90c0052..dbaa7aabbb4a 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -583,7 +583,7 @@ int clone_into_cgroup_run_wait(const char *cgroup)
 	return 0;
 }
 
-int cg_prepare_for_wait(const char *cgroup)
+static int __prepare_for_wait(const char *cgroup, const char *filename)
 {
 	int fd, ret = -1;
 
@@ -591,8 +591,7 @@ int cg_prepare_for_wait(const char *cgroup)
 	if (fd == -1)
 		return fd;
 
-	ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
-				IN_MODIFY);
+	ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
 	if (ret == -1) {
 		close(fd);
 		fd = -1;
@@ -601,6 +600,16 @@ int cg_prepare_for_wait(const char *cgroup)
 	return fd;
 }
 
+int cg_prepare_for_wait(const char *cgroup)
+{
+	return __prepare_for_wait(cgroup, "cgroup.events");
+}
+
+int memcg_prepare_for_wait(const char *cgroup)
+{
+	return __prepare_for_wait(cgroup, "memory.events");
+}
+
 int cg_wait_for(int fd)
 {
 	int ret = -1;
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 4f66d10626d2..628738532ac9 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -55,4 +55,5 @@ extern int clone_reap(pid_t pid, int options);
 extern int clone_into_cgroup_run_wait(const char *cgroup);
 extern int dirfd_open_opath(const char *dir);
 extern int cg_prepare_for_wait(const char *cgroup);
+extern int memcg_prepare_for_wait(const char *cgroup);
 extern int cg_wait_for(int fd);
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index c19a97dd02d4..36ccf2322e21 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -16,6 +16,7 @@
 #include <netinet/in.h>
 #include <netdb.h>
 #include <errno.h>
+#include <sys/mman.h>
 
 #include "../kselftest.h"
 #include "cgroup_util.h"
@@ -628,6 +629,82 @@ cleanup:
 	return ret;
 }
 
+static int alloc_anon_mlock(const char *cgroup, void *arg)
+{
+	size_t size = (size_t)arg;
+	void *buf;
+
+	buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+		   0, 0);
+	if (buf == MAP_FAILED)
+		return -1;
+
+	mlock(buf, size);
+	munmap(buf, size);
+	return 0;
+}
+
+/*
+ * This test checks that memory.high is able to throttle big single shot
+ * allocation i.e. large allocation within one kernel entry.
+ */
+static int test_memcg_high_sync(const char *root)
+{
+	int ret = KSFT_FAIL, pid, fd = -1;
+	char *memcg;
+	long pre_high, pre_max;
+	long post_high, post_max;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	pre_high = cg_read_key_long(memcg, "memory.events", "high ");
+	pre_max = cg_read_key_long(memcg, "memory.events", "max ");
+	if (pre_high < 0 || pre_max < 0)
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.high", "30M"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.max", "140M"))
+		goto cleanup;
+
+	fd = memcg_prepare_for_wait(memcg);
+	if (fd < 0)
+		goto cleanup;
+
+	pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
+	if (pid < 0)
+		goto cleanup;
+
+	cg_wait_for(fd);
+
+	post_high = cg_read_key_long(memcg, "memory.events", "high ");
+	post_max = cg_read_key_long(memcg, "memory.events", "max ");
+	if (post_high < 0 || post_max < 0)
+		goto cleanup;
+
+	if (pre_high == post_high || pre_max != post_max)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (fd >= 0)
+		close(fd);
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
 /*
  * This test checks that memory.max limits the amount of
  * memory which can be consumed by either anonymous memory
@@ -1180,6 +1257,7 @@ struct memcg_test {
 	T(test_memcg_min),
 	T(test_memcg_low),
 	T(test_memcg_high),
+	T(test_memcg_high_sync),
 	T(test_memcg_max),
 	T(test_memcg_oom_events),
 	T(test_memcg_swap_max),
-- 
cgit 


From ef696f93ed9778d570bd5ac58414421cdd4f1aab Mon Sep 17 00:00:00 2001
From: Guillaume Tucker <guillaume.tucker@collabora.com>
Date: Tue, 22 Mar 2022 14:41:44 -0700
Subject: selftests, x86: fix how check_cc.sh is being invoked

The $(CC) variable used in Makefiles could contain several arguments
such as "ccache gcc".  These need to be passed as a single string to
check_cc.sh, otherwise only the first argument will be used as the
compiler command.  Without quotes, the $(CC) variable is passed as
distinct arguments which causes the script to fail to build trivial
programs.

Fix this by adding quotes around $(CC) when calling check_cc.sh to pass
the whole string as a single argument to the script even if it has
several words such as "ccache gcc".

Link: https://lkml.kernel.org/r/d0d460d7be0107a69e3c52477761a6fe694c1840.1646991629.git.guillaume.tucker@collabora.com
Fixes: e9886ace222e ("selftests, x86: Rework x86 target architecture detection")
Signed-off-by: Guillaume Tucker <guillaume.tucker@collabora.com>
Tested-by: "kernelci.org bot" <bot@kernelci.org>
Reviewed-by: Guenter Roeck <groeck@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile  | 6 +++---
 tools/testing/selftests/x86/Makefile | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index a14b5b800897..1530c3e0242e 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -51,9 +51,9 @@ TEST_GEN_FILES += split_huge_page_test
 TEST_GEN_FILES += ksm_tests
 
 ifeq ($(MACHINE),x86_64)
-CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c)
-CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie)
+CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
 
 TARGETS := protection_keys
 BINARIES_32 := $(TARGETS:%=%_32)
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 8a1f62ab3c8e..53df7d3893d3 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -6,9 +6,9 @@ include ../lib.mk
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
 UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
+CAN_BUILD_I386 := $(shell ./check_cc.sh "$(CC)" trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh "$(CC)" trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie)
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
 			check_initial_reg_state sigreturn iopl ioperm \
-- 
cgit 


From b147c89cd429321a59147368378c8aba17c8480f Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Tue, 22 Mar 2022 14:45:09 -0700
Subject: selftests: vm: add a hugetlb test case

Since the head vmemmap page frame associated with each HugeTLB page is
reused, we should hide the PG_head flag of tail struct page from the
user.  Add a tese case to check whether it is work properly.  The test
steps are as follows.

  1) alloc 2MB hugeTLB
  2) get each page frame
  3) apply those APIs in each page frame
  4) Those APIs work completely the same as before.

Reading the flags of a page by /proc/kpageflags is done in
stable_page_flags(), which has invoked PageHead(), PageTail(),
PageCompound() and compound_head().

If those APIs work properly, the head page must have 15 and 17 bits set.
And tail pages must have 16 and 17 bits set but 15 bit unset.  Those
flags are checked in check_page_flags().

Link: https://lkml.kernel.org/r/20211101031651.75851-5-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Barry Song <song.bao.hua@hisilicon.com>
Cc: Bodeddula Balasubramaniam <bodeddub@amazon.com>
Cc: Chen Huang <chenhuang5@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Fam Zheng <fam.zheng@bytedance.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/.gitignore         |   1 +
 tools/testing/selftests/vm/Makefile           |   1 +
 tools/testing/selftests/vm/hugepage-vmemmap.c | 144 ++++++++++++++++++++++++++
 tools/testing/selftests/vm/run_vmtests.sh     |  11 ++
 4 files changed, 157 insertions(+)
 create mode 100644 tools/testing/selftests/vm/hugepage-vmemmap.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 2e7e86e85282..3b5faec3c04f 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -2,6 +2,7 @@
 hugepage-mmap
 hugepage-mremap
 hugepage-shm
+hugepage-vmemmap
 khugepaged
 map_hugetlb
 map_populate
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 1530c3e0242e..81040f01711f 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -33,6 +33,7 @@ TEST_GEN_FILES += hmm-tests
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-mremap
 TEST_GEN_FILES += hugepage-shm
+TEST_GEN_FILES += hugepage-vmemmap
 TEST_GEN_FILES += khugepaged
 TEST_GEN_FILES += madv_populate
 TEST_GEN_FILES += map_fixed_noreplace
diff --git a/tools/testing/selftests/vm/hugepage-vmemmap.c b/tools/testing/selftests/vm/hugepage-vmemmap.c
new file mode 100644
index 000000000000..557bdbd4f87e
--- /dev/null
+++ b/tools/testing/selftests/vm/hugepage-vmemmap.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test case of using hugepage memory in a user application using the
+ * mmap system call with MAP_HUGETLB flag.  Before running this program
+ * make sure the administrator has allocated enough default sized huge
+ * pages to cover the 2 MB allocation.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#define MAP_LENGTH		(2UL * 1024 * 1024)
+
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB		0x40000	/* arch specific */
+#endif
+
+#define PAGE_SIZE		4096
+
+#define PAGE_COMPOUND_HEAD	(1UL << 15)
+#define PAGE_COMPOUND_TAIL	(1UL << 16)
+#define PAGE_HUGE		(1UL << 17)
+
+#define HEAD_PAGE_FLAGS		(PAGE_COMPOUND_HEAD | PAGE_HUGE)
+#define TAIL_PAGE_FLAGS		(PAGE_COMPOUND_TAIL | PAGE_HUGE)
+
+#define PM_PFRAME_BITS		55
+#define PM_PFRAME_MASK		~((1UL << PM_PFRAME_BITS) - 1)
+
+/*
+ * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
+ * That means the addresses starting with 0x800000... will need to be
+ * specified.  Specifying a fixed address is not required on ppc64, i386
+ * or x86_64.
+ */
+#ifdef __ia64__
+#define MAP_ADDR		(void *)(0x8000000000000000UL)
+#define MAP_FLAGS		(MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
+#else
+#define MAP_ADDR		NULL
+#define MAP_FLAGS		(MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+#endif
+
+static void write_bytes(char *addr, size_t length)
+{
+	unsigned long i;
+
+	for (i = 0; i < length; i++)
+		*(addr + i) = (char)i;
+}
+
+static unsigned long virt_to_pfn(void *addr)
+{
+	int fd;
+	unsigned long pagemap;
+
+	fd = open("/proc/self/pagemap", O_RDONLY);
+	if (fd < 0)
+		return -1UL;
+
+	lseek(fd, (unsigned long)addr / PAGE_SIZE * sizeof(pagemap), SEEK_SET);
+	read(fd, &pagemap, sizeof(pagemap));
+	close(fd);
+
+	return pagemap & ~PM_PFRAME_MASK;
+}
+
+static int check_page_flags(unsigned long pfn)
+{
+	int fd, i;
+	unsigned long pageflags;
+
+	fd = open("/proc/kpageflags", O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	lseek(fd, pfn * sizeof(pageflags), SEEK_SET);
+
+	read(fd, &pageflags, sizeof(pageflags));
+	if ((pageflags & HEAD_PAGE_FLAGS) != HEAD_PAGE_FLAGS) {
+		close(fd);
+		printf("Head page flags (%lx) is invalid\n", pageflags);
+		return -1;
+	}
+
+	/*
+	 * pages other than the first page must be tail and shouldn't be head;
+	 * this also verifies kernel has correctly set the fake page_head to tail
+	 * while hugetlb_free_vmemmap is enabled.
+	 */
+	for (i = 1; i < MAP_LENGTH / PAGE_SIZE; i++) {
+		read(fd, &pageflags, sizeof(pageflags));
+		if ((pageflags & TAIL_PAGE_FLAGS) != TAIL_PAGE_FLAGS ||
+		    (pageflags & HEAD_PAGE_FLAGS) == HEAD_PAGE_FLAGS) {
+			close(fd);
+			printf("Tail page flags (%lx) is invalid\n", pageflags);
+			return -1;
+		}
+	}
+
+	close(fd);
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	void *addr;
+	unsigned long pfn;
+
+	addr = mmap(MAP_ADDR, MAP_LENGTH, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0);
+	if (addr == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	/* Trigger allocation of HugeTLB page. */
+	write_bytes(addr, MAP_LENGTH);
+
+	pfn = virt_to_pfn(addr);
+	if (pfn == -1UL) {
+		munmap(addr, MAP_LENGTH);
+		perror("virt_to_pfn");
+		exit(1);
+	}
+
+	printf("Returned address is %p whose pfn is %lx\n", addr, pfn);
+
+	if (check_page_flags(pfn) < 0) {
+		munmap(addr, MAP_LENGTH);
+		perror("check_page_flags");
+		exit(1);
+	}
+
+	/* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
+	if (munmap(addr, MAP_LENGTH)) {
+		perror("munmap");
+		exit(1);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 71d2dc198fc1..e10d50e0b8e8 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -120,6 +120,17 @@ else
 fi
 rm -f $mnt/huge_mremap
 
+echo "------------------------"
+echo "running hugepage-vmemmap"
+echo "------------------------"
+./hugepage-vmemmap
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
 echo "NOTE: The above hugetlb tests provide minimal coverage.  Use"
 echo "      https://github.com/libhugetlbfs/libhugetlbfs.git for"
 echo "      hugetlb regression testing."
-- 
cgit 


From d794103d52739f8e27b69c4895dbf5a5a7a805cc Mon Sep 17 00:00:00 2001
From: Guo Zhengkui <guozhengkui@vivo.com>
Date: Tue, 22 Mar 2022 14:45:35 -0700
Subject: userfaultfd/selftests: fix uninitialized_var.cocci warning

Fix following coccicheck warning:
tools/testing/selftests/vm/userfaultfd.c:556:23-24:
WARNING this kind of initialization is deprecated

`unsigned long page_nr = *(&page_nr)` has the same form of
uninitialized_var() macro. I remove the redundant assignement. It has
been tested with gcc (Debian 8.3.0-6) 8.3.0.

The patch which removed uninitialized_var() is:
https://lore.kernel.org/all/20121028102007.GA7547@gmail.com/ And there is
very few "/* GCC */" comments in the Linux kernel code now.

Link: https://lkml.kernel.org/r/20220304082333.9252-1-guozhengkui@vivo.com
Signed-off-by: Guo Zhengkui <guozhengkui@vivo.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 3fc1d2ee2948..29cc6f247a9e 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -540,7 +540,7 @@ static void continue_range(int ufd, __u64 start, __u64 len)
 static void *locking_thread(void *arg)
 {
 	unsigned long cpu = (unsigned long) arg;
-	unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
+	unsigned long page_nr;
 	unsigned long long count;
 
 	if (!(bounces & BOUNCE_RANDOM)) {
-- 
cgit 


From 40184e484def1bea48e6be8c80ea7f992b8df16a Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 22 Mar 2022 14:49:52 -0700
Subject: selftests/damon: add a test for DAMON sysfs interface

This commit adds a selftest for DAMON sysfs interface.  It tests the
functionality of 'nr' files and existence of files in each directory of
the hierarchy.

Link: https://lkml.kernel.org/r/20220228081314.5770-12-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Xin Hao <xhao@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/damon/Makefile |   1 +
 tools/testing/selftests/damon/sysfs.sh | 306 +++++++++++++++++++++++++++++++++
 2 files changed, 307 insertions(+)
 create mode 100644 tools/testing/selftests/damon/sysfs.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index 937d36ae9a69..0470c5f3e690 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -6,5 +6,6 @@ TEST_GEN_FILES += huge_count_read_write
 TEST_FILES = _chk_dependency.sh _debugfs_common.sh
 TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
 TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
+TEST_PROGS += sysfs.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
new file mode 100644
index 000000000000..2e3ae77cb6db
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -0,0 +1,306 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest frmework requirement - SKIP code is 4.
+ksft_skip=4
+
+ensure_write_succ()
+{
+	file=$1
+	content=$2
+	reason=$3
+
+	if ! echo "$content" > "$file"
+	then
+		echo "writing $content to $file failed"
+		echo "expected success because $reason"
+		exit 1
+	fi
+}
+
+ensure_write_fail()
+{
+	file=$1
+	content=$2
+	reason=$3
+
+	if echo "$content" > "$file"
+	then
+		echo "writing $content to $file succeed ($fail_reason)"
+		echo "expected failure because $reason"
+		exit 1
+	fi
+}
+
+ensure_dir()
+{
+	dir=$1
+	to_ensure=$2
+	if [ "$to_ensure" = "exist" ] && [ ! -d "$dir" ]
+	then
+		echo "$dir dir is expected but not found"
+		exit 1
+	elif [ "$to_ensure" = "not_exist" ] && [ -d "$dir" ]
+	then
+		echo "$dir dir is not expected but found"
+		exit 1
+	fi
+}
+
+ensure_file()
+{
+	file=$1
+	to_ensure=$2
+	permission=$3
+	if [ "$to_ensure" = "exist" ]
+	then
+		if [ ! -f "$file" ]
+		then
+			echo "$file is expected but not found"
+			exit 1
+		fi
+		perm=$(stat -c "%a" "$file")
+		if [ ! "$perm" = "$permission" ]
+		then
+			echo "$file permission: expected $permission but $perm"
+			exit 1
+		fi
+	elif [ "$to_ensure" = "not_exist" ] && [ -f "$dir" ]
+	then
+		echo "$file is not expected but found"
+		exit 1
+	fi
+}
+
+test_range()
+{
+	range_dir=$1
+	ensure_dir "$range_dir" "exist"
+	ensure_file "$range_dir/min" "exist" 600
+	ensure_file "$range_dir/max" "exist" 600
+}
+
+test_stats()
+{
+	stats_dir=$1
+	ensure_dir "$stats_dir" "exist"
+	for f in nr_tried sz_tried nr_applied sz_applied qt_exceeds
+	do
+		ensure_file "$stats_dir/$f" "exist" "400"
+	done
+}
+
+test_watermarks()
+{
+	watermarks_dir=$1
+	ensure_dir "$watermarks_dir" "exist"
+	ensure_file "$watermarks_dir/metric" "exist" "600"
+	ensure_file "$watermarks_dir/interval_us" "exist" "600"
+	ensure_file "$watermarks_dir/high" "exist" "600"
+	ensure_file "$watermarks_dir/mid" "exist" "600"
+	ensure_file "$watermarks_dir/low" "exist" "600"
+}
+
+test_weights()
+{
+	weights_dir=$1
+	ensure_dir "$weights_dir" "exist"
+	ensure_file "$weights_dir/sz_permil" "exist" "600"
+	ensure_file "$weights_dir/nr_accesses_permil" "exist" "600"
+	ensure_file "$weights_dir/age_permil" "exist" "600"
+}
+
+test_quotas()
+{
+	quotas_dir=$1
+	ensure_dir "$quotas_dir" "exist"
+	ensure_file "$quotas_dir/ms" "exist" 600
+	ensure_file "$quotas_dir/bytes" "exist" 600
+	ensure_file "$quotas_dir/reset_interval_ms" "exist" 600
+	test_weights "$quotas_dir/weights"
+}
+
+test_access_pattern()
+{
+	access_pattern_dir=$1
+	ensure_dir "$access_pattern_dir" "exist"
+	test_range "$access_pattern_dir/age"
+	test_range "$access_pattern_dir/nr_accesses"
+	test_range "$access_pattern_dir/sz"
+}
+
+test_scheme()
+{
+	scheme_dir=$1
+	ensure_dir "$scheme_dir" "exist"
+	ensure_file "$scheme_dir/action" "exist" "600"
+	test_access_pattern "$scheme_dir/access_pattern"
+	test_quotas "$scheme_dir/quotas"
+	test_watermarks "$scheme_dir/watermarks"
+	test_stats "$scheme_dir/stats"
+}
+
+test_schemes()
+{
+	schemes_dir=$1
+	ensure_dir "$schemes_dir" "exist"
+	ensure_file "$schemes_dir/nr_schemes" "exist" 600
+
+	ensure_write_succ  "$schemes_dir/nr_schemes" "1" "valid input"
+	test_scheme "$schemes_dir/0"
+
+	ensure_write_succ  "$schemes_dir/nr_schemes" "2" "valid input"
+	test_scheme "$schemes_dir/0"
+	test_scheme "$schemes_dir/1"
+
+	ensure_write_succ "$schemes_dir/nr_schemes" "0" "valid input"
+	ensure_dir "$schemes_dir/0" "not_exist"
+	ensure_dir "$schemes_dir/1" "not_exist"
+}
+
+test_region()
+{
+	region_dir=$1
+	ensure_dir "$region_dir" "exist"
+	ensure_file "$region_dir/start" "exist" 600
+	ensure_file "$region_dir/end" "exist" 600
+}
+
+test_regions()
+{
+	regions_dir=$1
+	ensure_dir "$regions_dir" "exist"
+	ensure_file "$regions_dir/nr_regions" "exist" 600
+
+	ensure_write_succ  "$regions_dir/nr_regions" "1" "valid input"
+	test_region "$regions_dir/0"
+
+	ensure_write_succ  "$regions_dir/nr_regions" "2" "valid input"
+	test_region "$regions_dir/0"
+	test_region "$regions_dir/1"
+
+	ensure_write_succ "$regions_dir/nr_regions" "0" "valid input"
+	ensure_dir "$regions_dir/0" "not_exist"
+	ensure_dir "$regions_dir/1" "not_exist"
+}
+
+test_target()
+{
+	target_dir=$1
+	ensure_dir "$target_dir" "exist"
+	ensure_file "$target_dir/pid_target" "exist" "600"
+	test_regions "$target_dir/regions"
+}
+
+test_targets()
+{
+	targets_dir=$1
+	ensure_dir "$targets_dir" "exist"
+	ensure_file "$targets_dir/nr_targets" "exist" 600
+
+	ensure_write_succ  "$targets_dir/nr_targets" "1" "valid input"
+	test_target "$targets_dir/0"
+
+	ensure_write_succ  "$targets_dir/nr_targets" "2" "valid input"
+	test_target "$targets_dir/0"
+	test_target "$targets_dir/1"
+
+	ensure_write_succ "$targets_dir/nr_targets" "0" "valid input"
+	ensure_dir "$targets_dir/0" "not_exist"
+	ensure_dir "$targets_dir/1" "not_exist"
+}
+
+test_intervals()
+{
+	intervals_dir=$1
+	ensure_dir "$intervals_dir" "exist"
+	ensure_file "$intervals_dir/aggr_us" "exist" "600"
+	ensure_file "$intervals_dir/sample_us" "exist" "600"
+	ensure_file "$intervals_dir/update_us" "exist" "600"
+}
+
+test_monitoring_attrs()
+{
+	monitoring_attrs_dir=$1
+	ensure_dir "$monitoring_attrs_dir" "exist"
+	test_intervals "$monitoring_attrs_dir/intervals"
+	test_range "$monitoring_attrs_dir/nr_regions"
+}
+
+test_context()
+{
+	context_dir=$1
+	ensure_dir "$context_dir" "exist"
+	ensure_file "$context_dir/operations" "exist" 600
+	test_monitoring_attrs "$context_dir/monitoring_attrs"
+	test_targets "$context_dir/targets"
+	test_schemes "$context_dir/schemes"
+}
+
+test_contexts()
+{
+	contexts_dir=$1
+	ensure_dir "$contexts_dir" "exist"
+	ensure_file "$contexts_dir/nr_contexts" "exist" 600
+
+	ensure_write_succ  "$contexts_dir/nr_contexts" "1" "valid input"
+	test_context "$contexts_dir/0"
+
+	ensure_write_fail "$contexts_dir/nr_contexts" "2" "only 0/1 are supported"
+	test_context "$contexts_dir/0"
+
+	ensure_write_succ "$contexts_dir/nr_contexts" "0" "valid input"
+	ensure_dir "$contexts_dir/0" "not_exist"
+}
+
+test_kdamond()
+{
+	kdamond_dir=$1
+	ensure_dir "$kdamond_dir" "exist"
+	ensure_file "$kdamond_dir/state" "exist" "600"
+	ensure_file "$kdamond_dir/pid" "exist" 400
+	test_contexts "$kdamond_dir/contexts"
+}
+
+test_kdamonds()
+{
+	kdamonds_dir=$1
+	ensure_dir "$kdamonds_dir" "exist"
+
+	ensure_file "$kdamonds_dir/nr_kdamonds" "exist" "600"
+
+	ensure_write_succ  "$kdamonds_dir/nr_kdamonds" "1" "valid input"
+	test_kdamond "$kdamonds_dir/0"
+
+	ensure_write_succ  "$kdamonds_dir/nr_kdamonds" "2" "valid input"
+	test_kdamond "$kdamonds_dir/0"
+	test_kdamond "$kdamonds_dir/1"
+
+	ensure_write_succ "$kdamonds_dir/nr_kdamonds" "0" "valid input"
+	ensure_dir "$kdamonds_dir/0" "not_exist"
+	ensure_dir "$kdamonds_dir/1" "not_exist"
+}
+
+test_damon_sysfs()
+{
+	damon_sysfs=$1
+	if [ ! -d "$damon_sysfs" ]
+	then
+		echo "$damon_sysfs not found"
+		exit $ksft_skip
+	fi
+
+	test_kdamonds "$damon_sysfs/kdamonds"
+}
+
+check_dependencies()
+{
+	if [ $EUID -ne 0 ]
+	then
+		echo "Run as root"
+		exit $ksft_skip
+	fi
+}
+
+check_dependencies
+test_damon_sysfs "/sys/kernel/mm/damon/admin"
-- 
cgit 


From 20df737561484cb2d42e537663c03a7311d2b3c1 Mon Sep 17 00:00:00 2001
From: "Chang S. Bae" <chang.seok.bae@intel.com>
Date: Sat, 29 Jan 2022 09:36:47 -0800
Subject: selftests/x86/amx: Update the ARCH_REQ_XCOMP_PERM test

Update the arch_prctl test to check the permission bitmap whether the
requested feature is added as expected or not.

Every non-dynamic feature that is enabled is permitted already for use.
TILECFG is not dynamic feature. Ensure the bit is always on from
ARCH_GET_XCOMP_PERM.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20220129173647.27981-3-chang.seok.bae@intel.com
---
 tools/testing/selftests/x86/amx.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
index 3615ef4a48bb..2189f0322d8b 100644
--- a/tools/testing/selftests/x86/amx.c
+++ b/tools/testing/selftests/x86/amx.c
@@ -368,9 +368,16 @@ static void req_xtiledata_perm(void)
 
 static void validate_req_xcomp_perm(enum expected_result exp)
 {
-	unsigned long bitmask;
+	unsigned long bitmask, expected_bitmask;
 	long rc;
 
+	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
+	if (rc) {
+		fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
+	} else if (!(bitmask & XFEATURE_MASK_XTILECFG)) {
+		fatal_error("ARCH_GET_XCOMP_PERM returns XFEATURE_XTILECFG off.");
+	}
+
 	rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
 	if (exp == FAIL_EXPECTED) {
 		if (rc) {
@@ -383,10 +390,15 @@ static void validate_req_xcomp_perm(enum expected_result exp)
 		fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected failure.\n");
 	}
 
+	expected_bitmask = bitmask | XFEATURE_MASK_XTILEDATA;
+
 	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
 	if (rc) {
 		fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
-	} else if (bitmask & XFEATURE_MASK_XTILE) {
+	} else if (bitmask != expected_bitmask) {
+		fatal_error("ARCH_REQ_XCOMP_PERM set a wrong bitmask: %lx, expected: %lx.\n",
+			    bitmask, expected_bitmask);
+	} else {
 		printf("\tARCH_REQ_XCOMP_PERM is successful.\n");
 	}
 }
-- 
cgit 


From 90647d9d725068ad27d39f80d2b4a5150d041038 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Thu, 24 Mar 2022 18:09:43 -0700
Subject: selftest/vm: add util.h and and move helper functions there

Avoid code duplication by adding util.h.  No functional change in this
patch.

Link: https://lkml.kernel.org/r/20220307054355.149820-1-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/ksm_tests.c        | 38 +--------------------
 tools/testing/selftests/vm/transhuge-stress.c | 41 ++---------------------
 tools/testing/selftests/vm/util.h             | 48 +++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 75 deletions(-)
 create mode 100644 tools/testing/selftests/vm/util.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
index 1436e1a9a3d3..fd85f15869d1 100644
--- a/tools/testing/selftests/vm/ksm_tests.c
+++ b/tools/testing/selftests/vm/ksm_tests.c
@@ -12,6 +12,7 @@
 
 #include "../kselftest.h"
 #include "../../../../include/vdso/time64.h"
+#include "util.h"
 
 #define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
 #define KSM_FP(s) (KSM_SYSFS_PATH s)
@@ -22,15 +23,6 @@
 #define KSM_MERGE_ACROSS_NODES_DEFAULT true
 #define MB (1ul << 20)
 
-#define PAGE_SHIFT 12
-#define HPAGE_SHIFT 21
-
-#define PAGE_SIZE (1 << PAGE_SHIFT)
-#define HPAGE_SIZE (1 << HPAGE_SHIFT)
-
-#define PAGEMAP_PRESENT(ent)	(((ent) & (1ull << 63)) != 0)
-#define PAGEMAP_PFN(ent)	((ent) & ((1ull << 55) - 1))
-
 struct ksm_sysfs {
 	unsigned long max_page_sharing;
 	unsigned long merge_across_nodes;
@@ -456,34 +448,6 @@ err_out:
 	return KSFT_FAIL;
 }
 
-int64_t allocate_transhuge(void *ptr, int pagemap_fd)
-{
-	uint64_t ent[2];
-
-	/* drop pmd */
-	if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
-				MAP_FIXED | MAP_ANONYMOUS |
-				MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
-		errx(2, "mmap transhuge");
-
-	if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
-		err(2, "MADV_HUGEPAGE");
-
-	/* allocate transparent huge page */
-	*(volatile void **)ptr = ptr;
-
-	if (pread(pagemap_fd, ent, sizeof(ent),
-			(uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
-		err(2, "read pagemap");
-
-	if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
-	    PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
-	    !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
-		return PAGEMAP_PFN(ent[0]);
-
-	return -1;
-}
-
 static int ksm_merge_hugepages_time(int mapping, int prot, int timeout, size_t map_size)
 {
 	void *map_ptr, *map_ptr_orig;
diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c
index a03cb3fce1f6..e3f00adb1b82 100644
--- a/tools/testing/selftests/vm/transhuge-stress.c
+++ b/tools/testing/selftests/vm/transhuge-stress.c
@@ -15,48 +15,12 @@
 #include <fcntl.h>
 #include <string.h>
 #include <sys/mman.h>
+#include "util.h"
 
-#define PAGE_SHIFT 12
-#define HPAGE_SHIFT 21
-
-#define PAGE_SIZE (1 << PAGE_SHIFT)
-#define HPAGE_SIZE (1 << HPAGE_SHIFT)
-
-#define PAGEMAP_PRESENT(ent)	(((ent) & (1ull << 63)) != 0)
-#define PAGEMAP_PFN(ent)	((ent) & ((1ull << 55) - 1))
-
-int pagemap_fd;
 int backing_fd = -1;
 int mmap_flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE;
 #define PROT_RW (PROT_READ | PROT_WRITE)
 
-int64_t allocate_transhuge(void *ptr)
-{
-	uint64_t ent[2];
-
-	/* drop pmd */
-	if (mmap(ptr, HPAGE_SIZE, PROT_RW, MAP_FIXED | mmap_flags,
-		 backing_fd, 0) != ptr)
-		errx(2, "mmap transhuge");
-
-	if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
-		err(2, "MADV_HUGEPAGE");
-
-	/* allocate transparent huge page */
-	*(volatile void **)ptr = ptr;
-
-	if (pread(pagemap_fd, ent, sizeof(ent),
-			(uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
-		err(2, "read pagemap");
-
-	if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
-	    PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
-	    !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
-		return PAGEMAP_PFN(ent[0]);
-
-	return -1;
-}
-
 int main(int argc, char **argv)
 {
 	size_t ram, len;
@@ -67,6 +31,7 @@ int main(int argc, char **argv)
 	double s;
 	uint8_t *map;
 	size_t map_len;
+	int pagemap_fd;
 
 	ram = sysconf(_SC_PHYS_PAGES);
 	if (ram > SIZE_MAX / sysconf(_SC_PAGESIZE) / 4)
@@ -122,7 +87,7 @@ int main(int argc, char **argv)
 		for (p = ptr; p < ptr + len; p += HPAGE_SIZE) {
 			int64_t pfn;
 
-			pfn = allocate_transhuge(p);
+			pfn = allocate_transhuge(p, pagemap_fd);
 
 			if (pfn < 0) {
 				nr_failed++;
diff --git a/tools/testing/selftests/vm/util.h b/tools/testing/selftests/vm/util.h
new file mode 100644
index 000000000000..0f0a0f345d76
--- /dev/null
+++ b/tools/testing/selftests/vm/util.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __KSELFTEST_VM_UTIL_H
+#define __KSELFTEST_VM_UTIL_H
+
+#include <stdint.h>
+#include <sys/mman.h>
+#include <err.h>
+
+#define PAGE_SHIFT	12
+#define HPAGE_SHIFT	21
+
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+#define HPAGE_SIZE (1 << HPAGE_SHIFT)
+
+#define PAGEMAP_PRESENT(ent)	(((ent) & (1ull << 63)) != 0)
+#define PAGEMAP_PFN(ent)	((ent) & ((1ull << 55) - 1))
+
+
+static inline int64_t allocate_transhuge(void *ptr, int pagemap_fd)
+{
+	uint64_t ent[2];
+
+	/* drop pmd */
+	if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
+		 MAP_FIXED | MAP_ANONYMOUS |
+		 MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
+		errx(2, "mmap transhuge");
+
+	if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
+		err(2, "MADV_HUGEPAGE");
+
+	/* allocate transparent huge page */
+	*(volatile void **)ptr = ptr;
+
+	if (pread(pagemap_fd, ent, sizeof(ent),
+		  (uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
+		err(2, "read pagemap");
+
+	if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
+	    PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
+	    !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
+		return PAGEMAP_PFN(ent[0]);
+
+	return -1;
+}
+
+#endif
-- 
cgit 


From 6f6a841fb77df0bc6383133b22feb9b015380a55 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@kernel.org>
Date: Thu, 24 Mar 2022 18:09:46 -0700
Subject: selftest/vm: add helpers to detect PAGE_SIZE and PAGE_SHIFT

PAGE_SIZE is not 4096 in many configurations, particularly ppc64 uses 64K
pages in majority of cases.

Add helpers to detect PAGE_SIZE and PAGE_SHIFT dynamically.

Without this tests are broken w.r.t reading /proc/self/pagemap

    if (pread(pagemap_fd, ent, sizeof(ent),
              (uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
              err(2, "read pagemap");

Link: https://lkml.kernel.org/r/20220307054355.149820-2-aneesh.kumar@linux.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/gup_test.c |  3 ++-
 tools/testing/selftests/vm/util.h     | 27 ++++++++++++++++++++++++---
 2 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index fe043f67798b..cda837a14736 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -10,8 +10,9 @@
 #include <assert.h>
 #include "../../../../mm/gup_test.h"
 
+#include "util.h"
+
 #define MB (1UL << 20)
-#define PAGE_SIZE sysconf(_SC_PAGESIZE)
 
 /* Just the flags we need, copied from mm.h: */
 #define FOLL_WRITE	0x01	/* check pte is writable */
diff --git a/tools/testing/selftests/vm/util.h b/tools/testing/selftests/vm/util.h
index 0f0a0f345d76..b27d26199334 100644
--- a/tools/testing/selftests/vm/util.h
+++ b/tools/testing/selftests/vm/util.h
@@ -6,11 +6,32 @@
 #include <stdint.h>
 #include <sys/mman.h>
 #include <err.h>
+#include <string.h> /* ffsl() */
+#include <unistd.h> /* _SC_PAGESIZE */
 
-#define PAGE_SHIFT	12
-#define HPAGE_SHIFT	21
+static unsigned int __page_size;
+static unsigned int __page_shift;
 
-#define PAGE_SIZE (1 << PAGE_SHIFT)
+static inline unsigned int page_size(void)
+{
+	if (!__page_size)
+		__page_size = sysconf(_SC_PAGESIZE);
+	return __page_size;
+}
+
+static inline unsigned int page_shift(void)
+{
+	if (!__page_shift)
+		__page_shift = (ffsl(page_size()) - 1);
+	return __page_shift;
+}
+
+#define PAGE_SHIFT	(page_shift())
+#define PAGE_SIZE	(page_size())
+/*
+ * On ppc64 this will only work with radix 2M hugepage size
+ */
+#define HPAGE_SHIFT 21
 #define HPAGE_SIZE (1 << HPAGE_SHIFT)
 
 #define PAGEMAP_PRESENT(ent)	(((ent) & (1ull << 63)) != 0)
-- 
cgit 


From c4b6cb884011a5063722c15d14789d1ac5aad28a Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Thu, 24 Mar 2022 18:13:21 -0700
Subject: selftests/vm: add hugetlb madvise MADV_DONTNEED MADV_REMOVE test

Now that MADV_DONTNEED support for hugetlb is enabled, add corresponding
tests.  MADV_REMOVE has been enabled for some time, but no tests exist so
add them as well.

Link: https://lkml.kernel.org/r/20220215002348.128823-3-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/.gitignore        |   1 +
 tools/testing/selftests/vm/Makefile          |   1 +
 tools/testing/selftests/vm/hugetlb-madvise.c | 410 +++++++++++++++++++++++++++
 tools/testing/selftests/vm/run_vmtests.sh    |  12 +
 4 files changed, 424 insertions(+)
 create mode 100644 tools/testing/selftests/vm/hugetlb-madvise.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 3b5faec3c04f..d7507f3c7c76 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -3,6 +3,7 @@ hugepage-mmap
 hugepage-mremap
 hugepage-shm
 hugepage-vmemmap
+hugetlb-madvise
 khugepaged
 map_hugetlb
 map_populate
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index fbf390b51c2f..04a49e876a46 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -30,6 +30,7 @@ LDLIBS = -lrt -lpthread
 TEST_GEN_FILES = compaction_test
 TEST_GEN_FILES += gup_test
 TEST_GEN_FILES += hmm-tests
+TEST_GEN_FILES += hugetlb-madvise
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-mremap
 TEST_GEN_FILES += hugepage-shm
diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c
new file mode 100644
index 000000000000..6c6af40f5747
--- /dev/null
+++ b/tools/testing/selftests/vm/hugetlb-madvise.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugepage-madvise:
+ *
+ * Basic functional testing of madvise MADV_DONTNEED and MADV_REMOVE
+ * on hugetlb mappings.
+ *
+ * Before running this test, make sure the administrator has pre-allocated
+ * at least MIN_FREE_PAGES hugetlb pages and they are free.  In addition,
+ * the test takes an argument that is the path to a file in a hugetlbfs
+ * filesystem.  Therefore, a hugetlbfs filesystem must be mounted on some
+ * directory.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#define __USE_GNU
+#include <fcntl.h>
+
+#define USAGE	"USAGE: %s <hugepagefile_name>\n"
+#define MIN_FREE_PAGES	20
+#define NR_HUGE_PAGES	10	/* common number of pages to map/allocate */
+
+#define validate_free_pages(exp_free)					\
+	do {								\
+		int fhp = get_free_hugepages();				\
+		if (fhp != (exp_free)) {				\
+			printf("Unexpected number of free huge "	\
+				"pages line %d\n", __LINE__);		\
+			exit(1);					\
+		}							\
+	} while (0)
+
+unsigned long huge_page_size;
+unsigned long base_page_size;
+
+/*
+ * default_huge_page_size copied from mlock2-tests.c
+ */
+unsigned long default_huge_page_size(void)
+{
+	unsigned long hps = 0;
+	char *line = NULL;
+	size_t linelen = 0;
+	FILE *f = fopen("/proc/meminfo", "r");
+
+	if (!f)
+		return 0;
+	while (getline(&line, &linelen, f) > 0) {
+		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
+			hps <<= 10;
+			break;
+		}
+	}
+
+	free(line);
+	fclose(f);
+	return hps;
+}
+
+unsigned long get_free_hugepages(void)
+{
+	unsigned long fhp = 0;
+	char *line = NULL;
+	size_t linelen = 0;
+	FILE *f = fopen("/proc/meminfo", "r");
+
+	if (!f)
+		return fhp;
+	while (getline(&line, &linelen, f) > 0) {
+		if (sscanf(line, "HugePages_Free:      %lu", &fhp) == 1)
+			break;
+	}
+
+	free(line);
+	fclose(f);
+	return fhp;
+}
+
+void write_fault_pages(void *addr, unsigned long nr_pages)
+{
+	unsigned long i;
+
+	for (i = 0; i < nr_pages; i++)
+		*((unsigned long *)(addr + (i * huge_page_size))) = i;
+}
+
+void read_fault_pages(void *addr, unsigned long nr_pages)
+{
+	unsigned long i, tmp;
+
+	for (i = 0; i < nr_pages; i++)
+		tmp += *((unsigned long *)(addr + (i * huge_page_size)));
+}
+
+int main(int argc, char **argv)
+{
+	unsigned long free_hugepages;
+	void *addr, *addr2;
+	int fd;
+	int ret;
+
+	if (argc != 2) {
+		printf(USAGE, argv[0]);
+		exit(1);
+	}
+
+	huge_page_size = default_huge_page_size();
+	if (!huge_page_size) {
+		printf("Unable to determine huge page size, exiting!\n");
+		exit(1);
+	}
+	base_page_size = sysconf(_SC_PAGE_SIZE);
+	if (!huge_page_size) {
+		printf("Unable to determine base page size, exiting!\n");
+		exit(1);
+	}
+
+	free_hugepages = get_free_hugepages();
+	if (free_hugepages < MIN_FREE_PAGES) {
+		printf("Not enough free huge pages to test, exiting!\n");
+		exit(1);
+	}
+
+	fd = open(argv[1], O_CREAT | O_RDWR, 0755);
+	if (fd < 0) {
+		perror("Open failed");
+		exit(1);
+	}
+
+	/*
+	 * Test validity of MADV_DONTNEED addr and length arguments.  mmap
+	 * size is NR_HUGE_PAGES + 2.  One page at the beginning and end of
+	 * the mapping will be unmapped so we KNOW there is nothing mapped
+	 * there.
+	 */
+	addr = mmap(NULL, (NR_HUGE_PAGES + 2) * huge_page_size,
+			PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+			-1, 0);
+	if (addr == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+	if (munmap(addr, huge_page_size) ||
+			munmap(addr + (NR_HUGE_PAGES + 1) * huge_page_size,
+				huge_page_size)) {
+		perror("munmap");
+		exit(1);
+	}
+	addr = addr + huge_page_size;
+
+	write_fault_pages(addr, NR_HUGE_PAGES);
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	/* addr before mapping should fail */
+	ret = madvise(addr - base_page_size, NR_HUGE_PAGES * huge_page_size,
+		MADV_DONTNEED);
+	if (!ret) {
+		printf("Unexpected success of madvise call with invalid addr line %d\n",
+				__LINE__);
+			exit(1);
+	}
+
+	/* addr + length after mapping should fail */
+	ret = madvise(addr, (NR_HUGE_PAGES * huge_page_size) + base_page_size,
+		MADV_DONTNEED);
+	if (!ret) {
+		printf("Unexpected success of madvise call with invalid length line %d\n",
+				__LINE__);
+			exit(1);
+	}
+
+	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+	/*
+	 * Test alignment of MADV_DONTNEED addr and length arguments
+	 */
+	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+			PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+			-1, 0);
+	if (addr == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+	write_fault_pages(addr, NR_HUGE_PAGES);
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	/* addr is not huge page size aligned and should fail */
+	ret = madvise(addr + base_page_size,
+			NR_HUGE_PAGES * huge_page_size - base_page_size,
+			MADV_DONTNEED);
+	if (!ret) {
+		printf("Unexpected success of madvise call with unaligned start address %d\n",
+				__LINE__);
+			exit(1);
+	}
+
+	/* addr + length should be aligned up to huge page size */
+	if (madvise(addr,
+			((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size,
+			MADV_DONTNEED)) {
+		perror("madvise");
+		exit(1);
+	}
+
+	/* should free all pages in mapping */
+	validate_free_pages(free_hugepages);
+
+	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+	/*
+	 * Test MADV_DONTNEED on anonymous private mapping
+	 */
+	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+			PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+			-1, 0);
+	if (addr == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+	write_fault_pages(addr, NR_HUGE_PAGES);
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+		perror("madvise");
+		exit(1);
+	}
+
+	/* should free all pages in mapping */
+	validate_free_pages(free_hugepages);
+
+	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+	/*
+	 * Test MADV_DONTNEED on private mapping of hugetlb file
+	 */
+	if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
+		perror("fallocate");
+		exit(1);
+	}
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+			PROT_READ | PROT_WRITE,
+			MAP_PRIVATE, fd, 0);
+	if (addr == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	/* read should not consume any pages */
+	read_fault_pages(addr, NR_HUGE_PAGES);
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	/* madvise should not free any pages */
+	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+		perror("madvise");
+		exit(1);
+	}
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	/* writes should allocate private pages */
+	write_fault_pages(addr, NR_HUGE_PAGES);
+	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+	/* madvise should free private pages */
+	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+		perror("madvise");
+		exit(1);
+	}
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	/* writes should allocate private pages */
+	write_fault_pages(addr, NR_HUGE_PAGES);
+	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+	/*
+	 * The fallocate below certainly should free the pages associated
+	 * with the file.  However, pages in the private mapping are also
+	 * freed.  This is not the 'correct' behavior, but is expected
+	 * because this is how it has worked since the initial hugetlb
+	 * implementation.
+	 */
+	if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+					0, NR_HUGE_PAGES * huge_page_size)) {
+		perror("fallocate");
+		exit(1);
+	}
+	validate_free_pages(free_hugepages);
+
+	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+	/*
+	 * Test MADV_DONTNEED on shared mapping of hugetlb file
+	 */
+	if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
+		perror("fallocate");
+		exit(1);
+	}
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+			PROT_READ | PROT_WRITE,
+			MAP_SHARED, fd, 0);
+	if (addr == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	/* write should not consume any pages */
+	write_fault_pages(addr, NR_HUGE_PAGES);
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	/* madvise should not free any pages */
+	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+		perror("madvise");
+		exit(1);
+	}
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	/*
+	 * Test MADV_REMOVE on shared mapping of hugetlb file
+	 *
+	 * madvise is same as hole punch and should free all pages.
+	 */
+	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
+		perror("madvise");
+		exit(1);
+	}
+	validate_free_pages(free_hugepages);
+	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+	/*
+	 * Test MADV_REMOVE on shared and private mapping of hugetlb file
+	 */
+	if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
+		perror("fallocate");
+		exit(1);
+	}
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+			PROT_READ | PROT_WRITE,
+			MAP_SHARED, fd, 0);
+	if (addr == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	/* shared write should not consume any additional pages */
+	write_fault_pages(addr, NR_HUGE_PAGES);
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	addr2 = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+			PROT_READ | PROT_WRITE,
+			MAP_PRIVATE, fd, 0);
+	if (addr2 == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	/* private read should not consume any pages */
+	read_fault_pages(addr2, NR_HUGE_PAGES);
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	/* private write should consume additional pages */
+	write_fault_pages(addr2, NR_HUGE_PAGES);
+	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+	/* madvise of shared mapping should not free any pages */
+	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+		perror("madvise");
+		exit(1);
+	}
+	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+	/* madvise of private mapping should free private pages */
+	if (madvise(addr2, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+		perror("madvise");
+		exit(1);
+	}
+	validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+	/* private write should consume additional pages again */
+	write_fault_pages(addr2, NR_HUGE_PAGES);
+	validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+	/*
+	 * madvise should free both file and private pages although this is
+	 * not correct.  private pages should not be freed, but this is
+	 * expected.  See comment associated with FALLOC_FL_PUNCH_HOLE call.
+	 */
+	if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
+		perror("madvise");
+		exit(1);
+	}
+	validate_free_pages(free_hugepages);
+
+	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+	(void)munmap(addr2, NR_HUGE_PAGES * huge_page_size);
+
+	close(fd);
+	unlink(argv[1]);
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index e10d50e0b8e8..1948098f431d 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -131,6 +131,18 @@ else
 	echo "[PASS]"
 fi
 
+echo "-----------------------"
+echo "running hugetlb-madvise"
+echo "-----------------------"
+./hugetlb-madvise $mnt/madvise-test
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+rm -f $mnt/madvise-test
+
 echo "NOTE: The above hugetlb tests provide minimal coverage.  Use"
 echo "      https://github.com/libhugetlbfs/libhugetlbfs.git for"
 echo "      hugetlb regression testing."
-- 
cgit 


From 9ae8f2b849f7991cb88ba20c39cb488d0a4f7916 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Thu, 24 Mar 2022 18:13:24 -0700
Subject: userfaultfd/selftests: enable hugetlb remap and remove event testing

With MADV_DONTNEED support added to hugetlb mappings, mremap testing can
also be enabled for hugetlb.

Modify the tests to use madvise MADV_DONTNEED and MADV_REMOVE instead of
fallocate hole puch for releasing hugetlb pages.

Link: https://lkml.kernel.org/r/20220215002348.128823-4-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/run_vmtests.sh |  3 +-
 tools/testing/selftests/vm/userfaultfd.c  | 69 ++++++++++++++++---------------
 2 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 1948098f431d..3b265f140c25 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -208,14 +208,13 @@ echo "running userfaultfd_hugetlb"
 echo "---------------------------"
 # Test requires source and destination huge pages.  Size of source
 # (half_ufd_size_MB) is passed as argument to test.
-./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file
+./userfaultfd hugetlb $half_ufd_size_MB 32
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
 else
 	echo "[PASS]"
 fi
-rm -f $mnt/ufd_test_file
 
 echo "-------------------------"
 echo "running userfaultfd_shmem"
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 39403b8931de..92a4516f8f0d 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -89,7 +89,6 @@ static bool test_uffdio_minor = false;
 static bool map_shared;
 static int shm_fd;
 static int huge_fd;
-static char *huge_fd_off0;
 static unsigned long long *count_verify;
 static int uffd = -1;
 static int uffd_flags, finished, *pipefd;
@@ -128,9 +127,9 @@ const char *examples =
     "./userfaultfd anon 100 99999\n\n"
     "# Run share memory test on 1GiB region with 99 bounces:\n"
     "./userfaultfd shmem 1000 99\n\n"
-    "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n"
-    "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n"
-    "# Run the same hugetlb test but using shmem:\n"
+    "# Run hugetlb memory test on 256MiB region with 50 bounces:\n"
+    "./userfaultfd hugetlb 256 50\n\n"
+    "# Run the same hugetlb test but using shared file:\n"
     "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
     "# 10MiB-~6GiB 999 bounces anonymous test, "
     "continue forever unless an error triggers\n"
@@ -227,10 +226,13 @@ static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
 
 static void hugetlb_release_pages(char *rel_area)
 {
-	if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-		      rel_area == huge_fd_off0 ? 0 : nr_pages * page_size,
-		      nr_pages * page_size))
-		err("fallocate() failed");
+	if (!map_shared) {
+		if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
+			err("madvise(MADV_DONTNEED) failed");
+	} else {
+		if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
+			err("madvise(MADV_REMOVE) failed");
+	}
 }
 
 static void hugetlb_allocate_area(void **alloc_area)
@@ -238,26 +240,37 @@ static void hugetlb_allocate_area(void **alloc_area)
 	void *area_alias = NULL;
 	char **alloc_area_alias;
 
-	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
-			   (map_shared ? MAP_SHARED : MAP_PRIVATE) |
-			   MAP_HUGETLB |
-			   (*alloc_area == area_src ? 0 : MAP_NORESERVE),
-			   huge_fd, *alloc_area == area_src ? 0 :
-			   nr_pages * page_size);
+	if (!map_shared)
+		*alloc_area = mmap(NULL,
+			nr_pages * page_size,
+			PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB |
+				(*alloc_area == area_src ? 0 : MAP_NORESERVE),
+			-1,
+			0);
+	else
+		*alloc_area = mmap(NULL,
+			nr_pages * page_size,
+			PROT_READ | PROT_WRITE,
+			MAP_SHARED |
+				(*alloc_area == area_src ? 0 : MAP_NORESERVE),
+			huge_fd,
+			*alloc_area == area_src ? 0 : nr_pages * page_size);
 	if (*alloc_area == MAP_FAILED)
 		err("mmap of hugetlbfs file failed");
 
 	if (map_shared) {
-		area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
-				  MAP_SHARED | MAP_HUGETLB,
-				  huge_fd, *alloc_area == area_src ? 0 :
-				  nr_pages * page_size);
+		area_alias = mmap(NULL,
+			nr_pages * page_size,
+			PROT_READ | PROT_WRITE,
+			MAP_SHARED,
+			huge_fd,
+			*alloc_area == area_src ? 0 : nr_pages * page_size);
 		if (area_alias == MAP_FAILED)
 			err("mmap of hugetlb file alias failed");
 	}
 
 	if (*alloc_area == area_src) {
-		huge_fd_off0 = *alloc_area;
 		alloc_area_alias = &area_src_alias;
 	} else {
 		alloc_area_alias = &area_dst_alias;
@@ -270,12 +283,7 @@ static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset
 {
 	if (!map_shared)
 		return;
-	/*
-	 * We can't zap just the pagetable with hugetlbfs because
-	 * MADV_DONTEED won't work. So exercise -EEXIST on a alias
-	 * mapping where the pagetables are not established initially,
-	 * this way we'll exercise the -EEXEC at the fs level.
-	 */
+
 	*start = (unsigned long) area_dst_alias + offset;
 }
 
@@ -428,7 +436,6 @@ static void uffd_test_ctx_clear(void)
 		uffd = -1;
 	}
 
-	huge_fd_off0 = NULL;
 	munmap_area((void **)&area_src);
 	munmap_area((void **)&area_src_alias);
 	munmap_area((void **)&area_dst);
@@ -926,10 +933,7 @@ static int faulting_process(int signal_test)
 	struct sigaction act;
 	unsigned long signalled = 0;
 
-	if (test_type != TEST_HUGETLB)
-		split_nr_pages = (nr_pages + 1) / 2;
-	else
-		split_nr_pages = nr_pages;
+	split_nr_pages = (nr_pages + 1) / 2;
 
 	if (signal_test) {
 		sigbuf = &jbuf;
@@ -986,9 +990,6 @@ static int faulting_process(int signal_test)
 	if (signal_test)
 		return signalled != split_nr_pages;
 
-	if (test_type == TEST_HUGETLB)
-		return 0;
-
 	area_dst = mremap(area_dst, nr_pages * page_size,  nr_pages * page_size,
 			  MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
 	if (area_dst == MAP_FAILED)
@@ -1676,7 +1677,7 @@ int main(int argc, char **argv)
 	}
 	nr_pages = nr_pages_per_cpu * nr_cpus;
 
-	if (test_type == TEST_HUGETLB) {
+	if (test_type == TEST_HUGETLB && map_shared) {
 		if (argc < 5)
 			usage();
 		huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
-- 
cgit 


From 25fd2d41b505d0640bdfe67aa77c549de2d3c18a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 24 Mar 2022 18:14:18 -0700
Subject: selftests: kselftest framework: provide "finished" helper

Instead of having each time that wants to use ksft_exit() have to figure
out the internals of kselftest.h, add the helper ksft_finished() that
makes sure the passes, xfails, and skips are equal to the test plan count.

Link: https://lkml.kernel.org/r/20220201013717.2464392-1-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/kselftest.h       | 10 ++++++++++
 tools/testing/selftests/vm/memfd_secret.c |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index f1180987492c..b8f248018174 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -28,6 +28,7 @@
  *
  * When all tests are finished, clean up and exit the program with one of:
  *
+ *    ksft_finished();
  *    ksft_exit(condition);
  *    ksft_exit_pass();
  *    ksft_exit_fail();
@@ -235,6 +236,15 @@ static inline int ksft_exit_fail(void)
 		ksft_exit_fail();	\
 	} while (0)
 
+/**
+ * ksft_finished() - Exit selftest with success if all tests passed
+ */
+#define ksft_finished()			\
+	ksft_exit(ksft_plan ==		\
+		  ksft_cnt.ksft_pass +	\
+		  ksft_cnt.ksft_xfail +	\
+		  ksft_cnt.ksft_xskip)
+
 static inline int ksft_exit_fail_msg(const char *msg, ...)
 {
 	int saved_errno = errno;
diff --git a/tools/testing/selftests/vm/memfd_secret.c b/tools/testing/selftests/vm/memfd_secret.c
index 93e7e7ffed33..957b9e18c729 100644
--- a/tools/testing/selftests/vm/memfd_secret.c
+++ b/tools/testing/selftests/vm/memfd_secret.c
@@ -282,7 +282,7 @@ int main(int argc, char *argv[])
 
 	close(fd);
 
-	ksft_exit(!ksft_get_fail_cnt());
+	ksft_finished();
 }
 
 #else /* __NR_memfd_secret */
-- 
cgit 


From b50d3b46f84282d795ae3076111acb75ae1031f3 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 24 Mar 2022 22:05:14 +0200
Subject: selftests: test_vxlan_under_vrf: Fix broken test case

The purpose of the last test case is to test VXLAN encapsulation and
decapsulation when the underlay lookup takes place in a non-default VRF.
This is achieved by enslaving the physical device of the tunnel to a
VRF.

The binding of the VXLAN UDP socket to the VRF happens when the VXLAN
device itself is opened, not when its physical device is opened. This
was also mentioned in the cited commit ("tests that moving the underlay
from a VRF to another works when down/up the VXLAN interface"), but the
test did something else.

Fix it by reopening the VXLAN device instead of its physical device.

Before:

 # ./test_vxlan_under_vrf.sh
 Checking HV connectivity                                           [ OK ]
 Check VM connectivity through VXLAN (underlay in the default VRF)  [ OK ]
 Check VM connectivity through VXLAN (underlay in a VRF)            [FAIL]

After:

 # ./test_vxlan_under_vrf.sh
 Checking HV connectivity                                           [ OK ]
 Check VM connectivity through VXLAN (underlay in the default VRF)  [ OK ]
 Check VM connectivity through VXLAN (underlay in a VRF)            [ OK ]

Fixes: 03f1c26b1c56 ("test/net: Add script for VXLAN underlay in a VRF")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20220324200514.1638326-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/test_vxlan_under_vrf.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
index ea5a7a808f12..1fd1250ebc66 100755
--- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh
+++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
@@ -120,11 +120,11 @@ echo "[ OK ]"
 
 # Move the underlay to a non-default VRF
 ip -netns hv-1 link set veth0 vrf vrf-underlay
-ip -netns hv-1 link set veth0 down
-ip -netns hv-1 link set veth0 up
+ip -netns hv-1 link set vxlan0 down
+ip -netns hv-1 link set vxlan0 up
 ip -netns hv-2 link set veth0 vrf vrf-underlay
-ip -netns hv-2 link set veth0 down
-ip -netns hv-2 link set veth0 up
+ip -netns hv-2 link set vxlan0 down
+ip -netns hv-2 link set vxlan0 up
 
 echo -n "Check VM connectivity through VXLAN (underlay in a VRF)            "
 ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
-- 
cgit 


From 5c7e49be96ea24776a5b5a07c732c477294add00 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 25 Mar 2022 16:27:09 -0700
Subject: selftests: tls: skip cmsg_to_pipe tests with TLS=n

These are negative tests, testing TLS code rejects certain
operations. They won't pass without TLS enabled, pure TCP
accepts those operations.

Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
Fixes: d87d67fd61ef ("selftests: tls: test splicing cmsgs")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tls.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 6e468e0f42f7..5d70b04c482c 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -683,6 +683,9 @@ TEST_F(tls, splice_cmsg_to_pipe)
 	char buf[10];
 	int p[2];
 
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
 	ASSERT_GE(pipe(p), 0);
 	EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
 	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
@@ -703,6 +706,9 @@ TEST_F(tls, splice_dec_cmsg_to_pipe)
 	char buf[10];
 	int p[2];
 
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
 	ASSERT_GE(pipe(p), 0);
 	EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
 	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
-- 
cgit 


From d9142e1cf3bbdaf21337767114ecab26fe702d47 Mon Sep 17 00:00:00 2001
From: Naresh Kamboju <naresh.kamboju@linaro.org>
Date: Mon, 28 Mar 2022 19:16:50 +0530
Subject: selftests: net: Add tls config dependency for tls selftests

selftest net tls test cases need TLS=m without this the test hangs.
Enabling config TLS solves this problem and runs to complete.
  - CONFIG_TLS=m

Reported-by: Linux Kernel Functional Testing <lkft@linaro.org>
Signed-off-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index ead7963b9bf0..cecb921a0dbf 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -43,5 +43,6 @@ CONFIG_NET_ACT_TUNNEL_KEY=m
 CONFIG_NET_ACT_MIRRED=m
 CONFIG_BAREUDP=m
 CONFIG_IPV6_IOAM6_LWTUNNEL=y
+CONFIG_TLS=m
 CONFIG_CRYPTO_SM4=y
 CONFIG_AMT=m
-- 
cgit 


From 20695e9a9fd39103d1b0669470ae74030b7aa196 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 28 Mar 2022 14:29:04 -0700
Subject: Revert "selftests: net: Add tls config dependency for tls selftests"

This reverts commit d9142e1cf3bbdaf21337767114ecab26fe702d47.

The test is supposed to run cleanly with TLS is disabled,
to test compatibility with TCP behavior. I can't repro
the failure [1], the problem should be debugged rather
than papered over.

Link: https://lore.kernel.org/all/20220325161203.7000698c@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com/ [1]
Fixes: d9142e1cf3bb ("selftests: net: Add tls config dependency for tls selftests")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20220328212904.2685395-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/config | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index cecb921a0dbf..ead7963b9bf0 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -43,6 +43,5 @@ CONFIG_NET_ACT_TUNNEL_KEY=m
 CONFIG_NET_ACT_MIRRED=m
 CONFIG_BAREUDP=m
 CONFIG_IPV6_IOAM6_LWTUNNEL=y
-CONFIG_TLS=m
 CONFIG_CRYPTO_SM4=y
 CONFIG_AMT=m
-- 
cgit 


From 99dea2c664d7bc7e4f6f6947182d0d365165a998 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 25 Mar 2022 15:56:43 -0700
Subject: selftests/bpf: fix selftest after random: Urandom_read tracepoint
 removal

14c174633f34 ("random: remove unused tracepoints") removed all the
tracepoints from drivers/char/random.c, one of which,
random:urandom_read, was used by stacktrace_build_id selftest to trigger
stack trace capture.

Fix breakage by switching to kprobing urandom_read() function.

Suggested-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220325225643.2606-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
index 36a707e7c7a7..6c62bfb8bb6f 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
@@ -39,16 +39,8 @@ struct {
 	__type(value, stack_trace_t);
 } stack_amap SEC(".maps");
 
-/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
-struct random_urandom_args {
-	unsigned long long pad;
-	int got_bits;
-	int pool_left;
-	int input_left;
-};
-
-SEC("tracepoint/random/urandom_read")
-int oncpu(struct random_urandom_args *args)
+SEC("kprobe/urandom_read")
+int oncpu(struct pt_regs *args)
 {
 	__u32 max_len = sizeof(struct bpf_stack_build_id)
 			* PERF_MAX_STACK_DEPTH;
-- 
cgit 


From ccaff3d56acc47c257a99b2807b7c78a9467cf09 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 25 Mar 2022 13:03:04 -0700
Subject: selftests/bpf: Fix clang compilation errors

llvm upstream patch ([1]) added to issue warning for code like
  void test() {
    int j = 0;
    for (int i = 0; i < 1000; i++)
            j++;
    return;
  }

This triggered several errors in selftests/bpf build since
compilation flag -Werror is used.
  ...
  test_lpm_map.c:212:15: error: variable 'n_matches' set but not used [-Werror,-Wunused-but-set-variable]
        size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
                     ^
  test_lpm_map.c:212:26: error: variable 'n_matches_after_delete' set but not used [-Werror,-Wunused-but-set-variable]
        size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
                                ^
  ...
  prog_tests/get_stack_raw_tp.c:32:15: error: variable 'cnt' set but not used [-Werror,-Wunused-but-set-variable]
        static __u64 cnt;
                     ^
  ...

  For test_lpm_map.c, 'n_matches'/'n_matches_after_delete' are changed to be volatile
  in order to silent the warning. I didn't remove these two declarations since
  they are referenced in a commented code which might be used by people in certain
  cases. For get_stack_raw_tp.c, the variable 'cnt' is removed.

  [1] https://reviews.llvm.org/D122271

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220325200304.2915588-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c | 3 ---
 tools/testing/selftests/bpf/test_lpm_map.c                | 3 ++-
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index e834a01de16a..16048978a1ef 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -29,11 +29,8 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
 	 */
 	struct get_stack_trace_t e;
 	int i, num_stack;
-	static __u64 cnt;
 	struct ksym *ks;
 
-	cnt++;
-
 	memset(&e, 0, sizeof(e));
 	memcpy(&e, data, size <= sizeof(e) ? size : sizeof(e));
 
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index baa3e3ecae82..aa294612e0a7 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -209,7 +209,8 @@ static void test_lpm_order(void)
 static void test_lpm_map(int keysize)
 {
 	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
-	size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
+	volatile size_t n_matches, n_matches_after_delete;
+	size_t i, j, n_nodes, n_lookups;
 	struct tlpm_node *t, *list = NULL;
 	struct bpf_lpm_trie_key *key;
 	uint8_t *data, *value;
-- 
cgit 


From 2609f635a20d3691e7b5725edc3bdadb7bedf8fb Mon Sep 17 00:00:00 2001
From: Haowen Bai <baihaowen@meizu.com>
Date: Wed, 30 Mar 2022 09:59:48 +0800
Subject: selftests/bpf: Fix warning comparing pointer to 0

Avoid pointer type value compared with 0 to make code clear. Reported by
coccicheck:

  tools/testing/selftests/bpf/progs/map_ptr_kern.c:370:21-22:
  WARNING comparing pointer to 0
  tools/testing/selftests/bpf/progs/map_ptr_kern.c:397:21-22:
  WARNING comparing pointer to 0

Signed-off-by: Haowen Bai <baihaowen@meizu.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/1648605588-19269-1-git-send-email-baihaowen@meizu.com
---
 tools/testing/selftests/bpf/progs/map_ptr_kern.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index b64df94ec476..db388f593d0a 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -367,7 +367,7 @@ static inline int check_array_of_maps(void)
 
 	VERIFY(check_default(&array_of_maps->map, map));
 	inner_map = bpf_map_lookup_elem(array_of_maps, &key);
-	VERIFY(inner_map != 0);
+	VERIFY(inner_map != NULL);
 	VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES);
 
 	return 1;
@@ -394,7 +394,7 @@ static inline int check_hash_of_maps(void)
 
 	VERIFY(check_default(&hash_of_maps->map, map));
 	inner_map = bpf_map_lookup_elem(hash_of_maps, &key);
-	VERIFY(inner_map != 0);
+	VERIFY(inner_map != NULL);
 	VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES);
 
 	return 1;
-- 
cgit 


From ca93ca23409b827b48a2fc0a692496d3f7b67944 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 29 Mar 2022 21:31:25 -0400
Subject: wireguard: selftests: simplify RNG seeding

The seed_rng() function was written to work across lots of old kernels,
back when WireGuard used a big compatibility layer. Now that things have
evolved, we can vastly simplify this, by just marking the RNG as seeded.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/wireguard/qemu/init.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index c9698120ac9d..0b45055d9de0 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -56,26 +56,14 @@ static void print_banner(void)
 
 static void seed_rng(void)
 {
-	int fd;
-	struct {
-		int entropy_count;
-		int buffer_size;
-		unsigned char buffer[256];
-	} entropy = {
-		.entropy_count = sizeof(entropy.buffer) * 8,
-		.buffer_size = sizeof(entropy.buffer),
-		.buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!"
-	};
+	int bits = 256, fd;
 
-	if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9)))
-		panic("mknod(/dev/urandom)");
-	fd = open("/dev/urandom", O_WRONLY);
+	pretty_message("[+] Fake seeding RNG...");
+	fd = open("/dev/random", O_WRONLY);
 	if (fd < 0)
-		panic("open(urandom)");
-	for (int i = 0; i < 256; ++i) {
-		if (ioctl(fd, RNDADDENTROPY, &entropy) < 0)
-			panic("ioctl(urandom)");
-	}
+		panic("open(random)");
+	if (ioctl(fd, RNDADDTOENTCNT, &bits) < 0)
+		panic("ioctl(RNDADDTOENTCNT)");
 	close(fd);
 }
 
@@ -270,10 +258,10 @@ static void check_leaks(void)
 
 int main(int argc, char *argv[])
 {
-	seed_rng();
 	ensure_console();
 	print_banner();
 	mount_filesystems();
+	seed_rng();
 	kmod_selftests();
 	enable_logging();
 	clear_leaks();
-- 
cgit 


From 0a210af6d0a0595fef566e7eeb072f10f37774be Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 29 Mar 2022 18:15:02 -0700
Subject: bpf: selftests: Test fentry tracing a struct_ops program

This patch tests attaching an fentry prog to a struct_ops prog.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220330011502.2985292-1-kafai@fb.com
---
 .../selftests/bpf/prog_tests/dummy_st_ops.c        | 23 ++++++++++++++++++++++
 .../selftests/bpf/progs/trace_dummy_st_ops.c       | 21 ++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
index 5aa52cc31dc2..c11832657d2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
+++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
@@ -2,6 +2,7 @@
 /* Copyright (C) 2021. Huawei Technologies Co., Ltd */
 #include <test_progs.h>
 #include "dummy_st_ops.skel.h"
+#include "trace_dummy_st_ops.skel.h"
 
 /* Need to keep consistent with definition in include/linux/bpf.h */
 struct bpf_dummy_ops_state {
@@ -56,6 +57,7 @@ static void test_dummy_init_ptr_arg(void)
 		.ctx_in = args,
 		.ctx_size_in = sizeof(args),
 	);
+	struct trace_dummy_st_ops *trace_skel;
 	struct dummy_st_ops *skel;
 	int fd, err;
 
@@ -64,12 +66,33 @@ static void test_dummy_init_ptr_arg(void)
 		return;
 
 	fd = bpf_program__fd(skel->progs.test_1);
+
+	trace_skel = trace_dummy_st_ops__open();
+	if (!ASSERT_OK_PTR(trace_skel, "trace_dummy_st_ops__open"))
+		goto done;
+
+	err = bpf_program__set_attach_target(trace_skel->progs.fentry_test_1,
+					     fd, "test_1");
+	if (!ASSERT_OK(err, "set_attach_target(fentry_test_1)"))
+		goto done;
+
+	err = trace_dummy_st_ops__load(trace_skel);
+	if (!ASSERT_OK(err, "load(trace_skel)"))
+		goto done;
+
+	err = trace_dummy_st_ops__attach(trace_skel);
+	if (!ASSERT_OK(err, "attach(trace_skel)"))
+		goto done;
+
 	err = bpf_prog_test_run_opts(fd, &attr);
 	ASSERT_OK(err, "test_run");
 	ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret");
 	ASSERT_EQ(attr.retval, exp_retval, "test_ret");
+	ASSERT_EQ(trace_skel->bss->val, exp_retval, "fentry_val");
 
+done:
 	dummy_st_ops__destroy(skel);
+	trace_dummy_st_ops__destroy(trace_skel);
 }
 
 static void test_dummy_multiple_args(void)
diff --git a/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c
new file mode 100644
index 000000000000..00a4be9d3074
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int val = 0;
+
+SEC("fentry/test_1")
+int BPF_PROG(fentry_test_1, __u64 *st_ops_ctx)
+{
+	__u64 state;
+
+	/* Read the traced st_ops arg1 which is a pointer */
+	bpf_probe_read_kernel(&state, sizeof(__u64), (void *)st_ops_ctx);
+	/* Read state->val */
+	bpf_probe_read_kernel(&val, sizeof(__u32), (void *)state);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From e9c281928c24dfeb86b11c31b53757b6a127f8aa Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Fri, 4 Mar 2022 10:08:14 -0700
Subject: kbuild: Make $(LLVM) more flexible

The LLVM make variable allows a developer to quickly switch between the
GNU and LLVM tools. However, it does not handle versioned binaries, such
as the ones shipped by Debian, as LLVM=1 just defines the tool variables
with the unversioned binaries.

There was some discussion during the review of the patch that introduces
LLVM=1 around versioned binaries, ultimately coming to the conclusion
that developers can just add the folder that contains the unversioned
binaries to their PATH, as Debian's versioned suffixed binaries are
really just symlinks to the unversioned binaries in /usr/lib/llvm-#/bin:

$ realpath /usr/bin/clang-14
/usr/lib/llvm-14/bin/clang

$ PATH=/usr/lib/llvm-14/bin:$PATH make ... LLVM=1

However, that can be cumbersome to developers who are constantly testing
series with different toolchains and versions. It is simple enough to
support these versioned binaries directly in the Kbuild system by
allowing the developer to specify the version suffix with LLVM=, which
is shorter than the above suggestion:

$ make ... LLVM=-14

It does not change the meaning of LLVM=1 (which will continue to use
unversioned binaries) and it does not add too much additional complexity
to the existing $(LLVM) code, while allowing developers to quickly test
their series with different versions of the whole LLVM suite of tools.

Some developers may build LLVM from source but not add the binaries to
their PATH, as they may not want to use that toolchain systemwide.
Support those developers by allowing them to supply the directory that
the LLVM tools are available in, as it is no more complex to support
than the version suffix change above.

$ make ... LLVM=/path/to/llvm/

Update and reorder the documentation to reflect these new additions.
At the same time, notate that LLVM=0 is not the same as just omitting it
altogether, which has confused people in the past.

Link: https://lore.kernel.org/r/20200317215515.226917-1-ndesaulniers@google.com/
Link: https://lore.kernel.org/r/20220224151322.072632223@infradead.org/
Suggested-by: Masahiro Yamada <masahiroy@kernel.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 Documentation/kbuild/llvm.rst  | 31 +++++++++++++++++++++++++------
 Makefile                       | 26 ++++++++++++++++----------
 tools/scripts/Makefile.include | 22 ++++++++++++++--------
 tools/testing/selftests/lib.mk |  8 +++++++-
 4 files changed, 62 insertions(+), 25 deletions(-)

(limited to 'tools/testing')

diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst
index d32616891dcf..b854bb413164 100644
--- a/Documentation/kbuild/llvm.rst
+++ b/Documentation/kbuild/llvm.rst
@@ -49,17 +49,36 @@ example: ::
 LLVM Utilities
 --------------
 
-LLVM has substitutes for GNU binutils utilities. Kbuild supports ``LLVM=1``
-to enable them. ::
-
-	make LLVM=1
-
-They can be enabled individually. The full list of the parameters: ::
+LLVM has substitutes for GNU binutils utilities. They can be enabled individually.
+The full list of supported make variables::
 
 	make CC=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \
 	  OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump READELF=llvm-readelf \
 	  HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar HOSTLD=ld.lld
 
+To simplify the above command, Kbuild supports the ``LLVM`` variable::
+
+	make LLVM=1
+
+If your LLVM tools are not available in your PATH, you can supply their
+location using the LLVM variable with a trailing slash::
+
+	make LLVM=/path/to/llvm/
+
+which will use ``/path/to/llvm/clang``, ``/path/to/llvm/ld.lld``, etc.
+
+If your LLVM tools have a version suffix and you want to test with that
+explicit version rather than the unsuffixed executables like ``LLVM=1``, you
+can pass the suffix using the ``LLVM`` variable::
+
+	make LLVM=-14
+
+which will use ``clang-14``, ``ld.lld-14``, etc.
+
+``LLVM=0`` is not the same as omitting ``LLVM`` altogether, it will behave like
+``LLVM=1``. If you only wish to use certain LLVM utilities, use their respective
+make variables.
+
 The integrated assembler is enabled by default. You can pass ``LLVM_IAS=0`` to
 disable it.
 
diff --git a/Makefile b/Makefile
index a82095c69fdd..b3fb32fd3906 100644
--- a/Makefile
+++ b/Makefile
@@ -424,8 +424,14 @@ HOST_LFS_LDFLAGS := $(shell getconf LFS_LDFLAGS 2>/dev/null)
 HOST_LFS_LIBS := $(shell getconf LFS_LIBS 2>/dev/null)
 
 ifneq ($(LLVM),)
-HOSTCC	= clang
-HOSTCXX	= clang++
+ifneq ($(filter %/,$(LLVM)),)
+LLVM_PREFIX := $(LLVM)
+else ifneq ($(filter -%,$(LLVM)),)
+LLVM_SUFFIX := $(LLVM)
+endif
+
+HOSTCC	= $(LLVM_PREFIX)clang$(LLVM_SUFFIX)
+HOSTCXX	= $(LLVM_PREFIX)clang++$(LLVM_SUFFIX)
 else
 HOSTCC	= gcc
 HOSTCXX	= g++
@@ -444,14 +450,14 @@ KBUILD_HOSTLDLIBS   := $(HOST_LFS_LIBS) $(HOSTLDLIBS)
 # Make variables (CC, etc...)
 CPP		= $(CC) -E
 ifneq ($(LLVM),)
-CC		= clang
-LD		= ld.lld
-AR		= llvm-ar
-NM		= llvm-nm
-OBJCOPY		= llvm-objcopy
-OBJDUMP		= llvm-objdump
-READELF		= llvm-readelf
-STRIP		= llvm-strip
+CC		= $(LLVM_PREFIX)clang$(LLVM_SUFFIX)
+LD		= $(LLVM_PREFIX)ld.lld$(LLVM_SUFFIX)
+AR		= $(LLVM_PREFIX)llvm-ar$(LLVM_SUFFIX)
+NM		= $(LLVM_PREFIX)llvm-nm$(LLVM_SUFFIX)
+OBJCOPY		= $(LLVM_PREFIX)llvm-objcopy$(LLVM_SUFFIX)
+OBJDUMP		= $(LLVM_PREFIX)llvm-objdump$(LLVM_SUFFIX)
+READELF		= $(LLVM_PREFIX)llvm-readelf$(LLVM_SUFFIX)
+STRIP		= $(LLVM_PREFIX)llvm-strip$(LLVM_SUFFIX)
 else
 CC		= $(CROSS_COMPILE)gcc
 LD		= $(CROSS_COMPILE)ld
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 79d102304470..b507a6a733f5 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -52,11 +52,17 @@ define allow-override
 endef
 
 ifneq ($(LLVM),)
-$(call allow-override,CC,clang)
-$(call allow-override,AR,llvm-ar)
-$(call allow-override,LD,ld.lld)
-$(call allow-override,CXX,clang++)
-$(call allow-override,STRIP,llvm-strip)
+ifneq ($(filter %/,$(LLVM)),)
+LLVM_PREFIX := $(LLVM)
+else ifneq ($(filter -%,$(LLVM)),)
+LLVM_SUFFIX := $(LLVM)
+endif
+
+$(call allow-override,CC,$(LLVM_PREFIX)clang$(LLVM_SUFFIX))
+$(call allow-override,AR,$(LLVM_PREFIX)llvm-ar$(LLVM_SUFFIX))
+$(call allow-override,LD,$(LLVM_PREFIX)ld.lld$(LLVM_SUFFIX))
+$(call allow-override,CXX,$(LLVM_PREFIX)clang++$(LLVM_SUFFIX))
+$(call allow-override,STRIP,$(LLVM_PREFIX)llvm-strip$(LLVM_SUFFIX))
 else
 # Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix.
 $(call allow-override,CC,$(CROSS_COMPILE)gcc)
@@ -69,9 +75,9 @@ endif
 CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
 
 ifneq ($(LLVM),)
-HOSTAR  ?= llvm-ar
-HOSTCC  ?= clang
-HOSTLD  ?= ld.lld
+HOSTAR  ?= $(LLVM_PREFIX)llvm-ar$(LLVM_SUFFIX)
+HOSTCC  ?= $(LLVM_PREFIX)clang$(LLVM_SUFFIX)
+HOSTLD  ?= $(LLVM_PREFIX)ld.lld$(LLVM_SUFFIX)
 else
 HOSTAR  ?= ar
 HOSTCC  ?= gcc
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index a40add31a2e3..2a2d240cdc1b 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -1,7 +1,13 @@
 # This mimics the top-level Makefile. We do it explicitly here so that this
 # Makefile can operate with or without the kbuild infrastructure.
 ifneq ($(LLVM),)
-CC := clang
+ifneq ($(filter %/,$(LLVM)),)
+LLVM_PREFIX := $(LLVM)
+else ifneq ($(filter -%,$(LLVM)),)
+LLVM_SUFFIX := $(LLVM)
+endif
+
+CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX)
 else
 CC := $(CROSS_COMPILE)gcc
 endif
-- 
cgit 


From 392baa339c6a42a2cb088e5e5df2b59b8f89be24 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Fri, 1 Apr 2022 10:33:43 +0300
Subject: selftests: net: add delete nexthop route warning test

Add a test which causes a WARNING on kernels which treat a
nexthop route like a normal route when comparing for deletion and a
device is specified. That is, a route is found but we hit a warning while
matching it. The warning is from fib_info_nh() in include/net/nexthop.h
because we run it on a fib_info with nexthop object. The call chain is:
 inet_rtm_delroute -> fib_table_delete -> fib_nh_match (called with a
nexthop fib_info and also with fc_oif set thus calling fib_info_nh on
the fib_info and triggering the warning).

Repro steps:
 $ ip nexthop add id 12 via 172.16.1.3 dev veth1
 $ ip route add 172.16.101.1/32 nhid 12
 $ ip route delete 172.16.101.1/32 dev veth1

Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index d444ee6aa3cb..d8ede0c81ac1 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1208,6 +1208,20 @@ ipv4_fcnal()
 	set +e
 	check_nexthop "dev veth1" ""
 	log_test $? 0 "Nexthops removed on admin down"
+
+	# nexthop route delete warning: route add with nhid and delete
+	# using device
+	run_cmd "$IP li set dev veth1 up"
+	run_cmd "$IP nexthop add id 12 via 172.16.1.3 dev veth1"
+	out1=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l`
+	run_cmd "$IP route add 172.16.101.1/32 nhid 12"
+	run_cmd "$IP route delete 172.16.101.1/32 dev veth1"
+	out2=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l`
+	[ $out1 -eq $out2 ]
+	rc=$?
+	log_test $rc 0 "Delete nexthop route warning"
+	run_cmd "$IP ip route delete 172.16.101.1/32 nhid 12"
+	run_cmd "$IP ip nexthop del id 12"
 }
 
 ipv4_grp_fcnal()
-- 
cgit 


From fe4625d8b0536c0098845065a6bb3fdeaa6ad940 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Tue, 29 Mar 2022 18:49:14 +0300
Subject: selftests/bpf: Remove unused variable from bpf_sk_assign test

Was never used in bpf_sk_assign_test(), and was removed from handle_{tcp,udp}()
in commit 0b9ad56b1ea6 ("selftests/bpf: Use SOCKMAP for server sockets in
bpf_sk_assign test").

Fixes: 0b9ad56b1ea6 ("selftests/bpf: Use SOCKMAP for server sockets in bpf_sk_assign test")
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220329154914.3718658-1-eyal.birger@gmail.com
---
 tools/testing/selftests/bpf/progs/test_sk_assign.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
index 02f79356d5eb..98c6493d9b91 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -89,7 +89,6 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
 static inline int
 handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
 {
-	struct bpf_sock_tuple ln = {0};
 	struct bpf_sock *sk;
 	const int zero = 0;
 	size_t tuple_len;
@@ -121,7 +120,6 @@ assign:
 static inline int
 handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
 {
-	struct bpf_sock_tuple ln = {0};
 	struct bpf_sock *sk;
 	const int zero = 0;
 	size_t tuple_len;
@@ -161,7 +159,7 @@ assign:
 SEC("tc")
 int bpf_sk_assign_test(struct __sk_buff *skb)
 {
-	struct bpf_sock_tuple *tuple, ln = {0};
+	struct bpf_sock_tuple *tuple;
 	bool ipv4 = false;
 	bool tcp = false;
 	int tuple_len;
-- 
cgit 


From 891663ace74c23321a40f4bae13e45a1803d5e20 Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <ykaliuta@redhat.com>
Date: Tue, 29 Mar 2022 11:11:00 +0300
Subject: bpf, test_offload.py: Skip base maps without names

The test fails:

  # ./test_offload.py
  [...]
  Test bpftool bound info reporting (own ns)...
  FAIL: 3 BPF maps loaded, expected 2
    File "/root/bpf-next/tools/testing/selftests/bpf/./test_offload.py", line 1177, in <module>
      check_dev_info(False, "")
    File "/root/bpf-next/tools/testing/selftests/bpf/./test_offload.py", line 645, in check_dev_info
      maps = bpftool_map_list(expected=2, ns=ns)
    File "/root/bpf-next/tools/testing/selftests/bpf/./test_offload.py", line 190, in bpftool_map_list
      fail(True, "%d BPF maps loaded, expected %d" %
    File "/root/bpf-next/tools/testing/selftests/bpf/./test_offload.py", line 86, in fail
      tb = "".join(traceback.extract_stack().format())

Some base maps do not have names and they cannot be added due to compatibility
with older kernels, see [0]. So, just skip the unnamed maps.

  [0] https://lore.kernel.org/bpf/CAEf4BzY66WPKQbDe74AKZ6nFtZjq5e+G3Ji2egcVytB9R6_sGQ@mail.gmail.com/

Signed-off-by: Yauheni Kaliuta <ykaliuta@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20220329081100.9705-1-ykaliuta@redhat.com
---
 tools/testing/selftests/bpf/test_offload.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index edaffd43da83..6cd6ef9fc20b 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -184,7 +184,7 @@ def bpftool_prog_list(expected=None, ns=""):
 def bpftool_map_list(expected=None, ns=""):
     _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
     # Remove the base maps
-    maps = [m for m in maps if m not in base_maps and m.get('name') not in base_map_names]
+    maps = [m for m in maps if m not in base_maps and m.get('name') and m.get('name') not in base_map_names]
     if expected is not None:
         if len(maps) != expected:
             fail(True, "%d BPF maps loaded, expected %d" %
-- 
cgit 


From 6c2fa8b20d0cad3719b024ba0f25a50199a90c9a Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 16 Mar 2022 00:55:38 +0000
Subject: selftests: KVM: Test KVM_X86_QUIRK_FIX_HYPERCALL_INSN

Add a test that asserts KVM rewrites guest hypercall instructions to
match the running architecture (VMCALL on VMX, VMMCALL on SVM).
Additionally, test that with the quirk disabled, KVM no longer rewrites
guest instructions and instead injects a #UD.

Signed-off-by: Oliver Upton <oupton@google.com>
Message-Id: <20220316005538.2282772-3-oupton@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore             |   1 +
 tools/testing/selftests/kvm/Makefile               |   1 +
 .../selftests/kvm/x86_64/fix_hypercall_test.c      | 170 +++++++++++++++++++++
 3 files changed, 172 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 9b67343dc4ab..1f1b6c978bf7 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -15,6 +15,7 @@
 /x86_64/debug_regs
 /x86_64/evmcs_test
 /x86_64/emulator_error_test
+/x86_64/fix_hypercall_test
 /x86_64/get_msr_index_features
 /x86_64/kvm_clock_test
 /x86_64/kvm_pv_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 04099f453b59..03662666b30d 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -48,6 +48,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test
+TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
new file mode 100644
index 000000000000..1f5c32146f3d
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+#include <stdint.h>
+
+#include "apic.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 0
+
+static bool ud_expected;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+	GUEST_ASSERT(ud_expected);
+	GUEST_DONE();
+}
+
+extern unsigned char svm_hypercall_insn;
+static uint64_t svm_do_sched_yield(uint8_t apic_id)
+{
+	uint64_t ret;
+
+	asm volatile("mov %1, %%rax\n\t"
+		     "mov %2, %%rbx\n\t"
+		     "svm_hypercall_insn:\n\t"
+		     "vmmcall\n\t"
+		     "mov %%rax, %0\n\t"
+		     : "=r"(ret)
+		     : "r"((uint64_t)KVM_HC_SCHED_YIELD), "r"((uint64_t)apic_id)
+		     : "rax", "rbx", "memory");
+
+	return ret;
+}
+
+extern unsigned char vmx_hypercall_insn;
+static uint64_t vmx_do_sched_yield(uint8_t apic_id)
+{
+	uint64_t ret;
+
+	asm volatile("mov %1, %%rax\n\t"
+		     "mov %2, %%rbx\n\t"
+		     "vmx_hypercall_insn:\n\t"
+		     "vmcall\n\t"
+		     "mov %%rax, %0\n\t"
+		     : "=r"(ret)
+		     : "r"((uint64_t)KVM_HC_SCHED_YIELD), "r"((uint64_t)apic_id)
+		     : "rax", "rbx", "memory");
+
+	return ret;
+}
+
+static void assert_hypercall_insn(unsigned char *exp_insn, unsigned char *obs_insn)
+{
+	uint32_t exp = 0, obs = 0;
+
+	memcpy(&exp, exp_insn, sizeof(exp));
+	memcpy(&obs, obs_insn, sizeof(obs));
+
+	GUEST_ASSERT_EQ(exp, obs);
+}
+
+static void guest_main(void)
+{
+	unsigned char *native_hypercall_insn, *hypercall_insn;
+	uint8_t apic_id;
+
+	apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+
+	if (is_intel_cpu()) {
+		native_hypercall_insn = &vmx_hypercall_insn;
+		hypercall_insn = &svm_hypercall_insn;
+		svm_do_sched_yield(apic_id);
+	} else if (is_amd_cpu()) {
+		native_hypercall_insn = &svm_hypercall_insn;
+		hypercall_insn = &vmx_hypercall_insn;
+		vmx_do_sched_yield(apic_id);
+	} else {
+		GUEST_ASSERT(0);
+		/* unreachable */
+		return;
+	}
+
+	GUEST_ASSERT(!ud_expected);
+	assert_hypercall_insn(native_hypercall_insn, hypercall_insn);
+	GUEST_DONE();
+}
+
+static void setup_ud_vector(struct kvm_vm *vm)
+{
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+	vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+}
+
+static void enter_guest(struct kvm_vm *vm)
+{
+	struct kvm_run *run;
+	struct ucall uc;
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	vcpu_run(vm, VCPU_ID);
+	switch (get_ucall(vm, VCPU_ID, &uc)) {
+	case UCALL_SYNC:
+		pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]);
+		break;
+	case UCALL_DONE:
+		return;
+	case UCALL_ABORT:
+		TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__, uc.args[1]);
+	default:
+		TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)",
+			  uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason));
+	}
+}
+
+static void test_fix_hypercall(void)
+{
+	struct kvm_vm *vm;
+
+	vm = vm_create_default(VCPU_ID, 0, guest_main);
+	setup_ud_vector(vm);
+
+	ud_expected = false;
+	sync_global_to_guest(vm, ud_expected);
+
+	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+	enter_guest(vm);
+}
+
+static void test_fix_hypercall_disabled(void)
+{
+	struct kvm_enable_cap cap = {0};
+	struct kvm_vm *vm;
+
+	vm = vm_create_default(VCPU_ID, 0, guest_main);
+	setup_ud_vector(vm);
+
+	cap.cap = KVM_CAP_DISABLE_QUIRKS2;
+	cap.args[0] = KVM_X86_QUIRK_FIX_HYPERCALL_INSN;
+	vm_enable_cap(vm, &cap);
+
+	ud_expected = true;
+	sync_global_to_guest(vm, ud_expected);
+
+	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+	enter_guest(vm);
+}
+
+int main(void)
+{
+	if (!(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN)) {
+		print_skip("KVM_X86_QUIRK_HYPERCALL_INSN not supported");
+		exit(KSFT_SKIP);
+	}
+
+	test_fix_hypercall();
+	test_fix_hypercall_disabled();
+}
-- 
cgit 


From 1a65105a5aba9f7c6ea68cf687e569d055a94685 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Thu, 3 Mar 2022 15:41:26 +0000
Subject: KVM: x86/xen: handle PV spinlocks slowpath

Add support for SCHEDOP_poll hypercall.

This implementation is optimized for polling for a single channel, which
is what Linux does. Polling for multiple channels is not especially
efficient (and has not been tested).

PV spinlocks slow path uses this hypercall, and explicitly crash if it's
not supported.

[ dwmw2: Rework to use kvm_vcpu_halt(), not supported for 32-bit guests ]

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-17-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h                    |   3 +
 arch/x86/kvm/xen.c                                 | 158 ++++++++++++++++++++-
 .../testing/selftests/kvm/x86_64/xen_shinfo_test.c |   6 +
 3 files changed, 165 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 998caf7a3ce9..5370744b789c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -619,6 +619,8 @@ struct kvm_vcpu_xen {
 	u64 timer_expires; /* In guest epoch */
 	atomic_t timer_pending;
 	struct hrtimer timer;
+	int poll_evtchn;
+	struct timer_list poll_timer;
 };
 
 struct kvm_vcpu_arch {
@@ -1032,6 +1034,7 @@ struct kvm_xen {
 	u8 upcall_vector;
 	struct gfn_to_pfn_cache shinfo_cache;
 	struct idr evtchn_ports;
+	unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
 };
 
 enum kvm_irqchip_mode {
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 53232c9ff89c..7e7c8a5bff52 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -10,6 +10,7 @@
 #include "xen.h"
 #include "lapic.h"
 #include "hyperv.h"
+#include "lapic.h"
 
 #include <linux/eventfd.h>
 #include <linux/kvm_host.h>
@@ -954,9 +955,146 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
 	return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
 }
 
-static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, int cmd, u64 param, u64 *r)
+static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
+			       evtchn_port_t *ports)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
+	unsigned long *pending_bits;
+	unsigned long flags;
+	bool ret = true;
+	int idx, i;
+
+	read_lock_irqsave(&gpc->lock, flags);
+	idx = srcu_read_lock(&kvm->srcu);
+	if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
+		goto out_rcu;
+
+	ret = false;
+	if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
+		struct shared_info *shinfo = gpc->khva;
+		pending_bits = (unsigned long *)&shinfo->evtchn_pending;
+	} else {
+		struct compat_shared_info *shinfo = gpc->khva;
+		pending_bits = (unsigned long *)&shinfo->evtchn_pending;
+	}
+
+	for (i = 0; i < nr_ports; i++) {
+		if (test_bit(ports[i], pending_bits)) {
+			ret = true;
+			break;
+		}
+	}
+
+ out_rcu:
+	srcu_read_unlock(&kvm->srcu, idx);
+	read_unlock_irqrestore(&gpc->lock, flags);
+
+	return ret;
+}
+
+static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
+				 u64 param, u64 *r)
+{
+	int idx, i;
+	struct sched_poll sched_poll;
+	evtchn_port_t port, *ports;
+	gpa_t gpa;
+
+	if (!longmode || !lapic_in_kernel(vcpu) ||
+	    !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
+		return false;
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+	if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &sched_poll,
+					sizeof(sched_poll))) {
+		*r = -EFAULT;
+		return true;
+	}
+
+	if (unlikely(sched_poll.nr_ports > 1)) {
+		/* Xen (unofficially) limits number of pollers to 128 */
+		if (sched_poll.nr_ports > 128) {
+			*r = -EINVAL;
+			return true;
+		}
+
+		ports = kmalloc_array(sched_poll.nr_ports,
+				      sizeof(*ports), GFP_KERNEL);
+		if (!ports) {
+			*r = -ENOMEM;
+			return true;
+		}
+	} else
+		ports = &port;
+
+	for (i = 0; i < sched_poll.nr_ports; i++) {
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		gpa = kvm_mmu_gva_to_gpa_system(vcpu,
+						(gva_t)(sched_poll.ports + i),
+						NULL);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+		if (!gpa || kvm_vcpu_read_guest(vcpu, gpa,
+						&ports[i], sizeof(port))) {
+			*r = -EFAULT;
+			goto out;
+		}
+	}
+
+	if (sched_poll.nr_ports == 1)
+		vcpu->arch.xen.poll_evtchn = port;
+	else
+		vcpu->arch.xen.poll_evtchn = -1;
+
+	set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask);
+
+	if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports)) {
+		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+
+		if (sched_poll.timeout)
+			mod_timer(&vcpu->arch.xen.poll_timer,
+				  jiffies + nsecs_to_jiffies(sched_poll.timeout));
+
+		kvm_vcpu_halt(vcpu);
+
+		if (sched_poll.timeout)
+			del_timer(&vcpu->arch.xen.poll_timer);
+
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+		kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+	}
+
+	vcpu->arch.xen.poll_evtchn = 0;
+	*r = 0;
+out:
+	/* Really, this is only needed in case of timeout */
+	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask);
+
+	if (unlikely(sched_poll.nr_ports > 1))
+		kfree(ports);
+	return true;
+}
+
+static void cancel_evtchn_poll(struct timer_list *t)
+{
+	struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.xen.poll_timer);
+
+	kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
+	kvm_vcpu_kick(vcpu);
+}
+
+static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode,
+				   int cmd, u64 param, u64 *r)
 {
 	switch (cmd) {
+	case SCHEDOP_poll:
+		if (kvm_xen_schedop_poll(vcpu, longmode, param, r))
+			return true;
+		fallthrough;
 	case SCHEDOP_yield:
 		kvm_vcpu_on_spin(vcpu, true);
 		*r = 0;
@@ -1121,7 +1259,8 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
 			handled = kvm_xen_hcall_evtchn_send(vcpu, params[1], &r);
 		break;
 	case __HYPERVISOR_sched_op:
-		handled = kvm_xen_hcall_sched_op(vcpu, params[0], params[1], &r);
+		handled = kvm_xen_hcall_sched_op(vcpu, longmode, params[0],
+						 params[1], &r);
 		break;
 	case __HYPERVISOR_vcpu_op:
 		handled = kvm_xen_hcall_vcpu_op(vcpu, longmode, params[0], params[1],
@@ -1168,6 +1307,17 @@ static inline int max_evtchn_port(struct kvm *kvm)
 		return COMPAT_EVTCHN_2L_NR_CHANNELS;
 }
 
+static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port)
+{
+	int poll_evtchn = vcpu->arch.xen.poll_evtchn;
+
+	if ((poll_evtchn == port || poll_evtchn == -1) &&
+	    test_and_clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.xen.poll_mask)) {
+		kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
+}
+
 /*
  * The return value from this function is propagated to kvm_set_irq() API,
  * so it returns:
@@ -1235,6 +1385,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 		rc = 0; /* It was already raised */
 	} else if (test_bit(xe->port, mask_bits)) {
 		rc = -ENOTCONN; /* Masked */
+		kvm_xen_check_poller(vcpu, xe->port);
 	} else {
 		rc = 1; /* Delivered to the bitmap in shared_info. */
 		/* Now switch to the vCPU's vcpu_info to set the index and pending_sel */
@@ -1665,6 +1816,8 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r)
 void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx;
+	vcpu->arch.xen.poll_evtchn = 0;
+	timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
 }
 
 void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
@@ -1678,6 +1831,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
 				     &vcpu->arch.xen.vcpu_info_cache);
 	kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
 				     &vcpu->arch.xen.vcpu_time_info_cache);
+	del_timer_sync(&vcpu->arch.xen.poll_timer);
 }
 
 void kvm_xen_init_vm(struct kvm *kvm)
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 865e17146815..376c611443cd 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -233,6 +233,12 @@ int main(int argc, char *argv[])
 		.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
 		.msr = XEN_HYPERCALL_MSR,
 	};
+
+	/* Let the kernel know that we *will* use it for sending all
+	 * event channels, which lets it intercept SCHEDOP_poll */
+	if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)
+		hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+
 	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
 
 	struct kvm_xen_hvm_attr lm = {
-- 
cgit 


From 25eaeebe710c8c3aa588ee0830155fefb2548c28 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Thu, 3 Mar 2022 15:41:27 +0000
Subject: KVM: x86/xen: Add self tests for KVM_XEN_HVM_CONFIG_EVTCHN_SEND

Test a combination of event channel send, poll and timer operations.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20220303154127.202856-18-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/x86_64/xen_shinfo_test.c | 333 ++++++++++++++++++++-
 1 file changed, 320 insertions(+), 13 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 376c611443cd..63b0ca7685b0 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -39,12 +39,36 @@
 
 #define EVTCHN_VECTOR	0x10
 
+#define EVTCHN_TEST1 15
+#define EVTCHN_TEST2 66
+#define EVTCHN_TIMER 13
+
 static struct kvm_vm *vm;
 
 #define XEN_HYPERCALL_MSR	0x40000000
 
 #define MIN_STEAL_TIME		50000
 
+#define __HYPERVISOR_set_timer_op	15
+#define __HYPERVISOR_sched_op		29
+#define __HYPERVISOR_event_channel_op	32
+
+#define SCHEDOP_poll			3
+
+#define EVTCHNOP_send			4
+
+#define EVTCHNSTAT_interdomain		2
+
+struct evtchn_send {
+	u32 port;
+};
+
+struct sched_poll {
+	u32 *ports;
+	unsigned int nr_ports;
+	u64 timeout;
+};
+
 struct pvclock_vcpu_time_info {
 	u32   version;
 	u32   pad0;
@@ -107,15 +131,25 @@ struct {
 	struct kvm_irq_routing_entry entries[2];
 } irq_routes;
 
+bool guest_saw_irq;
+
 static void evtchn_handler(struct ex_regs *regs)
 {
 	struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
 	vi->evtchn_upcall_pending = 0;
 	vi->evtchn_pending_sel = 0;
+	guest_saw_irq = true;
 
 	GUEST_SYNC(0x20);
 }
 
+static void guest_wait_for_irq(void)
+{
+	while (!guest_saw_irq)
+		__asm__ __volatile__ ("rep nop" : : : "memory");
+	guest_saw_irq = false;
+}
+
 static void guest_code(void)
 {
 	struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
@@ -128,6 +162,8 @@ static void guest_code(void)
 	/* Trigger an interrupt injection */
 	GUEST_SYNC(0);
 
+	guest_wait_for_irq();
+
 	/* Test having the host set runstates manually */
 	GUEST_SYNC(RUNSTATE_runnable);
 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
@@ -168,14 +204,127 @@ static void guest_code(void)
 	/* Now deliver an *unmasked* interrupt */
 	GUEST_SYNC(8);
 
-	while (!si->evtchn_pending[1])
-		__asm__ __volatile__ ("rep nop" : : : "memory");
+	guest_wait_for_irq();
 
 	/* Change memslots and deliver an interrupt */
 	GUEST_SYNC(9);
 
-	for (;;)
-		__asm__ __volatile__ ("rep nop" : : : "memory");
+	guest_wait_for_irq();
+
+	/* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
+	GUEST_SYNC(10);
+
+	guest_wait_for_irq();
+
+	GUEST_SYNC(11);
+
+	/* Our turn. Deliver event channel (to ourselves) with
+	 * EVTCHNOP_send hypercall. */
+	unsigned long rax;
+	struct evtchn_send s = { .port = 127 };
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_event_channel_op),
+			      "D" (EVTCHNOP_send),
+			      "S" (&s));
+
+	GUEST_ASSERT(rax == 0);
+
+	guest_wait_for_irq();
+
+	GUEST_SYNC(12);
+
+	/* Deliver "outbound" event channel to an eventfd which
+	 * happens to be one of our own irqfds. */
+	s.port = 197;
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_event_channel_op),
+			      "D" (EVTCHNOP_send),
+			      "S" (&s));
+
+	GUEST_ASSERT(rax == 0);
+
+	guest_wait_for_irq();
+
+	GUEST_SYNC(13);
+
+	/* Set a timer 100ms in the future. */
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_set_timer_op),
+			      "D" (rs->state_entry_time + 100000000));
+	GUEST_ASSERT(rax == 0);
+
+	GUEST_SYNC(14);
+
+	/* Now wait for the timer */
+	guest_wait_for_irq();
+
+	GUEST_SYNC(15);
+
+	/* The host has 'restored' the timer. Just wait for it. */
+	guest_wait_for_irq();
+
+	GUEST_SYNC(16);
+
+	/* Poll for an event channel port which is already set */
+	u32 ports[1] = { EVTCHN_TIMER };
+	struct sched_poll p = {
+		.ports = ports,
+		.nr_ports = 1,
+		.timeout = 0,
+	};
+
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_sched_op),
+			      "D" (SCHEDOP_poll),
+			      "S" (&p));
+
+	GUEST_ASSERT(rax == 0);
+
+	GUEST_SYNC(17);
+
+	/* Poll for an unset port and wait for the timeout. */
+	p.timeout = 100000000;
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_sched_op),
+			      "D" (SCHEDOP_poll),
+			      "S" (&p));
+
+	GUEST_ASSERT(rax == 0);
+
+	GUEST_SYNC(18);
+
+	/* A timer will wake the masked port we're waiting on, while we poll */
+	p.timeout = 0;
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_sched_op),
+			      "D" (SCHEDOP_poll),
+			      "S" (&p));
+
+	GUEST_ASSERT(rax == 0);
+
+	GUEST_SYNC(19);
+
+	/* A timer wake an *unmasked* port which should wake us with an
+	 * actual interrupt, while we're polling on a different port. */
+	ports[0]++;
+	p.timeout = 0;
+	__asm__ __volatile__ ("vmcall" :
+			      "=a" (rax) :
+			      "a" (__HYPERVISOR_sched_op),
+			      "D" (SCHEDOP_poll),
+			      "S" (&p));
+
+	GUEST_ASSERT(rax == 0);
+
+	guest_wait_for_irq();
+
+	GUEST_SYNC(20);
 }
 
 static int cmp_timespec(struct timespec *a, struct timespec *b)
@@ -191,9 +340,13 @@ static int cmp_timespec(struct timespec *a, struct timespec *b)
 	else
 		return 0;
 }
+struct vcpu_info *vinfo;
 
 static void handle_alrm(int sig)
 {
+	if (vinfo)
+		printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
+	vcpu_dump(stdout, vm, VCPU_ID, 0);
 	TEST_FAIL("IRQ delivery timed out");
 }
 
@@ -213,6 +366,7 @@ int main(int argc, char *argv[])
 
 	bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
 	bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
+	bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
 
 	clock_gettime(CLOCK_REALTIME, &min_ts);
 
@@ -236,7 +390,7 @@ int main(int argc, char *argv[])
 
 	/* Let the kernel know that we *will* use it for sending all
 	 * event channels, which lets it intercept SCHEDOP_poll */
-	if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)
+	if (do_evtchn_tests)
 		hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
 
 	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
@@ -301,7 +455,7 @@ int main(int argc, char *argv[])
 
 		/* Unexpected, but not a KVM failure */
 		if (irq_fd[0] == -1 || irq_fd[1] == -1)
-			do_eventfd_tests = false;
+			do_evtchn_tests = do_eventfd_tests = false;
 	}
 
 	if (do_eventfd_tests) {
@@ -309,13 +463,13 @@ int main(int argc, char *argv[])
 
 		irq_routes.entries[0].gsi = 32;
 		irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
-		irq_routes.entries[0].u.xen_evtchn.port = 15;
+		irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
 		irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID;
 		irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
 
 		irq_routes.entries[1].gsi = 33;
 		irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
-		irq_routes.entries[1].u.xen_evtchn.port = 66;
+		irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
 		irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID;
 		irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
 
@@ -336,7 +490,39 @@ int main(int argc, char *argv[])
 		sigaction(SIGALRM, &sa, NULL);
 	}
 
-	struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
+	struct kvm_xen_vcpu_attr tmr = {
+		.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
+		.u.timer.port = EVTCHN_TIMER,
+		.u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+		.u.timer.expires_ns = 0
+	};
+
+	if (do_evtchn_tests) {
+		struct kvm_xen_hvm_attr inj = {
+			.type = KVM_XEN_ATTR_TYPE_EVTCHN,
+			.u.evtchn.send_port = 127,
+			.u.evtchn.type = EVTCHNSTAT_interdomain,
+			.u.evtchn.flags = 0,
+			.u.evtchn.deliver.port.port = EVTCHN_TEST1,
+			.u.evtchn.deliver.port.vcpu = VCPU_ID + 1,
+			.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+		};
+		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+		/* Test migration to a different vCPU */
+		inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
+		inj.u.evtchn.deliver.port.vcpu = VCPU_ID;
+		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+		inj.u.evtchn.send_port = 197;
+		inj.u.evtchn.deliver.eventfd.port = 0;
+		inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
+		inj.u.evtchn.flags = 0;
+		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+		vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
+	}
+	vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
 	vinfo->evtchn_upcall_pending = 0;
 
 	struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
@@ -429,7 +615,7 @@ int main(int argc, char *argv[])
 					goto done;
 				if (verbose)
 					printf("Testing masked event channel\n");
-				shinfo->evtchn_mask[0] = 0x8000;
+				shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
 				eventfd_write(irq_fd[0], 1UL);
 				alarm(1);
 				break;
@@ -446,6 +632,9 @@ int main(int argc, char *argv[])
 				break;
 
 			case 9:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				shinfo->evtchn_pending[1] = 0;
 				if (verbose)
 					printf("Testing event channel after memslot change\n");
 				vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -455,12 +644,129 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
+			case 10:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				if (!do_evtchn_tests)
+					goto done;
+
+				shinfo->evtchn_pending[0] = 0;
+				if (verbose)
+					printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
+
+				struct kvm_irq_routing_xen_evtchn e;
+				e.port = EVTCHN_TEST2;
+				e.vcpu = VCPU_ID;
+				e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+				vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
+			case 11:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				shinfo->evtchn_pending[1] = 0;
+
+				if (verbose)
+					printf("Testing guest EVTCHNOP_send direct to evtchn\n");
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
+			case 12:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				shinfo->evtchn_pending[0] = 0;
+
+				if (verbose)
+					printf("Testing guest EVTCHNOP_send to eventfd\n");
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
+			case 13:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				shinfo->evtchn_pending[1] = 0;
+
+				if (verbose)
+					printf("Testing guest oneshot timer\n");
+				break;
+
+			case 14:
+				memset(&tmr, 0, sizeof(tmr));
+				tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
+				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr);
+				TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
+					    "Timer port not returned");
+				TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+					    "Timer priority not returned");
+				TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
+					    "Timer expiry not returned");
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
+			case 15:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				shinfo->evtchn_pending[0] = 0;
+
+				if (verbose)
+					printf("Testing restored oneshot timer\n");
+
+				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000,
+				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
+			case 16:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+
+				if (verbose)
+					printf("Testing SCHEDOP_poll with already pending event\n");
+				shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
+				alarm(1);
+				break;
+
+			case 17:
+				if (verbose)
+					printf("Testing SCHEDOP_poll timeout\n");
+				shinfo->evtchn_pending[0] = 0;
+				alarm(1);
+				break;
+
+			case 18:
+				if (verbose)
+					printf("Testing SCHEDOP_poll wake on masked event\n");
+
+				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000,
+				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
+				break;
+
+			case 19:
+				shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
+				if (verbose)
+					printf("Testing SCHEDOP_poll wake on unmasked event\n");
+
+				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000,
+				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
+				evtchn_irq_expected = true;
+				break;
+
+			case 20:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				shinfo->evtchn_pending[1] = 0;
+				goto done;
+
 			case 0x20:
 				TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
 				evtchn_irq_expected = false;
-				if (shinfo->evtchn_pending[1] &&
-				    shinfo->evtchn_pending[0])
-					goto done;
 				break;
 			}
 			break;
@@ -473,6 +779,7 @@ int main(int argc, char *argv[])
 	}
 
  done:
+	alarm(0);
 	clock_gettime(CLOCK_REALTIME, &max_ts);
 
 	/*
-- 
cgit 


From a29833e36b43b326e6371c181474119069d6073a Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Wed, 9 Mar 2022 14:38:35 +0000
Subject: KVM: x86/xen: Update self test for Xen PV timers

Add test cases for timers in the past, and reading the status of a timer
which has already fired.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20220309143835.253911-3-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/x86_64/xen_shinfo_test.c | 35 ++++++++++++++++++++--
 1 file changed, 32 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 63b0ca7685b0..d9d9d1deec45 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -325,6 +325,11 @@ static void guest_code(void)
 	guest_wait_for_irq();
 
 	GUEST_SYNC(20);
+
+	/* Timer should have fired already */
+	guest_wait_for_irq();
+
+	GUEST_SYNC(21);
 }
 
 static int cmp_timespec(struct timespec *a, struct timespec *b)
@@ -746,6 +751,7 @@ int main(int argc, char *argv[])
 
 				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000,
 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
+				alarm(1);
 				break;
 
 			case 19:
@@ -753,15 +759,38 @@ int main(int argc, char *argv[])
 				if (verbose)
 					printf("Testing SCHEDOP_poll wake on unmasked event\n");
 
-				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000,
-				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
 				evtchn_irq_expected = true;
+				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
+
+				/* Read it back and check the pending time is reported correctly */
+				tmr.u.timer.expires_ns = 0;
+				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr);
+				TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
+					    "Timer not reported pending");
+				alarm(1);
 				break;
 
 			case 20:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
-				shinfo->evtchn_pending[1] = 0;
+				/* Read timer and check it is no longer pending */
+				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr);
+				TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
+
+				shinfo->evtchn_pending[0] = 0;
+				if (verbose)
+					printf("Testing timer in the past\n");
+
+				evtchn_irq_expected = true;
+				tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
+				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
+				alarm(1);
+				break;
+
+			case 21:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
 				goto done;
 
 			case 0x20:
-- 
cgit 


From e467b0de82b260a4ee2c3ea53ef76ac40259498f Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Fri, 25 Feb 2022 14:53:04 +0000
Subject: KVM: x86: Test case for TSC scaling and offset sync

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20220225145304.36166-4-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile               |   1 +
 .../selftests/kvm/x86_64/tsc_scaling_sync.c        | 119 +++++++++++++++++++++
 2 files changed, 120 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 03662666b30d..c9cdbd248727 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -66,6 +66,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
+TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
 TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
 TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
new file mode 100644
index 000000000000..f0083d8cfe98
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_vmcall_test
+ *
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ *
+ * Xen shared_info / pvclock testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#define NR_TEST_VCPUS 20
+
+static struct kvm_vm *vm;
+pthread_spinlock_t create_lock;
+
+#define TEST_TSC_KHZ    2345678UL
+#define TEST_TSC_OFFSET 200000000
+
+uint64_t tsc_sync;
+static void guest_code(void)
+{
+	uint64_t start_tsc, local_tsc, tmp;
+
+	start_tsc = rdtsc();
+	do {
+		tmp = READ_ONCE(tsc_sync);
+		local_tsc = rdtsc();
+		WRITE_ONCE(tsc_sync, local_tsc);
+		if (unlikely(local_tsc < tmp))
+			GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0);
+
+	} while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ);
+
+	GUEST_DONE();
+}
+
+
+static void *run_vcpu(void *_cpu_nr)
+{
+	unsigned long cpu = (unsigned long)_cpu_nr;
+	unsigned long failures = 0;
+	static bool first_cpu_done;
+
+	/* The kernel is fine, but vm_vcpu_add_default() needs locking */
+	pthread_spin_lock(&create_lock);
+
+	vm_vcpu_add_default(vm, cpu, guest_code);
+
+	if (!first_cpu_done) {
+		first_cpu_done = true;
+		vcpu_set_msr(vm, cpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
+	}
+
+	pthread_spin_unlock(&create_lock);
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu_state(vm, cpu);
+                struct ucall uc;
+
+                vcpu_run(vm, cpu);
+                TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                            "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+                            run->exit_reason,
+                            exit_reason_str(run->exit_reason));
+
+                switch (get_ucall(vm, cpu, &uc)) {
+                case UCALL_DONE:
+			goto out;
+
+                case UCALL_SYNC:
+			printf("Guest %ld sync %lx %lx %ld\n", cpu, uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
+			failures++;
+			break;
+
+                default:
+                        TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+ out:
+	return (void *)failures;
+}
+
+int main(int argc, char *argv[])
+{
+        if (!kvm_check_cap(KVM_CAP_VM_TSC_CONTROL)) {
+		print_skip("KVM_CAP_VM_TSC_CONTROL not available");
+		exit(KSFT_SKIP);
+	}
+
+	vm = vm_create_default_with_vcpus(0, DEFAULT_STACK_PGS * NR_TEST_VCPUS, 0, guest_code, NULL);
+	vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
+
+	pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
+	pthread_t cpu_threads[NR_TEST_VCPUS];
+	unsigned long cpu;
+	for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++)
+		pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu);
+
+	unsigned long failures = 0;
+	for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) {
+		void *this_cpu_failures;
+		pthread_join(cpu_threads[cpu], &this_cpu_failures);
+		failures += (unsigned long)this_cpu_failures;
+	}
+
+	TEST_ASSERT(!failures, "TSC sync failed");
+	pthread_spin_destroy(&create_lock);
+	kvm_vm_free(vm);
+	return 0;
+}
-- 
cgit 


From 692930cc435099580a4b9e32fa781b0688c18439 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Fri, 1 Apr 2022 18:54:27 +0300
Subject: selftests: net: fix nexthop warning cleanup double ip typo

I made a stupid typo when adding the nexthop route warning selftest and
added both $IP and ip after it (double ip) on the cleanup path. The
error doesn't show up when running the test, but obviously it doesn't
cleanup properly after it.

Fixes: 392baa339c6a ("selftests: net: add delete nexthop route warning test")
Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index d8ede0c81ac1..b3bf5319bb0e 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1220,8 +1220,8 @@ ipv4_fcnal()
 	[ $out1 -eq $out2 ]
 	rc=$?
 	log_test $rc 0 "Delete nexthop route warning"
-	run_cmd "$IP ip route delete 172.16.101.1/32 nhid 12"
-	run_cmd "$IP ip nexthop del id 12"
+	run_cmd "$IP route delete 172.16.101.1/32 nhid 12"
+	run_cmd "$IP nexthop del id 12"
 }
 
 ipv4_grp_fcnal()
-- 
cgit 


From e299bcd4d16ff86f46c48df1062c8aae0eca1ed8 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Thu, 31 Mar 2022 17:09:49 +0300
Subject: selftests/bpf: Fix vfs_link kprobe definition

Since commit 6521f8917082 ("namei: prepare for idmapped mounts")
vfs_link's prototype was changed, the kprobe definition in
profiler selftest in turn wasn't updated. The result is that all
argument after the first are now stored in different registers. This
means that self-test has been broken ever since. Fix it by updating the
kprobe definition accordingly.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220331140949.1410056-1-nborisov@suse.com
---
 tools/testing/selftests/bpf/progs/profiler.inc.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 4896fdf816f7..92331053dba3 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -826,8 +826,9 @@ out:
 
 SEC("kprobe/vfs_link")
 int BPF_KPROBE(kprobe__vfs_link,
-	       struct dentry* old_dentry, struct inode* dir,
-	       struct dentry* new_dentry, struct inode** delegated_inode)
+	       struct dentry* old_dentry, struct user_namespace *mnt_userns,
+	       struct inode* dir, struct dentry* new_dentry,
+	       struct inode** delegated_inode)
 {
 	struct bpf_func_stats_ctx stats_ctx;
 	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
-- 
cgit 


From f6d60facd9b65614594f1feaa4eee18ac60a9a18 Mon Sep 17 00:00:00 2001
From: Haowen Bai <baihaowen@meizu.com>
Date: Fri, 1 Apr 2022 10:15:54 +0800
Subject: selftests/bpf: Return true/false (not 1/0) from bool functions

Return boolean values ("true" or "false") instead of 1 or 0 from bool
functions.  This fixes the following warnings from coccicheck:

./tools/testing/selftests/bpf/progs/test_xdp_noinline.c:567:9-10: WARNING:
return of 0/1 in function 'get_packet_dst' with return type bool
./tools/testing/selftests/bpf/progs/test_l4lb_noinline.c:221:9-10: WARNING:
return of 0/1 in function 'get_packet_dst' with return type bool

Signed-off-by: Haowen Bai <baihaowen@meizu.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/1648779354-14700-1-git-send-email-baihaowen@meizu.com
---
 tools/testing/selftests/bpf/progs/test_l4lb_noinline.c |  2 +-
 tools/testing/selftests/bpf/progs/test_xdp_noinline.c  | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index 19e4d2071c60..c8bc0c6947aa 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -218,7 +218,7 @@ static __noinline bool get_packet_dst(struct real_definition **real,
 
 	if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
 	    hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
-		return 0;
+		return false;
 
 	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
 	if (!real_pos)
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 596c4e71bf3a..125d872d7981 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -564,22 +564,22 @@ static bool get_packet_dst(struct real_definition **real,
 	hash = get_packet_hash(pckt, hash_16bytes);
 	if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
 	    hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
-		return 0;
+		return false;
 	key = 2 * vip_info->vip_num + hash % 2;
 	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
 	if (!real_pos)
-		return 0;
+		return false;
 	key = *real_pos;
 	*real = bpf_map_lookup_elem(&reals, &key);
 	if (!(*real))
-		return 0;
+		return false;
 	if (!(vip_info->flags & (1 << 1))) {
 		__u32 conn_rate_key = 512 + 2;
 		struct lb_stats *conn_rate_stats =
 		    bpf_map_lookup_elem(&stats, &conn_rate_key);
 
 		if (!conn_rate_stats)
-			return 1;
+			return true;
 		cur_time = bpf_ktime_get_ns();
 		if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
 			conn_rate_stats->v1 = 1;
@@ -587,14 +587,14 @@ static bool get_packet_dst(struct real_definition **real,
 		} else {
 			conn_rate_stats->v1 += 1;
 			if (conn_rate_stats->v1 >= 1)
-				return 1;
+				return true;
 		}
 		if (pckt->flow.proto == IPPROTO_UDP)
 			new_dst_lru.atime = cur_time;
 		new_dst_lru.pos = key;
 		bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
 	}
-	return 1;
+	return true;
 }
 
 __attribute__ ((noinline))
-- 
cgit 


From 9bbad6dab8279905c4593be69b06704b77b31403 Mon Sep 17 00:00:00 2001
From: Yuntao Wang <ytcoode@gmail.com>
Date: Sun, 3 Apr 2022 21:52:45 +0800
Subject: selftests/bpf: Fix cd_flavor_subdir() of test_progs

Currently, when we run test_progs with just executable file name, for
example 'PATH=. test_progs-no_alu32', cd_flavor_subdir() will not check
if test_progs is running as a flavored test runner and switch into
corresponding sub-directory.

This will cause test_progs-no_alu32 executed by the
'PATH=. test_progs-no_alu32' command to run in the wrong directory and
load the wrong BPF objects.

Signed-off-by: Yuntao Wang <ytcoode@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220403135245.1713283-1-ytcoode@gmail.com
---
 tools/testing/selftests/bpf/test_progs.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 2ecb73a65206..0a4b45d7b515 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -761,8 +761,10 @@ int cd_flavor_subdir(const char *exec_name)
 	const char *flavor = strrchr(exec_name, '/');
 
 	if (!flavor)
-		return 0;
-	flavor++;
+		flavor = exec_name;
+	else
+		flavor++;
+
 	flavor = strrchr(flavor, '-');
 	if (!flavor)
 		return 0;
-- 
cgit 


From 39f8dc43b7a05ab0e352655a14a9d613c2308b92 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Wed, 30 Mar 2022 16:26:38 +0100
Subject: libbpf: Add auto-attach for uprobes based on section name

Now that u[ret]probes can use name-based specification, it makes
sense to add support for auto-attach based on SEC() definition.
The format proposed is

        SEC("u[ret]probe/binary:[raw_offset|[function_name[+offset]]")

For example, to trace malloc() in libc:

        SEC("uprobe/libc.so.6:malloc")

...or to trace function foo2 in /usr/bin/foo:

        SEC("uprobe//usr/bin/foo:foo2")

Auto-attach is done for all tasks (pid -1).  prog can be an absolute
path or simply a program/library name; in the latter case, we use
PATH/LD_LIBRARY_PATH to resolve the full path, falling back to
standard locations (/usr/bin:/usr/sbin or /usr/lib64:/usr/lib) if
the file is not found via environment-variable specified locations.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1648654000-21758-4-git-send-email-alan.maguire@oracle.com
---
 tools/lib/bpf/libbpf.c                             | 74 +++++++++++++++++++++-
 .../testing/selftests/bpf/progs/test_bpf_cookie.c  |  4 +-
 .../selftests/bpf/progs/test_task_pt_regs.c        |  2 +-
 tools/testing/selftests/bpf/progs/trigger_bench.c  |  2 +-
 4 files changed, 76 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index f6e5a0217841..6d2be53e4ba9 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8630,6 +8630,7 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log
 }
 
 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
@@ -8642,9 +8643,9 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("sk_reuseport/migrate",	SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 	SEC_DEF("sk_reuseport",		SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 	SEC_DEF("kprobe/",		KPROBE,	0, SEC_NONE, attach_kprobe),
-	SEC_DEF("uprobe/",		KPROBE,	0, SEC_NONE),
+	SEC_DEF("uprobe+",		KPROBE,	0, SEC_NONE, attach_uprobe),
 	SEC_DEF("kretprobe/",		KPROBE, 0, SEC_NONE, attach_kprobe),
-	SEC_DEF("uretprobe/",		KPROBE, 0, SEC_NONE),
+	SEC_DEF("uretprobe+",		KPROBE, 0, SEC_NONE, attach_uprobe),
 	SEC_DEF("kprobe.multi/",	KPROBE,	BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
 	SEC_DEF("kretprobe.multi/",	KPROBE,	BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
 	SEC_DEF("tc",			SCHED_CLS, 0, SEC_NONE),
@@ -10845,6 +10846,75 @@ err_out:
 
 }
 
+/* Format of u[ret]probe section definition supporting auto-attach:
+ * u[ret]probe/binary:function[+offset]
+ *
+ * binary can be an absolute/relative path or a filename; the latter is resolved to a
+ * full binary path via bpf_program__attach_uprobe_opts.
+ *
+ * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
+ * specified (and auto-attach is not possible) or the above format is specified for
+ * auto-attach.
+ */
+static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+{
+	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
+	char *func, *probe_name, *func_end;
+	char *func_name, binary_path[512];
+	unsigned long long raw_offset;
+	size_t offset = 0;
+	int n;
+
+	*link = NULL;
+
+	opts.retprobe = str_has_pfx(prog->sec_name, "uretprobe");
+	if (opts.retprobe)
+		probe_name = prog->sec_name + sizeof("uretprobe") - 1;
+	else
+		probe_name = prog->sec_name + sizeof("uprobe") - 1;
+	if (probe_name[0] == '/')
+		probe_name++;
+
+	/* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
+	if (strlen(probe_name) == 0)
+		return 0;
+
+	snprintf(binary_path, sizeof(binary_path), "%s", probe_name);
+	/* ':' should be prior to function+offset */
+	func_name = strrchr(binary_path, ':');
+	if (!func_name) {
+		pr_warn("section '%s' missing ':function[+offset]' specification\n",
+			prog->sec_name);
+		return -EINVAL;
+	}
+	func_name[0] = '\0';
+	func_name++;
+	n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
+	if (n < 1) {
+		pr_warn("uprobe name '%s' is invalid\n", func_name);
+		return -EINVAL;
+	}
+	if (opts.retprobe && offset != 0) {
+		free(func);
+		pr_warn("uretprobes do not support offset specification\n");
+		return -EINVAL;
+	}
+
+	/* Is func a raw address? */
+	errno = 0;
+	raw_offset = strtoull(func, &func_end, 0);
+	if (!errno && !*func_end) {
+		free(func);
+		func = NULL;
+		offset = (size_t)raw_offset;
+	}
+	opts.func_name = func;
+
+	*link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
+	free(func);
+	return libbpf_get_error(*link);
+}
+
 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
 					    bool retprobe, pid_t pid,
 					    const char *binary_path,
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
index 2d3a7710e2ce..0e2222968918 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -37,14 +37,14 @@ int handle_kretprobe(struct pt_regs *ctx)
 	return 0;
 }
 
-SEC("uprobe/trigger_func")
+SEC("uprobe")
 int handle_uprobe(struct pt_regs *ctx)
 {
 	update(ctx, &uprobe_res);
 	return 0;
 }
 
-SEC("uretprobe/trigger_func")
+SEC("uretprobe")
 int handle_uretprobe(struct pt_regs *ctx)
 {
 	update(ctx, &uretprobe_res);
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
index e6cb09259408..1926facba122 100644
--- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
+++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
@@ -14,7 +14,7 @@ char current_regs[PT_REGS_SIZE] = {};
 char ctx_regs[PT_REGS_SIZE] = {};
 int uprobe_res = 0;
 
-SEC("uprobe/trigger_func")
+SEC("uprobe")
 int handle_uprobe(struct pt_regs *ctx)
 {
 	struct task_struct *current;
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 2ab049b54d6c..694e7cec1823 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -54,7 +54,7 @@ int bench_trigger_fmodret(void *ctx)
 	return -22;
 }
 
-SEC("uprobe/self/uprobe_target")
+SEC("uprobe")
 int bench_trigger_uprobe(void *ctx)
 {
 	__sync_add_and_fetch(&hits, 1);
-- 
cgit 


From ba7499bc9d52f7ea93301a48c05caa370c68b213 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Wed, 30 Mar 2022 16:26:39 +0100
Subject: selftests/bpf: Add tests for u[ret]probe attach by name

add tests that verify attaching by name for

1. local functions in a program
2. library functions in a shared object

...succeed for uprobe and uretprobes using new "func_name"
option for bpf_program__attach_uprobe_opts().  Also verify
auto-attach works where uprobe, path to binary and function
name are specified, but fails with -EOPNOTSUPP with a SEC
name that does not specify binary path/function.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1648654000-21758-5-git-send-email-alan.maguire@oracle.com
---
 .../selftests/bpf/prog_tests/attach_probe.c        | 85 ++++++++++++++++++----
 .../selftests/bpf/progs/test_attach_probe.c        | 41 ++++++++++-
 2 files changed, 109 insertions(+), 17 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index d48f6e533e1e..c0c6d410751d 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -11,15 +11,22 @@ static void trigger_func(void)
 	asm volatile ("");
 }
 
+/* attach point for byname uprobe */
+static void trigger_func2(void)
+{
+	asm volatile ("");
+}
+
 void test_attach_probe(void)
 {
 	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
-	int duration = 0;
 	struct bpf_link *kprobe_link, *kretprobe_link;
 	struct bpf_link *uprobe_link, *uretprobe_link;
 	struct test_attach_probe* skel;
 	ssize_t uprobe_offset, ref_ctr_offset;
+	struct bpf_link *uprobe_err_link;
 	bool legacy;
+	char *mem;
 
 	/* Check if new-style kprobe/uprobe API is supported.
 	 * Kernels that support new FD-based kprobe and uprobe BPF attachment
@@ -43,9 +50,9 @@ void test_attach_probe(void)
 		return;
 
 	skel = test_attach_probe__open_and_load();
-	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		return;
-	if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n"))
+	if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
 		goto cleanup;
 
 	kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
@@ -90,25 +97,73 @@ void test_attach_probe(void)
 		goto cleanup;
 	skel->links.handle_uretprobe = uretprobe_link;
 
-	/* trigger & validate kprobe && kretprobe */
-	usleep(1);
+	/* verify auto-attach fails for old-style uprobe definition */
+	uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname);
+	if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP,
+		       "auto-attach should fail for old-style name"))
+		goto cleanup;
+
+	uprobe_opts.func_name = "trigger_func2";
+	uprobe_opts.retprobe = false;
+	uprobe_opts.ref_ctr_offset = 0;
+	skel->links.handle_uprobe_byname =
+			bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname,
+							0 /* this pid */,
+							"/proc/self/exe",
+							0, &uprobe_opts);
+	if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname"))
+		goto cleanup;
+
+	/* verify auto-attach works */
+	skel->links.handle_uretprobe_byname =
+			bpf_program__attach(skel->progs.handle_uretprobe_byname);
+	if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname"))
+		goto cleanup;
 
-	if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res",
-		  "wrong kprobe res: %d\n", skel->bss->kprobe_res))
+	/* test attach by name for a library function, using the library
+	 * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo().
+	 */
+	uprobe_opts.func_name = "malloc";
+	uprobe_opts.retprobe = false;
+	skel->links.handle_uprobe_byname2 =
+			bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname2,
+							0 /* this pid */,
+							"libc.so.6",
+							0, &uprobe_opts);
+	if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2"))
 		goto cleanup;
-	if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res",
-		  "wrong kretprobe res: %d\n", skel->bss->kretprobe_res))
+
+	uprobe_opts.func_name = "free";
+	uprobe_opts.retprobe = true;
+	skel->links.handle_uretprobe_byname2 =
+			bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_byname2,
+							-1 /* any pid */,
+							"libc.so.6",
+							0, &uprobe_opts);
+	if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2"))
 		goto cleanup;
 
+	/* trigger & validate kprobe && kretprobe */
+	usleep(1);
+
+	/* trigger & validate shared library u[ret]probes attached by name */
+	mem = malloc(1);
+	free(mem);
+
 	/* trigger & validate uprobe & uretprobe */
 	trigger_func();
 
-	if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
-		  "wrong uprobe res: %d\n", skel->bss->uprobe_res))
-		goto cleanup;
-	if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res",
-		  "wrong uretprobe res: %d\n", skel->bss->uretprobe_res))
-		goto cleanup;
+	/* trigger & validate uprobe attached by name */
+	trigger_func2();
+
+	ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
+	ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
+	ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res");
+	ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res");
+	ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res");
+	ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res");
+	ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res");
+	ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res");
 
 cleanup:
 	test_attach_probe__destroy(skel);
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index 8056a4c6d918..af994d16bb10 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -10,6 +10,10 @@ int kprobe_res = 0;
 int kretprobe_res = 0;
 int uprobe_res = 0;
 int uretprobe_res = 0;
+int uprobe_byname_res = 0;
+int uretprobe_byname_res = 0;
+int uprobe_byname2_res = 0;
+int uretprobe_byname2_res = 0;
 
 SEC("kprobe/sys_nanosleep")
 int handle_kprobe(struct pt_regs *ctx)
@@ -25,18 +29,51 @@ int BPF_KRETPROBE(handle_kretprobe)
 	return 0;
 }
 
-SEC("uprobe/trigger_func")
+SEC("uprobe")
 int handle_uprobe(struct pt_regs *ctx)
 {
 	uprobe_res = 3;
 	return 0;
 }
 
-SEC("uretprobe/trigger_func")
+SEC("uretprobe")
 int handle_uretprobe(struct pt_regs *ctx)
 {
 	uretprobe_res = 4;
 	return 0;
 }
 
+SEC("uprobe")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+	uprobe_byname_res = 5;
+	return 0;
+}
+
+/* use auto-attach format for section definition. */
+SEC("uretprobe//proc/self/exe:trigger_func2")
+int handle_uretprobe_byname(struct pt_regs *ctx)
+{
+	uretprobe_byname_res = 6;
+	return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe_byname2(struct pt_regs *ctx)
+{
+	unsigned int size = PT_REGS_PARM1(ctx);
+
+	/* verify malloc size */
+	if (size == 1)
+		uprobe_byname2_res = 7;
+	return 0;
+}
+
+SEC("uretprobe")
+int handle_uretprobe_byname2(struct pt_regs *ctx)
+{
+	uretprobe_byname2_res = 8;
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit 


From 579c3196b21800538e0cfd512a05619ee80fb19a Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Wed, 30 Mar 2022 16:26:40 +0100
Subject: selftests/bpf: Add tests for uprobe auto-attach via skeleton

tests that verify auto-attach works for function entry/return for
local functions in program and library functions in a library.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1648654000-21758-6-git-send-email-alan.maguire@oracle.com
---
 .../selftests/bpf/prog_tests/uprobe_autoattach.c   | 38 ++++++++++++++++
 .../selftests/bpf/progs/test_uprobe_autoattach.c   | 52 ++++++++++++++++++++++
 2 files changed, 90 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
new file mode 100644
index 000000000000..03b15d632be4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include "test_uprobe_autoattach.skel.h"
+
+/* uprobe attach point */
+static void autoattach_trigger_func(void)
+{
+	asm volatile ("");
+}
+
+void test_uprobe_autoattach(void)
+{
+	struct test_uprobe_autoattach *skel;
+	char *mem;
+
+	skel = test_uprobe_autoattach__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	if (!ASSERT_OK(test_uprobe_autoattach__attach(skel), "skel_attach"))
+		goto cleanup;
+
+	/* trigger & validate uprobe & uretprobe */
+	autoattach_trigger_func();
+
+	/* trigger & validate shared library u[ret]probes attached by name */
+	mem = malloc(1);
+	free(mem);
+
+	ASSERT_EQ(skel->bss->uprobe_byname_res, 1, "check_uprobe_byname_res");
+	ASSERT_EQ(skel->bss->uretprobe_byname_res, 2, "check_uretprobe_byname_res");
+	ASSERT_EQ(skel->bss->uprobe_byname2_res, 3, "check_uprobe_byname2_res");
+	ASSERT_EQ(skel->bss->uretprobe_byname2_res, 4, "check_uretprobe_byname2_res");
+cleanup:
+	test_uprobe_autoattach__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
new file mode 100644
index 000000000000..b442fb5ef54b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int uprobe_byname_res = 0;
+int uretprobe_byname_res = 0;
+int uprobe_byname2_res = 0;
+int uretprobe_byname2_res = 0;
+
+/* This program cannot auto-attach, but that should not stop other
+ * programs from attaching.
+ */
+SEC("uprobe")
+int handle_uprobe_noautoattach(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+SEC("uprobe//proc/self/exe:autoattach_trigger_func")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+	uprobe_byname_res = 1;
+	return 0;
+}
+
+SEC("uretprobe//proc/self/exe:autoattach_trigger_func")
+int handle_uretprobe_byname(struct pt_regs *ctx)
+{
+	uretprobe_byname_res = 2;
+	return 0;
+}
+
+
+SEC("uprobe/libc.so.6:malloc")
+int handle_uprobe_byname2(struct pt_regs *ctx)
+{
+	uprobe_byname2_res = 3;
+	return 0;
+}
+
+SEC("uretprobe/libc.so.6:free")
+int handle_uretprobe_byname2(struct pt_regs *ctx)
+{
+	uretprobe_byname2_res = 4;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 8ff88bec6f6186c406aa71312b2919e56f7b8084 Mon Sep 17 00:00:00 2001
From: Guo Zhengkui <guozhengkui@vivo.com>
Date: Mon, 21 Mar 2022 13:27:42 +0800
Subject: selftests/vDSO: fix array_size.cocci warning

Fix the following coccicheck warning:

tools/testing/selftests/vDSO/vdso_test_correctness.c:309:46-47:
WARNING: Use ARRAY_SIZE
tools/testing/selftests/vDSO/vdso_test_correctness.c:373:46-47:
WARNING: Use ARRAY_SIZE

It has been tested with gcc (Debian 8.3.0-6) 8.3.0 on x86_64.

Signed-off-by: Guo Zhengkui <guozhengkui@vivo.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vDSO/vdso_test_correctness.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c
index c4aea794725a..e691a3cf1491 100644
--- a/tools/testing/selftests/vDSO/vdso_test_correctness.c
+++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c
@@ -20,6 +20,7 @@
 #include <limits.h>
 
 #include "vdso_config.h"
+#include "../kselftest.h"
 
 static const char **name;
 
@@ -306,10 +307,8 @@ static void test_clock_gettime(void)
 		return;
 	}
 
-	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
-	     clock++) {
+	for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++)
 		test_one_clock_gettime(clock, clocknames[clock]);
-	}
 
 	/* Also test some invalid clock ids */
 	test_one_clock_gettime(-1, "invalid");
@@ -370,10 +369,8 @@ static void test_clock_gettime64(void)
 		return;
 	}
 
-	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
-	     clock++) {
+	for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++)
 		test_one_clock_gettime64(clock, clocknames[clock]);
-	}
 
 	/* Also test some invalid clock ids */
 	test_one_clock_gettime64(-1, "invalid");
-- 
cgit 


From 1585b1b55a2b9086823a6b30031eb63f965f8d44 Mon Sep 17 00:00:00 2001
From: Guo Zhengkui <guozhengkui@vivo.com>
Date: Mon, 21 Mar 2022 18:25:17 +0800
Subject: selftests/proc: fix array_size.cocci warning

Fix the following coccicheck warning:

tools/testing/selftests/proc/proc-pid-vm.c:371:26-27:
WARNING: Use ARRAY_SIZE
tools/testing/selftests/proc/proc-pid-vm.c:420:26-27:
WARNING: Use ARRAY_SIZE

It has been tested with gcc (Debian 8.3.0-6) 8.3.0 on x86_64.

Signed-off-by: Guo Zhengkui <guozhengkui@vivo.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/proc/proc-pid-vm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c
index 18a3bde8bc96..28604c9f805c 100644
--- a/tools/testing/selftests/proc/proc-pid-vm.c
+++ b/tools/testing/selftests/proc/proc-pid-vm.c
@@ -46,6 +46,8 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 
+#include "../kselftest.h"
+
 static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
 {
 	return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
@@ -368,7 +370,7 @@ int main(void)
 		};
 		int i;
 
-		for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) {
+		for (i = 0; i < ARRAY_SIZE(S); i++) {
 			assert(memmem(buf, rv, S[i], strlen(S[i])));
 		}
 
@@ -417,7 +419,7 @@ int main(void)
 		};
 		int i;
 
-		for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) {
+		for (i = 0; i < ARRAY_SIZE(S); i++) {
 			assert(memmem(buf, rv, S[i], strlen(S[i])));
 		}
 	}
-- 
cgit 


From aa8ce29931d6a37aa80f10ae7aa30045108f276d Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Thu, 24 Mar 2022 17:55:54 +0800
Subject: selftests: x86: add 32bit build warnings for SUSE

In order to successfully build all these 32bit tests, these 32bit gcc
and glibc packages, named gcc-32bit and glibc-devel-static-32bit on SUSE,
need to be installed.

This patch added this information in warn_32bit_failure.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/x86/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 53df7d3893d3..0388c4d60af0 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -92,6 +92,10 @@ warn_32bit_failure:
 	echo "If you are using a Fedora-like distribution, try:";	\
 	echo "";							\
 	echo "  yum install glibc-devel.*i686";				\
+	echo "";							\
+	echo "If you are using a SUSE-like distribution, try:";		\
+	echo "";							\
+	echo "  zypper install gcc-32bit glibc-devel-static-32bit";	\
 	exit 0;
 endif
 
-- 
cgit 


From 52035628fae646269b1379417926fa3d60ef87d0 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Thu, 24 Mar 2022 15:39:28 -0700
Subject: selftests: fix header dependency for pid_namespace selftests

The way the test target was defined before, when building with clang we
get a command line like this:

clang -Wall -Werror -g -I../../../../usr/include/ \
	regression_enomem.c ../pidfd/pidfd.h  -o regression_enomem

This yields an error, because clang thinks we want to produce both a *.o
file, as well as a precompiled header:

clang: error: cannot specify -o when generating multiple output files

gcc, for whatever reason, doesn't exhibit the same behavior which I
suspect is why the problem wasn't noticed before.

This can be fixed simply by using the LOCAL_HDRS infrastructure the
selftests lib.mk provides. It does the right think and marks the target
as depending on the header (so if the header changes, we rebuild), but
it filters the header out of the compiler command line, so we don't get
the error described above.

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/pid_namespace/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile
index dcaefa224ca0..edafaca1aeb3 100644
--- a/tools/testing/selftests/pid_namespace/Makefile
+++ b/tools/testing/selftests/pid_namespace/Makefile
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -g -I../../../../usr/include/
 
-TEST_GEN_PROGS := regression_enomem
+TEST_GEN_PROGS = regression_enomem
 
-include ../lib.mk
+LOCAL_HDRS += $(selfdir)/pidfd/pidfd.h
 
-$(OUTPUT)/regression_enomem: regression_enomem.c ../pidfd/pidfd.h
+include ../lib.mk
-- 
cgit 


From 187816d07729ff88e75d84efbc668642106cebf3 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Thu, 24 Mar 2022 15:39:29 -0700
Subject: selftests: fix an unused variable warning in pidfd selftest

I fixed a few warnings like this in commit e2aa5e650b07
("selftests: fixup build warnings in pidfd / clone3 tests"), but I
missed this one by mistake. Since this variable is unused, remove it.

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/pidfd/pidfd_wait.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
index 17999e082aa7..070c1c876df1 100644
--- a/tools/testing/selftests/pidfd/pidfd_wait.c
+++ b/tools/testing/selftests/pidfd/pidfd_wait.c
@@ -95,7 +95,6 @@ TEST(wait_states)
 		.flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
 		.exit_signal = SIGCHLD,
 	};
-	int ret;
 	pid_t pid;
 	siginfo_t info = {
 		.si_signo = 0,
-- 
cgit 


From 63e6b2a42342c3297cce286fb124c99be9e0f3fd Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 24 Mar 2022 16:19:06 -0700
Subject: selftests/harness: Run TEARDOWN for ASSERT failures

The kselftest test harness has traditionally not run the registered
TEARDOWN handler when a test encountered an ASSERT. This creates
unexpected situations and tests need to be very careful about using
ASSERT, which seems a needless hurdle for test writers.

Because of the harness's design for optional failure handlers, the
original implementation of ASSERT used an abort() to immediately
stop execution, but that meant the context for running teardown was
lost. Instead, use setjmp/longjmp so that teardown can be done.

Failed SETUP routines continue to not be followed by TEARDOWN, though.

Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Will Drewry <wad@chromium.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: linux-kselftest@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/kselftest_harness.h | 49 ++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 15 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 11779405dc80..696eab05f995 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -64,6 +64,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <setjmp.h>
 
 #include "kselftest.h"
 
@@ -183,7 +184,10 @@
 		struct __test_metadata *_metadata, \
 		struct __fixture_variant_metadata *variant) \
 	{ \
-		test_name(_metadata); \
+		_metadata->setup_completed = true; \
+		if (setjmp(_metadata->env) == 0) \
+			test_name(_metadata); \
+		__test_check_assert(_metadata); \
 	} \
 	static struct __test_metadata _##test_name##_object = \
 		{ .name = #test_name, \
@@ -356,10 +360,7 @@
  * Defines a test that depends on a fixture (e.g., is part of a test case).
  * Very similar to TEST() except that *self* is the setup instance of fixture's
  * datatype exposed for use by the implementation.
- *
- * Warning: use of ASSERT_* here will skip TEARDOWN.
  */
-/* TODO(wad) register fixtures on dedicated test lists. */
 #define TEST_F(fixture_name, test_name) \
 	__TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT)
 
@@ -381,12 +382,17 @@
 		/* fixture data is alloced, setup, and torn down per call. */ \
 		FIXTURE_DATA(fixture_name) self; \
 		memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
-		fixture_name##_setup(_metadata, &self, variant->data); \
-		/* Let setup failure terminate early. */ \
-		if (!_metadata->passed) \
-			return; \
-		fixture_name##_##test_name(_metadata, &self, variant->data); \
-		fixture_name##_teardown(_metadata, &self); \
+		if (setjmp(_metadata->env) == 0) { \
+			fixture_name##_setup(_metadata, &self, variant->data); \
+			/* Let setup failure terminate early. */ \
+			if (!_metadata->passed) \
+				return; \
+			_metadata->setup_completed = true; \
+			fixture_name##_##test_name(_metadata, &self, variant->data); \
+		} \
+		if (_metadata->setup_completed) \
+			fixture_name##_teardown(_metadata, &self); \
+		__test_check_assert(_metadata); \
 	} \
 	static struct __test_metadata \
 		      _##fixture_name##_##test_name##_object = { \
@@ -683,7 +689,7 @@
  */
 #define OPTIONAL_HANDLER(_assert) \
 	for (; _metadata->trigger; _metadata->trigger = \
-			__bail(_assert, _metadata->no_print, _metadata->step))
+			__bail(_assert, _metadata))
 
 #define __INC_STEP(_metadata) \
 	/* Keep "step" below 255 (which is used for "SKIP" reporting). */	\
@@ -830,6 +836,9 @@ struct __test_metadata {
 	bool timed_out;	/* did this test timeout instead of exiting? */
 	__u8 step;
 	bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
+	bool aborted;	/* stopped test due to failed ASSERT */
+	bool setup_completed; /* did setup finish? */
+	jmp_buf env;	/* for exiting out of test early */
 	struct __test_results *results;
 	struct __test_metadata *prev, *next;
 };
@@ -848,16 +857,26 @@ static inline void __register_test(struct __test_metadata *t)
 	__LIST_APPEND(t->fixture->tests, t);
 }
 
-static inline int __bail(int for_realz, bool no_print, __u8 step)
+static inline int __bail(int for_realz, struct __test_metadata *t)
 {
+	/* if this is ASSERT, return immediately. */
 	if (for_realz) {
-		if (no_print)
-			_exit(step);
-		abort();
+		t->aborted = true;
+		longjmp(t->env, 1);
 	}
+	/* otherwise, end the for loop and continue. */
 	return 0;
 }
 
+static inline void __test_check_assert(struct __test_metadata *t)
+{
+	if (t->aborted) {
+		if (t->no_print)
+			_exit(t->step);
+		abort();
+	}
+}
+
 struct __test_metadata *__active_test;
 static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
 {
-- 
cgit 


From 79ee8aa31d518c1fd5f3b1b1ac39dd1fb4dc7039 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 24 Mar 2022 16:19:07 -0700
Subject: selftests/harness: Pass variant to teardown

FIXTURE_VARIANT data is passed to FIXTURE_SETUP and TEST_F as "variant".

In some cases, the variant will change the setup, such that expectations
also change on teardown. Also pass variant to FIXTURE_TEARDOWN.

The new FIXTURE_TEARDOWN logic is identical to that in FIXTURE_SETUP,
right above.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20201210231010.420298-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/kselftest_harness.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 696eab05f995..25f4d54067c0 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -291,7 +291,9 @@
 #define FIXTURE_TEARDOWN(fixture_name) \
 	void fixture_name##_teardown( \
 		struct __test_metadata __attribute__((unused)) *_metadata, \
-		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+		const FIXTURE_VARIANT(fixture_name) \
+			__attribute__((unused)) *variant)
 
 /**
  * FIXTURE_VARIANT() - Optionally called once per fixture
@@ -306,9 +308,9 @@
  *       ...
  *     };
  *
- * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F()
- * as *variant*. Variants allow the same tests to be run with different
- * arguments.
+ * Defines type of constant parameters provided to FIXTURE_SETUP(), TEST_F() and
+ * FIXTURE_TEARDOWN as *variant*. Variants allow the same tests to be run with
+ * different arguments.
  */
 #define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name
 
@@ -391,7 +393,7 @@
 			fixture_name##_##test_name(_metadata, &self, variant->data); \
 		} \
 		if (_metadata->setup_completed) \
-			fixture_name##_teardown(_metadata, &self); \
+			fixture_name##_teardown(_metadata, &self, variant->data); \
 		__test_check_assert(_metadata); \
 	} \
 	static struct __test_metadata \
-- 
cgit 


From 00f75043e46d5bd2bba87b3fada6c1090b61bd40 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Tue, 18 Jan 2022 11:09:19 -0800
Subject: kunit: tool: make --json handling a bit clearer

Currently kunit_json.get_json_result() will output the JSON-ified test
output to json_path, but iff it's not "stdout".

Instead, move the responsibility entirely over to the one caller.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit.py           | 12 ++++++++----
 tools/testing/kunit/kunit_json.py      | 12 ++----------
 tools/testing/kunit/kunit_tool_test.py |  3 +--
 3 files changed, 11 insertions(+), 16 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 9274c6355809..bd2f7f088c72 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -216,13 +216,17 @@ def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> Tuple[
 	parse_end = time.time()
 
 	if request.json:
-		json_obj = kunit_json.get_json_result(
+		json_str = kunit_json.get_json_result(
 					test=test_result,
 					def_config='kunit_defconfig',
-					build_dir=request.build_dir,
-					json_path=request.json)
+					build_dir=request.build_dir)
 		if request.json == 'stdout':
-			print(json_obj)
+			print(json_str)
+		else:
+			with open(request.json, 'w') as f:
+				f.write(json_str)
+			kunit_parser.print_with_timestamp("Test results stored in %s" %
+				os.path.abspath(request.json))
 
 	if test_result.status != kunit_parser.TestStatus.SUCCESS:
 		return KunitResult(KunitStatus.TEST_FAILURE, parse_end - parse_start), test_result
diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
index 6862671709bc..61091878f51e 100644
--- a/tools/testing/kunit/kunit_json.py
+++ b/tools/testing/kunit/kunit_json.py
@@ -51,15 +51,7 @@ def _get_group_json(test: Test, def_config: str,
 	return test_group
 
 def get_json_result(test: Test, def_config: str,
-		build_dir: Optional[str], json_path: str) -> str:
+		build_dir: Optional[str]) -> str:
 	test_group = _get_group_json(test, def_config, build_dir)
 	test_group["name"] = "KUnit Test Group"
-	json_obj = json.dumps(test_group, indent=4)
-	if json_path != 'stdout':
-		with open(json_path, 'w') as result_path:
-			result_path.write(json_obj)
-		root = __file__.split('tools/testing/kunit/')[0]
-		kunit_parser.print_with_timestamp(
-			"Test results stored in %s" %
-			os.path.join(root, result_path.name))
-	return json_obj
+	return json.dumps(test_group, indent=4)
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 352369dffbd9..f7cbc248a405 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -469,8 +469,7 @@ class KUnitJsonTest(unittest.TestCase):
 			json_obj = kunit_json.get_json_result(
 				test=test_result,
 				def_config='kunit_defconfig',
-				build_dir=None,
-				json_path='stdout')
+				build_dir=None)
 		return json.loads(json_obj)
 
 	def test_failed_test_json(self):
-- 
cgit 


From 89aa72cd3052b70ade40fa02a018f8f509355790 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Tue, 18 Jan 2022 11:09:20 -0800
Subject: kunit: tool: drop unused KernelDirectoryPath var

Commit be886ba90cce ("kunit: run kunit_tool from any directory")
introduced this variable, but it was unused even in that commit.

Since it's still unused now and callers can instead use
get_kernel_root_path(), delete this var.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit.py | 2 --
 1 file changed, 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index bd2f7f088c72..4cb91d191f1d 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -63,8 +63,6 @@ class KunitRequest(KunitExecRequest, KunitBuildRequest):
 	pass
 
 
-KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
-
 def get_kernel_root_path() -> str:
 	path = sys.argv[0] if not __file__ else __file__
 	parts = os.path.realpath(path).split('tools/testing/kunit')
-- 
cgit 


From e6f61920653925e6fa9aceb5cdb47ecf543986c8 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Tue, 18 Jan 2022 11:09:21 -0800
Subject: kunit: tool: drop last uses of collections.namedtuple

Since we formally require python3.7+ since commit df4b0807ca1a
("kunit: tool: Assert the version requirement"), we can just use
@dataclasses.dataclass instead.

In kunit_config.py, we used namedtuple to create a hashable type that
had `name` and `value` fields and had to subclass it to define a custom
`__str__()`.
@datalcass lets us just define one type instead.

In qemu_config.py, we use namedtuple to allow modules to define various
parameters. Using @dataclass, we can add type-annotations for all these
fields, making our code more typesafe and making it easier for users to
figure out how to define new configs.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_config.py |  9 +++++----
 tools/testing/kunit/qemu_config.py  | 17 ++++++++++-------
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
index 677354546156..ca33e4b7bcc5 100644
--- a/tools/testing/kunit/kunit_config.py
+++ b/tools/testing/kunit/kunit_config.py
@@ -6,16 +6,17 @@
 # Author: Felix Guo <felixguoxiuping@gmail.com>
 # Author: Brendan Higgins <brendanhiggins@google.com>
 
-import collections
+from dataclasses import dataclass
 import re
 from typing import List, Set
 
 CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$'
 CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$'
 
-KconfigEntryBase = collections.namedtuple('KconfigEntryBase', ['name', 'value'])
-
-class KconfigEntry(KconfigEntryBase):
+@dataclass(frozen=True)
+class KconfigEntry:
+	name: str
+	value: str
 
 	def __str__(self) -> str:
 		if self.value == 'n':
diff --git a/tools/testing/kunit/qemu_config.py b/tools/testing/kunit/qemu_config.py
index 1672f6184e95..0b6a80398ccc 100644
--- a/tools/testing/kunit/qemu_config.py
+++ b/tools/testing/kunit/qemu_config.py
@@ -5,12 +5,15 @@
 # Copyright (C) 2021, Google LLC.
 # Author: Brendan Higgins <brendanhiggins@google.com>
 
-from collections import namedtuple
+from dataclasses import dataclass
+from typing import List
 
 
-QemuArchParams = namedtuple('QemuArchParams', ['linux_arch',
-					       'kconfig',
-					       'qemu_arch',
-					       'kernel_path',
-					       'kernel_command_line',
-					       'extra_qemu_params'])
+@dataclass(frozen=True)
+class QemuArchParams:
+  linux_arch: str
+  kconfig: str
+  qemu_arch: str
+  kernel_path: str
+  kernel_command_line: str
+  extra_qemu_params: List[str]
-- 
cgit 


From aa1c05558e71422564a664fc4cafbf5999f1de0f Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Tue, 18 Jan 2022 11:09:22 -0800
Subject: kunit: tool: simplify code since build_dir can't be None

--build_dir is set to a default of '.kunit' since commit ddbd60c779b4
("kunit: use --build_dir=.kunit as default"), but even before then it
was explicitly set to ''.

So outside of one unit test, there was no way for the build_dir to be
ever be None, and we can simplify code by fixing the unit test and
enforcing that via updated type annotations.

E.g. this lets us drop `get_file_path()` since it's now exactly
equivalent to os.path.join().

Note: there's some `if build_dir` checks that also fail if build_dir is
explicitly set to '' that just guard against passing "O=" to make.
But running `make O=` works just fine, so drop these checks.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_json.py      |  8 ++----
 tools/testing/kunit/kunit_kernel.py    | 51 +++++++++++++---------------------
 tools/testing/kunit/kunit_tool_test.py |  2 +-
 3 files changed, 24 insertions(+), 37 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
index 61091878f51e..24d103049bca 100644
--- a/tools/testing/kunit/kunit_json.py
+++ b/tools/testing/kunit/kunit_json.py
@@ -12,12 +12,11 @@ import os
 import kunit_parser
 
 from kunit_parser import Test, TestStatus
-from typing import Any, Dict, Optional
+from typing import Any, Dict
 
 JsonObj = Dict[str, Any]
 
-def _get_group_json(test: Test, def_config: str,
-		build_dir: Optional[str]) -> JsonObj:
+def _get_group_json(test: Test, def_config: str, build_dir: str) -> JsonObj:
 	sub_groups = []  # List[JsonObj]
 	test_cases = []  # List[JsonObj]
 
@@ -50,8 +49,7 @@ def _get_group_json(test: Test, def_config: str,
 	}
 	return test_group
 
-def get_json_result(test: Test, def_config: str,
-		build_dir: Optional[str]) -> str:
+def get_json_result(test: Test, def_config: str, build_dir: str) -> str:
 	test_group = _get_group_json(test, def_config, build_dir)
 	test_group["name"] = "KUnit Test Group"
 	return json.dumps(test_group, indent=4)
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 3c4196cef3ed..385eb9f5fc0b 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -29,11 +29,6 @@ OUTFILE_PATH = 'test.log'
 ABS_TOOL_PATH = os.path.abspath(os.path.dirname(__file__))
 QEMU_CONFIGS_DIR = os.path.join(ABS_TOOL_PATH, 'qemu_configs')
 
-def get_file_path(build_dir, default):
-	if build_dir:
-		default = os.path.join(build_dir, default)
-	return default
-
 class ConfigError(Exception):
 	"""Represents an error trying to configure the Linux kernel."""
 
@@ -60,17 +55,15 @@ class LinuxSourceTreeOperations(object):
 	def make_arch_qemuconfig(self, kconfig: kunit_config.Kconfig) -> None:
 		pass
 
-	def make_allyesconfig(self, build_dir, make_options) -> None:
+	def make_allyesconfig(self, build_dir: str, make_options) -> None:
 		raise ConfigError('Only the "um" arch is supported for alltests')
 
-	def make_olddefconfig(self, build_dir, make_options) -> None:
-		command = ['make', 'ARCH=' + self._linux_arch, 'olddefconfig']
+	def make_olddefconfig(self, build_dir: str, make_options) -> None:
+		command = ['make', 'ARCH=' + self._linux_arch, 'O=' + build_dir, 'olddefconfig']
 		if self._cross_compile:
 			command += ['CROSS_COMPILE=' + self._cross_compile]
 		if make_options:
 			command.extend(make_options)
-		if build_dir:
-			command += ['O=' + build_dir]
 		print('Populating config with:\n$', ' '.join(command))
 		try:
 			subprocess.check_output(command, stderr=subprocess.STDOUT)
@@ -79,14 +72,12 @@ class LinuxSourceTreeOperations(object):
 		except subprocess.CalledProcessError as e:
 			raise ConfigError(e.output.decode())
 
-	def make(self, jobs, build_dir, make_options) -> None:
-		command = ['make', 'ARCH=' + self._linux_arch, '--jobs=' + str(jobs)]
+	def make(self, jobs, build_dir: str, make_options) -> None:
+		command = ['make', 'ARCH=' + self._linux_arch, 'O=' + build_dir, '--jobs=' + str(jobs)]
 		if make_options:
 			command.extend(make_options)
 		if self._cross_compile:
 			command += ['CROSS_COMPILE=' + self._cross_compile]
-		if build_dir:
-			command += ['O=' + build_dir]
 		print('Building with:\n$', ' '.join(command))
 		try:
 			proc = subprocess.Popen(command,
@@ -144,14 +135,12 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations):
 	def __init__(self, cross_compile=None):
 		super().__init__(linux_arch='um', cross_compile=cross_compile)
 
-	def make_allyesconfig(self, build_dir, make_options) -> None:
+	def make_allyesconfig(self, build_dir: str, make_options) -> None:
 		kunit_parser.print_with_timestamp(
 			'Enabling all CONFIGs for UML...')
-		command = ['make', 'ARCH=um', 'allyesconfig']
+		command = ['make', 'ARCH=um', 'O=' + build_dir, 'allyesconfig']
 		if make_options:
 			command.extend(make_options)
-		if build_dir:
-			command += ['O=' + build_dir]
 		process = subprocess.Popen(
 			command,
 			stdout=subprocess.DEVNULL,
@@ -168,24 +157,24 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations):
 
 	def start(self, params: List[str], build_dir: str) -> subprocess.Popen:
 		"""Runs the Linux UML binary. Must be named 'linux'."""
-		linux_bin = get_file_path(build_dir, 'linux')
+		linux_bin = os.path.join(build_dir, 'linux')
 		return subprocess.Popen([linux_bin] + params,
 					   stdin=subprocess.PIPE,
 					   stdout=subprocess.PIPE,
 					   stderr=subprocess.STDOUT,
 					   text=True, errors='backslashreplace')
 
-def get_kconfig_path(build_dir) -> str:
-	return get_file_path(build_dir, KCONFIG_PATH)
+def get_kconfig_path(build_dir: str) -> str:
+	return os.path.join(build_dir, KCONFIG_PATH)
 
-def get_kunitconfig_path(build_dir) -> str:
-	return get_file_path(build_dir, KUNITCONFIG_PATH)
+def get_kunitconfig_path(build_dir: str) -> str:
+	return os.path.join(build_dir, KUNITCONFIG_PATH)
 
-def get_old_kunitconfig_path(build_dir) -> str:
-	return get_file_path(build_dir, OLD_KUNITCONFIG_PATH)
+def get_old_kunitconfig_path(build_dir: str) -> str:
+	return os.path.join(build_dir, OLD_KUNITCONFIG_PATH)
 
-def get_outfile_path(build_dir) -> str:
-	return get_file_path(build_dir, OUTFILE_PATH)
+def get_outfile_path(build_dir: str) -> str:
+	return os.path.join(build_dir, OUTFILE_PATH)
 
 def get_source_tree_ops(arch: str, cross_compile: Optional[str]) -> LinuxSourceTreeOperations:
 	config_path = os.path.join(QEMU_CONFIGS_DIR, arch + '.py')
@@ -269,7 +258,7 @@ class LinuxSourceTree(object):
 			return False
 		return True
 
-	def validate_config(self, build_dir) -> bool:
+	def validate_config(self, build_dir: str) -> bool:
 		kconfig_path = get_kconfig_path(build_dir)
 		validated_kconfig = kunit_config.parse_file(kconfig_path)
 		if self._kconfig.is_subset_of(validated_kconfig):
@@ -284,7 +273,7 @@ class LinuxSourceTree(object):
 		logging.error(message)
 		return False
 
-	def build_config(self, build_dir, make_options) -> bool:
+	def build_config(self, build_dir: str, make_options) -> bool:
 		kconfig_path = get_kconfig_path(build_dir)
 		if build_dir and not os.path.exists(build_dir):
 			os.mkdir(build_dir)
@@ -312,7 +301,7 @@ class LinuxSourceTree(object):
 		old_kconfig = kunit_config.parse_file(old_path)
 		return old_kconfig.entries() != self._kconfig.entries()
 
-	def build_reconfig(self, build_dir, make_options) -> bool:
+	def build_reconfig(self, build_dir: str, make_options) -> bool:
 		"""Creates a new .config if it is not a subset of the .kunitconfig."""
 		kconfig_path = get_kconfig_path(build_dir)
 		if not os.path.exists(kconfig_path):
@@ -327,7 +316,7 @@ class LinuxSourceTree(object):
 		os.remove(kconfig_path)
 		return self.build_config(build_dir, make_options)
 
-	def build_kernel(self, alltests, jobs, build_dir, make_options) -> bool:
+	def build_kernel(self, alltests, jobs, build_dir: str, make_options) -> bool:
 		try:
 			if alltests:
 				self._ops.make_allyesconfig(build_dir, make_options)
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index f7cbc248a405..a3c036a620b2 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -469,7 +469,7 @@ class KUnitJsonTest(unittest.TestCase):
 			json_obj = kunit_json.get_json_result(
 				test=test_result,
 				def_config='kunit_defconfig',
-				build_dir=None)
+				build_dir='.kunit')
 		return json.loads(json_obj)
 
 	def test_failed_test_json(self):
-- 
cgit 


From 6bd0f52ee8f400a558f1c0f33e1f3fd3ef4922a8 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Thu, 24 Feb 2022 11:20:34 -0800
Subject: kunit: tool: readability tweaks in KernelCI json generation logic

Use a more idiomatic check that a list is non-empty (`if mylist:`) and
simplify the function body by dedenting and using a dict to map between
the kunit TestStatus enum => KernelCI json status string.

The dict hopefully makes it less likely to have bugs like commit
9a6bb30a8830 ("kunit: tool: fix --json output for skipped tests").

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_json.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
index 24d103049bca..14a480d3308a 100644
--- a/tools/testing/kunit/kunit_json.py
+++ b/tools/testing/kunit/kunit_json.py
@@ -16,24 +16,24 @@ from typing import Any, Dict
 
 JsonObj = Dict[str, Any]
 
+_status_map: Dict[TestStatus, str] = {
+	TestStatus.SUCCESS: "PASS",
+	TestStatus.SKIPPED: "SKIP",
+	TestStatus.TEST_CRASHED: "ERROR",
+}
+
 def _get_group_json(test: Test, def_config: str, build_dir: str) -> JsonObj:
 	sub_groups = []  # List[JsonObj]
 	test_cases = []  # List[JsonObj]
 
 	for subtest in test.subtests:
-		if len(subtest.subtests):
+		if subtest.subtests:
 			sub_group = _get_group_json(subtest, def_config,
 				build_dir)
 			sub_groups.append(sub_group)
-		else:
-			test_case = {"name": subtest.name, "status": "FAIL"}
-			if subtest.status == TestStatus.SUCCESS:
-				test_case["status"] = "PASS"
-			elif subtest.status == TestStatus.SKIPPED:
-				test_case["status"] = "SKIP"
-			elif subtest.status == TestStatus.TEST_CRASHED:
-				test_case["status"] = "ERROR"
-			test_cases.append(test_case)
+			continue
+		status = _status_map.get(subtest.status, "FAIL")
+		test_cases.append({"name": subtest.name, "status": status})
 
 	test_group = {
 		"name": test.name,
-- 
cgit 


From ee96d25f2fa657a29ab59345898dc4ff616cfe84 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Thu, 24 Feb 2022 11:20:35 -0800
Subject: kunit: tool: refactor how we plumb metadata into JSON

When using --json, kunit.py run/exec/parse will produce results in
KernelCI json format.
As part of that, we include the build_dir that was used, and we
(incorrectly) hardcode in the arch, etc.

We'll want a way to plumb more values (as well as the correct `arch`),
so this patch groups those fields into kunit_json.Metadata type.
This patch should have no user visible changes.

And since we only used build_dir in KunitParseRequest for json, we can
now move it out of that struct and add it into KunitExecRequest, which
needs it and used to get it via inheritance.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit.py           | 16 +++++++++-------
 tools/testing/kunit/kunit_json.py      | 29 ++++++++++++++++++++---------
 tools/testing/kunit/kunit_tool_test.py |  9 ++++-----
 3 files changed, 33 insertions(+), 21 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 4cb91d191f1d..7dd6ed42141f 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -47,11 +47,11 @@ class KunitBuildRequest(KunitConfigRequest):
 @dataclass
 class KunitParseRequest:
 	raw_output: Optional[str]
-	build_dir: str
 	json: Optional[str]
 
 @dataclass
 class KunitExecRequest(KunitParseRequest):
+	build_dir: str
 	timeout: int
 	alltests: bool
 	filter_glob: str
@@ -153,6 +153,8 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -
 				test_glob = request.filter_glob.split('.', maxsplit=2)[1]
 				filter_globs = [g + '.'+ test_glob for g in filter_globs]
 
+	metadata = kunit_json.Metadata(build_dir=request.build_dir)
+
 	test_counts = kunit_parser.TestCounts()
 	exec_time = 0.0
 	for i, filter_glob in enumerate(filter_globs):
@@ -165,7 +167,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -
 			filter_glob=filter_glob,
 			build_dir=request.build_dir)
 
-		_, test_result = parse_tests(request, run_result)
+		_, test_result = parse_tests(request, metadata, run_result)
 		# run_kernel() doesn't block on the kernel exiting.
 		# That only happens after we get the last line of output from `run_result`.
 		# So exec_time here actually contains parsing + execution time, which is fine.
@@ -189,7 +191,7 @@ def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus:
 	else:
 		return KunitStatus.TEST_FAILURE
 
-def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> Tuple[KunitResult, kunit_parser.Test]:
+def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input_data: Iterable[str]) -> Tuple[KunitResult, kunit_parser.Test]:
 	parse_start = time.time()
 
 	test_result = kunit_parser.Test()
@@ -216,8 +218,7 @@ def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> Tuple[
 	if request.json:
 		json_str = kunit_json.get_json_result(
 					test=test_result,
-					def_config='kunit_defconfig',
-					build_dir=request.build_dir)
+					metadata=metadata)
 		if request.json == 'stdout':
 			print(json_str)
 		else:
@@ -504,10 +505,11 @@ def main(argv, linux=None):
 		else:
 			with open(cli_args.file, 'r', errors='backslashreplace') as f:
 				kunit_output = f.read().splitlines()
+		# We know nothing about how the result was created!
+		metadata = kunit_json.Metadata()
 		request = KunitParseRequest(raw_output=cli_args.raw_output,
-					    build_dir='',
 					    json=cli_args.json)
-		result, _ = parse_tests(request, kunit_output)
+		result, _ = parse_tests(request, metadata, kunit_output)
 		if result.status != KunitStatus.SUCCESS:
 			sys.exit(1)
 	else:
diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
index 14a480d3308a..0a7e29a315ed 100644
--- a/tools/testing/kunit/kunit_json.py
+++ b/tools/testing/kunit/kunit_json.py
@@ -6,6 +6,7 @@
 # Copyright (C) 2020, Google LLC.
 # Author: Heidi Fahim <heidifahim@google.com>
 
+from dataclasses import dataclass
 import json
 import os
 
@@ -14,6 +15,13 @@ import kunit_parser
 from kunit_parser import Test, TestStatus
 from typing import Any, Dict
 
+@dataclass
+class Metadata:
+	"""Stores metadata about this run to include in get_json_result()."""
+	arch: str = 'UM'
+	def_config: str = 'kunit_defconfig'
+	build_dir: str = ''
+
 JsonObj = Dict[str, Any]
 
 _status_map: Dict[TestStatus, str] = {
@@ -22,14 +30,13 @@ _status_map: Dict[TestStatus, str] = {
 	TestStatus.TEST_CRASHED: "ERROR",
 }
 
-def _get_group_json(test: Test, def_config: str, build_dir: str) -> JsonObj:
+def _get_group_json(test: Test, common_fields: JsonObj) -> JsonObj:
 	sub_groups = []  # List[JsonObj]
 	test_cases = []  # List[JsonObj]
 
 	for subtest in test.subtests:
 		if subtest.subtests:
-			sub_group = _get_group_json(subtest, def_config,
-				build_dir)
+			sub_group = _get_group_json(subtest, common_fields)
 			sub_groups.append(sub_group)
 			continue
 		status = _status_map.get(subtest.status, "FAIL")
@@ -37,19 +44,23 @@ def _get_group_json(test: Test, def_config: str, build_dir: str) -> JsonObj:
 
 	test_group = {
 		"name": test.name,
-		"arch": "UM",
-		"defconfig": def_config,
-		"build_environment": build_dir,
 		"sub_groups": sub_groups,
 		"test_cases": test_cases,
+	}
+	test_group.update(common_fields)
+	return test_group
+
+def get_json_result(test: Test, metadata: Metadata) -> str:
+	common_fields = {
+		"arch": metadata.arch,
+		"defconfig": metadata.def_config,
+		"build_environment": metadata.build_dir,
 		"lab_name": None,
 		"kernel": None,
 		"job": None,
 		"git_branch": "kselftest",
 	}
-	return test_group
 
-def get_json_result(test: Test, def_config: str, build_dir: str) -> str:
-	test_group = _get_group_json(test, def_config, build_dir)
+	test_group = _get_group_json(test, common_fields)
 	test_group["name"] = "KUnit Test Group"
 	return json.dumps(test_group, indent=4)
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index a3c036a620b2..60806994683c 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -468,8 +468,7 @@ class KUnitJsonTest(unittest.TestCase):
 			test_result = kunit_parser.parse_run_tests(file)
 			json_obj = kunit_json.get_json_result(
 				test=test_result,
-				def_config='kunit_defconfig',
-				build_dir='.kunit')
+				metadata=kunit_json.Metadata())
 		return json.loads(json_obj)
 
 	def test_failed_test_json(self):
@@ -691,7 +690,7 @@ class KUnitMainTest(unittest.TestCase):
 		self.linux_source_mock.run_kernel.return_value = ['TAP version 14', 'init: random output'] + want
 
 		got = kunit._list_tests(self.linux_source_mock,
-				     kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*', None, 'suite'))
+				     kunit.KunitExecRequest(None, None, '.kunit', 300, False, 'suite*', None, 'suite'))
 
 		self.assertEqual(got, want)
 		# Should respect the user's filter glob when listing tests.
@@ -706,7 +705,7 @@ class KUnitMainTest(unittest.TestCase):
 
 		# Should respect the user's filter glob when listing tests.
 		mock_tests.assert_called_once_with(mock.ANY,
-				     kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*.test*', None, 'suite'))
+				     kunit.KunitExecRequest(None, None, '.kunit', 300, False, 'suite*.test*', None, 'suite'))
 		self.linux_source_mock.run_kernel.assert_has_calls([
 			mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', timeout=300),
 			mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', timeout=300),
@@ -719,7 +718,7 @@ class KUnitMainTest(unittest.TestCase):
 
 		# Should respect the user's filter glob when listing tests.
 		mock_tests.assert_called_once_with(mock.ANY,
-				     kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*', None, 'test'))
+				     kunit.KunitExecRequest(None, None, '.kunit', 300, False, 'suite*', None, 'test'))
 		self.linux_source_mock.run_kernel.assert_has_calls([
 			mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', timeout=300),
 			mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', timeout=300),
-- 
cgit 


From 885210d348f71e14b91bdf626d5d9039bf1afb03 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Thu, 24 Feb 2022 11:20:36 -0800
Subject: kunit: tool: properly report the used arch for --json, or '' if not
 known

Before, kunit.py always printed "arch": "UM" in its json output, but...
1. With `kunit.py parse`, we could be parsing output from anywhere, so
    we can't say that.
2. Capitalizing it is probably wrong, as it's `ARCH=um`
3. Commit 87c9c1631788 ("kunit: tool: add support for QEMU") made it so
   kunit.py could knowingly run a different arch, yet we'd still always
   claim "UM".

This patch addresses all of those. E.g.

1.
$ ./tools/testing/kunit/kunit.py parse .kunit/test.log --json | grep -o '"arch.*' | sort -u
"arch": "",

2.
$ ./tools/testing/kunit/kunit.py run --json | ...
"arch": "um",

3.
$ ./tools/testing/kunit/kunit.py run --json --arch=x86_64 | ...
"arch": "x86_64",

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit.py        | 2 +-
 tools/testing/kunit/kunit_json.py   | 4 ++--
 tools/testing/kunit/kunit_kernel.py | 2 ++
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 7dd6ed42141f..5c03f1546732 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -153,7 +153,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -
 				test_glob = request.filter_glob.split('.', maxsplit=2)[1]
 				filter_globs = [g + '.'+ test_glob for g in filter_globs]
 
-	metadata = kunit_json.Metadata(build_dir=request.build_dir)
+	metadata = kunit_json.Metadata(arch=linux.arch(), build_dir=request.build_dir, def_config='kunit_defconfig')
 
 	test_counts = kunit_parser.TestCounts()
 	exec_time = 0.0
diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
index 0a7e29a315ed..1212423fe6bc 100644
--- a/tools/testing/kunit/kunit_json.py
+++ b/tools/testing/kunit/kunit_json.py
@@ -18,8 +18,8 @@ from typing import Any, Dict
 @dataclass
 class Metadata:
 	"""Stores metadata about this run to include in get_json_result()."""
-	arch: str = 'UM'
-	def_config: str = 'kunit_defconfig'
+	arch: str = ''
+	def_config: str = ''
 	build_dir: str = ''
 
 JsonObj = Dict[str, Any]
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 385eb9f5fc0b..483f78e15ce9 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -249,6 +249,8 @@ class LinuxSourceTree(object):
 			kconfig = kunit_config.parse_from_string('\n'.join(kconfig_add))
 			self._kconfig.merge_in_entries(kconfig)
 
+	def arch(self) -> str:
+		return self._arch
 
 	def clean(self) -> bool:
 		try:
-- 
cgit 


From d34f82d67d2b1bdbed6bf343f0c9e6d828438976 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 23 Feb 2022 21:53:50 -0800
Subject: kunit: tool: Do not colorize output when redirected

Filling log files with color codes makes diffs and other comparisons
difficult. Only emit vt100 codes when the stdout is a TTY.

Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: linux-kselftest@vger.kernel.org
Cc: kunit-dev@googlegroups.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_parser.py | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 05ff334761dd..807ed2bd6832 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -11,6 +11,7 @@
 
 from __future__ import annotations
 import re
+import sys
 
 import datetime
 from enum import Enum, auto
@@ -503,14 +504,20 @@ RESET = '\033[0;0m'
 
 def red(text: str) -> str:
 	"""Returns inputted string with red color code."""
+	if not sys.stdout.isatty():
+		return text
 	return '\033[1;31m' + text + RESET
 
 def yellow(text: str) -> str:
 	"""Returns inputted string with yellow color code."""
+	if not sys.stdout.isatty():
+		return text
 	return '\033[1;33m' + text + RESET
 
 def green(text: str) -> str:
 	"""Returns inputted string with green color code."""
+	if not sys.stdout.isatty():
+		return text
 	return '\033[1;32m' + text + RESET
 
 ANSI_LEN = len(red(''))
-- 
cgit 


From 4eeebce6ac4ad80ee8243bb847c98e0e55848d47 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Mon, 4 Apr 2022 15:09:44 +0100
Subject: selftests/bpf: Fix parsing of prog types in UAPI hdr for bpftool sync

The script for checking that various lists of types in bpftool remain in
sync with the UAPI BPF header uses a regex to parse enum bpf_prog_type.
If this enum contains a set of values different from the list of program
types in bpftool, it complains.

This script should have reported the addition, some time ago, of the new
BPF_PROG_TYPE_SYSCALL, which was not reported to bpftool's program types
list. It failed to do so, because it failed to parse that new type from
the enum. This is because the new value, in the BPF header, has an
explicative comment on the same line, and the regex does not support
that.

Let's update the script to support parsing enum values when they have
comments on the same line.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220404140944.64744-1-quentin@isovalent.com
---
 tools/testing/selftests/bpf/test_bpftool_synctypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index 6bf21e47882a..c0e7acd698ed 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -180,7 +180,7 @@ class FileExtractor(object):
         @enum_name: name of the enum to parse
         """
         start_marker = re.compile(f'enum {enum_name} {{\n')
-        pattern = re.compile('^\s*(BPF_\w+),?$')
+        pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$')
         end_marker = re.compile('^};')
         parser = BlockParser(self.reader)
         parser.search_block(start_marker)
-- 
cgit 


From d298761746d59ca169dfe68b4d0a983c3053573b Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Mon, 4 Apr 2022 16:21:01 +0200
Subject: selftests/bpf: Define SYS_NANOSLEEP_KPROBE_NAME for aarch64

attach_probe selftest fails on aarch64 with `failed to create kprobe
'sys_nanosleep+0x0' perf event: No such file or directory`. This is
because, like on several other architectures, nanosleep has a prefix.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/20220404142101.27900-1-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/test_progs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 93c1ff705533..eec4c7385b14 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -332,6 +332,8 @@ int trigger_module_test_write(int write_sz);
 #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
 #elif defined(__s390x__)
 #define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep"
+#elif defined(__aarch64__)
+#define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep"
 #else
 #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep"
 #endif
-- 
cgit 


From baa3331503271c84c252ab42475729c028b07acf Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Sat, 26 Feb 2022 13:23:25 -0800
Subject: kunit: tool: more descriptive metavars/--help output

Before, our help output contained lines like
  --kconfig_add KCONFIG_ADD
  --qemu_config qemu_config
  --jobs jobs

They're not very helpful.

The former kind come from the automatic 'metavar' we get from argparse,
the uppercase version of the flag name.
The latter are where we manually specified metavar as the flag name.

After:
  --build_dir DIR
  --make_options X=Y
  --kunitconfig PATH
  --kconfig_add CONFIG_X=Y
  --arch ARCH
  --cross_compile PREFIX
  --qemu_config FILE
  --jobs N
  --timeout SECONDS
  --raw_output [{all,kunit}]
  --json [FILE]

This patch tries to make the code more clear by specifying the _type_ of
input we expect, e.g. --build_dir is a DIR, --qemu_config is a FILE.
I also switched it to uppercase since it looked more clearly like
placeholder text that way.

This patch also changes --raw_output to specify `choices` to make it
more clear what the options are, and this way argparse can validate it
for us, as shown by the added test case.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit.py           | 26 ++++++++++++--------------
 tools/testing/kunit/kunit_tool_test.py |  5 +++++
 2 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 5c03f1546732..6dc710d3996b 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -206,8 +206,6 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input
 			pass
 		elif request.raw_output == 'kunit':
 			output = kunit_parser.extract_tap_lines(output)
-		else:
-			print(f'Unknown --raw_output option "{request.raw_output}"', file=sys.stderr)
 		for line in output:
 			print(line.rstrip())
 
@@ -284,10 +282,10 @@ def add_common_opts(parser) -> None:
 	parser.add_argument('--build_dir',
 			    help='As in the make command, it specifies the build '
 			    'directory.',
-			    type=str, default='.kunit', metavar='build_dir')
+			    type=str, default='.kunit', metavar='DIR')
 	parser.add_argument('--make_options',
 			    help='X=Y make option, can be repeated.',
-			    action='append')
+			    action='append', metavar='X=Y')
 	parser.add_argument('--alltests',
 			    help='Run all KUnit tests through allyesconfig',
 			    action='store_true')
@@ -295,11 +293,11 @@ def add_common_opts(parser) -> None:
 			     help='Path to Kconfig fragment that enables KUnit tests.'
 			     ' If given a directory, (e.g. lib/kunit), "/.kunitconfig" '
 			     'will get  automatically appended.',
-			     metavar='kunitconfig')
+			     metavar='PATH')
 	parser.add_argument('--kconfig_add',
 			     help='Additional Kconfig options to append to the '
 			     '.kunitconfig, e.g. CONFIG_KASAN=y. Can be repeated.',
-			    action='append')
+			    action='append', metavar='CONFIG_X=Y')
 
 	parser.add_argument('--arch',
 			    help=('Specifies the architecture to run tests under. '
@@ -307,7 +305,7 @@ def add_common_opts(parser) -> None:
 				  'string passed to the ARCH make param, '
 				  'e.g. i386, x86_64, arm, um, etc. Non-UML '
 				  'architectures run on QEMU.'),
-			    type=str, default='um', metavar='arch')
+			    type=str, default='um', metavar='ARCH')
 
 	parser.add_argument('--cross_compile',
 			    help=('Sets make\'s CROSS_COMPILE variable; it should '
@@ -319,18 +317,18 @@ def add_common_opts(parser) -> None:
 				  'if you have downloaded the microblaze toolchain '
 				  'from the 0-day website to a directory in your '
 				  'home directory called `toolchains`).'),
-			    metavar='cross_compile')
+			    metavar='PREFIX')
 
 	parser.add_argument('--qemu_config',
 			    help=('Takes a path to a path to a file containing '
 				  'a QemuArchParams object.'),
-			    type=str, metavar='qemu_config')
+			    type=str, metavar='FILE')
 
 def add_build_opts(parser) -> None:
 	parser.add_argument('--jobs',
 			    help='As in the make command, "Specifies  the number of '
 			    'jobs (commands) to run simultaneously."',
-			    type=int, default=get_default_jobs(), metavar='jobs')
+			    type=int, default=get_default_jobs(), metavar='N')
 
 def add_exec_opts(parser) -> None:
 	parser.add_argument('--timeout',
@@ -339,7 +337,7 @@ def add_exec_opts(parser) -> None:
 			    'tests.',
 			    type=int,
 			    default=300,
-			    metavar='timeout')
+			    metavar='SECONDS')
 	parser.add_argument('filter_glob',
 			    help='Filter which KUnit test suites/tests run at '
 			    'boot-time, e.g. list* or list*.*del_test',
@@ -349,7 +347,7 @@ def add_exec_opts(parser) -> None:
 			    metavar='filter_glob')
 	parser.add_argument('--kernel_args',
 			    help='Kernel command-line parameters. Maybe be repeated',
-			     action='append')
+			     action='append', metavar='')
 	parser.add_argument('--run_isolated', help='If set, boot the kernel for each '
 			    'individual suite/test. This is can be useful for debugging '
 			    'a non-hermetic test, one that might pass/fail based on '
@@ -360,13 +358,13 @@ def add_exec_opts(parser) -> None:
 def add_parse_opts(parser) -> None:
 	parser.add_argument('--raw_output', help='If set don\'t format output from kernel. '
 			    'If set to --raw_output=kunit, filters to just KUnit output.',
-			    type=str, nargs='?', const='all', default=None)
+			     type=str, nargs='?', const='all', default=None, choices=['all', 'kunit'])
 	parser.add_argument('--json',
 			    nargs='?',
 			    help='Stores test results in a JSON, and either '
 			    'prints to stdout or saves to file if a '
 			    'filename is specified',
-			    type=str, const='stdout', default=None)
+			    type=str, const='stdout', default=None, metavar='FILE')
 
 def main(argv, linux=None):
 	parser = argparse.ArgumentParser(
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 60806994683c..210df0f443e6 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -593,6 +593,11 @@ class KUnitMainTest(unittest.TestCase):
 			self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
 			self.assertNotEqual(call, mock.call(StrContains(' 0 tests run')))
 
+	def test_run_raw_output_invalid(self):
+		self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+		with self.assertRaises(SystemExit) as e:
+			kunit.main(['run', '--raw_output=invalid'], self.linux_source_mock)
+
 	def test_run_raw_output_does_not_take_positional_args(self):
 		# --raw_output is a string flag, but we don't want it to consume
 		# any positional arguments, only ones after an '='
-- 
cgit 


From 630301b0d59dd85e43cca29382c459f9880af5f0 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 4 Apr 2022 16:42:01 -0700
Subject: selftests/bpf: Add basic USDT selftests

Add semaphore-based USDT to test_progs itself and write basic tests to
valicate both auto-attachment and manual attachment logic, as well as
BPF-side functionality.

Also add subtests to validate that libbpf properly deduplicates USDT
specs and handles spec overflow situations correctly, as well as proper
"rollback" of partially-attached multi-spec USDT.

BPF-side of selftest intentionally consists of two files to validate
that usdt.bpf.h header can be included from multiple source code files
that are subsequently linked into final BPF object file without causing
any symbol duplication or other issues. We are validating that __weak
maps and bpf_usdt_xxx() API functions defined in usdt.bpf.h do work as
intended.

USDT selftests utilize sys/sdt.h header that on Ubuntu systems comes
from systemtap-sdt-devel package. But to simplify everyone's life,
including CI but especially casual contributors to bpf/bpf-next that
are trying to build selftests, I've checked in sys/sdt.h header from [0]
directly. This way it will work on all architectures and distros without
having to figure it out for every relevant combination and adding any
extra implicit package dependencies.

  [0] https://sourceware.org/git?p=systemtap.git;a=blob_plain;f=includes/sys/sdt.h;h=ca0162b4dc57520b96638c8ae79ad547eb1dd3a1;hb=HEAD

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Acked-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/bpf/20220404234202.331384-7-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile               |  14 +-
 tools/testing/selftests/bpf/prog_tests/usdt.c      | 313 +++++++++++++
 tools/testing/selftests/bpf/progs/test_usdt.c      |  96 ++++
 .../selftests/bpf/progs/test_usdt_multispec.c      |  34 ++
 tools/testing/selftests/bpf/sdt-config.h           |   6 +
 tools/testing/selftests/bpf/sdt.h                  | 513 +++++++++++++++++++++
 6 files changed, 970 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/usdt.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_usdt.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_usdt_multispec.c
 create mode 100644 tools/testing/selftests/bpf/sdt-config.h
 create mode 100644 tools/testing/selftests/bpf/sdt.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3820608faf57..0f8c55dfd844 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -328,12 +328,8 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
 
 LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 		linked_vars.skel.h linked_maps.skel.h 			\
-		test_subskeleton.skel.h test_subskeleton_lib.skel.h
-
-# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file
-# but that's created as a side-effect of the skel.h generation.
-test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o
-test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o
+		test_subskeleton.skel.h test_subskeleton_lib.skel.h	\
+		test_usdt.skel.h
 
 LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
 	test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \
@@ -346,6 +342,11 @@ test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
 linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
 linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
 linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o
+# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file
+# but that's created as a side-effect of the skel.h generation.
+test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o
+test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o
+test_usdt.skel.h-deps := test_usdt.o test_usdt_multispec.o
 
 LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
 
@@ -400,6 +401,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		     $(TRUNNER_BPF_PROGS_DIR)/*.h			\
 		     $$(INCLUDE_DIR)/vmlinux.h				\
 		     $(wildcard $(BPFDIR)/bpf_*.h)			\
+		     $(wildcard $(BPFDIR)/*.bpf.h)			\
 		     | $(TRUNNER_OUTPUT) $$(BPFOBJ)
 	$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,			\
 					  $(TRUNNER_BPF_CFLAGS))
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
new file mode 100644
index 000000000000..0677c9bfd40b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#define _SDT_HAS_SEMAPHORES 1
+#include "../sdt.h"
+
+#include "test_usdt.skel.h"
+
+int lets_test_this(int);
+
+static volatile int idx = 2;
+static volatile __u64 bla = 0xFEDCBA9876543210ULL;
+static volatile short nums[] = {-1, -2, -3, };
+
+static volatile struct {
+	int x;
+	signed char y;
+} t1 = { 1, -127 };
+
+#define SEC(name) __attribute__((section(name), used))
+
+unsigned short test_usdt0_semaphore SEC(".probes");
+unsigned short test_usdt3_semaphore SEC(".probes");
+unsigned short test_usdt12_semaphore SEC(".probes");
+
+static void __always_inline trigger_func(int x) {
+	long y = 42;
+
+	if (test_usdt0_semaphore)
+		STAP_PROBE(test, usdt0);
+	if (test_usdt3_semaphore)
+		STAP_PROBE3(test, usdt3, x, y, &bla);
+	if (test_usdt12_semaphore) {
+		STAP_PROBE12(test, usdt12,
+			     x, x + 1, y, x + y, 5,
+			     y / 7, bla, &bla, -9, nums[x],
+			     nums[idx], t1.y);
+	}
+}
+
+static void subtest_basic_usdt(void)
+{
+	LIBBPF_OPTS(bpf_usdt_opts, opts);
+	struct test_usdt *skel;
+	struct test_usdt__bss *bss;
+	int err;
+
+	skel = test_usdt__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bss = skel->bss;
+	bss->my_pid = getpid();
+
+	err = test_usdt__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	/* usdt0 won't be auto-attached */
+	opts.usdt_cookie = 0xcafedeadbeeffeed;
+	skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
+						     0 /*self*/, "/proc/self/exe",
+						     "test", "usdt0", &opts);
+	if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
+		goto cleanup;
+
+	trigger_func(1);
+
+	ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called");
+	ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called");
+	ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");
+
+	ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
+	ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
+	ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
+
+	/* auto-attached usdt3 gets default zero cookie value */
+	ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie");
+	ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
+
+	ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
+	ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
+	ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
+	ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1");
+	ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
+	ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+
+	/* auto-attached usdt12 gets default zero cookie value */
+	ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie");
+	ASSERT_EQ(bss->usdt12_arg_cnt, 12, "usdt12_arg_cnt");
+
+	ASSERT_EQ(bss->usdt12_args[0], 1, "usdt12_arg1");
+	ASSERT_EQ(bss->usdt12_args[1], 1 + 1, "usdt12_arg2");
+	ASSERT_EQ(bss->usdt12_args[2], 42, "usdt12_arg3");
+	ASSERT_EQ(bss->usdt12_args[3], 42 + 1, "usdt12_arg4");
+	ASSERT_EQ(bss->usdt12_args[4], 5, "usdt12_arg5");
+	ASSERT_EQ(bss->usdt12_args[5], 42 / 7, "usdt12_arg6");
+	ASSERT_EQ(bss->usdt12_args[6], bla, "usdt12_arg7");
+	ASSERT_EQ(bss->usdt12_args[7], (uintptr_t)&bla, "usdt12_arg8");
+	ASSERT_EQ(bss->usdt12_args[8], -9, "usdt12_arg9");
+	ASSERT_EQ(bss->usdt12_args[9], nums[1], "usdt12_arg10");
+	ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11");
+	ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12");
+
+	/* trigger_func() is marked __always_inline, so USDT invocations will be
+	 * inlined in two different places, meaning that each USDT will have
+	 * at least 2 different places to be attached to. This verifies that
+	 * bpf_program__attach_usdt() handles this properly and attaches to
+	 * all possible places of USDT invocation.
+	 */
+	trigger_func(2);
+
+	ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called");
+	ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called");
+	ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called");
+
+	/* only check values that depend on trigger_func()'s input value */
+	ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1");
+
+	ASSERT_EQ(bss->usdt12_args[0], 2, "usdt12_arg1");
+	ASSERT_EQ(bss->usdt12_args[1], 2 + 1, "usdt12_arg2");
+	ASSERT_EQ(bss->usdt12_args[3], 42 + 2, "usdt12_arg4");
+	ASSERT_EQ(bss->usdt12_args[9], nums[2], "usdt12_arg10");
+
+	/* detach and re-attach usdt3 */
+	bpf_link__destroy(skel->links.usdt3);
+
+	opts.usdt_cookie = 0xBADC00C51E;
+	skel->links.usdt3 = bpf_program__attach_usdt(skel->progs.usdt3, -1 /* any pid */,
+						     "/proc/self/exe", "test", "usdt3", &opts);
+	if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach"))
+		goto cleanup;
+
+	trigger_func(3);
+
+	ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called");
+	/* this time usdt3 has custom cookie */
+	ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie");
+	ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
+
+	ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
+	ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
+	ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
+	ASSERT_EQ(bss->usdt3_args[0], 3, "usdt3_arg1");
+	ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
+	ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+
+cleanup:
+	test_usdt__destroy(skel);
+}
+
+unsigned short test_usdt_100_semaphore SEC(".probes");
+unsigned short test_usdt_300_semaphore SEC(".probes");
+unsigned short test_usdt_400_semaphore SEC(".probes");
+
+#define R10(F, X)  F(X+0); F(X+1);F(X+2); F(X+3); F(X+4); \
+		   F(X+5); F(X+6); F(X+7); F(X+8); F(X+9);
+#define R100(F, X) R10(F,X+ 0);R10(F,X+10);R10(F,X+20);R10(F,X+30);R10(F,X+40); \
+		   R10(F,X+50);R10(F,X+60);R10(F,X+70);R10(F,X+80);R10(F,X+90);
+
+/* carefully control that we get exactly 100 inlines by preventing inlining */
+static void __always_inline f100(int x)
+{
+	STAP_PROBE1(test, usdt_100, x);
+}
+
+__weak void trigger_100_usdts(void)
+{
+	R100(f100, 0);
+}
+
+/* we shouldn't be able to attach to test:usdt2_300 USDT as we don't have as
+ * many slots for specs. It's important that each STAP_PROBE2() invocation
+ * (after untolling) gets different arg spec due to compiler inlining i as
+ * a constant
+ */
+static void __always_inline f300(int x)
+{
+	STAP_PROBE1(test, usdt_300, x);
+}
+
+__weak void trigger_300_usdts(void)
+{
+	R100(f300, 0);
+	R100(f300, 100);
+	R100(f300, 200);
+}
+
+static void __always_inline f400(int x __attribute__((unused)))
+{
+	static int y;
+
+	STAP_PROBE1(test, usdt_400, y++);
+}
+
+/* this time we have 400 different USDT call sites, but they have uniform
+ * argument location, so libbpf's spec string deduplication logic should keep
+ * spec count use very small and so we should be able to attach to all 400
+ * call sites
+ */
+__weak void trigger_400_usdts(void)
+{
+	R100(f400, 0);
+	R100(f400, 100);
+	R100(f400, 200);
+	R100(f400, 300);
+}
+
+static void subtest_multispec_usdt(void)
+{
+	LIBBPF_OPTS(bpf_usdt_opts, opts);
+	struct test_usdt *skel;
+	struct test_usdt__bss *bss;
+	int err, i;
+
+	skel = test_usdt__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bss = skel->bss;
+	bss->my_pid = getpid();
+
+	err = test_usdt__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	/* usdt_100 is auto-attached and there are 100 inlined call sites,
+	 * let's validate that all of them are properly attached to and
+	 * handled from BPF side
+	 */
+	trigger_100_usdts();
+
+	ASSERT_EQ(bss->usdt_100_called, 100, "usdt_100_called");
+	ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
+
+	/* Stress test free spec ID tracking. By default libbpf allows up to
+	 * 256 specs to be used, so if we don't return free spec IDs back
+	 * after few detachments and re-attachments we should run out of
+	 * available spec IDs.
+	 */
+	for (i = 0; i < 2; i++) {
+		bpf_link__destroy(skel->links.usdt_100);
+
+		skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
+							        "/proc/self/exe",
+								"test", "usdt_100", NULL);
+		if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_100_reattach"))
+			goto cleanup;
+
+		bss->usdt_100_sum = 0;
+		trigger_100_usdts();
+
+		ASSERT_EQ(bss->usdt_100_called, (i + 1) * 100 + 100, "usdt_100_called");
+		ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
+	}
+
+	/* Now let's step it up and try to attach USDT that requires more than
+	 * 256 attach points with different specs for each.
+	 * Note that we need trigger_300_usdts() only to actually have 300
+	 * USDT call sites, we are not going to actually trace them.
+	 */
+	trigger_300_usdts();
+
+	/* we'll reuse usdt_100 BPF program for usdt_300 test */
+	bpf_link__destroy(skel->links.usdt_100);
+	skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe",
+							"test", "usdt_300", NULL);
+	err = -errno;
+	if (!ASSERT_ERR_PTR(skel->links.usdt_100, "usdt_300_bad_attach"))
+		goto cleanup;
+	ASSERT_EQ(err, -E2BIG, "usdt_300_attach_err");
+
+	/* let's check that there are no "dangling" BPF programs attached due
+	 * to partial success of the above test:usdt_300 attachment
+	 */
+	bss->usdt_100_called = 0;
+	bss->usdt_100_sum = 0;
+
+	f300(777); /* this is 301st instance of usdt_300 */
+
+	ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called");
+	ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum");
+
+	/* This time we have USDT with 400 inlined invocations, but arg specs
+	 * should be the same across all sites, so libbpf will only need to
+	 * use one spec and thus we'll be able to attach 400 uprobes
+	 * successfully.
+	 *
+	 * Again, we are reusing usdt_100 BPF program.
+	 */
+	skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
+							"/proc/self/exe",
+							"test", "usdt_400", NULL);
+	if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_400_attach"))
+		goto cleanup;
+
+	trigger_400_usdts();
+
+	ASSERT_EQ(bss->usdt_100_called, 400, "usdt_400_called");
+	ASSERT_EQ(bss->usdt_100_sum, 399 * 400 / 2, "usdt_400_sum");
+
+cleanup:
+	test_usdt__destroy(skel);
+}
+
+void test_usdt(void)
+{
+	if (test__start_subtest("basic"))
+		subtest_basic_usdt();
+	if (test__start_subtest("multispec"))
+		subtest_multispec_usdt();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
new file mode 100644
index 000000000000..505aab9a5234
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+int my_pid;
+
+int usdt0_called;
+u64 usdt0_cookie;
+int usdt0_arg_cnt;
+int usdt0_arg_ret;
+
+SEC("usdt")
+int usdt0(struct pt_regs *ctx)
+{
+	long tmp;
+
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&usdt0_called, 1);
+
+	usdt0_cookie = bpf_usdt_cookie(ctx);
+	usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx);
+	/* should return -ENOENT for any arg_num */
+	usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp);
+	return 0;
+}
+
+int usdt3_called;
+u64 usdt3_cookie;
+int usdt3_arg_cnt;
+int usdt3_arg_rets[3];
+u64 usdt3_args[3];
+
+SEC("usdt//proc/self/exe:test:usdt3")
+int usdt3(struct pt_regs *ctx)
+{
+	long tmp;
+
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&usdt3_called, 1);
+
+	usdt3_cookie = bpf_usdt_cookie(ctx);
+	usdt3_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+	usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp);
+	usdt3_args[0] = (int)tmp;
+
+	usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp);
+	usdt3_args[1] = (long)tmp;
+
+	usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp);
+	usdt3_args[2] = (uintptr_t)tmp;
+
+	return 0;
+}
+
+int usdt12_called;
+u64 usdt12_cookie;
+int usdt12_arg_cnt;
+u64 usdt12_args[12];
+
+SEC("usdt//proc/self/exe:test:usdt12")
+int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
+		     long a6, __u64 a7, uintptr_t a8, int a9, short a10,
+		     short a11, signed char a12)
+{
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&usdt12_called, 1);
+
+	usdt12_cookie = bpf_usdt_cookie(ctx);
+	usdt12_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+	usdt12_args[0] = a1;
+	usdt12_args[1] = a2;
+	usdt12_args[2] = a3;
+	usdt12_args[3] = a4;
+	usdt12_args[4] = a5;
+	usdt12_args[5] = a6;
+	usdt12_args[6] = a7;
+	usdt12_args[7] = a8;
+	usdt12_args[8] = a9;
+	usdt12_args[9] = a10;
+	usdt12_args[10] = a11;
+	usdt12_args[11] = a12;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
new file mode 100644
index 000000000000..3a090681f981
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+/* this file is linked together with test_usdt.c to validate that usdt.bpf.h
+ * can be included in multiple .bpf.c files forming single final BPF object
+ * file
+ */
+
+extern int my_pid;
+
+int usdt_100_called;
+int usdt_100_sum;
+
+SEC("usdt//proc/self/exe:test:usdt_100")
+int BPF_USDT(usdt_100, int x)
+{
+	long tmp;
+
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&usdt_100_called, 1);
+	__sync_fetch_and_add(&usdt_100_sum, x);
+
+	bpf_printk("X is %d, sum is %d", x, usdt_100_sum);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sdt-config.h b/tools/testing/selftests/bpf/sdt-config.h
new file mode 100644
index 000000000000..733045a52771
--- /dev/null
+++ b/tools/testing/selftests/bpf/sdt-config.h
@@ -0,0 +1,6 @@
+/* includes/sys/sdt-config.h.  Generated from sdt-config.h.in by configure.
+
+   This file just defines _SDT_ASM_SECTION_AUTOGROUP_SUPPORT to 0 or 1 to
+   indicate whether the assembler supports "?" in .pushsection directives.  */
+
+#define _SDT_ASM_SECTION_AUTOGROUP_SUPPORT 1
diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
new file mode 100644
index 000000000000..ca0162b4dc57
--- /dev/null
+++ b/tools/testing/selftests/bpf/sdt.h
@@ -0,0 +1,513 @@
+/* <sys/sdt.h> - Systemtap static probe definition macros.
+
+   This file is dedicated to the public domain, pursuant to CC0
+   (https://creativecommons.org/publicdomain/zero/1.0/)
+*/
+
+#ifndef _SYS_SDT_H
+#define _SYS_SDT_H    1
+
+/*
+  This file defines a family of macros
+
+       STAP_PROBEn(op1, ..., opn)
+
+  that emit a nop into the instruction stream, and some data into an auxiliary
+  note section.  The data in the note section describes the operands, in terms
+  of size and location.  Each location is encoded as assembler operand string.
+  Consumer tools such as gdb or systemtap insert breakpoints on top of
+  the nop, and decode the location operand-strings, like an assembler,
+  to find the values being passed.
+
+  The operand strings are selected by the compiler for each operand.
+  They are constrained by gcc inline-assembler codes.  The default is:
+
+  #define STAP_SDT_ARG_CONSTRAINT nor
+
+  This is a good default if the operands tend to be integral and
+  moderate in number (smaller than number of registers).  In other
+  cases, the compiler may report "'asm' requires impossible reload" or
+  similar.  In this case, consider simplifying the macro call (fewer
+  and simpler operands), reduce optimization, or override the default
+  constraints string via:
+
+  #define STAP_SDT_ARG_CONSTRAINT g
+  #include <sys/sdt.h>
+
+  See also:
+  https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+  https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ */
+
+
+
+#ifdef __ASSEMBLER__
+# define _SDT_PROBE(provider, name, n, arglist)	\
+  _SDT_ASM_BODY(provider, name, _SDT_ASM_SUBSTR_1, (_SDT_DEPAREN_##n arglist)) \
+  _SDT_ASM_BASE
+# define _SDT_ASM_1(x)			x;
+# define _SDT_ASM_2(a, b)		a,b;
+# define _SDT_ASM_3(a, b, c)		a,b,c;
+# define _SDT_ASM_5(a, b, c, d, e)	a,b,c,d,e;
+# define _SDT_ASM_STRING_1(x)		.asciz #x;
+# define _SDT_ASM_SUBSTR_1(x)		.ascii #x;
+# define _SDT_DEPAREN_0()				/* empty */
+# define _SDT_DEPAREN_1(a)				a
+# define _SDT_DEPAREN_2(a,b)				a b
+# define _SDT_DEPAREN_3(a,b,c)				a b c
+# define _SDT_DEPAREN_4(a,b,c,d)			a b c d
+# define _SDT_DEPAREN_5(a,b,c,d,e)			a b c d e
+# define _SDT_DEPAREN_6(a,b,c,d,e,f)			a b c d e f
+# define _SDT_DEPAREN_7(a,b,c,d,e,f,g)			a b c d e f g
+# define _SDT_DEPAREN_8(a,b,c,d,e,f,g,h)		a b c d e f g h
+# define _SDT_DEPAREN_9(a,b,c,d,e,f,g,h,i)		a b c d e f g h i
+# define _SDT_DEPAREN_10(a,b,c,d,e,f,g,h,i,j)		a b c d e f g h i j
+# define _SDT_DEPAREN_11(a,b,c,d,e,f,g,h,i,j,k)		a b c d e f g h i j k
+# define _SDT_DEPAREN_12(a,b,c,d,e,f,g,h,i,j,k,l)	a b c d e f g h i j k l
+#else
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_NOTE_SEMAPHORE_USE(provider, name) \
+  __asm__ __volatile__ ("" :: "m" (provider##_##name##_semaphore));
+#else
+#define _SDT_NOTE_SEMAPHORE_USE(provider, name)
+#endif
+
+# define _SDT_PROBE(provider, name, n, arglist) \
+  do {									    \
+    _SDT_NOTE_SEMAPHORE_USE(provider, name); \
+    __asm__ __volatile__ (_SDT_ASM_BODY(provider, name, _SDT_ASM_ARGS, (n)) \
+			  :: _SDT_ASM_OPERANDS_##n arglist);		    \
+    __asm__ __volatile__ (_SDT_ASM_BASE);				    \
+  } while (0)
+# define _SDT_S(x)			#x
+# define _SDT_ASM_1(x)			_SDT_S(x) "\n"
+# define _SDT_ASM_2(a, b)		_SDT_S(a) "," _SDT_S(b) "\n"
+# define _SDT_ASM_3(a, b, c)		_SDT_S(a) "," _SDT_S(b) "," \
+					_SDT_S(c) "\n"
+# define _SDT_ASM_5(a, b, c, d, e)	_SDT_S(a) "," _SDT_S(b) "," \
+					_SDT_S(c) "," _SDT_S(d) "," \
+					_SDT_S(e) "\n"
+# define _SDT_ASM_ARGS(n)		_SDT_ASM_TEMPLATE_##n
+# define _SDT_ASM_STRING_1(x)		_SDT_ASM_1(.asciz #x)
+# define _SDT_ASM_SUBSTR_1(x)		_SDT_ASM_1(.ascii #x)
+
+# define _SDT_ARGFMT(no)                _SDT_ASM_1(_SDT_SIGN %n[_SDT_S##no]) \
+                                        _SDT_ASM_1(_SDT_SIZE %n[_SDT_S##no]) \
+                                        _SDT_ASM_1(_SDT_TYPE %n[_SDT_S##no]) \
+                                        _SDT_ASM_SUBSTR(_SDT_ARGTMPL(_SDT_A##no))
+
+
+# ifndef STAP_SDT_ARG_CONSTRAINT
+# if defined __powerpc__
+# define STAP_SDT_ARG_CONSTRAINT        nZr
+# elif defined __arm__
+# define STAP_SDT_ARG_CONSTRAINT        g
+# else
+# define STAP_SDT_ARG_CONSTRAINT        nor
+# endif
+# endif
+
+# define _SDT_STRINGIFY(x)              #x
+# define _SDT_ARG_CONSTRAINT_STRING(x)  _SDT_STRINGIFY(x)
+/* _SDT_S encodes the size and type as 0xSSTT which is decoded by the assembler
+   macros _SDT_SIZE and _SDT_TYPE */
+# define _SDT_ARG(n, x)				    \
+  [_SDT_S##n] "n" ((_SDT_ARGSIGNED (x) ? (int)-1 : 1) * (-(((int) _SDT_ARGSIZE (x)) << 8) + (-(0x7f & __builtin_classify_type (x))))), \
+  [_SDT_A##n] _SDT_ARG_CONSTRAINT_STRING (STAP_SDT_ARG_CONSTRAINT) (_SDT_ARGVAL (x))
+#endif
+#define _SDT_ASM_STRING(x)		_SDT_ASM_STRING_1(x)
+#define _SDT_ASM_SUBSTR(x)		_SDT_ASM_SUBSTR_1(x)
+
+#define _SDT_ARGARRAY(x)	(__builtin_classify_type (x) == 14	\
+				 || __builtin_classify_type (x) == 5)
+
+#ifdef __cplusplus
+# define _SDT_ARGSIGNED(x)	(!_SDT_ARGARRAY (x) \
+				 && __sdt_type<__typeof (x)>::__sdt_signed)
+# define _SDT_ARGSIZE(x)	(_SDT_ARGARRAY (x) \
+				 ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x)		(x)
+
+# include <cstddef>
+
+template<typename __sdt_T>
+struct __sdt_type
+{
+  static const bool __sdt_signed = false;
+};
+  
+#define __SDT_ALWAYS_SIGNED(T) \
+template<> struct __sdt_type<T> { static const bool __sdt_signed = true; };
+#define __SDT_COND_SIGNED(T,CT)						\
+template<> struct __sdt_type<T> { static const bool __sdt_signed = ((CT)(-1) < 1); };
+__SDT_ALWAYS_SIGNED(signed char)
+__SDT_ALWAYS_SIGNED(short)
+__SDT_ALWAYS_SIGNED(int)
+__SDT_ALWAYS_SIGNED(long)
+__SDT_ALWAYS_SIGNED(long long)
+__SDT_ALWAYS_SIGNED(volatile signed char)
+__SDT_ALWAYS_SIGNED(volatile short)
+__SDT_ALWAYS_SIGNED(volatile int)
+__SDT_ALWAYS_SIGNED(volatile long)
+__SDT_ALWAYS_SIGNED(volatile long long)
+__SDT_ALWAYS_SIGNED(const signed char)
+__SDT_ALWAYS_SIGNED(const short)
+__SDT_ALWAYS_SIGNED(const int)
+__SDT_ALWAYS_SIGNED(const long)
+__SDT_ALWAYS_SIGNED(const long long)
+__SDT_ALWAYS_SIGNED(const volatile signed char)
+__SDT_ALWAYS_SIGNED(const volatile short)
+__SDT_ALWAYS_SIGNED(const volatile int)
+__SDT_ALWAYS_SIGNED(const volatile long)
+__SDT_ALWAYS_SIGNED(const volatile long long)
+__SDT_COND_SIGNED(char, char)
+__SDT_COND_SIGNED(wchar_t, wchar_t)
+__SDT_COND_SIGNED(volatile char, char)
+__SDT_COND_SIGNED(volatile wchar_t, wchar_t)
+__SDT_COND_SIGNED(const char, char)
+__SDT_COND_SIGNED(const wchar_t, wchar_t)
+__SDT_COND_SIGNED(const volatile char, char)
+__SDT_COND_SIGNED(const volatile wchar_t, wchar_t)
+#if defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
+/* __SDT_COND_SIGNED(char16_t) */
+/* __SDT_COND_SIGNED(char32_t) */
+#endif
+
+template<typename __sdt_E>
+struct __sdt_type<__sdt_E[]> : public __sdt_type<__sdt_E *> {};
+
+template<typename __sdt_E, size_t __sdt_N>
+struct __sdt_type<__sdt_E[__sdt_N]> : public __sdt_type<__sdt_E *> {};
+
+#elif !defined(__ASSEMBLER__)
+__extension__ extern unsigned long long __sdt_unsp;
+# define _SDT_ARGINTTYPE(x)						\
+  __typeof (__builtin_choose_expr (((__builtin_classify_type (x)	\
+				     + 3) & -4) == 4, (x), 0U))
+# define _SDT_ARGSIGNED(x)						\
+  (!__extension__							\
+   (__builtin_constant_p ((((unsigned long long)			\
+			    (_SDT_ARGINTTYPE (x)) __sdt_unsp)		\
+			   & ((unsigned long long)1 << (sizeof (unsigned long long)	\
+				       * __CHAR_BIT__ - 1))) == 0)	\
+    || (_SDT_ARGINTTYPE (x)) -1 > (_SDT_ARGINTTYPE (x)) 0))
+# define _SDT_ARGSIZE(x)	\
+  (_SDT_ARGARRAY (x) ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x)		(x)
+#endif
+
+#if defined __powerpc__ || defined __powerpc64__
+# define _SDT_ARGTMPL(id)	%I[id]%[id]
+#elif defined __i386__
+# define _SDT_ARGTMPL(id)	%k[id]  /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */
+#else
+# define _SDT_ARGTMPL(id)	%[id]
+#endif
+
+/* NB: gdb PR24541 highlighted an unspecified corner of the sdt.h
+   operand note format.
+
+   The named register may be a longer or shorter (!) alias for the
+   storage where the value in question is found.  For example, on
+   i386, 64-bit value may be put in register pairs, and the register
+   name stored would identify just one of them.  Previously, gcc was
+   asked to emit the %w[id] (16-bit alias of some registers holding
+   operands), even when a wider 32-bit value was used.
+
+   Bottom line: the byte-width given before the @ sign governs.  If
+   there is a mismatch between that width and that of the named
+   register, then a sys/sdt.h note consumer may need to employ
+   architecture-specific heuristics to figure out where the compiler
+   has actually put the complete value.
+*/
+
+#ifdef __LP64__
+# define _SDT_ASM_ADDR	.8byte
+#else
+# define _SDT_ASM_ADDR	.4byte
+#endif
+
+/* The ia64 and s390 nop instructions take an argument. */
+#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
+#define _SDT_NOP	nop 0
+#else
+#define _SDT_NOP	nop
+#endif
+
+#define _SDT_NOTE_NAME	"stapsdt"
+#define _SDT_NOTE_TYPE	3
+
+/* If the assembler supports the necessary feature, then we can play
+   nice with code in COMDAT sections, which comes up in C++ code.
+   Without that assembler support, some combinations of probe placements
+   in certain kinds of C++ code may produce link-time errors.  */
+#include "sdt-config.h"
+#if _SDT_ASM_SECTION_AUTOGROUP_SUPPORT
+# define _SDT_ASM_AUTOGROUP "?"
+#else
+# define _SDT_ASM_AUTOGROUP ""
+#endif
+
+#define _SDT_DEF_MACROS							     \
+	_SDT_ASM_1(.altmacro)						     \
+	_SDT_ASM_1(.macro _SDT_SIGN x)				     	     \
+	_SDT_ASM_3(.pushsection .note.stapsdt,"","note")		     \
+	_SDT_ASM_1(.iflt \\x)						     \
+	_SDT_ASM_1(.ascii "-")						     \
+	_SDT_ASM_1(.endif)						     \
+	_SDT_ASM_1(.popsection)						     \
+	_SDT_ASM_1(.endm)						     \
+	_SDT_ASM_1(.macro _SDT_SIZE_ x)					     \
+	_SDT_ASM_3(.pushsection .note.stapsdt,"","note")		     \
+	_SDT_ASM_1(.ascii "\x")						     \
+	_SDT_ASM_1(.popsection)						     \
+	_SDT_ASM_1(.endm)						     \
+	_SDT_ASM_1(.macro _SDT_SIZE x)					     \
+	_SDT_ASM_1(_SDT_SIZE_ %%((-(-\\x*((-\\x>0)-(-\\x<0))))>>8))	     \
+	_SDT_ASM_1(.endm)						     \
+	_SDT_ASM_1(.macro _SDT_TYPE_ x)				             \
+	_SDT_ASM_3(.pushsection .note.stapsdt,"","note")		     \
+	_SDT_ASM_2(.ifc 8,\\x)					     	     \
+	_SDT_ASM_1(.ascii "f")						     \
+	_SDT_ASM_1(.endif)						     \
+	_SDT_ASM_1(.ascii "@")						     \
+	_SDT_ASM_1(.popsection)						     \
+	_SDT_ASM_1(.endm)						     \
+	_SDT_ASM_1(.macro _SDT_TYPE x)				     	     \
+	_SDT_ASM_1(_SDT_TYPE_ %%((\\x)&(0xff)))			     \
+	_SDT_ASM_1(.endm)
+
+#define _SDT_UNDEF_MACROS						      \
+  _SDT_ASM_1(.purgem _SDT_SIGN)						      \
+  _SDT_ASM_1(.purgem _SDT_SIZE_)					      \
+  _SDT_ASM_1(.purgem _SDT_SIZE)						      \
+  _SDT_ASM_1(.purgem _SDT_TYPE_)					      \
+  _SDT_ASM_1(.purgem _SDT_TYPE)
+
+#define _SDT_ASM_BODY(provider, name, pack_args, args, ...)		      \
+  _SDT_DEF_MACROS							      \
+  _SDT_ASM_1(990:	_SDT_NOP)					      \
+  _SDT_ASM_3(		.pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \
+  _SDT_ASM_1(		.balign 4)					      \
+  _SDT_ASM_3(		.4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE)	      \
+  _SDT_ASM_1(991:	.asciz _SDT_NOTE_NAME)				      \
+  _SDT_ASM_1(992:	.balign 4)					      \
+  _SDT_ASM_1(993:	_SDT_ASM_ADDR 990b)				      \
+  _SDT_ASM_1(		_SDT_ASM_ADDR _.stapsdt.base)			      \
+  _SDT_SEMAPHORE(provider,name)						      \
+  _SDT_ASM_STRING(provider)						      \
+  _SDT_ASM_STRING(name)							      \
+  pack_args args							      \
+  _SDT_ASM_SUBSTR(\x00)							      \
+  _SDT_UNDEF_MACROS							      \
+  _SDT_ASM_1(994:	.balign 4)					      \
+  _SDT_ASM_1(		.popsection)
+
+#define _SDT_ASM_BASE							      \
+  _SDT_ASM_1(.ifndef _.stapsdt.base)					      \
+  _SDT_ASM_5(		.pushsection .stapsdt.base,"aG","progbits",	      \
+							.stapsdt.base,comdat) \
+  _SDT_ASM_1(		.weak _.stapsdt.base)				      \
+  _SDT_ASM_1(		.hidden _.stapsdt.base)				      \
+  _SDT_ASM_1(	_.stapsdt.base: .space 1)				      \
+  _SDT_ASM_2(		.size _.stapsdt.base, 1)			      \
+  _SDT_ASM_1(		.popsection)					      \
+  _SDT_ASM_1(.endif)
+
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_SEMAPHORE(p,n) \
+	_SDT_ASM_1(		_SDT_ASM_ADDR p##_##n##_semaphore)
+#else
+#define _SDT_SEMAPHORE(p,n) _SDT_ASM_1(		_SDT_ASM_ADDR 0)
+#endif
+
+#define _SDT_ASM_BLANK _SDT_ASM_SUBSTR(\x20)
+#define _SDT_ASM_TEMPLATE_0		/* no arguments */
+#define _SDT_ASM_TEMPLATE_1		_SDT_ARGFMT(1)
+#define _SDT_ASM_TEMPLATE_2		_SDT_ASM_TEMPLATE_1 _SDT_ASM_BLANK _SDT_ARGFMT(2)
+#define _SDT_ASM_TEMPLATE_3		_SDT_ASM_TEMPLATE_2 _SDT_ASM_BLANK _SDT_ARGFMT(3)
+#define _SDT_ASM_TEMPLATE_4		_SDT_ASM_TEMPLATE_3 _SDT_ASM_BLANK _SDT_ARGFMT(4)
+#define _SDT_ASM_TEMPLATE_5		_SDT_ASM_TEMPLATE_4 _SDT_ASM_BLANK _SDT_ARGFMT(5)
+#define _SDT_ASM_TEMPLATE_6		_SDT_ASM_TEMPLATE_5 _SDT_ASM_BLANK _SDT_ARGFMT(6)
+#define _SDT_ASM_TEMPLATE_7		_SDT_ASM_TEMPLATE_6 _SDT_ASM_BLANK _SDT_ARGFMT(7)
+#define _SDT_ASM_TEMPLATE_8		_SDT_ASM_TEMPLATE_7 _SDT_ASM_BLANK _SDT_ARGFMT(8)
+#define _SDT_ASM_TEMPLATE_9		_SDT_ASM_TEMPLATE_8 _SDT_ASM_BLANK _SDT_ARGFMT(9)
+#define _SDT_ASM_TEMPLATE_10		_SDT_ASM_TEMPLATE_9 _SDT_ASM_BLANK _SDT_ARGFMT(10)
+#define _SDT_ASM_TEMPLATE_11		_SDT_ASM_TEMPLATE_10 _SDT_ASM_BLANK _SDT_ARGFMT(11)
+#define _SDT_ASM_TEMPLATE_12		_SDT_ASM_TEMPLATE_11 _SDT_ASM_BLANK _SDT_ARGFMT(12)
+#define _SDT_ASM_OPERANDS_0()		[__sdt_dummy] "g" (0)
+#define _SDT_ASM_OPERANDS_1(arg1)	_SDT_ARG(1, arg1)
+#define _SDT_ASM_OPERANDS_2(arg1, arg2) \
+  _SDT_ASM_OPERANDS_1(arg1), _SDT_ARG(2, arg2)
+#define _SDT_ASM_OPERANDS_3(arg1, arg2, arg3) \
+  _SDT_ASM_OPERANDS_2(arg1, arg2), _SDT_ARG(3, arg3)
+#define _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4) \
+  _SDT_ASM_OPERANDS_3(arg1, arg2, arg3), _SDT_ARG(4, arg4)
+#define _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5) \
+  _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4), _SDT_ARG(5, arg5)
+#define _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
+  _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5), _SDT_ARG(6, arg6)
+#define _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+  _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6), _SDT_ARG(7, arg7)
+#define _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \
+  _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7), \
+    _SDT_ARG(8, arg8)
+#define _SDT_ASM_OPERANDS_9(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9) \
+  _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8), \
+    _SDT_ARG(9, arg9)
+#define _SDT_ASM_OPERANDS_10(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+  _SDT_ASM_OPERANDS_9(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9), \
+    _SDT_ARG(10, arg10)
+#define _SDT_ASM_OPERANDS_11(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+  _SDT_ASM_OPERANDS_10(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10), \
+    _SDT_ARG(11, arg11)
+#define _SDT_ASM_OPERANDS_12(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+  _SDT_ASM_OPERANDS_11(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11), \
+    _SDT_ARG(12, arg12)
+
+/* These macros can be used in C, C++, or assembly code.
+   In assembly code the arguments should use normal assembly operand syntax.  */
+
+#define STAP_PROBE(provider, name) \
+  _SDT_PROBE(provider, name, 0, ())
+#define STAP_PROBE1(provider, name, arg1) \
+  _SDT_PROBE(provider, name, 1, (arg1))
+#define STAP_PROBE2(provider, name, arg1, arg2) \
+  _SDT_PROBE(provider, name, 2, (arg1, arg2))
+#define STAP_PROBE3(provider, name, arg1, arg2, arg3) \
+  _SDT_PROBE(provider, name, 3, (arg1, arg2, arg3))
+#define STAP_PROBE4(provider, name, arg1, arg2, arg3, arg4) \
+  _SDT_PROBE(provider, name, 4, (arg1, arg2, arg3, arg4))
+#define STAP_PROBE5(provider, name, arg1, arg2, arg3, arg4, arg5) \
+  _SDT_PROBE(provider, name, 5, (arg1, arg2, arg3, arg4, arg5))
+#define STAP_PROBE6(provider, name, arg1, arg2, arg3, arg4, arg5, arg6)	\
+  _SDT_PROBE(provider, name, 6, (arg1, arg2, arg3, arg4, arg5, arg6))
+#define STAP_PROBE7(provider, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+  _SDT_PROBE(provider, name, 7, (arg1, arg2, arg3, arg4, arg5, arg6, arg7))
+#define STAP_PROBE8(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8) \
+  _SDT_PROBE(provider, name, 8, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8))
+#define STAP_PROBE9(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)\
+  _SDT_PROBE(provider, name, 9, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
+#define STAP_PROBE10(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+  _SDT_PROBE(provider, name, 10, \
+	     (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10))
+#define STAP_PROBE11(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+  _SDT_PROBE(provider, name, 11, \
+	     (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11))
+#define STAP_PROBE12(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+  _SDT_PROBE(provider, name, 12, \
+	     (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12))
+
+/* This STAP_PROBEV macro can be used in variadic scenarios, where the
+   number of probe arguments is not known until compile time.  Since
+   variadic macro support may vary with compiler options, you must
+   pre-#define SDT_USE_VARIADIC to enable this type of probe.
+
+   The trick to count __VA_ARGS__ was inspired by this post by
+   Laurent Deniau <laurent.deniau@cern.ch>:
+       http://groups.google.com/group/comp.std.c/msg/346fc464319b1ee5
+
+   Note that our _SDT_NARG is called with an extra 0 arg that's not
+   counted, so we don't have to worry about the behavior of macros
+   called without any arguments.  */
+
+#define _SDT_NARG(...) __SDT_NARG(__VA_ARGS__, 12,11,10,9,8,7,6,5,4,3,2,1,0)
+#define __SDT_NARG(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12, N, ...) N
+#ifdef SDT_USE_VARIADIC
+#define _SDT_PROBE_N(provider, name, N, ...) \
+  _SDT_PROBE(provider, name, N, (__VA_ARGS__))
+#define STAP_PROBEV(provider, name, ...) \
+  _SDT_PROBE_N(provider, name, _SDT_NARG(0, ##__VA_ARGS__), ##__VA_ARGS__)
+#endif
+
+/* These macros are for use in asm statements.  You must compile
+   with -std=gnu99 or -std=c99 to use the STAP_PROBE_ASM macro.
+
+   The STAP_PROBE_ASM macro generates a quoted string to be used in the
+   template portion of the asm statement, concatenated with strings that
+   contain the actual assembly code around the probe site.
+
+   For example:
+
+	asm ("before\n"
+	     STAP_PROBE_ASM(provider, fooprobe, %eax 4(%esi))
+	     "after");
+
+   emits the assembly code for "before\nafter", with a probe in between.
+   The probe arguments are the %eax register, and the value of the memory
+   word located 4 bytes past the address in the %esi register.  Note that
+   because this is a simple asm, not a GNU C extended asm statement, these
+   % characters do not need to be doubled to generate literal %reg names.
+
+   In a GNU C extended asm statement, the probe arguments can be specified
+   using the macro STAP_PROBE_ASM_TEMPLATE(n) for n arguments.  The paired
+   macro STAP_PROBE_ASM_OPERANDS gives the C values of these probe arguments,
+   and appears in the input operand list of the asm statement.  For example:
+
+	asm ("someinsn %0,%1\n" // %0 is output operand, %1 is input operand
+	     STAP_PROBE_ASM(provider, fooprobe, STAP_PROBE_ASM_TEMPLATE(3))
+	     "otherinsn %[namedarg]"
+	     : "r" (outvar)
+	     : "g" (some_value), [namedarg] "i" (1234),
+	       STAP_PROBE_ASM_OPERANDS(3, some_value, some_ptr->field, 1234));
+
+    This is just like writing:
+
+	STAP_PROBE3(provider, fooprobe, some_value, some_ptr->field, 1234));
+
+    but the probe site is right between "someinsn" and "otherinsn".
+
+    The probe arguments in STAP_PROBE_ASM can be given as assembly
+    operands instead, even inside a GNU C extended asm statement.
+    Note that these can use operand templates like %0 or %[name],
+    and likewise they must write %%reg for a literal operand of %reg.  */
+
+#define _SDT_ASM_BODY_1(p,n,...) _SDT_ASM_BODY(p,n,_SDT_ASM_SUBSTR,(__VA_ARGS__))
+#define _SDT_ASM_BODY_2(p,n,...) _SDT_ASM_BODY(p,n,/*_SDT_ASM_STRING */,__VA_ARGS__)
+#define _SDT_ASM_BODY_N2(p,n,no,...) _SDT_ASM_BODY_ ## no(p,n,__VA_ARGS__)
+#define _SDT_ASM_BODY_N1(p,n,no,...) _SDT_ASM_BODY_N2(p,n,no,__VA_ARGS__)
+#define _SDT_ASM_BODY_N(p,n,...) _SDT_ASM_BODY_N1(p,n,_SDT_NARG(0, __VA_ARGS__),__VA_ARGS__)
+
+#if __STDC_VERSION__ >= 199901L
+# define STAP_PROBE_ASM(provider, name, ...)		\
+  _SDT_ASM_BODY_N(provider, name, __VA_ARGS__)					\
+  _SDT_ASM_BASE
+# define STAP_PROBE_ASM_OPERANDS(n, ...) _SDT_ASM_OPERANDS_##n(__VA_ARGS__)
+#else
+# define STAP_PROBE_ASM(provider, name, args)	\
+  _SDT_ASM_BODY(provider, name, /* _SDT_ASM_STRING */, (args))	\
+  _SDT_ASM_BASE
+#endif
+#define STAP_PROBE_ASM_TEMPLATE(n) _SDT_ASM_TEMPLATE_##n,"use _SDT_ASM_TEMPLATE_"
+
+
+/* DTrace compatible macro names.  */
+#define DTRACE_PROBE(provider,probe)		\
+  STAP_PROBE(provider,probe)
+#define DTRACE_PROBE1(provider,probe,parm1)	\
+  STAP_PROBE1(provider,probe,parm1)
+#define DTRACE_PROBE2(provider,probe,parm1,parm2)	\
+  STAP_PROBE2(provider,probe,parm1,parm2)
+#define DTRACE_PROBE3(provider,probe,parm1,parm2,parm3) \
+  STAP_PROBE3(provider,probe,parm1,parm2,parm3)
+#define DTRACE_PROBE4(provider,probe,parm1,parm2,parm3,parm4)	\
+  STAP_PROBE4(provider,probe,parm1,parm2,parm3,parm4)
+#define DTRACE_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5)	\
+  STAP_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5)
+#define DTRACE_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) \
+  STAP_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6)
+#define DTRACE_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) \
+  STAP_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7)
+#define DTRACE_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) \
+  STAP_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8)
+#define DTRACE_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) \
+  STAP_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9)
+#define DTRACE_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) \
+  STAP_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10)
+#define DTRACE_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) \
+  STAP_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11)
+#define DTRACE_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) \
+  STAP_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12)
+
+
+#endif /* sys/sdt.h */
-- 
cgit 


From 00a0fa2d7d496824648b125256c5566f36b48dad Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 4 Apr 2022 16:42:02 -0700
Subject: selftests/bpf: Add urandom_read shared lib and USDTs

Extend urandom_read helper binary to include USDTs of 4 combinations:
semaphore/semaphoreless (refcounted and non-refcounted) and based in
executable or shared library. We also extend urandom_read with ability
to report it's own PID to parent process and wait for parent process to
ready itself up for tracing urandom_read. We utilize popen() and
underlying pipe properties for proper signaling.

Once urandom_read is ready, we add few tests to validate that libbpf's
USDT attachment handles all the above combinations of semaphore (or lack
of it) and static or shared library USDTs. Also, we validate that libbpf
handles shared libraries both with PID filter and without one (i.e., -1
for PID argument).

Having the shared library case tested with and without PID is important
because internal logic differs on kernels that don't support BPF
cookies. On such older kernels, attaching to USDTs in shared libraries
without specifying concrete PID doesn't work in principle, because it's
impossible to determine shared library's load address to derive absolute
IPs for uprobe attachments. Without absolute IPs, it's impossible to
perform correct look up of USDT spec based on uprobe's absolute IP (the
only kind available from BPF at runtime). This is not the problem on
newer kernels with BPF cookie as we don't need IP-to-ID lookup because
BPF cookie value *is* spec ID.

So having those two situations as separate subtests is good because
libbpf CI is able to test latest selftests against old kernels (e.g.,
4.9 and 5.5), so we'll be able to disable PID-less shared lib attachment
for old kernels, but will still leave PID-specific one enabled to validate
this legacy logic is working correctly.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/bpf/20220404234202.331384-8-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile               |  11 ++-
 tools/testing/selftests/bpf/prog_tests/usdt.c      | 108 +++++++++++++++++++++
 .../selftests/bpf/progs/test_urandom_usdt.c        |  70 +++++++++++++
 .../selftests/bpf/progs/test_usdt_multispec.c      |   2 -
 tools/testing/selftests/bpf/urandom_read.c         |  63 +++++++++++-
 tools/testing/selftests/bpf/urandom_read_aux.c     |   9 ++
 tools/testing/selftests/bpf/urandom_read_lib1.c    |  13 +++
 tools/testing/selftests/bpf/urandom_read_lib2.c    |   8 ++
 8 files changed, 275 insertions(+), 9 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_urandom_usdt.c
 create mode 100644 tools/testing/selftests/bpf/urandom_read_aux.c
 create mode 100644 tools/testing/selftests/bpf/urandom_read_lib1.c
 create mode 100644 tools/testing/selftests/bpf/urandom_read_lib2.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 0f8c55dfd844..bafdc5373a13 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -168,9 +168,15 @@ $(OUTPUT)/%:%.c
 	$(call msg,BINARY,,$@)
 	$(Q)$(LINK.c) $^ $(LDLIBS) -o $@
 
-$(OUTPUT)/urandom_read: urandom_read.c
+$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
+	$(call msg,LIB,,$@)
+	$(Q)$(CC) $(CFLAGS) -fPIC $(LDFLAGS) $^ $(LDLIBS) --shared -o $@
+
+$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
 	$(call msg,BINARY,,$@)
-	$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $< $(LDLIBS) -Wl,--build-id=sha1 -o $@
+	$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.c,$^)			       \
+		  liburandom_read.so $(LDLIBS)	       			       \
+		  -Wl,-rpath=. -Wl,--build-id=sha1 -o $@
 
 $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
 	$(call msg,MOD,,$@)
@@ -493,6 +499,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
 			 btf_helpers.c flow_dissector_load.h		\
 			 cap_helpers.c
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
+		       $(OUTPUT)/liburandom_read.so			\
 		       ima_setup.sh					\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 0677c9bfd40b..a71f51bdc08d 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -6,6 +6,7 @@
 #include "../sdt.h"
 
 #include "test_usdt.skel.h"
+#include "test_urandom_usdt.skel.h"
 
 int lets_test_this(int);
 
@@ -304,10 +305,117 @@ cleanup:
 	test_usdt__destroy(skel);
 }
 
+static FILE *urand_spawn(int *pid)
+{
+	FILE *f;
+
+	/* urandom_read's stdout is wired into f */
+	f = popen("./urandom_read 1 report-pid", "r");
+	if (!f)
+		return NULL;
+
+	if (fscanf(f, "%d", pid) != 1) {
+		pclose(f);
+		return NULL;
+	}
+
+	return f;
+}
+
+static int urand_trigger(FILE **urand_pipe)
+{
+	int exit_code;
+
+	/* pclose() waits for child process to exit and returns their exit code */
+	exit_code = pclose(*urand_pipe);
+	*urand_pipe = NULL;
+
+	return exit_code;
+}
+
+static void subtest_urandom_usdt(bool auto_attach)
+{
+	struct test_urandom_usdt *skel;
+	struct test_urandom_usdt__bss *bss;
+	struct bpf_link *l;
+	FILE *urand_pipe = NULL;
+	int err, urand_pid = 0;
+
+	skel = test_urandom_usdt__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	urand_pipe = urand_spawn(&urand_pid);
+	if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn"))
+		goto cleanup;
+
+	bss = skel->bss;
+	bss->urand_pid = urand_pid;
+
+	if (auto_attach) {
+		err = test_urandom_usdt__attach(skel);
+		if (!ASSERT_OK(err, "skel_auto_attach"))
+			goto cleanup;
+	} else {
+		l = bpf_program__attach_usdt(skel->progs.urand_read_without_sema,
+					     urand_pid, "./urandom_read",
+					     "urand", "read_without_sema", NULL);
+		if (!ASSERT_OK_PTR(l, "urand_without_sema_attach"))
+			goto cleanup;
+		skel->links.urand_read_without_sema = l;
+
+		l = bpf_program__attach_usdt(skel->progs.urand_read_with_sema,
+					     urand_pid, "./urandom_read",
+					     "urand", "read_with_sema", NULL);
+		if (!ASSERT_OK_PTR(l, "urand_with_sema_attach"))
+			goto cleanup;
+		skel->links.urand_read_with_sema = l;
+
+		l = bpf_program__attach_usdt(skel->progs.urandlib_read_without_sema,
+					     urand_pid, "./liburandom_read.so",
+					     "urandlib", "read_without_sema", NULL);
+		if (!ASSERT_OK_PTR(l, "urandlib_without_sema_attach"))
+			goto cleanup;
+		skel->links.urandlib_read_without_sema = l;
+
+		l = bpf_program__attach_usdt(skel->progs.urandlib_read_with_sema,
+					     urand_pid, "./liburandom_read.so",
+					     "urandlib", "read_with_sema", NULL);
+		if (!ASSERT_OK_PTR(l, "urandlib_with_sema_attach"))
+			goto cleanup;
+		skel->links.urandlib_read_with_sema = l;
+
+	}
+
+	/* trigger urandom_read USDTs */
+	ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code");
+
+	ASSERT_EQ(bss->urand_read_without_sema_call_cnt, 1, "urand_wo_sema_cnt");
+	ASSERT_EQ(bss->urand_read_without_sema_buf_sz_sum, 256, "urand_wo_sema_sum");
+
+	ASSERT_EQ(bss->urand_read_with_sema_call_cnt, 1, "urand_w_sema_cnt");
+	ASSERT_EQ(bss->urand_read_with_sema_buf_sz_sum, 256, "urand_w_sema_sum");
+
+	ASSERT_EQ(bss->urandlib_read_without_sema_call_cnt, 1, "urandlib_wo_sema_cnt");
+	ASSERT_EQ(bss->urandlib_read_without_sema_buf_sz_sum, 256, "urandlib_wo_sema_sum");
+
+	ASSERT_EQ(bss->urandlib_read_with_sema_call_cnt, 1, "urandlib_w_sema_cnt");
+	ASSERT_EQ(bss->urandlib_read_with_sema_buf_sz_sum, 256, "urandlib_w_sema_sum");
+
+cleanup:
+	if (urand_pipe)
+		pclose(urand_pipe);
+	test_urandom_usdt__destroy(skel);
+}
+
 void test_usdt(void)
 {
 	if (test__start_subtest("basic"))
 		subtest_basic_usdt();
 	if (test__start_subtest("multispec"))
 		subtest_multispec_usdt();
+	if (test__start_subtest("urand_auto_attach"))
+		subtest_urandom_usdt(true /* auto_attach */);
+	if (test__start_subtest("urand_pid_attach"))
+		subtest_urandom_usdt(false /* auto_attach */);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_urandom_usdt.c b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c
new file mode 100644
index 000000000000..3539b02bd5f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+int urand_pid;
+
+int urand_read_without_sema_call_cnt;
+int urand_read_without_sema_buf_sz_sum;
+
+SEC("usdt/./urandom_read:urand:read_without_sema")
+int BPF_USDT(urand_read_without_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+	if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&urand_read_without_sema_call_cnt, 1);
+	__sync_fetch_and_add(&urand_read_without_sema_buf_sz_sum, buf_sz);
+
+	return 0;
+}
+
+int urand_read_with_sema_call_cnt;
+int urand_read_with_sema_buf_sz_sum;
+
+SEC("usdt/./urandom_read:urand:read_with_sema")
+int BPF_USDT(urand_read_with_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+	if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&urand_read_with_sema_call_cnt, 1);
+	__sync_fetch_and_add(&urand_read_with_sema_buf_sz_sum, buf_sz);
+
+	return 0;
+}
+
+int urandlib_read_without_sema_call_cnt;
+int urandlib_read_without_sema_buf_sz_sum;
+
+SEC("usdt/./liburandom_read.so:urandlib:read_without_sema")
+int BPF_USDT(urandlib_read_without_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+	if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&urandlib_read_without_sema_call_cnt, 1);
+	__sync_fetch_and_add(&urandlib_read_without_sema_buf_sz_sum, buf_sz);
+
+	return 0;
+}
+
+int urandlib_read_with_sema_call_cnt;
+int urandlib_read_with_sema_buf_sz_sum;
+
+SEC("usdt/./liburandom_read.so:urandlib:read_with_sema")
+int BPF_USDT(urandlib_read_with_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+	if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&urandlib_read_with_sema_call_cnt, 1);
+	__sync_fetch_and_add(&urandlib_read_with_sema_buf_sz_sum, buf_sz);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
index 3a090681f981..aa6de32b50d1 100644
--- a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
+++ b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
@@ -26,8 +26,6 @@ int BPF_USDT(usdt_100, int x)
 	__sync_fetch_and_add(&usdt_100_called, 1);
 	__sync_fetch_and_add(&usdt_100_sum, x);
 
-	bpf_printk("X is %d, sum is %d", x, usdt_100_sum);
-
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index db781052758d..e92644d0fa75 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -1,32 +1,85 @@
+#include <stdbool.h>
 #include <stdio.h>
 #include <unistd.h>
+#include <errno.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <stdlib.h>
+#include <signal.h>
+
+#define _SDT_HAS_SEMAPHORES 1
+#include "sdt.h"
+
+#define SEC(name) __attribute__((section(name), used))
 
 #define BUF_SIZE 256
 
+/* defined in urandom_read_aux.c */
+void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+/* these are coming from urandom_read_lib{1,2}.c */
+void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz);
+void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+
+unsigned short urand_read_with_sema_semaphore SEC(".probes");
+
 static __attribute__((noinline))
 void urandom_read(int fd, int count)
 {
-       char buf[BUF_SIZE];
-       int i;
+	char buf[BUF_SIZE];
+	int i;
+
+	for (i = 0; i < count; ++i) {
+		read(fd, buf, BUF_SIZE);
+
+		/* trigger USDTs defined in executable itself */
+		urand_read_without_sema(i, count, BUF_SIZE);
+		STAP_PROBE3(urand, read_with_sema, i, count, BUF_SIZE);
 
-       for (i = 0; i < count; ++i)
-               read(fd, buf, BUF_SIZE);
+		/* trigger USDTs defined in shared lib */
+		urandlib_read_without_sema(i, count, BUF_SIZE);
+		urandlib_read_with_sema(i, count, BUF_SIZE);
+	}
+}
+
+static volatile bool parent_ready;
+
+static void handle_sigpipe(int sig)
+{
+	parent_ready = true;
 }
 
 int main(int argc, char *argv[])
 {
 	int fd = open("/dev/urandom", O_RDONLY);
 	int count = 4;
+	bool report_pid = false;
 
 	if (fd < 0)
 		return 1;
 
-	if (argc == 2)
+	if (argc >= 2)
 		count = atoi(argv[1]);
+	if (argc >= 3) {
+		report_pid = true;
+		/* install SIGPIPE handler to catch when parent closes their
+		 * end of the pipe (on the other side of our stdout)
+		 */
+		signal(SIGPIPE, handle_sigpipe);
+	}
+
+	/* report PID and wait for parent process to send us "signal" by
+	 * closing stdout
+	 */
+	if (report_pid) {
+		while (!parent_ready) {
+			fprintf(stdout, "%d\n", getpid());
+			fflush(stdout);
+		}
+		/* at this point stdout is closed, parent process knows our
+		 * PID and is ready to trace us
+		 */
+	}
 
 	urandom_read(fd, count);
 
diff --git a/tools/testing/selftests/bpf/urandom_read_aux.c b/tools/testing/selftests/bpf/urandom_read_aux.c
new file mode 100644
index 000000000000..6132edcfea74
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_aux.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "sdt.h"
+
+void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz)
+{
+	/* semaphore-less USDT */
+	STAP_PROBE3(urand, read_without_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c
new file mode 100644
index 000000000000..86186e24b740
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_lib1.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#define _SDT_HAS_SEMAPHORES 1
+#include "sdt.h"
+
+#define SEC(name) __attribute__((section(name), used))
+
+unsigned short urandlib_read_with_sema_semaphore SEC(".probes");
+
+void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz)
+{
+	STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib2.c b/tools/testing/selftests/bpf/urandom_read_lib2.c
new file mode 100644
index 000000000000..9d401ad9838f
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_lib2.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "sdt.h"
+
+void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz)
+{
+	STAP_PROBE3(urandlib, read_without_sema, iter_num, iter_cnt, read_sz);
+}
-- 
cgit 


From 2d0df01974ce2b59b6f7d5bd3ea58d74f12ddf85 Mon Sep 17 00:00:00 2001
From: Yuntao Wang <ytcoode@gmail.com>
Date: Tue, 5 Apr 2022 22:57:11 +0800
Subject: selftests/bpf: Fix file descriptor leak in load_kallsyms()

Currently, if sym_cnt > 0, it just returns and does not close file, fix it.

Signed-off-by: Yuntao Wang <ytcoode@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220405145711.49543-1-ytcoode@gmail.com
---
 tools/testing/selftests/bpf/trace_helpers.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 3d6217e3aff7..9c4be2cdb21a 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -25,15 +25,12 @@ static int ksym_cmp(const void *p1, const void *p2)
 
 int load_kallsyms(void)
 {
-	FILE *f = fopen("/proc/kallsyms", "r");
+	FILE *f;
 	char func[256], buf[256];
 	char symbol;
 	void *addr;
 	int i = 0;
 
-	if (!f)
-		return -ENOENT;
-
 	/*
 	 * This is called/used from multiplace places,
 	 * load symbols just once.
@@ -41,6 +38,10 @@ int load_kallsyms(void)
 	if (sym_cnt)
 		return 0;
 
+	f = fopen("/proc/kallsyms", "r");
+	if (!f)
+		return -ENOENT;
+
 	while (fgets(buf, sizeof(buf), f)) {
 		if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
 			break;
-- 
cgit 


From 2f5d27e6cf14efe652748bad89ee529ed5a5d577 Mon Sep 17 00:00:00 2001
From: Reiji Watanabe <reijiw@google.com>
Date: Mon, 28 Mar 2022 20:19:24 -0700
Subject: KVM: arm64: selftests: Introduce vcpu_width_config

Introduce a test for aarch64 that ensures non-mixed-width vCPUs
(all 64bit vCPUs or all 32bit vcPUs) can be configured, and
mixed-width vCPUs cannot be configured.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Reiji Watanabe <reijiw@google.com>
Reviewed-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220329031924.619453-3-reijiw@google.com
---
 tools/testing/selftests/kvm/.gitignore             |   1 +
 tools/testing/selftests/kvm/Makefile               |   1 +
 .../selftests/kvm/aarch64/vcpu_width_config.c      | 122 +++++++++++++++++++++
 3 files changed, 124 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/aarch64/vcpu_width_config.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index d1e8f5237469..573d93a1d61f 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -3,6 +3,7 @@
 /aarch64/debug-exceptions
 /aarch64/get-reg-list
 /aarch64/psci_cpu_on_test
+/aarch64/vcpu_width_config
 /aarch64/vgic_init
 /aarch64/vgic_irq
 /s390x/memop
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 21c2dbd21a81..681b173aa87c 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -106,6 +106,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
+TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
 TEST_GEN_PROGS_aarch64 += demand_paging_test
diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
new file mode 100644
index 000000000000..6e9402679229
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT.
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs
+ * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be
+ * configured.
+ */
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+
+/*
+ * Add a vCPU, run KVM_ARM_VCPU_INIT with @init1, and then
+ * add another vCPU, and run KVM_ARM_VCPU_INIT with @init2.
+ */
+static int add_init_2vcpus(struct kvm_vcpu_init *init1,
+			   struct kvm_vcpu_init *init2)
+{
+	struct kvm_vm *vm;
+	int ret;
+
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+
+	vm_vcpu_add(vm, 0);
+	ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1);
+	if (ret)
+		goto free_exit;
+
+	vm_vcpu_add(vm, 1);
+	ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2);
+
+free_exit:
+	kvm_vm_free(vm);
+	return ret;
+}
+
+/*
+ * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init1,
+ * and run KVM_ARM_VCPU_INIT for another vCPU with @init2.
+ */
+static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init1,
+				  struct kvm_vcpu_init *init2)
+{
+	struct kvm_vm *vm;
+	int ret;
+
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+
+	vm_vcpu_add(vm, 0);
+	vm_vcpu_add(vm, 1);
+
+	ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1);
+	if (ret)
+		goto free_exit;
+
+	ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2);
+
+free_exit:
+	kvm_vm_free(vm);
+	return ret;
+}
+
+/*
+ * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be
+ * configured, and two mixed-width vCPUs cannot be configured.
+ * Each of those three cases, configure vCPUs in two different orders.
+ * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running
+ * KVM_ARM_VCPU_INIT for them.
+ * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU,
+ * and then run those commands for another vCPU.
+ */
+int main(void)
+{
+	struct kvm_vcpu_init init1, init2;
+	struct kvm_vm *vm;
+	int ret;
+
+	if (!kvm_check_cap(KVM_CAP_ARM_EL1_32BIT)) {
+		print_skip("KVM_CAP_ARM_EL1_32BIT is not supported");
+		exit(KSFT_SKIP);
+	}
+
+	/* Get the preferred target type and copy that to init2 for later use */
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+	vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init1);
+	kvm_vm_free(vm);
+	init2 = init1;
+
+	/* Test with 64bit vCPUs */
+	ret = add_init_2vcpus(&init1, &init1);
+	TEST_ASSERT(ret == 0,
+		    "Configuring 64bit EL1 vCPUs failed unexpectedly");
+	ret = add_2vcpus_init_2vcpus(&init1, &init1);
+	TEST_ASSERT(ret == 0,
+		    "Configuring 64bit EL1 vCPUs failed unexpectedly");
+
+	/* Test with 32bit vCPUs */
+	init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+	ret = add_init_2vcpus(&init1, &init1);
+	TEST_ASSERT(ret == 0,
+		    "Configuring 32bit EL1 vCPUs failed unexpectedly");
+	ret = add_2vcpus_init_2vcpus(&init1, &init1);
+	TEST_ASSERT(ret == 0,
+		    "Configuring 32bit EL1 vCPUs failed unexpectedly");
+
+	/* Test with mixed-width vCPUs  */
+	init1.features[0] = 0;
+	init2.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+	ret = add_init_2vcpus(&init1, &init2);
+	TEST_ASSERT(ret != 0,
+		    "Configuring mixed-width vCPUs worked unexpectedly");
+	ret = add_2vcpus_init_2vcpus(&init1, &init2);
+	TEST_ASSERT(ret != 0,
+		    "Configuring mixed-width vCPUs worked unexpectedly");
+
+	return 0;
+}
-- 
cgit 


From 53968dafc4a6061c1e01d884f1f9a4b8c4b0d5bc Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@nvidia.com>
Date: Wed, 6 Apr 2022 15:41:13 +0300
Subject: bpf: Adjust bpf_tcp_check_syncookie selftest to test dual-stack
 sockets

The previous commit fixed support for dual-stack sockets in
bpf_tcp_check_syncookie. This commit adjusts the selftest to verify the
fixed functionality.

Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Arthur Fabre <afabre@cloudflare.com>
Link: https://lore.kernel.org/bpf/20220406124113.2795730-2-maximmi@nvidia.com
---
 .../selftests/bpf/test_tcp_check_syncookie_user.c  | 78 ++++++++++++++++------
 1 file changed, 59 insertions(+), 19 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index b9e991d43155..e7775d3bbe08 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -18,8 +18,9 @@
 #include "bpf_rlimit.h"
 #include "cgroup_helpers.h"
 
-static int start_server(const struct sockaddr *addr, socklen_t len)
+static int start_server(const struct sockaddr *addr, socklen_t len, bool dual)
 {
+	int mode = !dual;
 	int fd;
 
 	fd = socket(addr->sa_family, SOCK_STREAM, 0);
@@ -28,6 +29,14 @@ static int start_server(const struct sockaddr *addr, socklen_t len)
 		goto out;
 	}
 
+	if (addr->sa_family == AF_INET6) {
+		if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&mode,
+			       sizeof(mode)) == -1) {
+			log_err("Failed to set the dual-stack mode");
+			goto close_out;
+		}
+	}
+
 	if (bind(fd, addr, len) == -1) {
 		log_err("Failed to bind server socket");
 		goto close_out;
@@ -47,24 +56,17 @@ out:
 	return fd;
 }
 
-static int connect_to_server(int server_fd)
+static int connect_to_server(const struct sockaddr *addr, socklen_t len)
 {
-	struct sockaddr_storage addr;
-	socklen_t len = sizeof(addr);
 	int fd = -1;
 
-	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
-		log_err("Failed to get server addr");
-		goto out;
-	}
-
-	fd = socket(addr.ss_family, SOCK_STREAM, 0);
+	fd = socket(addr->sa_family, SOCK_STREAM, 0);
 	if (fd == -1) {
 		log_err("Failed to create client socket");
 		goto out;
 	}
 
-	if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+	if (connect(fd, (const struct sockaddr *)addr, len) == -1) {
 		log_err("Fail to connect to server");
 		goto close_out;
 	}
@@ -116,7 +118,8 @@ err:
 	return map_fd;
 }
 
-static int run_test(int server_fd, int results_fd, bool xdp)
+static int run_test(int server_fd, int results_fd, bool xdp,
+		    const struct sockaddr *addr, socklen_t len)
 {
 	int client = -1, srv_client = -1;
 	int ret = 0;
@@ -142,7 +145,7 @@ static int run_test(int server_fd, int results_fd, bool xdp)
 		goto err;
 	}
 
-	client = connect_to_server(server_fd);
+	client = connect_to_server(addr, len);
 	if (client == -1)
 		goto err;
 
@@ -199,12 +202,30 @@ out:
 	return ret;
 }
 
+static bool get_port(int server_fd, in_port_t *port)
+{
+	struct sockaddr_in addr;
+	socklen_t len = sizeof(addr);
+
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
+		return false;
+	}
+
+	/* sin_port and sin6_port are located at the same offset. */
+	*port = addr.sin_port;
+	return true;
+}
+
 int main(int argc, char **argv)
 {
 	struct sockaddr_in addr4;
 	struct sockaddr_in6 addr6;
+	struct sockaddr_in addr4dual;
+	struct sockaddr_in6 addr6dual;
 	int server = -1;
 	int server_v6 = -1;
+	int server_dual = -1;
 	int results = -1;
 	int err = 0;
 	bool xdp;
@@ -224,25 +245,43 @@ int main(int argc, char **argv)
 	addr4.sin_family = AF_INET;
 	addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 	addr4.sin_port = 0;
+	memcpy(&addr4dual, &addr4, sizeof(addr4dual));
 
 	memset(&addr6, 0, sizeof(addr6));
 	addr6.sin6_family = AF_INET6;
 	addr6.sin6_addr = in6addr_loopback;
 	addr6.sin6_port = 0;
 
-	server = start_server((const struct sockaddr *)&addr4, sizeof(addr4));
-	if (server == -1)
+	memset(&addr6dual, 0, sizeof(addr6dual));
+	addr6dual.sin6_family = AF_INET6;
+	addr6dual.sin6_addr = in6addr_any;
+	addr6dual.sin6_port = 0;
+
+	server = start_server((const struct sockaddr *)&addr4, sizeof(addr4),
+			      false);
+	if (server == -1 || !get_port(server, &addr4.sin_port))
 		goto err;
 
 	server_v6 = start_server((const struct sockaddr *)&addr6,
-				 sizeof(addr6));
-	if (server_v6 == -1)
+				 sizeof(addr6), false);
+	if (server_v6 == -1 || !get_port(server_v6, &addr6.sin6_port))
+		goto err;
+
+	server_dual = start_server((const struct sockaddr *)&addr6dual,
+				   sizeof(addr6dual), true);
+	if (server_dual == -1 || !get_port(server_dual, &addr4dual.sin_port))
+		goto err;
+
+	if (run_test(server, results, xdp,
+		     (const struct sockaddr *)&addr4, sizeof(addr4)))
 		goto err;
 
-	if (run_test(server, results, xdp))
+	if (run_test(server_v6, results, xdp,
+		     (const struct sockaddr *)&addr6, sizeof(addr6)))
 		goto err;
 
-	if (run_test(server_v6, results, xdp))
+	if (run_test(server_dual, results, xdp,
+		     (const struct sockaddr *)&addr4dual, sizeof(addr4dual)))
 		goto err;
 
 	printf("ok\n");
@@ -252,6 +291,7 @@ err:
 out:
 	close(server);
 	close(server_v6);
+	close(server_dual);
 	close(results);
 	return err;
 }
-- 
cgit 


From 958ddfd75d83d83e713027677c8786781e1f4576 Mon Sep 17 00:00:00 2001
From: Yuntao Wang <ytcoode@gmail.com>
Date: Wed, 6 Apr 2022 08:36:22 +0800
Subject: selftests/bpf: Fix issues in parse_num_list()

The function does not check that parsing_end is false after parsing
argument. Thus, if the final part of the argument is something like '4-',
which is invalid, parse_num_list() will discard it instead of returning
-EINVAL.

Before:

 $ ./test_progs -n 2,4-
 #2 atomic_bounds:OK
 Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED

After:

 $ ./test_progs -n 2,4-
 Failed to parse test numbers.

Signed-off-by: Yuntao Wang <ytcoode@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220406003622.73539-1-ytcoode@gmail.com
---
 tools/testing/selftests/bpf/testing_helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 795b6798ccee..87867f7a78c3 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -60,7 +60,7 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
 			set[i] = true;
 	}
 
-	if (!set)
+	if (!set || parsing_end)
 		return -EINVAL;
 
 	*num_set = set;
-- 
cgit 


From ebaf24c589d7c714b763a80856d1a6df3ba25b84 Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Wed, 6 Apr 2022 10:54:08 +0200
Subject: selftests/bpf: Use bpf_num_possible_cpus() in per-cpu map allocations

bpf_map_value_size() uses num_possible_cpus() to determine map size, but
some of the tests only allocate enough memory for online cpus. This
results in out-of-bound writes in userspace during bpf(BPF_MAP_LOOKUP_ELEM)
syscalls in cases when number of online cpus is lower than the number of
possible cpus. Fix by switching from get_nprocs_conf() to
bpf_num_possible_cpus() when determining the number of processors in
these tests (test_progs/netcnt and test_cgroup_storage).

Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220406085408.339336-1-asavkov@redhat.com
---
 tools/testing/selftests/bpf/prog_tests/netcnt.c   | 2 +-
 tools/testing/selftests/bpf/test_cgroup_storage.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c
index 954964f0ac3d..d3915c58d0e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/netcnt.c
+++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c
@@ -25,7 +25,7 @@ void serial_test_netcnt(void)
 	if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
 		return;
 
-	nproc = get_nprocs_conf();
+	nproc = bpf_num_possible_cpus();
 	percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
 	if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
 		goto err;
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index d6a1be4d8020..2ffa08198d1c 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -7,6 +7,7 @@
 #include <sys/sysinfo.h>
 
 #include "bpf_rlimit.h"
+#include "bpf_util.h"
 #include "cgroup_helpers.h"
 #include "testing_helpers.h"
 
@@ -44,7 +45,7 @@ int main(int argc, char **argv)
 	unsigned long long *percpu_value;
 	int cpu, nproc;
 
-	nproc = get_nprocs_conf();
+	nproc = bpf_num_possible_cpus();
 	percpu_value = malloc(sizeof(*percpu_value) * nproc);
 	if (!percpu_value) {
 		printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
-- 
cgit 


From 7cb29b1c99f4244c3f1de04e88eb9aed842a0cba Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 19 Mar 2022 13:38:26 +0530
Subject: selftests/bpf: Test passing rdonly mem to global func

Add two test cases, one pass read only map value pointer to global
func, which should be rejected. The same code checks it for kfunc, so
that is covered as well. Second one tries to use the missing check for
PTR_TO_MEM's MEM_RDONLY flag and tries to write to a read only memory
pointer. Without prior patches, both of these tests fail.

Reviewed-by: Hao Luo <haoluo@google.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220319080827.73251-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/ksyms_btf.c     | 17 ++++++++++++-----
 .../selftests/bpf/prog_tests/test_global_funcs.c       |  1 +
 tools/testing/selftests/bpf/progs/test_global_func17.c | 16 ++++++++++++++++
 .../selftests/bpf/progs/test_ksyms_btf_write_check.c   | 18 +++++++++++++++++-
 4 files changed, 46 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_global_func17.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index f6933b06daf8..1d7a2f1e0731 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -138,12 +138,16 @@ cleanup:
 	test_ksyms_weak_lskel__destroy(skel);
 }
 
-static void test_write_check(void)
+static void test_write_check(bool test_handler1)
 {
 	struct test_ksyms_btf_write_check *skel;
 
-	skel = test_ksyms_btf_write_check__open_and_load();
-	ASSERT_ERR_PTR(skel, "unexpected load of a prog writing to ksym memory\n");
+	skel = test_ksyms_btf_write_check__open();
+	if (!ASSERT_OK_PTR(skel, "test_ksyms_btf_write_check__open"))
+		return;
+	bpf_program__set_autoload(test_handler1 ? skel->progs.handler2 : skel->progs.handler1, false);
+	ASSERT_ERR(test_ksyms_btf_write_check__load(skel),
+		   "unexpected load of a prog writing to ksym memory\n");
 
 	test_ksyms_btf_write_check__destroy(skel);
 }
@@ -179,6 +183,9 @@ void test_ksyms_btf(void)
 	if (test__start_subtest("weak_ksyms_lskel"))
 		test_weak_syms_lskel();
 
-	if (test__start_subtest("write_check"))
-		test_write_check();
+	if (test__start_subtest("write_check1"))
+		test_write_check(true);
+
+	if (test__start_subtest("write_check2"))
+		test_write_check(false);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 509e21d5cb9d..b90ee47d3111 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -81,6 +81,7 @@ void test_test_global_funcs(void)
 		{ "test_global_func14.o", "reference type('FWD S') size cannot be determined" },
 		{ "test_global_func15.o", "At program exit the register R0 has value" },
 		{ "test_global_func16.o", "invalid indirect read from stack" },
+		{ "test_global_func17.o", "Caller passes invalid args into func#1" },
 	};
 	libbpf_print_fn_t old_print_fn = NULL;
 	int err, i, duration = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c
new file mode 100644
index 000000000000..2b8b9b8ba018
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func17.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+__noinline int foo(int *p)
+{
+	return p ? (*p = 42) : 0;
+}
+
+const volatile int i;
+
+SEC("tc")
+int test_cls(struct __sk_buff *skb)
+{
+	return foo((int *)&i);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
index 2180c41cd890..a72a5bf3812a 100644
--- a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
@@ -8,7 +8,7 @@
 extern const int bpf_prog_active __ksym; /* int type global var. */
 
 SEC("raw_tp/sys_enter")
-int handler(const void *ctx)
+int handler1(const void *ctx)
 {
 	int *active;
 	__u32 cpu;
@@ -26,4 +26,20 @@ int handler(const void *ctx)
 	return 0;
 }
 
+__noinline int write_active(int *p)
+{
+	return p ? (*p = 42) : 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handler2(const void *ctx)
+{
+	int *active;
+	__u32 cpu;
+
+	active = bpf_this_cpu_ptr(&bpf_prog_active);
+	write_active(active);
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit 


From 9fc4476a08b6dc61e406c2c0c4e0690512f60e82 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 19 Mar 2022 13:38:27 +0530
Subject: selftests/bpf: Test for writes to map key from BPF helpers

When invoking bpf_for_each_map_elem callback, we are passed a
PTR_TO_MAP_KEY, previously writes to this through helper may be allowed,
but the fix in previous patches is meant to prevent that case. The test
case tries to pass it as writable memory to helper, and fails test if it
succeeds to pass the verifier.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220319080827.73251-6-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/for_each.c  | 12 ++++++++++
 .../bpf/progs/for_each_map_elem_write_key.c        | 27 ++++++++++++++++++++++
 2 files changed, 39 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 044df13ee069..754e80937e5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -4,6 +4,7 @@
 #include <network_helpers.h>
 #include "for_each_hash_map_elem.skel.h"
 #include "for_each_array_map_elem.skel.h"
+#include "for_each_map_elem_write_key.skel.h"
 
 static unsigned int duration;
 
@@ -129,10 +130,21 @@ out:
 	for_each_array_map_elem__destroy(skel);
 }
 
+static void test_write_map_key(void)
+{
+	struct for_each_map_elem_write_key *skel;
+
+	skel = for_each_map_elem_write_key__open_and_load();
+	if (!ASSERT_ERR_PTR(skel, "for_each_map_elem_write_key__open_and_load"))
+		for_each_map_elem_write_key__destroy(skel);
+}
+
 void test_for_each(void)
 {
 	if (test__start_subtest("hash_map"))
 		test_hash_map();
 	if (test__start_subtest("array_map"))
 		test_array_map();
+	if (test__start_subtest("write_map_key"))
+		test_write_map_key();
 }
diff --git a/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c
new file mode 100644
index 000000000000..8e545865ea33
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} array_map SEC(".maps");
+
+static __u64
+check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+		 void *data)
+{
+	bpf_get_current_comm(key, sizeof(*key));
+	return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int test_map_key_write(const void *ctx)
+{
+	bpf_for_each_map_elem(&array_map, check_array_elem, NULL, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 02de9331c4d0c6bddac9c5fa66d91f70adf8612b Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Wed, 16 Mar 2022 13:51:29 +0100
Subject: KVM: selftests: get-reg-list: Add KVM_REG_ARM_FW_REG(3)

When testing a kernel with commit a5905d6af492 ("KVM: arm64:
Allow SMCCC_ARCH_WORKAROUND_3 to be discovered and migrated")
get-reg-list output

vregs: Number blessed registers:   234
vregs: Number registers:           238

vregs: There are 1 new registers.
Consider adding them to the blessed reg list with the following lines:

	KVM_REG_ARM_FW_REG(3),

vregs: PASS
...

That output inspired two changes: 1) add the new register to the
blessed list and 2) explain why "Number registers" is actually four
larger than "Number blessed registers" (on the system used for
testing), even though only one register is being stated as new.
The reason is that some registers are host dependent and they get
filtered out when comparing with the blessed list. The system
used for the test apparently had three filtered registers.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220316125129.392128-1-drjones@redhat.com
---
 tools/testing/selftests/kvm/aarch64/get-reg-list.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index f12147c43464..0b571f3fe64c 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -503,8 +503,13 @@ static void run_test(struct vcpu_config *c)
 		++missing_regs;
 
 	if (new_regs || missing_regs) {
+		n = 0;
+		for_each_reg_filtered(i)
+			++n;
+
 		printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
-		printf("%s: Number registers:         %5lld\n", config_name(c), reg_list->n);
+		printf("%s: Number registers:         %5lld (includes %lld filtered registers)\n",
+		       config_name(c), reg_list->n, reg_list->n - n);
 	}
 
 	if (new_regs) {
@@ -683,9 +688,10 @@ static __u64 base_regs[] = {
 	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
 	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
 	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
-	KVM_REG_ARM_FW_REG(0),
-	KVM_REG_ARM_FW_REG(1),
-	KVM_REG_ARM_FW_REG(2),
+	KVM_REG_ARM_FW_REG(0),		/* KVM_REG_ARM_PSCI_VERSION */
+	KVM_REG_ARM_FW_REG(1),		/* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */
+	KVM_REG_ARM_FW_REG(2),		/* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */
+	KVM_REG_ARM_FW_REG(3),		/* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */
 	ARM64_SYS_REG(3, 3, 14, 3, 1),	/* CNTV_CTL_EL0 */
 	ARM64_SYS_REG(3, 3, 14, 3, 2),	/* CNTV_CVAL_EL0 */
 	ARM64_SYS_REG(3, 3, 14, 0, 2),
-- 
cgit 


From 386ba265a8197716076a88853244f4437b92b167 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 6 Apr 2022 23:56:14 +0000
Subject: selftests: KVM: Don't leak GIC FD across dirty log test iterations

dirty_log_perf_test instantiates a VGICv3 for the guest (if supported by
hardware) to reduce the overhead of guest exits. However, the test does
not actually close the GIC fd when cleaning up the VM between test
iterations, meaning that the VM is never actually destroyed in the
kernel.

While this is generally a bad idea, the bug was detected from the kernel
spewing about duplicate debugfs entries as subsequent VMs happen to
reuse the same FD even though the debugfs directory is still present.

Abstract away the notion of setup/cleanup of the GIC FD from the test
by creating arch-specific helpers for test setup/cleanup. Close the GIC
FD on VM cleanup and do nothing for the other architectures.

Fixes: c340f7899af6 ("KVM: selftests: Add vgic initialization for dirty log perf test for ARM")
Reviewed-by: Jing Zhang <jingzhangos@google.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220406235615.1447180-3-oupton@google.com
---
 tools/testing/selftests/kvm/dirty_log_perf_test.c | 34 +++++++++++++++++++++--
 1 file changed, 31 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index c9d9e513ca04..7b47ae4f952e 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -18,11 +18,40 @@
 #include "test_util.h"
 #include "perf_test_util.h"
 #include "guest_modes.h"
+
 #ifdef __aarch64__
 #include "aarch64/vgic.h"
 
 #define GICD_BASE_GPA			0x8000000ULL
 #define GICR_BASE_GPA			0x80A0000ULL
+
+static int gic_fd;
+
+static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+	/*
+	 * The test can still run even if hardware does not support GICv3, as it
+	 * is only an optimization to reduce guest exits.
+	 */
+	gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+}
+
+static void arch_cleanup_vm(struct kvm_vm *vm)
+{
+	if (gic_fd > 0)
+		close(gic_fd);
+}
+
+#else /* __aarch64__ */
+
+static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+}
+
+static void arch_cleanup_vm(struct kvm_vm *vm)
+{
+}
+
 #endif
 
 /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
@@ -206,9 +235,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 		vm_enable_cap(vm, &cap);
 	}
 
-#ifdef __aarch64__
-	vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
-#endif
+	arch_setup_vm(vm, nr_vcpus);
 
 	/* Start the iterations */
 	iteration = 0;
@@ -302,6 +329,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	}
 
 	free_bitmaps(bitmaps, p->slots);
+	arch_cleanup_vm(vm);
 	perf_test_destroy_vm(vm);
 }
 
-- 
cgit 


From 21db83846683d3987666505a3ec38f367708199a Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 6 Apr 2022 23:56:15 +0000
Subject: selftests: KVM: Free the GIC FD when cleaning up in arch_timer

In order to correctly destroy a VM, all references to the VM must be
freed. The arch_timer selftest creates a VGIC for the guest, which
itself holds a reference to the VM.

Close the GIC FD when cleaning up a VM.

Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220406235615.1447180-4-oupton@google.com
---
 tools/testing/selftests/kvm/aarch64/arch_timer.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index b08d30bf71c5..3b940a101bc0 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -362,11 +362,12 @@ static void test_init_timer_irq(struct kvm_vm *vm)
 	pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
 }
 
+static int gic_fd;
+
 static struct kvm_vm *test_vm_create(void)
 {
 	struct kvm_vm *vm;
 	unsigned int i;
-	int ret;
 	int nr_vcpus = test_args.nr_vcpus;
 
 	vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
@@ -383,8 +384,8 @@ static struct kvm_vm *test_vm_create(void)
 
 	ucall_init(vm, NULL);
 	test_init_timer_irq(vm);
-	ret = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
-	if (ret < 0) {
+	gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+	if (gic_fd < 0) {
 		print_skip("Failed to create vgic-v3");
 		exit(KSFT_SKIP);
 	}
@@ -395,6 +396,12 @@ static struct kvm_vm *test_vm_create(void)
 	return vm;
 }
 
+static void test_vm_cleanup(struct kvm_vm *vm)
+{
+	close(gic_fd);
+	kvm_vm_free(vm);
+}
+
 static void test_print_help(char *name)
 {
 	pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n",
@@ -478,7 +485,7 @@ int main(int argc, char *argv[])
 
 	vm = test_vm_create();
 	test_run(vm);
-	kvm_vm_free(vm);
+	test_vm_cleanup(vm);
 
 	return 0;
 }
-- 
cgit 


From 1717e248014c33a81d7a1cdc048c6b3bed7bb3f8 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Wed, 6 Apr 2022 12:43:51 +0100
Subject: selftests/bpf: Uprobe tests should verify param/return values

uprobe/uretprobe tests don't do any validation of arguments/return values,
and without this we can't be sure we are attached to the right function,
or that we are indeed attached to a uprobe or uretprobe.  To fix this
record argument and return value for auto-attached functions and ensure
these match expectations.  Also need to filter by pid to ensure we do
not pick up stray malloc()s since auto-attach traces libc system-wide.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1649245431-29956-4-git-send-email-alan.maguire@oracle.com
---
 .../selftests/bpf/prog_tests/uprobe_autoattach.c   | 25 +++++++++----
 .../selftests/bpf/progs/test_uprobe_autoattach.c   | 43 ++++++++++++++++------
 2 files changed, 50 insertions(+), 18 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
index 03b15d632be4..d6003dc8cc99 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
@@ -5,14 +5,17 @@
 #include "test_uprobe_autoattach.skel.h"
 
 /* uprobe attach point */
-static void autoattach_trigger_func(void)
+static noinline int autoattach_trigger_func(int arg)
 {
 	asm volatile ("");
+	return arg + 1;
 }
 
 void test_uprobe_autoattach(void)
 {
 	struct test_uprobe_autoattach *skel;
+	int trigger_val = 100, trigger_ret;
+	size_t malloc_sz = 1;
 	char *mem;
 
 	skel = test_uprobe_autoattach__open_and_load();
@@ -22,17 +25,25 @@ void test_uprobe_autoattach(void)
 	if (!ASSERT_OK(test_uprobe_autoattach__attach(skel), "skel_attach"))
 		goto cleanup;
 
+	skel->bss->test_pid = getpid();
+
 	/* trigger & validate uprobe & uretprobe */
-	autoattach_trigger_func();
+	trigger_ret = autoattach_trigger_func(trigger_val);
+
+	skel->bss->test_pid = getpid();
 
 	/* trigger & validate shared library u[ret]probes attached by name */
-	mem = malloc(1);
+	mem = malloc(malloc_sz);
 	free(mem);
 
-	ASSERT_EQ(skel->bss->uprobe_byname_res, 1, "check_uprobe_byname_res");
-	ASSERT_EQ(skel->bss->uretprobe_byname_res, 2, "check_uretprobe_byname_res");
-	ASSERT_EQ(skel->bss->uprobe_byname2_res, 3, "check_uprobe_byname2_res");
-	ASSERT_EQ(skel->bss->uretprobe_byname2_res, 4, "check_uretprobe_byname2_res");
+	ASSERT_EQ(skel->bss->uprobe_byname_parm1, trigger_val, "check_uprobe_byname_parm1");
+	ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran");
+	ASSERT_EQ(skel->bss->uretprobe_byname_rc, trigger_ret, "check_uretprobe_byname_rc");
+	ASSERT_EQ(skel->bss->uretprobe_byname_ran, 2, "check_uretprobe_byname_ran");
+	ASSERT_EQ(skel->bss->uprobe_byname2_parm1, malloc_sz, "check_uprobe_byname2_parm1");
+	ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran");
+	ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc");
+	ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran");
 cleanup:
 	test_uprobe_autoattach__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
index b442fb5ef54b..ab75522e2eeb 100644
--- a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
+++ b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
@@ -1,15 +1,22 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2022, Oracle and/or its affiliates. */
 
-#include <linux/ptrace.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
+
+#include <bpf/bpf_core_read.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
-int uprobe_byname_res = 0;
-int uretprobe_byname_res = 0;
-int uprobe_byname2_res = 0;
-int uretprobe_byname2_res = 0;
+int uprobe_byname_parm1 = 0;
+int uprobe_byname_ran = 0;
+int uretprobe_byname_rc = 0;
+int uretprobe_byname_ran = 0;
+size_t uprobe_byname2_parm1 = 0;
+int uprobe_byname2_ran = 0;
+char *uretprobe_byname2_rc = NULL;
+int uretprobe_byname2_ran = 0;
+
+int test_pid;
 
 /* This program cannot auto-attach, but that should not stop other
  * programs from attaching.
@@ -23,14 +30,16 @@ int handle_uprobe_noautoattach(struct pt_regs *ctx)
 SEC("uprobe//proc/self/exe:autoattach_trigger_func")
 int handle_uprobe_byname(struct pt_regs *ctx)
 {
-	uprobe_byname_res = 1;
+	uprobe_byname_parm1 = PT_REGS_PARM1_CORE(ctx);
+	uprobe_byname_ran = 1;
 	return 0;
 }
 
 SEC("uretprobe//proc/self/exe:autoattach_trigger_func")
 int handle_uretprobe_byname(struct pt_regs *ctx)
 {
-	uretprobe_byname_res = 2;
+	uretprobe_byname_rc = PT_REGS_RC_CORE(ctx);
+	uretprobe_byname_ran = 2;
 	return 0;
 }
 
@@ -38,14 +47,26 @@ int handle_uretprobe_byname(struct pt_regs *ctx)
 SEC("uprobe/libc.so.6:malloc")
 int handle_uprobe_byname2(struct pt_regs *ctx)
 {
-	uprobe_byname2_res = 3;
+	int pid = bpf_get_current_pid_tgid() >> 32;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid)
+		return 0;
+	uprobe_byname2_parm1 = PT_REGS_PARM1_CORE(ctx);
+	uprobe_byname2_ran = 3;
 	return 0;
 }
 
-SEC("uretprobe/libc.so.6:free")
+SEC("uretprobe/libc.so.6:malloc")
 int handle_uretprobe_byname2(struct pt_regs *ctx)
 {
-	uretprobe_byname2_res = 4;
+	int pid = bpf_get_current_pid_tgid() >> 32;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid)
+		return 0;
+	uretprobe_byname2_rc = (char *)PT_REGS_RC_CORE(ctx);
+	uretprobe_byname2_ran = 4;
 	return 0;
 }
 
-- 
cgit 


From e8cf229ebe5eb31eecee86268223530a872872c2 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 3 Apr 2022 20:19:46 -0700
Subject: tools/testing/nvdimm: Fix security_init() symbol collision
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Starting with the new perf-event support in the nvdimm core, the
nfit_test mock module stops compiling. Rename its security_init() to
nfit_security_init().

tools/testing/nvdimm/test/nfit.c:1845:13: error: conflicting types for ‘security_init’; have ‘void(struct nfit_test *)’
 1845 | static void security_init(struct nfit_test *t)
      |             ^~~~~~~~~~~~~
In file included from ./include/linux/perf_event.h:61,
                 from ./include/linux/nd.h:11,
                 from ./drivers/nvdimm/nd-core.h:11,
                 from tools/testing/nvdimm/test/nfit.c:19:

Fixes: 9a61d0838cd0 ("drivers/nvdimm: Add nvdimm pmu structure")
Cc: Kajol Jain <kjain@linux.ibm.com>
Reviewed-by: Kajol Jain <kjain@linux.ibm.com>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Link: https://lore.kernel.org/r/164904238610.1330275.1889212115373993727.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 65dbdda3a054..1da76ccde448 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1842,7 +1842,7 @@ static int nfit_test_dimm_init(struct nfit_test *t)
 	return 0;
 }
 
-static void security_init(struct nfit_test *t)
+static void nfit_security_init(struct nfit_test *t)
 {
 	int i;
 
@@ -1938,7 +1938,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
 	if (nfit_test_dimm_init(t))
 		return -ENOMEM;
 	smart_init(t);
-	security_init(t);
+	nfit_security_init(t);
 	return ars_state_init(&t->pdev.dev, &t->ars_state);
 }
 
-- 
cgit 


From 8555defe48610a7efe9f2d72689e2d4f006898d7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 8 Apr 2022 11:14:25 -0700
Subject: selftests/bpf: Add CO-RE relos into linked_funcs selftests

Add CO-RE relocations into __weak subprogs for multi-file linked_funcs
selftest to make sure libbpf handles such combination well.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220408181425.2287230-4-andrii@kernel.org
---
 tools/testing/selftests/bpf/progs/linked_funcs1.c | 8 ++++++++
 tools/testing/selftests/bpf/progs/linked_funcs2.c | 8 ++++++++
 2 files changed, 16 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
index b964ec1390c2..963b393c37e8 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs1.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -4,6 +4,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
 
 /* weak and shared between two files */
 const volatile int my_tid __weak;
@@ -44,6 +45,13 @@ void set_output_ctx1(__u64 *ctx)
 /* this weak instance should win because it's the first one */
 __weak int set_output_weak(int x)
 {
+	static volatile int whatever;
+
+	/* make sure we use CO-RE relocations in a weak function, this used to
+	 * cause problems for BPF static linker
+	 */
+	whatever = bpf_core_type_size(struct task_struct);
+
 	output_weak1 = x;
 	return x;
 }
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
index 575e958e60b7..db195872f4eb 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs2.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -4,6 +4,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
 
 /* weak and shared between both files */
 const volatile int my_tid __weak;
@@ -44,6 +45,13 @@ void set_output_ctx2(__u64 *ctx)
 /* this weak instance should lose, because it will be processed second */
 __weak int set_output_weak(int x)
 {
+	static volatile int whatever;
+
+	/* make sure we use CO-RE relocations in a weak function, this used to
+	 * cause problems for BPF static linker
+	 */
+	whatever = 2 * bpf_core_type_size(struct task_struct);
+
 	output_weak2 = x;
 	return 2 * x;
 }
-- 
cgit 


From 658d87687cd5a6c8762d1de8abee1e6792d8d71e Mon Sep 17 00:00:00 2001
From: Yuntao Wang <ytcoode@gmail.com>
Date: Fri, 8 Apr 2022 12:14:52 +0800
Subject: selftests/bpf: Fix return value checks in perf_event_stackmap test

The bpf_get_stackid() function may also return 0 on success as per UAPI BPF
helper documentation. Therefore, correct checks from 'val > 0' to 'val >= 0'
to ensure that they cover all possible success return values.

Signed-off-by: Yuntao Wang <ytcoode@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220408041452.933944-1-ytcoode@gmail.com
---
 tools/testing/selftests/bpf/progs/perf_event_stackmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
index b3fcb5274ee0..f793280a3238 100644
--- a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
@@ -35,10 +35,10 @@ int oncpu(void *ctx)
 	long val;
 
 	val = bpf_get_stackid(ctx, &stackmap, 0);
-	if (val > 0)
+	if (val >= 0)
 		stackid_kernel = 2;
 	val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
-	if (val > 0)
+	if (val >= 0)
 		stackid_user = 2;
 
 	trace = bpf_map_lookup_elem(&stackdata_map, &key);
-- 
cgit 


From fac3725364397f9a40a101f089b86ea655a58d06 Mon Sep 17 00:00:00 2001
From: Anup Patel <apatel@ventanamicro.com>
Date: Sat, 9 Apr 2022 09:15:44 +0530
Subject: KVM: selftests: riscv: Set PTE A and D bits in VS-stage page table

Supporting hardware updates of PTE A and D bits is optional for any
RISC-V implementation so current software strategy is to always set
these bits in both G-stage (hypervisor) and VS-stage (guest kernel).

If PTE A and D bits are not set by software (hypervisor or guest)
then RISC-V implementations not supporting hardware updates of these
bits will cause traps even for perfectly valid PTEs.

Based on above explanation, the VS-stage page table created by various
KVM selftest applications is not correct because PTE A and D bits are
not set. This patch fixes VS-stage page table programming of PTE A and
D bits for KVM selftests.

Fixes: 3e06cdf10520 ("KVM: selftests: Add initial support for RISC-V
64-bit")
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Tested-by: Mayuresh Chitale <mchitale@ventanamicro.com>
Signed-off-by: Anup Patel <anup@brainfault.org>
---
 tools/testing/selftests/kvm/include/riscv/processor.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index dc284c6bdbc3..eca5c622efd2 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -101,7 +101,9 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id,
 #define PGTBL_PTE_WRITE_SHIFT			2
 #define PGTBL_PTE_READ_MASK			0x0000000000000002ULL
 #define PGTBL_PTE_READ_SHIFT			1
-#define PGTBL_PTE_PERM_MASK			(PGTBL_PTE_EXECUTE_MASK | \
+#define PGTBL_PTE_PERM_MASK			(PGTBL_PTE_ACCESSED_MASK | \
+						 PGTBL_PTE_DIRTY_MASK | \
+						 PGTBL_PTE_EXECUTE_MASK | \
 						 PGTBL_PTE_WRITE_MASK | \
 						 PGTBL_PTE_READ_MASK)
 #define PGTBL_PTE_VALID_MASK			0x0000000000000001ULL
-- 
cgit 


From ebdef0de2dbc40e697adaa6b3408130f7a7b8351 Mon Sep 17 00:00:00 2001
From: Anup Patel <apatel@ventanamicro.com>
Date: Sat, 9 Apr 2022 09:15:51 +0530
Subject: KVM: selftests: riscv: Fix alignment of the guest_hang() function

The guest_hang() function is used as the default exception handler
for various KVM selftests applications by setting it's address in
the vstvec CSR. The vstvec CSR requires exception handler base address
to be at least 4-byte aligned so this patch fixes alignment of the
guest_hang() function.

Fixes: 3e06cdf10520 ("KVM: selftests: Add initial support for RISC-V
64-bit")
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Tested-by: Mayuresh Chitale <mchitale@ventanamicro.com>
Signed-off-by: Anup Patel <anup@brainfault.org>
---
 tools/testing/selftests/kvm/lib/riscv/processor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index d377f2603d98..3961487a4870 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -268,7 +268,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
 		core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
 }
 
-static void guest_hang(void)
+static void __aligned(16) guest_hang(void)
 {
 	while (1)
 		;
-- 
cgit 


From 61ddff373ffa843155eba6a507973b1b78bb5a14 Mon Sep 17 00:00:00 2001
From: Mykola Lysenko <mykolal@fb.com>
Date: Fri, 8 Apr 2022 17:17:49 -0700
Subject: selftests/bpf: Improve by-name subtest selection logic in prog_tests

Improve subtest selection logic when using -t/-a/-d parameters.
In particular, more than one subtest can be specified or a
combination of tests / subtests.

-a send_signal -d send_signal/send_signal_nmi* - runs send_signal
test without nmi tests

-a send_signal/send_signal_nmi*,find_vma - runs two send_signal
subtests and find_vma test

-a 'send_signal*' -a find_vma -d send_signal/send_signal_nmi* -
runs 2 send_signal test and find_vma test. Disables two send_signal
nmi subtests

-t send_signal -t find_vma - runs two *send_signal* tests and one
*find_vma* test

This will allow us to have granular control over which subtests
to disable in the CI system instead of disabling whole tests.

Also, add new selftest to avoid possible regression when
changing prog_test test name selection logic.

Signed-off-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220409001750.529930-1-mykolal@fb.com
---
 .../testing/selftests/bpf/prog_tests/arg_parsing.c | 107 ++++++++++++++
 tools/testing/selftests/bpf/test_progs.c           | 157 ++++++++++-----------
 tools/testing/selftests/bpf/test_progs.h           |  15 +-
 tools/testing/selftests/bpf/testing_helpers.c      |  89 ++++++++++++
 tools/testing/selftests/bpf/testing_helpers.h      |   8 ++
 5 files changed, 288 insertions(+), 88 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/arg_parsing.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
new file mode 100644
index 000000000000..b17bfa0e0aac
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "test_progs.h"
+#include "testing_helpers.h"
+
+static void init_test_filter_set(struct test_filter_set *set)
+{
+	set->cnt = 0;
+	set->tests = NULL;
+}
+
+static void free_test_filter_set(struct test_filter_set *set)
+{
+	int i, j;
+
+	for (i = 0; i < set->cnt; i++) {
+		for (j = 0; j < set->tests[i].subtest_cnt; j++)
+			free((void *)set->tests[i].subtests[j]);
+		free(set->tests[i].subtests);
+		free(set->tests[i].name);
+	}
+
+	free(set->tests);
+	init_test_filter_set(set);
+}
+
+static void test_parse_test_list(void)
+{
+	struct test_filter_set set;
+
+	init_test_filter_set(&set);
+
+	ASSERT_OK(parse_test_list("arg_parsing", &set, true), "parsing");
+	if (!ASSERT_EQ(set.cnt, 1, "test filters count"))
+		goto error;
+	if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+		goto error;
+	ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count");
+	ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "subtest name");
+	free_test_filter_set(&set);
+
+	ASSERT_OK(parse_test_list("arg_parsing,bpf_cookie", &set, true),
+		  "parsing");
+	if (!ASSERT_EQ(set.cnt, 2, "count of test filters"))
+		goto error;
+	if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+		goto error;
+	ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count");
+	ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+	ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+	ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+	free_test_filter_set(&set);
+
+	ASSERT_OK(parse_test_list("arg_parsing/arg_parsing,bpf_cookie",
+				  &set,
+				  true),
+		  "parsing");
+	if (!ASSERT_EQ(set.cnt, 2, "count of test filters"))
+		goto error;
+	if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+		goto error;
+	if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+		goto error;
+	ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+	ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+	ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]),
+		  "subtest name");
+	ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+	free_test_filter_set(&set);
+
+	ASSERT_OK(parse_test_list("arg_parsing/arg_parsing", &set, true),
+		  "parsing");
+	ASSERT_OK(parse_test_list("bpf_cookie", &set, true), "parsing");
+	ASSERT_OK(parse_test_list("send_signal", &set, true), "parsing");
+	if (!ASSERT_EQ(set.cnt, 3, "count of test filters"))
+		goto error;
+	if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+		goto error;
+	if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+		goto error;
+	ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+	ASSERT_EQ(set.tests[2].subtest_cnt, 0, "subtest filters count");
+	ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+	ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]),
+		  "subtest name");
+	ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+	ASSERT_OK(strcmp("send_signal", set.tests[2].name), "test name");
+	free_test_filter_set(&set);
+
+	ASSERT_OK(parse_test_list("bpf_cookie/trace", &set, false), "parsing");
+	if (!ASSERT_EQ(set.cnt, 1, "count of test filters"))
+		goto error;
+	if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+		goto error;
+	if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+		goto error;
+	ASSERT_OK(strcmp("*bpf_cookie*", set.tests[0].name), "test name");
+	ASSERT_OK(strcmp("*trace*", set.tests[0].subtests[0]), "subtest name");
+error:
+	free_test_filter_set(&set);
+}
+
+void test_arg_parsing(void)
+{
+	if (test__start_subtest("test_parse_test_list"))
+		test_parse_test_list();
+}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 0a4b45d7b515..dcad9871f556 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -3,6 +3,7 @@
  */
 #define _GNU_SOURCE
 #include "test_progs.h"
+#include "testing_helpers.h"
 #include "cgroup_helpers.h"
 #include <argp.h>
 #include <pthread.h>
@@ -84,12 +85,13 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
 	int i;
 
 	for (i = 0; i < sel->blacklist.cnt; i++) {
-		if (glob_match(name, sel->blacklist.strs[i]))
+		if (glob_match(name, sel->blacklist.tests[i].name) &&
+		    !sel->blacklist.tests[i].subtest_cnt)
 			return false;
 	}
 
 	for (i = 0; i < sel->whitelist.cnt; i++) {
-		if (glob_match(name, sel->whitelist.strs[i]))
+		if (glob_match(name, sel->whitelist.tests[i].name))
 			return true;
 	}
 
@@ -99,6 +101,46 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
 	return num < sel->num_set_len && sel->num_set[num];
 }
 
+static bool should_run_subtest(struct test_selector *sel,
+			       struct test_selector *subtest_sel,
+			       int subtest_num,
+			       const char *test_name,
+			       const char *subtest_name)
+{
+	int i, j;
+
+	for (i = 0; i < sel->blacklist.cnt; i++) {
+		if (glob_match(test_name, sel->blacklist.tests[i].name)) {
+			if (!sel->blacklist.tests[i].subtest_cnt)
+				return false;
+
+			for (j = 0; j < sel->blacklist.tests[i].subtest_cnt; j++) {
+				if (glob_match(subtest_name,
+					       sel->blacklist.tests[i].subtests[j]))
+					return false;
+			}
+		}
+	}
+
+	for (i = 0; i < sel->whitelist.cnt; i++) {
+		if (glob_match(test_name, sel->whitelist.tests[i].name)) {
+			if (!sel->whitelist.tests[i].subtest_cnt)
+				return true;
+
+			for (j = 0; j < sel->whitelist.tests[i].subtest_cnt; j++) {
+				if (glob_match(subtest_name,
+					       sel->whitelist.tests[i].subtests[j]))
+					return true;
+			}
+		}
+	}
+
+	if (!sel->whitelist.cnt && !subtest_sel->num_set)
+		return true;
+
+	return subtest_num < subtest_sel->num_set_len && subtest_sel->num_set[subtest_num];
+}
+
 static void dump_test_log(const struct prog_test_def *test, bool failed)
 {
 	if (stdout == env.stdout)
@@ -135,7 +177,6 @@ static void stdio_restore(void);
  */
 static void reset_affinity(void)
 {
-
 	cpu_set_t cpuset;
 	int i, err;
 
@@ -196,7 +237,7 @@ void test__end_subtest(void)
 	test->subtest_name = NULL;
 }
 
-bool test__start_subtest(const char *name)
+bool test__start_subtest(const char *subtest_name)
 {
 	struct prog_test_def *test = env.test;
 
@@ -205,17 +246,21 @@ bool test__start_subtest(const char *name)
 
 	test->subtest_num++;
 
-	if (!name || !name[0]) {
+	if (!subtest_name || !subtest_name[0]) {
 		fprintf(env.stderr,
 			"Subtest #%d didn't provide sub-test name!\n",
 			test->subtest_num);
 		return false;
 	}
 
-	if (!should_run(&env.subtest_selector, test->subtest_num, name))
+	if (!should_run_subtest(&env.test_selector,
+				&env.subtest_selector,
+				test->subtest_num,
+				test->test_name,
+				subtest_name))
 		return false;
 
-	test->subtest_name = strdup(name);
+	test->subtest_name = strdup(subtest_name);
 	if (!test->subtest_name) {
 		fprintf(env.stderr,
 			"Subtest #%d: failed to copy subtest name!\n",
@@ -527,63 +572,29 @@ static int libbpf_print_fn(enum libbpf_print_level level,
 	return 0;
 }
 
-static void free_str_set(const struct str_set *set)
+static void free_test_filter_set(const struct test_filter_set *set)
 {
-	int i;
+	int i, j;
 
 	if (!set)
 		return;
 
-	for (i = 0; i < set->cnt; i++)
-		free((void *)set->strs[i]);
-	free(set->strs);
-}
-
-static int parse_str_list(const char *s, struct str_set *set, bool is_glob_pattern)
-{
-	char *input, *state = NULL, *next, **tmp, **strs = NULL;
-	int i, cnt = 0;
+	for (i = 0; i < set->cnt; i++) {
+		free((void *)set->tests[i].name);
+		for (j = 0; j < set->tests[i].subtest_cnt; j++)
+			free((void *)set->tests[i].subtests[j]);
 
-	input = strdup(s);
-	if (!input)
-		return -ENOMEM;
-
-	while ((next = strtok_r(state ? NULL : input, ",", &state))) {
-		tmp = realloc(strs, sizeof(*strs) * (cnt + 1));
-		if (!tmp)
-			goto err;
-		strs = tmp;
-
-		if (is_glob_pattern) {
-			strs[cnt] = strdup(next);
-			if (!strs[cnt])
-				goto err;
-		} else {
-			strs[cnt] = malloc(strlen(next) + 2 + 1);
-			if (!strs[cnt])
-				goto err;
-			sprintf(strs[cnt], "*%s*", next);
-		}
-
-		cnt++;
+		free((void *)set->tests[i].subtests);
 	}
 
-	tmp = realloc(set->strs, sizeof(*strs) * (cnt + set->cnt));
-	if (!tmp)
-		goto err;
-	memcpy(tmp + set->cnt, strs, sizeof(*strs) * cnt);
-	set->strs = (const char **)tmp;
-	set->cnt += cnt;
+	free((void *)set->tests);
+}
 
-	free(input);
-	free(strs);
-	return 0;
-err:
-	for (i = 0; i < cnt; i++)
-		free(strs[i]);
-	free(strs);
-	free(input);
-	return -ENOMEM;
+static void free_test_selector(struct test_selector *test_selector)
+{
+	free_test_filter_set(&test_selector->blacklist);
+	free_test_filter_set(&test_selector->whitelist);
+	free(test_selector->num_set);
 }
 
 extern int extra_prog_load_log_flags;
@@ -615,33 +626,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	}
 	case ARG_TEST_NAME_GLOB_ALLOWLIST:
 	case ARG_TEST_NAME: {
-		char *subtest_str = strchr(arg, '/');
-
-		if (subtest_str) {
-			*subtest_str = '\0';
-			if (parse_str_list(subtest_str + 1,
-					   &env->subtest_selector.whitelist,
-					   key == ARG_TEST_NAME_GLOB_ALLOWLIST))
-				return -ENOMEM;
-		}
-		if (parse_str_list(arg, &env->test_selector.whitelist,
-				   key == ARG_TEST_NAME_GLOB_ALLOWLIST))
+		if (parse_test_list(arg,
+				    &env->test_selector.whitelist,
+				    key == ARG_TEST_NAME_GLOB_ALLOWLIST))
 			return -ENOMEM;
 		break;
 	}
 	case ARG_TEST_NAME_GLOB_DENYLIST:
 	case ARG_TEST_NAME_BLACKLIST: {
-		char *subtest_str = strchr(arg, '/');
-
-		if (subtest_str) {
-			*subtest_str = '\0';
-			if (parse_str_list(subtest_str + 1,
-					   &env->subtest_selector.blacklist,
-					   key == ARG_TEST_NAME_GLOB_DENYLIST))
-				return -ENOMEM;
-		}
-		if (parse_str_list(arg, &env->test_selector.blacklist,
-				   key == ARG_TEST_NAME_GLOB_DENYLIST))
+		if (parse_test_list(arg,
+				    &env->test_selector.blacklist,
+				    key == ARG_TEST_NAME_GLOB_DENYLIST))
 			return -ENOMEM;
 		break;
 	}
@@ -1493,12 +1488,8 @@ int main(int argc, char **argv)
 out:
 	if (!env.list_test_names && env.has_testmod)
 		unload_bpf_testmod();
-	free_str_set(&env.test_selector.blacklist);
-	free_str_set(&env.test_selector.whitelist);
-	free(env.test_selector.num_set);
-	free_str_set(&env.subtest_selector.blacklist);
-	free_str_set(&env.subtest_selector.whitelist);
-	free(env.subtest_selector.num_set);
+
+	free_test_selector(&env.test_selector);
 
 	if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
 		return EXIT_NO_TEST;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index eec4c7385b14..ecb5fef29ee6 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -37,7 +37,6 @@ typedef __u16 __sum16;
 #include <bpf/bpf_endian.h>
 #include "trace_helpers.h"
 #include "testing_helpers.h"
-#include "flow_dissector_load.h"
 
 enum verbosity {
 	VERBOSE_NONE,
@@ -46,14 +45,20 @@ enum verbosity {
 	VERBOSE_SUPER,
 };
 
-struct str_set {
-	const char **strs;
+struct test_filter {
+	char *name;
+	char **subtests;
+	int subtest_cnt;
+};
+
+struct test_filter_set {
+	struct test_filter *tests;
 	int cnt;
 };
 
 struct test_selector {
-	struct str_set whitelist;
-	struct str_set blacklist;
+	struct test_filter_set whitelist;
+	struct test_filter_set blacklist;
 	bool *num_set;
 	int num_set_len;
 };
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 87867f7a78c3..9695318e8132 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -6,6 +6,7 @@
 #include <errno.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+#include "test_progs.h"
 #include "testing_helpers.h"
 
 int parse_num_list(const char *s, bool **num_set, int *num_set_len)
@@ -69,6 +70,94 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
 	return 0;
 }
 
+int parse_test_list(const char *s,
+		    struct test_filter_set *set,
+		    bool is_glob_pattern)
+{
+	char *input, *state = NULL, *next;
+	struct test_filter *tmp, *tests = NULL;
+	int i, j, cnt = 0;
+
+	input = strdup(s);
+	if (!input)
+		return -ENOMEM;
+
+	while ((next = strtok_r(state ? NULL : input, ",", &state))) {
+		char *subtest_str = strchr(next, '/');
+		char *pattern = NULL;
+		int glob_chars = 0;
+
+		tmp = realloc(tests, sizeof(*tests) * (cnt + 1));
+		if (!tmp)
+			goto err;
+		tests = tmp;
+
+		tests[cnt].subtest_cnt = 0;
+		tests[cnt].subtests = NULL;
+
+		if (is_glob_pattern) {
+			pattern = "%s";
+		} else {
+			pattern = "*%s*";
+			glob_chars = 2;
+		}
+
+		if (subtest_str) {
+			char **tmp_subtests = NULL;
+			int subtest_cnt = tests[cnt].subtest_cnt;
+
+			*subtest_str = '\0';
+			subtest_str += 1;
+			tmp_subtests = realloc(tests[cnt].subtests,
+					       sizeof(*tmp_subtests) *
+					       (subtest_cnt + 1));
+			if (!tmp_subtests)
+				goto err;
+			tests[cnt].subtests = tmp_subtests;
+
+			tests[cnt].subtests[subtest_cnt] =
+				malloc(strlen(subtest_str) + glob_chars + 1);
+			if (!tests[cnt].subtests[subtest_cnt])
+				goto err;
+			sprintf(tests[cnt].subtests[subtest_cnt],
+				pattern,
+				subtest_str);
+
+			tests[cnt].subtest_cnt++;
+		}
+
+		tests[cnt].name = malloc(strlen(next) + glob_chars + 1);
+		if (!tests[cnt].name)
+			goto err;
+		sprintf(tests[cnt].name, pattern, next);
+
+		cnt++;
+	}
+
+	tmp = realloc(set->tests, sizeof(*tests) * (cnt + set->cnt));
+	if (!tmp)
+		goto err;
+
+	memcpy(tmp +  set->cnt, tests, sizeof(*tests) * cnt);
+	set->tests = tmp;
+	set->cnt += cnt;
+
+	free(tests);
+	free(input);
+	return 0;
+
+err:
+	for (i = 0; i < cnt; i++) {
+		for (j = 0; j < tests[i].subtest_cnt; j++)
+			free(tests[i].subtests[j]);
+
+		free(tests[i].name);
+	}
+	free(tests);
+	free(input);
+	return -ENOMEM;
+}
+
 __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
 {
 	__u32 info_len = sizeof(*info);
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index f46ebc476ee8..6ec00bf79cb5 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -12,3 +12,11 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 			  size_t insns_cnt, const char *license,
 			  __u32 kern_version, char *log_buf,
 			  size_t log_buf_sz);
+
+/*
+ * below function is exported for testing in prog_test test
+ */
+struct test_filter_set;
+int parse_test_list(const char *s,
+		    struct test_filter_set *test_set,
+		    bool is_glob_pattern);
-- 
cgit 


From b858ba8c52b64c038de156c455a39a89bfd214e8 Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Sat, 9 Apr 2022 12:59:56 +0000
Subject: selftests/bpf: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK

We have switched to memcg-based memory accouting and thus the rlimit is
not needed any more. LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK was introduced in
libbpf for backward compatibility, so we can use it instead now. After
this change, the header tools/testing/selftests/bpf/bpf_rlimit.h can be
removed.

This patch also removes the useless header sys/resource.h from many files
in tools/testing/selftests/bpf/.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220409125958.92629-3-laoar.shao@gmail.com
---
 tools/testing/selftests/bpf/bench.c                |  1 -
 tools/testing/selftests/bpf/bpf_rlimit.h           | 28 ----------------------
 tools/testing/selftests/bpf/flow_dissector_load.c  |  6 ++---
 tools/testing/selftests/bpf/get_cgroup_id_user.c   |  4 +++-
 tools/testing/selftests/bpf/prog_tests/btf.c       |  1 -
 tools/testing/selftests/bpf/test_cgroup_storage.c  |  4 +++-
 tools/testing/selftests/bpf/test_dev_cgroup.c      |  4 +++-
 tools/testing/selftests/bpf/test_lpm_map.c         |  4 +++-
 tools/testing/selftests/bpf/test_lru_map.c         |  4 +++-
 .../selftests/bpf/test_skb_cgroup_id_user.c        |  4 +++-
 tools/testing/selftests/bpf/test_sock.c            |  4 +++-
 tools/testing/selftests/bpf/test_sock_addr.c       |  4 +++-
 tools/testing/selftests/bpf/test_sockmap.c         |  5 ++--
 tools/testing/selftests/bpf/test_sysctl.c          |  4 +++-
 tools/testing/selftests/bpf/test_tag.c             |  4 +++-
 .../selftests/bpf/test_tcp_check_syncookie_user.c  |  4 +++-
 tools/testing/selftests/bpf/test_tcpnotify_user.c  |  1 -
 tools/testing/selftests/bpf/test_verifier_log.c    |  5 ++--
 tools/testing/selftests/bpf/xdp_redirect_multi.c   |  1 -
 tools/testing/selftests/bpf/xdping.c               |  8 ++-----
 tools/testing/selftests/bpf/xdpxceiver.c           |  6 ++---
 21 files changed, 45 insertions(+), 61 deletions(-)
 delete mode 100644 tools/testing/selftests/bpf/bpf_rlimit.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index f973320e6dbf..f061cc20e776 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -8,7 +8,6 @@
 #include <fcntl.h>
 #include <pthread.h>
 #include <sys/sysinfo.h>
-#include <sys/resource.h>
 #include <signal.h>
 #include "bench.h"
 #include "testing_helpers.h"
diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
deleted file mode 100644
index 9dac9b30f8ef..000000000000
--- a/tools/testing/selftests/bpf/bpf_rlimit.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <sys/resource.h>
-#include <stdio.h>
-
-static  __attribute__((constructor)) void bpf_rlimit_ctor(void)
-{
-	struct rlimit rlim_old, rlim_new = {
-		.rlim_cur	= RLIM_INFINITY,
-		.rlim_max	= RLIM_INFINITY,
-	};
-
-	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
-	/* For the sake of running the test cases, we temporarily
-	 * set rlimit to infinity in order for kernel to focus on
-	 * errors from actual test cases and not getting noise
-	 * from hitting memlock limits. The limit is on per-process
-	 * basis and not a global one, hence destructor not really
-	 * needed here.
-	 */
-	if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
-		perror("Unable to lift memlock rlimit");
-		/* Trying out lower limit, but expect potential test
-		 * case failures from this!
-		 */
-		rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
-		rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
-		setrlimit(RLIMIT_MEMLOCK, &rlim_new);
-	}
-}
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index 87fd1aa323a9..c8be6406777f 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -11,7 +11,6 @@
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
-#include "bpf_rlimit.h"
 #include "flow_dissector_load.h"
 
 const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
@@ -25,9 +24,8 @@ static void load_and_attach_program(void)
 	int prog_fd, ret;
 	struct bpf_object *obj;
 
-	ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-	if (ret)
-		error(1, 0, "failed to enable libbpf strict mode: %d", ret);
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 
 	ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name,
 			    cfg_map_name, NULL, &prog_fd, NULL);
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index 3a7b82bd9e94..e021cc67dc02 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -20,7 +20,6 @@
 
 #include "cgroup_helpers.h"
 #include "testing_helpers.h"
-#include "bpf_rlimit.h"
 
 #define CHECK(condition, tag, format...) ({		\
 	int __ret = !!(condition);			\
@@ -67,6 +66,9 @@ int main(int argc, char **argv)
 	if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
 		return 1;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
 	if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno))
 		goto cleanup_cgroup_env;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index ec823561b912..84aae639ddb5 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -8,7 +8,6 @@
 #include <linux/filter.h>
 #include <linux/unistd.h>
 #include <bpf/bpf.h>
-#include <sys/resource.h>
 #include <libelf.h>
 #include <gelf.h>
 #include <string.h>
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index 2ffa08198d1c..0861ea60dcdd 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -6,7 +6,6 @@
 #include <stdlib.h>
 #include <sys/sysinfo.h>
 
-#include "bpf_rlimit.h"
 #include "bpf_util.h"
 #include "cgroup_helpers.h"
 #include "testing_helpers.h"
@@ -52,6 +51,9 @@ int main(int argc, char **argv)
 		goto err;
 	}
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	map_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, sizeof(key),
 				sizeof(value), 0, NULL);
 	if (map_fd < 0) {
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index c299d3452695..7886265846a0 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -15,7 +15,6 @@
 
 #include "cgroup_helpers.h"
 #include "testing_helpers.h"
-#include "bpf_rlimit.h"
 
 #define DEV_CGROUP_PROG "./dev_cgroup.o"
 
@@ -28,6 +27,9 @@ int main(int argc, char **argv)
 	int prog_fd, cgroup_fd;
 	__u32 prog_cnt;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	if (bpf_prog_test_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
 			  &obj, &prog_fd)) {
 		printf("Failed to load DEV_CGROUP program\n");
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index aa294612e0a7..789c9748d241 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -26,7 +26,6 @@
 #include <bpf/bpf.h>
 
 #include "bpf_util.h"
-#include "bpf_rlimit.h"
 
 struct tlpm_node {
 	struct tlpm_node *next;
@@ -791,6 +790,9 @@ int main(void)
 	/* we want predictable, pseudo random tests */
 	srand(0xf00ba1);
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	test_lpm_basic();
 	test_lpm_order();
 
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 563bbe18c172..a6aa2d121955 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -18,7 +18,6 @@
 #include <bpf/libbpf.h>
 
 #include "bpf_util.h"
-#include "bpf_rlimit.h"
 #include "../../../include/linux/filter.h"
 
 #define LOCAL_FREE_TARGET	(128)
@@ -878,6 +877,9 @@ int main(int argc, char **argv)
 	assert(nr_cpus != -1);
 	printf("nr_cpus:%d\n\n", nr_cpus);
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	for (f = 0; f < ARRAY_SIZE(map_flags); f++) {
 		unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ?
 			PERCPU_FREE_TARGET : LOCAL_FREE_TARGET;
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
index 4a64306728ab..3256de30f563 100644
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -15,7 +15,6 @@
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
-#include "bpf_rlimit.h"
 #include "cgroup_helpers.h"
 
 #define CGROUP_PATH		"/skb_cgroup_test"
@@ -160,6 +159,9 @@ int main(int argc, char **argv)
 		exit(EXIT_FAILURE);
 	}
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	cgfd = cgroup_setup_and_join(CGROUP_PATH);
 	if (cgfd < 0)
 		goto err;
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index fe10f8134278..6c4494076bbf 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -14,7 +14,6 @@
 
 #include "cgroup_helpers.h"
 #include <bpf/bpf_endian.h>
-#include "bpf_rlimit.h"
 #include "bpf_util.h"
 
 #define CG_PATH		"/foo"
@@ -541,6 +540,9 @@ int main(int argc, char **argv)
 	if (cgfd < 0)
 		goto err;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	if (run_tests(cgfd))
 		goto err;
 
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index f3d5d7ac6505..458564fcfc82 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -19,7 +19,6 @@
 #include <bpf/libbpf.h>
 
 #include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
 #include "bpf_util.h"
 
 #ifndef ENOTSUPP
@@ -1418,6 +1417,9 @@ int main(int argc, char **argv)
 	if (cgfd < 0)
 		goto err;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	if (run_tests(cgfd))
 		goto err;
 
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index dfb4f5c0fcb9..0fbaccdc8861 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -18,7 +18,6 @@
 #include <sched.h>
 
 #include <sys/time.h>
-#include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/sendfile.h>
 
@@ -37,7 +36,6 @@
 #include <bpf/libbpf.h>
 
 #include "bpf_util.h"
-#include "bpf_rlimit.h"
 #include "cgroup_helpers.h"
 
 int running;
@@ -2017,6 +2015,9 @@ int main(int argc, char **argv)
 		cg_created = 1;
 	}
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	if (test == SELFTESTS) {
 		err = test_selftest(cg_fd, &options);
 		goto out;
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index 4f6cf833b522..5bae25ca19fb 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -14,7 +14,6 @@
 #include <bpf/libbpf.h>
 
 #include <bpf/bpf_endian.h>
-#include "bpf_rlimit.h"
 #include "bpf_util.h"
 #include "cgroup_helpers.h"
 #include "testing_helpers.h"
@@ -1618,6 +1617,9 @@ int main(int argc, char **argv)
 	if (cgfd < 0)
 		goto err;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	if (run_tests(cgfd))
 		goto err;
 
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 0851c42ee31c..5546b05a0486 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -20,7 +20,6 @@
 #include <bpf/bpf.h>
 
 #include "../../../include/linux/filter.h"
-#include "bpf_rlimit.h"
 #include "testing_helpers.h"
 
 static struct bpf_insn prog[BPF_MAXINSNS];
@@ -189,6 +188,9 @@ int main(void)
 	uint32_t tests = 0;
 	int i, fd_map;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int),
 				sizeof(int), 1, &opts);
 	assert(fd_map > 0);
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index e7775d3bbe08..5c8ef062f760 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -15,7 +15,6 @@
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
-#include "bpf_rlimit.h"
 #include "cgroup_helpers.h"
 
 static int start_server(const struct sockaddr *addr, socklen_t len, bool dual)
@@ -235,6 +234,9 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp);
 	if (results < 0) {
 		log_err("Can't get map");
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 4c5114765b23..8284db8b0f13 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -19,7 +19,6 @@
 #include <linux/perf_event.h>
 #include <linux/err.h>
 
-#include "bpf_rlimit.h"
 #include "bpf_util.h"
 #include "cgroup_helpers.h"
 
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
index 8d6918c3b4a2..70feda97cee5 100644
--- a/tools/testing/selftests/bpf/test_verifier_log.c
+++ b/tools/testing/selftests/bpf/test_verifier_log.c
@@ -11,8 +11,6 @@
 
 #include <bpf/bpf.h>
 
-#include "bpf_rlimit.h"
-
 #define LOG_SIZE (1 << 20)
 
 #define err(str...)	printf("ERROR: " str)
@@ -141,6 +139,9 @@ int main(int argc, char **argv)
 
 	memset(log, 1, LOG_SIZE);
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	/* Test incorrect attr */
 	printf("Test log_level 0...\n");
 	test_log_bad(log, LOG_SIZE, 0);
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
index aaedbf4955c3..c03b3a75991f 100644
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -10,7 +10,6 @@
 #include <net/if.h>
 #include <unistd.h>
 #include <libgen.h>
-#include <sys/resource.h>
 #include <sys/ioctl.h>
 #include <sys/types.h>
 #include <sys/socket.h>
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index c567856fd1bc..5b6f977870f8 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -12,7 +12,6 @@
 #include <string.h>
 #include <unistd.h>
 #include <libgen.h>
-#include <sys/resource.h>
 #include <net/if.h>
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -89,7 +88,6 @@ int main(int argc, char **argv)
 {
 	__u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE;
 	struct addrinfo *a, hints = { .ai_family = AF_INET };
-	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	__u16 count = XDPING_DEFAULT_COUNT;
 	struct pinginfo pinginfo = { 0 };
 	const char *optstr = "c:I:NsS";
@@ -167,10 +165,8 @@ int main(int argc, char **argv)
 		freeaddrinfo(a);
 	}
 
-	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
-		perror("setrlimit(RLIMIT_MEMLOCK)");
-		return 1;
-	}
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 5f8296d29e77..cfcb031323c5 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -90,7 +90,6 @@
 #include <string.h>
 #include <stddef.h>
 #include <sys/mman.h>
-#include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <time.h>
@@ -1448,14 +1447,13 @@ static void ifobject_delete(struct ifobject *ifobj)
 
 int main(int argc, char **argv)
 {
-	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
 	struct pkt_stream *pkt_stream_default;
 	struct ifobject *ifobj_tx, *ifobj_rx;
 	struct test_spec test;
 	u32 i, j;
 
-	if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
-		exit_with_error(errno);
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 
 	ifobj_tx = ifobject_create();
 	if (!ifobj_tx)
-- 
cgit 


From 0c7b27616fbd64b3b86c59ad5441f82a1a0c4176 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 31 Mar 2022 15:46:52 +0200
Subject: selftests: netfilter: add fib expression forward test case

Its now possible to use fib expression in the forward chain (where both
the input and output interfaces are known).

Add a simple test case for this.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/netfilter/nft_fib.sh | 50 ++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
index 695a1958723f..fd76b69635a4 100755
--- a/tools/testing/selftests/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -66,6 +66,20 @@ table inet filter {
 EOF
 }
 
+load_pbr_ruleset() {
+	local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+	chain forward {
+		type filter hook forward priority raw;
+		fib saddr . iif oif gt 0 accept
+		log drop
+	}
+}
+EOF
+}
+
 load_ruleset_count() {
 	local netns=$1
 
@@ -219,4 +233,40 @@ sleep 2
 ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
 check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
 
+# delete all rules
+ip netns exec ${ns1} nft flush ruleset
+ip netns exec ${ns2} nft flush ruleset
+ip netns exec ${nsrouter} nft flush ruleset
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr del 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr del dead:2::99/64 dev eth0
+
+ip -net ${nsrouter} addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+load_pbr_ruleset ${nsrouter}
+if [ $? -ne 0 ] ; then
+	echo "SKIP: Could not load fib forward ruleset"
+	exit $ksft_skip
+fi
+
+ip -net ${nsrouter} rule add from all table 128
+ip -net ${nsrouter} rule add from all iif veth0 table 129
+ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0
+ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net ${nsrouter} -4 rule delete table main
+
+test_ping 10.0.2.99 dead:2::99
+if [ $? -ne 0 ] ; then
+	ip -net ${nsrouter} nft list ruleset
+	echo "FAIL: fib mismatch in pbr setup"
+	exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
 exit 0
-- 
cgit 


From f2ae0fa68e28f53df245c0d9e4c7a7db6d58638d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 8 Apr 2022 12:46:01 -0700
Subject: selftests/mptcp: add diag listen tests

Check dumping of mptcp listener sockets:
1. filter by dport should not return any results
2. filter by sport should return listen sk
3. filter by saddr+sport should return listen sk
4. no filter should return listen sk

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/diag.sh | 38 +++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index ff821025d309..9dd43d7d957b 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -71,6 +71,43 @@ chk_msk_remote_key_nr()
 		__chk_nr "grep -c remote_key" $*
 }
 
+__chk_listen()
+{
+	local filter="$1"
+	local expected=$2
+
+	shift 2
+	msg=$*
+
+	nr=$(ss -N $ns -Ml "$filter" | grep -c LISTEN)
+	printf "%-50s" "$msg"
+
+	if [ $nr != $expected ]; then
+		echo "[ fail ] expected $expected found $nr"
+		ret=$test_cnt
+	else
+		echo "[  ok  ]"
+	fi
+}
+
+chk_msk_listen()
+{
+	lport=$1
+	local msg="check for listen socket"
+
+	# destination port search should always return empty list
+	__chk_listen "dport $lport" 0 "listen match for dport $lport"
+
+	# should return 'our' mptcp listen socket
+	__chk_listen "sport $lport" 1 "listen match for sport $lport"
+
+	__chk_listen "src inet:0.0.0.0:$lport" 1 "listen match for saddr and sport"
+
+	__chk_listen "" 1 "all listen sockets"
+
+	nr=$(ss -Ml $filter | wc -l)
+}
+
 # $1: ns, $2: port
 wait_local_port_listen()
 {
@@ -113,6 +150,7 @@ echo "a" | \
 				0.0.0.0 >/dev/null &
 wait_local_port_listen $ns 10000
 chk_msk_nr 0 "no msk on netns creation"
+chk_msk_listen 10000
 
 echo "b" | \
 	timeout ${timeout_test} \
-- 
cgit 


From f4fd706f738338e78f413db36d0a483a11c53cd1 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Sat, 9 Apr 2022 07:58:17 +0800
Subject: selftests/bpf: Drop duplicate max/min definitions

Drop duplicate macros min() and MAX() definitions in prog_tests and use
MIN() or MAX() in sys/param.h instead.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/1ae276da9925c2de59b5bdc93b693b4c243e692e.1649462033.git.geliang.tang@suse.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c    | 4 +---
 tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c  | 6 ++----
 tools/testing/selftests/bpf/prog_tests/snprintf.c    | 4 +---
 tools/testing/selftests/bpf/prog_tests/tc_redirect.c | 1 -
 tools/testing/selftests/bpf/test_progs.h             | 1 +
 5 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 5142a7d130b2..2c403ddc8076 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -1192,8 +1192,6 @@ static void str_strip_first_line(char *str)
 	*dst = '\0';
 }
 
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
 static void test_task_vma(void)
 {
 	int err, iter_fd = -1, proc_maps_fd = -1;
@@ -1229,7 +1227,7 @@ static void test_task_vma(void)
 	len = 0;
 	while (len < CMP_BUFFER_SIZE) {
 		err = read_fd_into_buffer(iter_fd, task_vma_output + len,
-					  min(read_size, CMP_BUFFER_SIZE - len));
+					  MIN(read_size, CMP_BUFFER_SIZE - len));
 		if (!err)
 			break;
 		if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 8f7a1cef7d87..e9a9a31b2ffe 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -10,8 +10,6 @@
 #include "bpf_tcp_nogpl.skel.h"
 #include "bpf_dctcp_release.skel.h"
 
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
 #ifndef ENOTSUPP
 #define ENOTSUPP 524
 #endif
@@ -53,7 +51,7 @@ static void *server(void *arg)
 
 	while (bytes < total_bytes && !READ_ONCE(stop)) {
 		nr_sent = send(fd, &batch,
-			       min(total_bytes - bytes, sizeof(batch)), 0);
+			       MIN(total_bytes - bytes, sizeof(batch)), 0);
 		if (nr_sent == -1 && errno == EINTR)
 			continue;
 		if (nr_sent == -1) {
@@ -146,7 +144,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 	/* recv total_bytes */
 	while (bytes < total_bytes && !READ_ONCE(stop)) {
 		nr_recv = recv(fd, &batch,
-			       min(total_bytes - bytes, sizeof(batch)), 0);
+			       MIN(total_bytes - bytes, sizeof(batch)), 0);
 		if (nr_recv == -1 && errno == EINTR)
 			continue;
 		if (nr_recv == -1)
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index 394ebfc3bbf3..4be6fdb78c6a 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -83,8 +83,6 @@ cleanup:
 	test_snprintf__destroy(skel);
 }
 
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
 /* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */
 static int load_single_snprintf(char *fmt)
 {
@@ -95,7 +93,7 @@ static int load_single_snprintf(char *fmt)
 	if (!skel)
 		return -EINVAL;
 
-	memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10));
+	memcpy(skel->rodata->fmt, fmt, MIN(strlen(fmt) + 1, 10));
 
 	ret = test_snprintf_single__load(skel);
 	test_snprintf_single__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 7ad66a247c02..958dae769c52 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -949,7 +949,6 @@ fail:
 	return -1;
 }
 
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
 enum {
 	SRC_TO_TARGET = 0,
 	TARGET_TO_SRC = 1,
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index ecb5fef29ee6..6a8d68bb459e 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -25,6 +25,7 @@ typedef __u16 __sum16;
 #include <sys/wait.h>
 #include <sys/types.h>
 #include <sys/time.h>
+#include <sys/param.h>
 #include <fcntl.h>
 #include <pthread.h>
 #include <linux/bpf.h>
-- 
cgit 


From 0c8b6641c8410930e2a2f4a437ac3f987fbf9404 Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu.linux@gmail.com>
Date: Wed, 6 Apr 2022 14:37:13 +0800
Subject: selftests: kvm: add tsc_scaling_sync to .gitignore

The tsc_scaling_sync's binary should be present in the .gitignore
file for the git to ignore it.

Signed-off-by: Like Xu <likexu@tencent.com>
Message-Id: <20220406063715.55625-3-likexu@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 573d93a1d61f..0b0e4402bba6 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -34,6 +34,7 @@
 /x86_64/state_test
 /x86_64/svm_vmcall_test
 /x86_64/svm_int_ctl_test
+/x86_64/tsc_scaling_sync
 /x86_64/sync_regs_test
 /x86_64/tsc_msrs_test
 /x86_64/userspace_io_test
-- 
cgit 


From b6f3c6a2b1fe2d754acb7bf64a20e64a8f2c8a1b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 3 Feb 2022 17:53:22 -0800
Subject: torture: Add rcu_normal and rcu_expedited runs to torture.sh

Currently, the rcupdate.rcu_normal and rcupdate.rcu_expedited kernel
boot parameters are not regularly tested.  The potential addition of
polled expedited grace-period APIs increases the amount of code that is
affected by these kernel boot parameters.  This commit therefore adds a
"--do-rt" argument to torture.sh to exercise these kernel-boot options.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index bfe09e2829c8..e657a6e06417 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -54,6 +54,7 @@ do_kvfree=yes
 do_kasan=yes
 do_kcsan=no
 do_clocksourcewd=yes
+do_rt=yes
 
 # doyesno - Helper function for yes/no arguments
 function doyesno () {
@@ -82,6 +83,7 @@ usage () {
 	echo "       --do-rcuscale / --do-no-rcuscale"
 	echo "       --do-rcutorture / --do-no-rcutorture"
 	echo "       --do-refscale / --do-no-refscale"
+	echo "       --do-rt / --do-no-rt"
 	echo "       --do-scftorture / --do-no-scftorture"
 	echo "       --duration [ <minutes> | <hours>h | <days>d ]"
 	echo "       --kcsan-kmake-arg kernel-make-arguments"
@@ -118,6 +120,7 @@ do
 		do_scftorture=yes
 		do_rcuscale=yes
 		do_refscale=yes
+		do_rt=yes
 		do_kvfree=yes
 		do_kasan=yes
 		do_kcsan=yes
@@ -148,6 +151,7 @@ do
 		do_scftorture=no
 		do_rcuscale=no
 		do_refscale=no
+		do_rt=no
 		do_kvfree=no
 		do_kasan=no
 		do_kcsan=no
@@ -162,6 +166,9 @@ do
 	--do-refscale|--do-no-refscale)
 		do_refscale=`doyesno "$1" --do-refscale`
 		;;
+	--do-rt|--do-no-rt)
+		do_rt=`doyesno "$1" --do-rt`
+		;;
 	--do-scftorture|--do-no-scftorture)
 		do_scftorture=`doyesno "$1" --do-scftorture`
 		;;
@@ -354,6 +361,17 @@ then
 	torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
 fi
 
+if test "$do_rt" = "yes"
+then
+	# With all post-boot grace periods forced to normal.
+	torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1"
+	torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
+
+	# With all post-boot grace periods forced to expedited.
+	torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1"
+	torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
+fi
+
 if test "$do_refscale" = yes
 then
 	primlist="`grep '\.name[ 	]*=' kernel/rcu/refscale.c | sed -e 's/^[^"]*"//' -e 's/".*$//'`"
-- 
cgit 


From ab3ecd0bce32705e722f356a504694f4fd51d4c0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 15 Feb 2022 17:19:31 -0800
Subject: torture: Reposition so that $? collects ssh code in torture.sh

An "echo" slipped in between an "ssh" and the "ret=$?" that was intended
to collect its exit code, which prevents torture.sh from detecting
"ssh" failure.  This commit therefore reassociates the two.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-remote.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 8c4c1e4792d0..03d7dede5f9b 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -139,13 +139,13 @@ chmod +x $T/bin/kvm-remote-*.sh
 for i in $systems
 do
 	ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
-	echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
 	ret=$?
 	if test "$ret" -ne 0
 	then
 		echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
 		exit 4
 	fi
+	echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
 done
 
 # Download and expand the tarball on all systems.
-- 
cgit 


From b20842baf89955a18ade95e6de6d94be757d6e5f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 15 Feb 2022 17:22:32 -0800
Subject: torture: Use "-o Batchmode=yes" to disable ssh password requests

The torture.sh script normally runs unattended, so there is not much
point in the "ssh" command asking for a password.  This commit therefore
adds the "-o Batchmode=yes" argument to each "ssh" command to cause it
to fail rather than ask for a password.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-remote.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 03d7dede5f9b..0ff59bd8b640 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -138,7 +138,7 @@ chmod +x $T/bin/kvm-remote-*.sh
 # Check first to avoid the need for cleanup for system-name typos
 for i in $systems
 do
-	ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
+	ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
 	ret=$?
 	if test "$ret" -ne 0
 	then
@@ -153,14 +153,14 @@ echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
 for i in $systems
 do
 	echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
-	cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
+	cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
 	ret=$?
 	tries=0
 	while test "$ret" -ne 0
 	do
 		echo Unable to download $T/binres.tgz to system $i, waiting and then retrying.  $tries prior retries. | tee -a "$oldrun/remote-log"
 		sleep 60
-		cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
+		cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
 		ret=$?
 		if test "$ret" -ne 0
 		then
@@ -185,7 +185,7 @@ checkremotefile () {
 
 	while :
 	do
-		ssh $1 "test -f \"$2\""
+		ssh -o BatchMode=yes $1 "test -f \"$2\""
 		ret=$?
 		if test "$ret" -eq 255
 		then
@@ -228,7 +228,7 @@ startbatches () {
 		then
 			continue # System still running last test, skip.
 		fi
-		ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
+		ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
 		ret=$?
 		if test "$ret" -ne 0
 		then
@@ -267,7 +267,7 @@ do
 		sleep 30
 	done
 	echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
-	( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
+	( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
 done
 
 ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
-- 
cgit 


From 98bb264bdbbc0fe9d6b0340057fcc4e8e7043760 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 18 Feb 2022 17:52:13 -0800
Subject: torture: Permit running of experimental torture types

This commit weakens the checks of the kvm.sh script's --torture parameter
and the kvm-recheck.sh script's parsing so that experimental torture tests
may be created without updating these two scripts.  The changes required
are to the appropriate Makefile and Kconfig file, plus a directory
whose name begins with "X" must be added to the rcutorture/configs file.
This new directory's name can then be passed in via the kvm.sh script's
--torture parameter.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-recheck.sh | 7 ++++++-
 tools/testing/selftests/rcutorture/bin/kvm.sh         | 4 ++--
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 0a5419982ab3..0789c5606d2a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -33,7 +33,12 @@ do
 		TORTURE_SUITE="`cat $i/../torture_suite`"
 		configfile=`echo $i | sed -e 's,^.*/,,'`
 		rm -f $i/console.log.*.diags
-		kvm-recheck-${TORTURE_SUITE}.sh $i
+		case "${TORTURE_SUITE}" in
+		X*)
+			;;
+		*)
+			kvm-recheck-${TORTURE_SUITE}.sh $i
+		esac
 		if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137
 		then
 			echo QEMU error, output:
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 55b2c1533282..af58b86a503a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -86,7 +86,7 @@ usage () {
 	echo "       --remote"
 	echo "       --results absolute-pathname"
 	echo "       --shutdown-grace seconds"
-	echo "       --torture lock|rcu|rcuscale|refscale|scf"
+	echo "       --torture lock|rcu|rcuscale|refscale|scf|X*"
 	echo "       --trust-make"
 	exit 1
 }
@@ -231,7 +231,7 @@ do
 		shift
 		;;
 	--torture)
-		checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--'
+		checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\|X.*\)$' '^--'
 		TORTURE_SUITE=$2
 		TORTURE_MOD="`echo $TORTURE_SUITE | sed -e 's/^\(lock\|rcu\|scf\)$/\1torture/'`"
 		shift
-- 
cgit 


From 8e82c28ea2b4c6096c7673c59a285c658c9f389f Mon Sep 17 00:00:00 2001
From: Paul Menzel <pmenzel@molgen.mpg.de>
Date: Tue, 22 Feb 2022 13:07:16 +0100
Subject: torture: Make thread detection more robust by using lspcu

For consecutive numbers the lscpu command collapses the output and just
shows the range with start and end. The processors are numbered that
way on POWER8.

    $ sudo ppc64_cpu --smt=8
    $ lscpu | grep '^NUMA node'
    NUMA node(s):                    2
    NUMA node0 CPU(s):               0-79
    NUMA node8 CPU(s):               80-159

This causes the heuristic to detect the number threads per core, looking
for the number after the first comma, to fail, and QEMU aborts because of
invalid arguments.

    $ lscpu | grep '^NUMA node0' | sed -e 's/^[^,-]*(,|\-)\([0-9]*\),.*$/\1/'
    NUMA node0 CPU(s):               0-79

But the lscpu command shows the number of threads per core:

    $ sudo ppc64_cpu --smt=8
    $ lscpu | grep 'Thread(s) per core'
    Thread(s) per core:              8
    $ sudo ppc64_cpu --smt=off
    $ lscpu | grep 'Thread(s) per core'
    Thread(s) per core:              1

This commit therefore directly uses that value and replaces use of grep
with "sed -n" and its "p" command.

Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/functions.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index c35ba24f994c..66d0414d8e4b 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -301,7 +301,7 @@ specify_qemu_cpus () {
 			echo $2 -smp $3
 			;;
 		qemu-system-ppc64)
-			nt="`lscpu | grep '^NUMA node0' | sed -e 's/^[^,]*,\([0-9]*\),.*$/\1/'`"
+			nt="`lscpu | sed -n 's/^Thread(s) per core:\s*//p'`"
 			echo $2 -smp cores=`expr \( $3 + $nt - 1 \) / $nt`,threads=$nt
 			;;
 		esac
-- 
cgit 


From 50fe062c806ed76fbee11e23251717e5be497d4c Mon Sep 17 00:00:00 2001
From: Joachim Wiberg <troglobit@gmail.com>
Date: Mon, 11 Apr 2022 10:40:54 +0200
Subject: selftests: forwarding: new test, verify host mdb entries

Boiler plate for testing static mdb entries.  This first test verifies
adding and removing host mdb entries for all supported types: IPv4,
IPv6, and MAC multicast.

Signed-off-by: Joachim Wiberg <troglobit@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/forwarding/Makefile    |   1 +
 .../testing/selftests/net/forwarding/bridge_mdb.sh | 103 +++++++++++++++++++++
 2 files changed, 104 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/bridge_mdb.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 8fa97ae9af9e..ae80c2aef577 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -2,6 +2,7 @@
 
 TEST_PROGS = bridge_igmp.sh \
 	bridge_locked_port.sh \
+	bridge_mdb.sh \
 	bridge_port_isolation.sh \
 	bridge_sticky_fdb.sh \
 	bridge_vlan_aware.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
new file mode 100755
index 000000000000..b1ba6876dd86
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that adding host mdb entries work as intended for all types of
+# multicast filters: ipv4, ipv6, and mac
+
+ALL_TESTS="mdb_add_del_test"
+NUM_NETIFS=2
+
+TEST_GROUP_IP4="225.1.2.3"
+TEST_GROUP_IP6="ff02::42"
+TEST_GROUP_MAC="01:00:01:c0:ff:ee"
+
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+switch_create()
+{
+	# Enable multicast filtering
+	ip link add dev br0 type bridge mcast_snooping 1
+
+	ip link set dev $swp1 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp1 down
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+do_mdb_add_del()
+{
+	local group=$1
+	local flag=$2
+
+	RET=0
+	bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null
+	check_err $? "Failed adding $group to br0, port br0"
+
+	if [ -z "$flag" ]; then
+	    flag="temp"
+	fi
+
+	bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null
+	check_err $? "$group not added with $flag flag"
+
+	bridge mdb del dev br0 port br0 grp $group 2>/dev/null
+	check_err $? "Failed deleting $group from br0, port br0"
+
+	bridge mdb show dev br0 | grep -q $group >/dev/null
+	check_err_fail 1 $? "$group still in mdb after delete"
+
+	log_test "MDB add/del group $group to bridge port br0"
+}
+
+mdb_add_del_test()
+{
+	do_mdb_add_del $TEST_GROUP_MAC permanent
+	do_mdb_add_del $TEST_GROUP_IP4
+	do_mdb_add_del $TEST_GROUP_IP6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From ce64763c63854b4079f2e036638aa881a1fb3fbc Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Fri, 8 Apr 2022 12:54:31 +0530
Subject: testing/selftests/mqueue: Fix mq_perf_tests to free the allocated cpu
 set

The selftest "mqueue/mq_perf_tests.c" use CPU_ALLOC to allocate
CPU set. This cpu set is used further in pthread_attr_setaffinity_np
and by pthread_create in the code. But in current code, allocated
cpu set is not freed.

Fix this issue by adding CPU_FREE in the "shutdown" function which
is called in most of the error/exit path for the cleanup. There are
few error paths which exit without using shutdown. Add a common goto
error path with CPU_FREE for these cases.

Fixes: 7820b0715b6f ("tools/selftests: add mq_perf_tests")
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/mqueue/mq_perf_tests.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index b019e0b8221c..84fda3b49073 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no)
 	if (in_shutdown++)
 		return;
 
+	/* Free the cpu_set allocated using CPU_ALLOC in main function */
+	CPU_FREE(cpu_set);
+
 	for (i = 0; i < num_cpus_to_pin; i++)
 		if (cpu_threads[i]) {
 			pthread_kill(cpu_threads[i], SIGUSR1);
@@ -551,6 +554,12 @@ int main(int argc, char *argv[])
 		perror("sysconf(_SC_NPROCESSORS_ONLN)");
 		exit(1);
 	}
+
+	if (getuid() != 0)
+		ksft_exit_skip("Not running as root, but almost all tests "
+			"require root in order to modify\nsystem settings.  "
+			"Exiting.\n");
+
 	cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
 	cpu_set = CPU_ALLOC(cpus_online);
 	if (cpu_set == NULL) {
@@ -589,7 +598,7 @@ int main(int argc, char *argv[])
 						cpu_set)) {
 					fprintf(stderr, "Any given CPU may "
 						"only be given once.\n");
-					exit(1);
+					goto err_code;
 				} else
 					CPU_SET_S(cpus_to_pin[cpu],
 						  cpu_set_size, cpu_set);
@@ -607,7 +616,7 @@ int main(int argc, char *argv[])
 				queue_path = malloc(strlen(option) + 2);
 				if (!queue_path) {
 					perror("malloc()");
-					exit(1);
+					goto err_code;
 				}
 				queue_path[0] = '/';
 				queue_path[1] = 0;
@@ -622,17 +631,12 @@ int main(int argc, char *argv[])
 		fprintf(stderr, "Must pass at least one CPU to continuous "
 			"mode.\n");
 		poptPrintUsage(popt_context, stderr, 0);
-		exit(1);
+		goto err_code;
 	} else if (!continuous_mode) {
 		num_cpus_to_pin = 1;
 		cpus_to_pin[0] = cpus_online - 1;
 	}
 
-	if (getuid() != 0)
-		ksft_exit_skip("Not running as root, but almost all tests "
-			"require root in order to modify\nsystem settings.  "
-			"Exiting.\n");
-
 	max_msgs = fopen(MAX_MSGS, "r+");
 	max_msgsize = fopen(MAX_MSGSIZE, "r+");
 	if (!max_msgs)
@@ -740,4 +744,9 @@ int main(int argc, char *argv[])
 			sleep(1);
 	}
 	shutdown(0, "", 0);
+
+err_code:
+	CPU_FREE(cpu_set);
+	exit(1);
+
 }
-- 
cgit 


From 42db2594e4cd4a3c29aad87f80b1c00bf7751afe Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Feb 2022 10:20:47 -0800
Subject: lkdtm/heap: Note conditions for SLAB_LINEAR_OVERFLOW

It wasn't clear when SLAB_LINEAR_OVERFLOW would be expected to trip.
Explicitly describe it and include the CONFIGs in the kselftest.

Cc: Muhammad Usama Anjum <usama.anjum@collabora.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: linux-kselftest@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/misc/lkdtm/heap.c            | 6 ++++++
 tools/testing/selftests/lkdtm/config | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'tools/testing')

diff --git a/drivers/misc/lkdtm/heap.c b/drivers/misc/lkdtm/heap.c
index 8a92f5a800fa..b4ac726a548f 100644
--- a/drivers/misc/lkdtm/heap.c
+++ b/drivers/misc/lkdtm/heap.c
@@ -22,6 +22,9 @@ static volatile int __offset = 1;
 /*
  * If there aren't guard pages, it's likely that a consecutive allocation will
  * let us overflow into the second allocation without overwriting something real.
+ *
+ * This should always be caught because there is an unconditional unmapped
+ * page after vmap allocations.
  */
 void lkdtm_VMALLOC_LINEAR_OVERFLOW(void)
 {
@@ -41,6 +44,9 @@ void lkdtm_VMALLOC_LINEAR_OVERFLOW(void)
  * This tries to stay within the next largest power-of-2 kmalloc cache
  * to avoid actually overwriting anything important if it's not detected
  * correctly.
+ *
+ * This should get caught by either memory tagging, KASan, or by using
+ * CONFIG_SLUB_DEBUG=y and slub_debug=ZF (or CONFIG_SLUB_DEBUG_ON=y).
  */
 void lkdtm_SLAB_LINEAR_OVERFLOW(void)
 {
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index 46f39ee76208..304123688739 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -9,3 +9,5 @@ CONFIG_UBSAN=y
 CONFIG_UBSAN_BOUNDS=y
 CONFIG_UBSAN_TRAP=y
 CONFIG_STACKPROTECTOR_STRONG=y
+CONFIG_SLUB_DEBUG=y
+CONFIG_SLUB_DEBUG_ON=y
-- 
cgit 


From 31e624a77e748e4ab7d5c9c3ddc46ba7735bd75e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 14 Mar 2022 18:22:44 -0700
Subject: cxl/mem: Rename cxl_dvsec_decode_init() to cxl_hdm_decode_init()

cxl_dvsec_decode_init() is tasked with checking whether legacy DVSEC
range based decode is in effect, or whether HDM can be enabled / already
is enabled. As such it either succeeds or fails and that result is the
return value. The @do_hdm_init variable is misleading in the case where
HDM operation is already found to be active, so just call it @retval.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Link: https://lore.kernel.org/r/164730736435.3806189.2537160791687837469.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/mem.c            | 12 ++++++------
 tools/testing/cxl/mock_mem.c |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 50704deb2ff0..3baae1332760 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -68,7 +68,7 @@ static int create_endpoint(struct cxl_memdev *cxlmd,
 }
 
 /**
- * cxl_dvsec_decode_init() - Setup HDM decoding for the endpoint
+ * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
  * @cxlds: Device state
  *
  * Additionally, enables global HDM decoding. Warning: don't call this outside
@@ -79,12 +79,12 @@ static int create_endpoint(struct cxl_memdev *cxlmd,
  * decoders, or if it can not be determined if DVSEC Ranges are in use.
  * Otherwise, returns true.
  */
-__mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds)
+__mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds)
 {
 	struct cxl_endpoint_dvsec_info *info = &cxlds->info;
 	struct cxl_register_map map;
 	struct cxl_component_reg_map *cmap = &map.component_map;
-	bool global_enable, do_hdm_init = false;
+	bool global_enable, retval = false;
 	void __iomem *crb;
 	u32 global_ctrl;
 
@@ -113,7 +113,7 @@ __mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds)
 		goto out;
 	}
 
-	do_hdm_init = true;
+	retval = true;
 
 	/*
 	 * Permanently (for this boot at least) opt the device into HDM
@@ -129,7 +129,7 @@ __mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds)
 
 out:
 	iounmap(crb);
-	return do_hdm_init;
+	return retval;
 }
 
 static int cxl_mem_probe(struct device *dev)
@@ -160,7 +160,7 @@ static int cxl_mem_probe(struct device *dev)
 	 * If DVSEC ranges are being used instead of HDM decoder registers there
 	 * is no use in trying to manage those.
 	 */
-	if (!cxl_dvsec_decode_init(cxlds)) {
+	if (!cxl_hdm_decode_init(cxlds)) {
 		dev_err(dev,
 			"Legacy range registers configuration prevents HDM operation.\n");
 		return -EBUSY;
diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c
index d1dec5845139..69946f678cfa 100644
--- a/tools/testing/cxl/mock_mem.c
+++ b/tools/testing/cxl/mock_mem.c
@@ -4,7 +4,7 @@
 #include <linux/types.h>
 
 struct cxl_dev_state;
-bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds)
+bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds)
 {
 	return true;
 }
-- 
cgit 


From 816cda9ae531b27c30356a673e8dc9f037cd90d1 Mon Sep 17 00:00:00 2001
From: Alaa Mohamed <eng.alaamohamedsoliman.am@gmail.com>
Date: Tue, 12 Apr 2022 04:04:31 +0200
Subject: selftests: net: fib_rule_tests: add support to select a test to run

Add boilerplate test loop in test to run all tests
in fib_rule_tests.sh

Signed-off-by: Alaa Mohamed <eng.alaamohamedsoliman.am@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_rule_tests.sh | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 4f70baad867d..bbe3b379927a 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -20,6 +20,7 @@ SRC_IP6=2001:db8:1::3
 DEV_ADDR=192.51.100.1
 DEV_ADDR6=2001:db8:1::1
 DEV=dummy0
+TESTS="fib_rule6 fib_rule4"
 
 log_test()
 {
@@ -316,7 +317,16 @@ fi
 # start clean
 cleanup &> /dev/null
 setup
-run_fibrule_tests
+for t in $TESTS
+do
+	case $t in
+	fib_rule6_test|fib_rule6)		fib_rule6_test;;
+	fib_rule4_test|fib_rule4)		fib_rule4_test;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+
+	esac
+done
 cleanup
 
 if [ "$TESTS" != "none" ]; then
-- 
cgit 


From 42c35fdc340ffbf6b3a8ffbdd5b53d856ecf924b Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu.linux@gmail.com>
Date: Wed, 6 Apr 2022 14:37:14 +0800
Subject: selftests: kvm/x86/xen: Replace a comma in the xen_shinfo_test with
 semicolon

+WARNING: Possible comma where semicolon could be used
+#397: FILE: tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c:700:
++				tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
++				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr);

Fixes: 25eaeebe710c ("KVM: x86/xen: Add self tests for KVM_XEN_HVM_CONFIG_EVTCHN_SEND")
Signed-off-by: Like Xu <likexu@tencent.com>
Message-Id: <20220406063715.55625-4-likexu@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index d9d9d1deec45..5e6f40633ea6 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -702,7 +702,7 @@ int main(int argc, char *argv[])
 
 			case 14:
 				memset(&tmr, 0, sizeof(tmr));
-				tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
+				tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr);
 				TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
 					    "Timer port not returned");
-- 
cgit 


From 2e53b877dc1258d4ac3de98f496bb88ec3bf5e25 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Mar 2022 12:00:42 -0800
Subject: lkdtm: Add CFI_BACKWARD to test ROP mitigations

In order to test various backward-edge control flow integrity methods,
add a test that manipulates the return address on the stack. Currently
only arm64 Pointer Authentication and Shadow Call Stack is supported.

 $ echo CFI_BACKWARD | cat >/sys/kernel/debug/provoke-crash/DIRECT

Under SCS, successful test of the mitigation is reported as:

 lkdtm: Performing direct entry CFI_BACKWARD
 lkdtm: Attempting unchecked stack return address redirection ...
 lkdtm: ok: redirected stack return address.
 lkdtm: Attempting checked stack return address redirection ...
 lkdtm: ok: control flow unchanged.

Under PAC, successful test of the mitigation is reported by the PAC
exception handler:

 lkdtm: Performing direct entry CFI_BACKWARD
 lkdtm: Attempting unchecked stack return address redirection ...
 lkdtm: ok: redirected stack return address.
 lkdtm: Attempting checked stack return address redirection ...
 Unable to handle kernel paging request at virtual address bfffffc0088d0514
 Mem abort info:
   ESR = 0x86000004
   EC = 0x21: IABT (current EL), IL = 32 bits
   SET = 0, FnV = 0
   EA = 0, S1PTW = 0
   FSC = 0x04: level 0 translation fault
 [bfffffc0088d0514] address between user and kernel address ranges
 ...

If the CONFIGs are missing (or the mitigation isn't working), failure
is reported as:

 lkdtm: Performing direct entry CFI_BACKWARD
 lkdtm: Attempting unchecked stack return address redirection ...
 lkdtm: ok: redirected stack return address.
 lkdtm: Attempting checked stack return address redirection ...
 lkdtm: FAIL: stack return address was redirected!
 lkdtm: This is probably expected, since this kernel was built *without* CONFIG_ARM64_PTR_AUTH_KERNEL=y nor CONFIG_SHADOW_CALL_STACK=y

Co-developed-by: Dan Li <ashimida@linux.alibaba.com>
Signed-off-by: Dan Li <ashimida@linux.alibaba.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/lkml/20220416001103.1524653-1-keescook@chromium.org
---
 drivers/misc/lkdtm/cfi.c                | 134 ++++++++++++++++++++++++++++++++
 tools/testing/selftests/lkdtm/tests.txt |   1 +
 2 files changed, 135 insertions(+)

(limited to 'tools/testing')

diff --git a/drivers/misc/lkdtm/cfi.c b/drivers/misc/lkdtm/cfi.c
index e88f778be0d5..804965a480b7 100644
--- a/drivers/misc/lkdtm/cfi.c
+++ b/drivers/misc/lkdtm/cfi.c
@@ -3,6 +3,7 @@
  * This is for all the tests relating directly to Control Flow Integrity.
  */
 #include "lkdtm.h"
+#include <asm/page.h>
 
 static int called_count;
 
@@ -42,8 +43,141 @@ static void lkdtm_CFI_FORWARD_PROTO(void)
 	pr_expected_config(CONFIG_CFI_CLANG);
 }
 
+/*
+ * This can stay local to LKDTM, as there should not be a production reason
+ * to disable PAC && SCS.
+ */
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+# ifdef CONFIG_ARM64_BTI_KERNEL
+#  define __no_pac             "branch-protection=bti"
+# else
+#  define __no_pac             "branch-protection=none"
+# endif
+# define __no_ret_protection   __noscs __attribute__((__target__(__no_pac)))
+#else
+# define __no_ret_protection   __noscs
+#endif
+
+#define no_pac_addr(addr)      \
+	((__force __typeof__(addr))((__force u64)(addr) | PAGE_OFFSET))
+
+/* The ultimate ROP gadget. */
+static noinline __no_ret_protection
+void set_return_addr_unchecked(unsigned long *expected, unsigned long *addr)
+{
+	/* Use of volatile is to make sure final write isn't seen as a dead store. */
+	unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1;
+
+	/* Make sure we've found the right place on the stack before writing it. */
+	if (no_pac_addr(*ret_addr) == expected)
+		*ret_addr = (addr);
+	else
+		/* Check architecture, stack layout, or compiler behavior... */
+		pr_warn("Eek: return address mismatch! %px != %px\n",
+			*ret_addr, addr);
+}
+
+static noinline
+void set_return_addr(unsigned long *expected, unsigned long *addr)
+{
+	/* Use of volatile is to make sure final write isn't seen as a dead store. */
+	unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1;
+
+	/* Make sure we've found the right place on the stack before writing it. */
+	if (no_pac_addr(*ret_addr) == expected)
+		*ret_addr = (addr);
+	else
+		/* Check architecture, stack layout, or compiler behavior... */
+		pr_warn("Eek: return address mismatch! %px != %px\n",
+			*ret_addr, addr);
+}
+
+static volatile int force_check;
+
+static void lkdtm_CFI_BACKWARD(void)
+{
+	/* Use calculated gotos to keep labels addressable. */
+	void *labels[] = {0, &&normal, &&redirected, &&check_normal, &&check_redirected};
+
+	pr_info("Attempting unchecked stack return address redirection ...\n");
+
+	/* Always false */
+	if (force_check) {
+		/*
+		 * Prepare to call with NULLs to avoid parameters being treated as
+		 * constants in -02.
+		 */
+		set_return_addr_unchecked(NULL, NULL);
+		set_return_addr(NULL, NULL);
+		if (force_check)
+			goto *labels[1];
+		if (force_check)
+			goto *labels[2];
+		if (force_check)
+			goto *labels[3];
+		if (force_check)
+			goto *labels[4];
+		return;
+	}
+
+	/*
+	 * Use fallthrough switch case to keep basic block ordering between
+	 * set_return_addr*() and the label after it.
+	 */
+	switch (force_check) {
+	case 0:
+		set_return_addr_unchecked(&&normal, &&redirected);
+		fallthrough;
+	case 1:
+normal:
+		/* Always true */
+		if (!force_check) {
+			pr_err("FAIL: stack return address manipulation failed!\n");
+			/* If we can't redirect "normally", we can't test mitigations. */
+			return;
+		}
+		break;
+	default:
+redirected:
+		pr_info("ok: redirected stack return address.\n");
+		break;
+	}
+
+	pr_info("Attempting checked stack return address redirection ...\n");
+
+	switch (force_check) {
+	case 0:
+		set_return_addr(&&check_normal, &&check_redirected);
+		fallthrough;
+	case 1:
+check_normal:
+		/* Always true */
+		if (!force_check) {
+			pr_info("ok: control flow unchanged.\n");
+			return;
+		}
+
+check_redirected:
+		pr_err("FAIL: stack return address was redirected!\n");
+		break;
+	}
+
+	if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) {
+		pr_expected_config(CONFIG_ARM64_PTR_AUTH_KERNEL);
+		return;
+	}
+	if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK)) {
+		pr_expected_config(CONFIG_SHADOW_CALL_STACK);
+		return;
+	}
+	pr_warn("This is probably expected, since this %s was built *without* %s=y nor %s=y\n",
+		lkdtm_kernel_info,
+		"CONFIG_ARM64_PTR_AUTH_KERNEL", "CONFIG_SHADOW_CALL_STACK");
+}
+
 static struct crashtype crashtypes[] = {
 	CRASHTYPE(CFI_FORWARD_PROTO),
+	CRASHTYPE(CFI_BACKWARD),
 };
 
 struct crashtype_category cfi_crashtypes = {
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 243c781f0780..9dace01dbf15 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -74,6 +74,7 @@ USERCOPY_STACK_BEYOND
 USERCOPY_KERNEL
 STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
 CFI_FORWARD_PROTO
+CFI_BACKWARD call trace:|ok: control flow unchanged
 FORTIFIED_STRSCPY
 FORTIFIED_OBJECT
 FORTIFIED_SUBOBJECT
-- 
cgit 


From f9a2fb73318eb4dbf8cd84866b8b0dd012d8b116 Mon Sep 17 00:00:00 2001
From: Arun Ajith S <aajith@arista.com>
Date: Fri, 15 Apr 2022 08:34:02 +0000
Subject: net/ipv6: Introduce accept_unsolicited_na knob to implement
 router-side changes for RFC9131

Add a new neighbour cache entry in STALE state for routers on receiving
an unsolicited (gratuitous) neighbour advertisement with
target link-layer-address option specified.
This is similar to the arp_accept configuration for IPv4.
A new sysctl endpoint is created to turn on this behaviour:
/proc/sys/net/ipv6/conf/interface/accept_unsolicited_na.

Signed-off-by: Arun Ajith S <aajith@arista.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.rst             |  27 +++
 include/linux/ipv6.h                               |   1 +
 include/uapi/linux/ipv6.h                          |   1 +
 net/ipv6/addrconf.c                                |  10 +
 net/ipv6/ndisc.c                                   |  20 +-
 tools/testing/selftests/net/Makefile               |   1 +
 .../selftests/net/ndisc_unsolicited_na_test.sh     | 255 +++++++++++++++++++++
 7 files changed, 314 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/net/ndisc_unsolicited_na_test.sh

(limited to 'tools/testing')

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index b0024aa7b051..433f2e4a5fed 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2467,6 +2467,33 @@ drop_unsolicited_na - BOOLEAN
 
 	By default this is turned off.
 
+accept_unsolicited_na - BOOLEAN
+	Add a new neighbour cache entry in STALE state for routers on receiving an
+	unsolicited neighbour advertisement with target link-layer address option
+	specified. This is as per router-side behavior documented in RFC9131.
+	This has lower precedence than drop_unsolicited_na.
+
+	 ====   ======  ======  ==============================================
+	 drop   accept  fwding                   behaviour
+	 ----   ------  ------  ----------------------------------------------
+	    1        X       X  Drop NA packet and don't pass up the stack
+	    0        0       X  Pass NA packet up the stack, don't update NC
+	    0        1       0  Pass NA packet up the stack, don't update NC
+	    0        1       1  Pass NA packet up the stack, and add a STALE
+	                        NC entry
+	 ====   ======  ======  ==============================================
+
+	This will optimize the return path for the initial off-link communication
+	that is initiated by a directly connected host, by ensuring that
+	the first-hop router which turns on this setting doesn't have to
+	buffer the initial return packets to do neighbour-solicitation.
+	The prerequisite is that the host is configured to send
+	unsolicited neighbour advertisements on interface bringup.
+	This setting should be used in conjunction with the ndisc_notify setting
+	on the host to satisfy this prerequisite.
+
+	By default this is turned off.
+
 enhanced_dad - BOOLEAN
 	Include a nonce option in the IPv6 neighbor solicitation messages used for
 	duplicate address detection per RFC7527. A received DAD NS will only signal
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 16870f86c74d..918bfea4ef5f 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -61,6 +61,7 @@ struct ipv6_devconf {
 	__s32		suppress_frag_ndisc;
 	__s32		accept_ra_mtu;
 	__s32		drop_unsolicited_na;
+	__s32		accept_unsolicited_na;
 	struct ipv6_stable_secret {
 		bool initialized;
 		struct in6_addr secret;
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index d4178dace0bf..549ddeaf788b 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -194,6 +194,7 @@ enum {
 	DEVCONF_IOAM6_ID,
 	DEVCONF_IOAM6_ID_WIDE,
 	DEVCONF_NDISC_EVICT_NOCARRIER,
+	DEVCONF_ACCEPT_UNSOLICITED_NA,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1afc4c024981..6473dc84b71d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5587,6 +5587,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
 	array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
 	array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
+	array[DEVCONF_ACCEPT_UNSOLICITED_NA] = cnf->accept_unsolicited_na;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -7037,6 +7038,15 @@ static const struct ctl_table addrconf_sysctl[] = {
 		.extra1		= (void *)SYSCTL_ZERO,
 		.extra2		= (void *)SYSCTL_ONE,
 	},
+	{
+		.procname	= "accept_unsolicited_na",
+		.data		= &ipv6_devconf.accept_unsolicited_na,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= (void *)SYSCTL_ZERO,
+		.extra2		= (void *)SYSCTL_ONE,
+	},
 	{
 		/* sentinel */
 	}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index fcb288b0ae13..254addad0dd3 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -979,6 +979,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	struct inet6_ifaddr *ifp;
 	struct neighbour *neigh;
+	bool create_neigh;
 
 	if (skb->len < sizeof(struct nd_msg)) {
 		ND_PRINTK(2, warn, "NA: packet too short\n");
@@ -999,6 +1000,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 	/* For some 802.11 wireless deployments (and possibly other networks),
 	 * there will be a NA proxy and unsolicitd packets are attacks
 	 * and thus should not be accepted.
+	 * drop_unsolicited_na takes precedence over accept_unsolicited_na
 	 */
 	if (!msg->icmph.icmp6_solicited && idev &&
 	    idev->cnf.drop_unsolicited_na)
@@ -1039,7 +1041,23 @@ static void ndisc_recv_na(struct sk_buff *skb)
 		in6_ifa_put(ifp);
 		return;
 	}
-	neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
+	/* RFC 9131 updates original Neighbour Discovery RFC 4861.
+	 * An unsolicited NA can now create a neighbour cache entry
+	 * on routers if it has Target LL Address option.
+	 *
+	 * drop   accept  fwding                   behaviour
+	 * ----   ------  ------  ----------------------------------------------
+	 *    1        X       X  Drop NA packet and don't pass up the stack
+	 *    0        0       X  Pass NA packet up the stack, don't update NC
+	 *    0        1       0  Pass NA packet up the stack, don't update NC
+	 *    0        1       1  Pass NA packet up the stack, and add a STALE
+	 *                          NC entry
+	 * Note that we don't do a (daddr == all-routers-mcast) check.
+	 */
+	create_neigh = !msg->icmph.icmp6_solicited && lladdr &&
+		       idev && idev->cnf.forwarding &&
+		       idev->cnf.accept_unsolicited_na;
+	neigh = __neigh_lookup(&nd_tbl, &msg->target, dev, create_neigh);
 
 	if (neigh) {
 		u8 old_flags = neigh->flags;
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3fe2515aa616..af7f6e6ff182 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -36,6 +36,7 @@ TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
 TEST_PROGS += vrf_strict_mode_test.sh
 TEST_PROGS += arp_ndisc_evict_nocarrier.sh
+TEST_PROGS += ndisc_unsolicited_na_test.sh
 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
 TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
 TEST_GEN_FILES =  socket nettest
diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
new file mode 100755
index 000000000000..f508657ee126
--- /dev/null
+++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for the accept_unsolicited_na feature to
+# enable RFC9131 behaviour. The following is the test-matrix.
+# drop   accept  fwding                   behaviour
+# ----   ------  ------  ----------------------------------------------
+#    1        X       X  Drop NA packet and don't pass up the stack
+#    0        0       X  Pass NA packet up the stack, don't update NC
+#    0        1       0  Pass NA packet up the stack, don't update NC
+#    0        1       1  Pass NA packet up the stack, and add a STALE
+#                           NC entry
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+HOST_NS="ns-host"
+ROUTER_NS="ns-router"
+
+HOST_INTF="veth-host"
+ROUTER_INTF="veth-router"
+
+ROUTER_ADDR="2000:20::1"
+HOST_ADDR="2000:20::2"
+SUBNET_WIDTH=64
+ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}"
+HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}"
+
+IP_HOST="ip -6 -netns ${HOST_NS}"
+IP_HOST_EXEC="ip netns exec ${HOST_NS}"
+IP_ROUTER="ip -6 -netns ${ROUTER_NS}"
+IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}"
+
+tcpdump_stdout=
+tcpdump_stderr=
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+}
+
+setup()
+{
+	set -e
+
+	local drop_unsolicited_na=$1
+	local accept_unsolicited_na=$2
+	local forwarding=$3
+
+	# Setup two namespaces and a veth tunnel across them.
+	# On end of the tunnel is a router and the other end is a host.
+	ip netns add ${HOST_NS}
+	ip netns add ${ROUTER_NS}
+	${IP_ROUTER} link add ${ROUTER_INTF} type veth \
+                peer name ${HOST_INTF} netns ${HOST_NS}
+
+	# Enable IPv6 on both router and host, and configure static addresses.
+	# The router here is the DUT
+	# Setup router configuration as specified by the arguments.
+	# forwarding=0 case is to check that a non-router
+	# doesn't add neighbour entries.
+        ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF}
+	${IP_ROUTER_EXEC} sysctl -qw \
+                ${ROUTER_CONF}.forwarding=${forwarding}
+	${IP_ROUTER_EXEC} sysctl -qw \
+                ${ROUTER_CONF}.drop_unsolicited_na=${drop_unsolicited_na}
+	${IP_ROUTER_EXEC} sysctl -qw \
+                ${ROUTER_CONF}.accept_unsolicited_na=${accept_unsolicited_na}
+	${IP_ROUTER_EXEC} sysctl -qw ${ROUTER_CONF}.disable_ipv6=0
+	${IP_ROUTER} addr add ${ROUTER_ADDR_WITH_MASK} dev ${ROUTER_INTF}
+
+	# Turn on ndisc_notify on host interface so that
+	# the host sends unsolicited NAs.
+	HOST_CONF=net.ipv6.conf.${HOST_INTF}
+	${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.ndisc_notify=1
+	${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.disable_ipv6=0
+	${IP_HOST} addr add ${HOST_ADDR_WITH_MASK} dev ${HOST_INTF}
+
+	set +e
+}
+
+start_tcpdump() {
+	set -e
+	tcpdump_stdout=`mktemp`
+	tcpdump_stderr=`mktemp`
+	${IP_ROUTER_EXEC} timeout 15s \
+                tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \
+                "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR}" \
+                > ${tcpdump_stdout} 2> /dev/null
+	set +e
+}
+
+cleanup_tcpdump()
+{
+	set -e
+	[[ ! -z  ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout}
+	[[ ! -z  ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr}
+	tcpdump_stdout=
+	tcpdump_stderr=
+	set +e
+}
+
+cleanup()
+{
+	cleanup_tcpdump
+	ip netns del ${HOST_NS}
+	ip netns del ${ROUTER_NS}
+}
+
+link_up() {
+	set -e
+	${IP_ROUTER} link set dev ${ROUTER_INTF} up
+	${IP_HOST} link set dev ${HOST_INTF} up
+	set +e
+}
+
+verify_ndisc() {
+	local drop_unsolicited_na=$1
+	local accept_unsolicited_na=$2
+	local forwarding=$3
+
+	neigh_show_output=$(${IP_ROUTER} neigh show \
+                to ${HOST_ADDR} dev ${ROUTER_INTF} nud stale)
+	if [ ${drop_unsolicited_na} -eq 0 ] && \
+			[ ${accept_unsolicited_na} -eq 1 ] && \
+			[ ${forwarding} -eq 1 ]; then
+		# Neighbour entry expected to be present for 011 case
+		[[ ${neigh_show_output} ]]
+	else
+		# Neighbour entry expected to be absent for all other cases
+		[[ -z ${neigh_show_output} ]]
+	fi
+}
+
+test_unsolicited_na_common()
+{
+	# Setup the test bed, but keep links down
+	setup $1 $2 $3
+
+	# Bring the link up, wait for the NA,
+	# and add a delay to ensure neighbour processing is done.
+	link_up
+	start_tcpdump
+
+	# Verify the neighbour table
+	verify_ndisc $1 $2 $3
+
+}
+
+test_unsolicited_na_combination() {
+	test_unsolicited_na_common $1 $2 $3
+	test_msg=("test_unsolicited_na: "
+		"drop_unsolicited_na=$1 "
+		"accept_unsolicited_na=$2 "
+		"forwarding=$3")
+	log_test $? 0 "${test_msg[*]}"
+	cleanup
+}
+
+test_unsolicited_na_combinations() {
+	# Args: drop_unsolicited_na accept_unsolicited_na forwarding
+
+	# Expect entry
+	test_unsolicited_na_combination 0 1 1
+
+	# Expect no entry
+	test_unsolicited_na_combination 0 0 0
+	test_unsolicited_na_combination 0 0 1
+	test_unsolicited_na_combination 0 1 0
+	test_unsolicited_na_combination 1 0 0
+	test_unsolicited_na_combination 1 0 1
+	test_unsolicited_na_combination 1 1 0
+	test_unsolicited_na_combination 1 1 1
+}
+
+###############################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+EOF
+}
+
+###############################################################################
+# main
+
+while getopts :pPh o
+do
+	case $o in
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+	echo "SKIP: Could not run test without tcpdump tool"
+	exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+test_unsolicited_na_combinations
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n"   ${nfail}
+
+exit $ret
-- 
cgit 


From e1fad9517f0fddfbe7da5605f50c362242cc7170 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 18 Apr 2022 09:42:41 +0300
Subject: selftests: mlxsw: Introduce devlink line card
 provision/unprovision/activation tests

Introduce basic line card manipulation which consists of provisioning,
unprovisioning and activation of a line card.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/mlxsw/devlink_linecard.sh          | 280 +++++++++++++++++++++
 1 file changed, 280 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
new file mode 100755
index 000000000000..08a922d8b86a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
@@ -0,0 +1,280 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# In addition to the common variables, user might use:
+# LC_SLOT - If not set, all probed line cards are going to be tested,
+#	    with an exception of the "activation_16x100G_test".
+#	    It set, only the selected line card is going to be used
+#	    for tests, including "activation_16x100G_test".
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	unprovision_test
+	provision_test
+	activation_16x100G_test
+"
+
+NUM_NETIFS=0
+
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+until_lc_state_is()
+{
+	local state=$1; shift
+	local current=$("$@")
+
+	echo "$current"
+	[ "$current" == "$state" ]
+}
+
+until_lc_state_is_not()
+{
+	! until_lc_state_is "$@"
+}
+
+lc_state_get()
+{
+	local lc=$1
+
+	devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].state"
+}
+
+lc_wait_until_state_changes()
+{
+	local lc=$1
+	local state=$2
+	local timeout=$3 # ms
+
+	busywait "$timeout" until_lc_state_is_not "$state" lc_state_get "$lc"
+}
+
+lc_wait_until_state_becomes()
+{
+	local lc=$1
+	local state=$2
+	local timeout=$3 # ms
+
+	busywait "$timeout" until_lc_state_is "$state" lc_state_get "$lc"
+}
+
+until_lc_port_count_is()
+{
+	local port_count=$1; shift
+	local current=$("$@")
+
+	echo "$current"
+	[ $current == $port_count ]
+}
+
+lc_port_count_get()
+{
+	local lc=$1
+
+	devlink port -j | jq -e -r ".[][] | select(.lc==$lc) | .port" | wc -l
+}
+
+lc_wait_until_port_count_is()
+{
+	local lc=$1
+	local port_count=$2
+	local timeout=$3 # ms
+
+	busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc"
+}
+
+PROV_UNPROV_TIMEOUT=8000 # ms
+POST_PROV_ACT_TIMEOUT=2000 # ms
+PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms
+
+unprovision_one()
+{
+	local lc=$1
+	local state
+
+	state=$(lc_state_get $lc)
+	check_err $? "Failed to get state of linecard $lc"
+	if [[ "$state" == "unprovisioned" ]]; then
+		return
+	fi
+
+	log_info "Unprovisioning linecard $lc"
+
+	devlink lc set $DEVLINK_DEV lc $lc notype
+	check_err $? "Failed to trigger linecard $lc unprovisioning"
+
+	state=$(lc_wait_until_state_changes $lc "unprovisioning" \
+		$PROV_UNPROV_TIMEOUT)
+	check_err $? "Failed to unprovision linecard $lc (timeout)"
+
+	[ "$state" == "unprovisioned" ]
+	check_err $? "Failed to unprovision linecard $lc (state=$state)"
+}
+
+provision_one()
+{
+	local lc=$1
+	local type=$2
+	local state
+
+	log_info "Provisioning linecard $lc"
+
+	devlink lc set $DEVLINK_DEV lc $lc type $type
+	check_err $? "Failed trigger linecard $lc provisioning"
+
+	state=$(lc_wait_until_state_changes $lc "provisioning" \
+		$PROV_UNPROV_TIMEOUT)
+	check_err $? "Failed to provision linecard $lc (timeout)"
+
+	[ "$state" == "provisioned" ] || [ "$state" == "active" ]
+	check_err $? "Failed to provision linecard $lc (state=$state)"
+
+	provisioned_type=$(devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].type")
+	[ "$provisioned_type" == "$type" ]
+	check_err $? "Wrong provision type returned for linecard $lc (got \"$provisioned_type\", expected \"$type\")"
+
+	# Wait for possible activation to make sure the state
+	# won't change after return from this function.
+	state=$(lc_wait_until_state_becomes $lc "active" \
+		$POST_PROV_ACT_TIMEOUT)
+}
+
+unprovision_test()
+{
+	RET=0
+	local lc
+
+	lc=$LC_SLOT
+	unprovision_one $lc
+	log_test "Unprovision"
+}
+
+LC_16X100G_TYPE="16x100G"
+LC_16X100G_PORT_COUNT=16
+
+supported_types_check()
+{
+	local lc=$1
+	local supported_types_count
+	local type_index
+	local lc_16x100_found=false
+
+	supported_types_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+				jq -e -r ".[][][].supported_types | length")
+	[ $supported_types_count != 0 ]
+	check_err $? "No supported types found for linecard $lc"
+	for (( type_index=0; type_index<$supported_types_count; type_index++ ))
+	do
+		type=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+		       jq -e -r ".[][][].supported_types[$type_index]")
+		if [[ "$type" == "$LC_16X100G_TYPE" ]]; then
+			lc_16x100_found=true
+			break
+		fi
+	done
+	[ $lc_16x100_found = true ]
+	check_err $? "16X100G not found between supported types of linecard $lc"
+}
+
+ports_check()
+{
+	local lc=$1
+	local expected_port_count=$2
+	local port_count
+
+	port_count=$(lc_wait_until_port_count_is $lc $expected_port_count \
+		$PROV_PORTS_INSTANTIATION_TIMEOUT)
+	[ $port_count != 0 ]
+	check_err $? "No port associated with linecard $lc"
+	[ $port_count == $expected_port_count ]
+	check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)"
+}
+
+provision_test()
+{
+	RET=0
+	local lc
+	local type
+	local state
+
+	lc=$LC_SLOT
+	supported_types_check $lc
+	state=$(lc_state_get $lc)
+	check_err $? "Failed to get state of linecard $lc"
+	if [[ "$state" != "unprovisioned" ]]; then
+		unprovision_one $lc
+	fi
+	provision_one $lc $LC_16X100G_TYPE
+	ports_check $lc $LC_16X100G_PORT_COUNT
+	log_test "Provision"
+}
+
+ACTIVATION_TIMEOUT=20000 # ms
+
+interface_check()
+{
+	ip link set $h1 up
+	ip link set $h2 up
+	ifaces_upped=true
+	setup_wait
+}
+
+activation_16x100G_test()
+{
+	RET=0
+	local lc
+	local type
+	local state
+
+	lc=$LC_SLOT
+	type=$LC_16X100G_TYPE
+
+	unprovision_one $lc
+	provision_one $lc $type
+	state=$(lc_wait_until_state_becomes $lc "active" \
+		$ACTIVATION_TIMEOUT)
+	check_err $? "Failed to get linecard $lc activated (timeout)"
+
+	interface_check
+
+	log_test "Activation 16x100G"
+}
+
+setup_prepare()
+{
+	local lc_num=$(devlink lc show -j | jq -e -r ".[][\"$DEVLINK_DEV\"] |length")
+	if [[ $? -ne 0 ]] || [[ $lc_num -eq 0 ]]; then
+		echo "SKIP: No linecard support found"
+		exit $ksft_skip
+	fi
+
+	if [ -z "$LC_SLOT" ]; then
+		echo "SKIP: \"LC_SLOT\" variable not provided"
+		exit $ksft_skip
+	fi
+
+	# Interfaces are not present during the script start,
+	# that's why we define NUM_NETIFS here so dummy
+	# implicit veth pairs are not created.
+	NUM_NETIFS=2
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	ifaces_upped=false
+}
+
+cleanup()
+{
+	if [ "$ifaces_upped" = true ] ; then
+		ip link set $h1 down
+		ip link set $h2 down
+	fi
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From 2324257dbd6889638c6cba1ade9eeac3224e2043 Mon Sep 17 00:00:00 2001
From: Mykola Lysenko <mykolal@fb.com>
Date: Mon, 18 Apr 2022 15:25:07 -0700
Subject: selftests/bpf: Refactor prog_tests logging and test execution

This is a pre-req to add separate logging for each subtest in
test_progs.

Move all the mutable test data to the test_result struct.
Move per-test init/de-init into the run_one_test function.
Consolidate data aggregation and final log output in
calculate_and_print_summary function.
As a side effect, this patch fixes double counting of errors
for subtests and possible duplicate output of subtest log
on failures.

Also, add prog_tests_framework.c test to verify some of the
counting logic.

As part of verification, confirmed that number of reported
tests is the same before and after the change for both parallel
and sequential test execution.

Signed-off-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220418222507.1726259-1-mykolal@fb.com
---
 .../selftests/bpf/prog_tests/bpf_mod_race.c        |   4 +-
 .../bpf/prog_tests/prog_tests_framework.c          |  56 ++++
 tools/testing/selftests/bpf/test_progs.c           | 328 ++++++++-------------
 tools/testing/selftests/bpf/test_progs.h           |  35 ++-
 4 files changed, 202 insertions(+), 221 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
index d43f548c572c..a4d0cc9d3367 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -36,13 +36,13 @@ struct test_config {
 	void (*bpf_destroy)(void *);
 };
 
-enum test_state {
+enum bpf_test_state {
 	_TS_INVALID,
 	TS_MODULE_LOAD,
 	TS_MODULE_LOAD_FAIL,
 };
 
-static _Atomic enum test_state state = _TS_INVALID;
+static _Atomic enum bpf_test_state state = _TS_INVALID;
 
 static int sys_finit_module(int fd, const char *param_values, int flags)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
new file mode 100644
index 000000000000..14f2796076e0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "test_progs.h"
+#include "testing_helpers.h"
+
+static void clear_test_state(struct test_state *state)
+{
+	state->error_cnt = 0;
+	state->sub_succ_cnt = 0;
+	state->skip_cnt = 0;
+}
+
+void test_prog_tests_framework(void)
+{
+	struct test_state *state = env.test_state;
+
+	/* in all the ASSERT calls below we need to return on the first
+	 * error due to the fact that we are cleaning the test state after
+	 * each dummy subtest
+	 */
+
+	/* test we properly count skipped tests with subtests */
+	if (test__start_subtest("test_good_subtest"))
+		test__end_subtest();
+	if (!ASSERT_EQ(state->skip_cnt, 0, "skip_cnt_check"))
+		return;
+	if (!ASSERT_EQ(state->error_cnt, 0, "error_cnt_check"))
+		return;
+	if (!ASSERT_EQ(state->subtest_num, 1, "subtest_num_check"))
+		return;
+	clear_test_state(state);
+
+	if (test__start_subtest("test_skip_subtest")) {
+		test__skip();
+		test__end_subtest();
+	}
+	if (test__start_subtest("test_skip_subtest")) {
+		test__skip();
+		test__end_subtest();
+	}
+	if (!ASSERT_EQ(state->skip_cnt, 2, "skip_cnt_check"))
+		return;
+	if (!ASSERT_EQ(state->subtest_num, 3, "subtest_num_check"))
+		return;
+	clear_test_state(state);
+
+	if (test__start_subtest("test_fail_subtest")) {
+		test__fail();
+		test__end_subtest();
+	}
+	if (!ASSERT_EQ(state->error_cnt, 1, "error_cnt_check"))
+		return;
+	if (!ASSERT_EQ(state->subtest_num, 4, "subtest_num_check"))
+		return;
+	clear_test_state(state);
+}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index dcad9871f556..c536d1d29d57 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -51,19 +51,8 @@ struct prog_test_def {
 	int test_num;
 	void (*run_test)(void);
 	void (*run_serial_test)(void);
-	bool force_log;
-	int error_cnt;
-	int skip_cnt;
-	int sub_succ_cnt;
 	bool should_run;
-	bool tested;
 	bool need_cgroup_cleanup;
-
-	char *subtest_name;
-	int subtest_num;
-
-	/* store counts before subtest started */
-	int old_error_cnt;
 };
 
 /* Override C runtime library's usleep() implementation to ensure nanosleep()
@@ -141,35 +130,32 @@ static bool should_run_subtest(struct test_selector *sel,
 	return subtest_num < subtest_sel->num_set_len && subtest_sel->num_set[subtest_num];
 }
 
-static void dump_test_log(const struct prog_test_def *test, bool failed)
+static void dump_test_log(const struct prog_test_def *test,
+			  const struct test_state *test_state,
+			  bool force_failed)
 {
-	if (stdout == env.stdout)
-		return;
+	bool failed = test_state->error_cnt > 0 || force_failed;
 
 	/* worker always holds log */
 	if (env.worker_id != -1)
 		return;
 
-	fflush(stdout); /* exports env.log_buf & env.log_cnt */
+	fflush(stdout); /* exports test_state->log_buf & test_state->log_cnt */
+
+	fprintf(env.stdout, "#%-3d %s:%s\n",
+		test->test_num, test->test_name,
+		failed ? "FAIL" : (test_state->skip_cnt ? "SKIP" : "OK"));
 
-	if (env.verbosity > VERBOSE_NONE || test->force_log || failed) {
-		if (env.log_cnt) {
-			env.log_buf[env.log_cnt] = '\0';
-			fprintf(env.stdout, "%s", env.log_buf);
-			if (env.log_buf[env.log_cnt - 1] != '\n')
+	if (env.verbosity > VERBOSE_NONE || test_state->force_log || failed) {
+		if (test_state->log_cnt) {
+			test_state->log_buf[test_state->log_cnt] = '\0';
+			fprintf(env.stdout, "%s", test_state->log_buf);
+			if (test_state->log_buf[test_state->log_cnt - 1] != '\n')
 				fprintf(env.stdout, "\n");
 		}
 	}
 }
 
-static void skip_account(void)
-{
-	if (env.test->skip_cnt) {
-		env.skip_cnt++;
-		env.test->skip_cnt = 0;
-	}
-}
-
 static void stdio_restore(void);
 
 /* A bunch of tests set custom affinity per-thread and/or per-process. Reset
@@ -219,72 +205,78 @@ static void restore_netns(void)
 void test__end_subtest(void)
 {
 	struct prog_test_def *test = env.test;
-	int sub_error_cnt = test->error_cnt - test->old_error_cnt;
-
-	dump_test_log(test, sub_error_cnt);
+	struct test_state *state = env.test_state;
+	int sub_error_cnt = state->error_cnt - state->old_error_cnt;
 
 	fprintf(stdout, "#%d/%d %s/%s:%s\n",
-	       test->test_num, test->subtest_num, test->test_name, test->subtest_name,
-	       sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+	       test->test_num, state->subtest_num, test->test_name, state->subtest_name,
+	       sub_error_cnt ? "FAIL" : (state->subtest_skip_cnt ? "SKIP" : "OK"));
 
-	if (sub_error_cnt)
-		test->error_cnt++;
-	else if (test->skip_cnt == 0)
-		test->sub_succ_cnt++;
-	skip_account();
+	if (sub_error_cnt == 0) {
+		if (state->subtest_skip_cnt == 0) {
+			state->sub_succ_cnt++;
+		} else {
+			state->subtest_skip_cnt = 0;
+			state->skip_cnt++;
+		}
+	}
 
-	free(test->subtest_name);
-	test->subtest_name = NULL;
+	free(state->subtest_name);
+	state->subtest_name = NULL;
 }
 
 bool test__start_subtest(const char *subtest_name)
 {
 	struct prog_test_def *test = env.test;
+	struct test_state *state = env.test_state;
 
-	if (test->subtest_name)
+	if (state->subtest_name)
 		test__end_subtest();
 
-	test->subtest_num++;
+	state->subtest_num++;
 
 	if (!subtest_name || !subtest_name[0]) {
 		fprintf(env.stderr,
 			"Subtest #%d didn't provide sub-test name!\n",
-			test->subtest_num);
+			state->subtest_num);
 		return false;
 	}
 
 	if (!should_run_subtest(&env.test_selector,
 				&env.subtest_selector,
-				test->subtest_num,
+				state->subtest_num,
 				test->test_name,
 				subtest_name))
 		return false;
 
-	test->subtest_name = strdup(subtest_name);
-	if (!test->subtest_name) {
+	state->subtest_name = strdup(subtest_name);
+	if (!state->subtest_name) {
 		fprintf(env.stderr,
 			"Subtest #%d: failed to copy subtest name!\n",
-			test->subtest_num);
+			state->subtest_num);
 		return false;
 	}
-	env.test->old_error_cnt = env.test->error_cnt;
+	state->old_error_cnt = state->error_cnt;
 
 	return true;
 }
 
 void test__force_log(void)
 {
-	env.test->force_log = true;
+	env.test_state->force_log = true;
 }
 
 void test__skip(void)
 {
-	env.test->skip_cnt++;
+	if (env.test_state->subtest_name)
+		env.test_state->subtest_skip_cnt++;
+	else
+		env.test_state->skip_cnt++;
 }
 
 void test__fail(void)
 {
-	env.test->error_cnt++;
+	env.test_state->error_cnt++;
 }
 
 int test__join_cgroup(const char *path)
@@ -517,8 +509,11 @@ static struct prog_test_def prog_test_defs[] = {
 #include <prog_tests/tests.h>
 #undef DEFINE_TEST
 };
+
 static const int prog_test_cnt = ARRAY_SIZE(prog_test_defs);
 
+static struct test_state test_states[ARRAY_SIZE(prog_test_defs)];
+
 const char *argp_program_version = "test_progs 0.1";
 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
 static const char argp_program_doc[] = "BPF selftests test runner";
@@ -701,7 +696,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	return 0;
 }
 
-static void stdio_hijack(void)
+static void stdio_hijack(char **log_buf, size_t *log_cnt)
 {
 #ifdef __GLIBC__
 	env.stdout = stdout;
@@ -715,7 +710,7 @@ static void stdio_hijack(void)
 	/* stdout and stderr -> buffer */
 	fflush(stdout);
 
-	stdout = open_memstream(&env.log_buf, &env.log_cnt);
+	stdout = open_memstream(log_buf, log_cnt);
 	if (!stdout) {
 		stdout = env.stdout;
 		perror("open_memstream");
@@ -818,7 +813,7 @@ void crash_handler(int signum)
 	sz = backtrace(bt, ARRAY_SIZE(bt));
 
 	if (env.test)
-		dump_test_log(env.test, true);
+		dump_test_log(env.test, env.test_state, true);
 	if (env.stdout)
 		stdio_restore();
 	if (env.worker_id != -1)
@@ -840,17 +835,6 @@ static int current_test_idx;
 static pthread_mutex_t current_test_lock;
 static pthread_mutex_t stdout_output_lock;
 
-struct test_result {
-	int error_cnt;
-	int skip_cnt;
-	int sub_succ_cnt;
-
-	size_t log_cnt;
-	char *log_buf;
-};
-
-static struct test_result test_results[ARRAY_SIZE(prog_test_defs)];
-
 static inline const char *str_msg(const struct msg *msg, char *buf)
 {
 	switch (msg->type) {
@@ -904,8 +888,12 @@ static int recv_message(int sock, struct msg *msg)
 static void run_one_test(int test_num)
 {
 	struct prog_test_def *test = &prog_test_defs[test_num];
+	struct test_state *state = &test_states[test_num];
 
 	env.test = test;
+	env.test_state = state;
+
+	stdio_hijack(&state->log_buf, &state->log_cnt);
 
 	if (test->run_test)
 		test->run_test();
@@ -913,17 +901,19 @@ static void run_one_test(int test_num)
 		test->run_serial_test();
 
 	/* ensure last sub-test is finalized properly */
-	if (test->subtest_name)
+	if (state->subtest_name)
 		test__end_subtest();
 
-	test->tested = true;
+	state->tested = true;
 
-	dump_test_log(test, test->error_cnt);
+	dump_test_log(test, state, false);
 
 	reset_affinity();
 	restore_netns();
 	if (test->need_cgroup_cleanup)
 		cleanup_cgroup_environment();
+
+	stdio_restore();
 }
 
 struct dispatch_data {
@@ -942,7 +932,7 @@ static void *dispatch_thread(void *ctx)
 	while (true) {
 		int test_to_run = -1;
 		struct prog_test_def *test;
-		struct test_result *result;
+		struct test_state *state;
 
 		/* grab a test */
 		{
@@ -989,16 +979,15 @@ static void *dispatch_thread(void *ctx)
 			if (test_to_run != msg_test_done.test_done.test_num)
 				goto error;
 
-			test->tested = true;
-			result = &test_results[test_to_run];
-
-			result->error_cnt = msg_test_done.test_done.error_cnt;
-			result->skip_cnt = msg_test_done.test_done.skip_cnt;
-			result->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt;
+			state = &test_states[test_to_run];
+			state->tested = true;
+			state->error_cnt = msg_test_done.test_done.error_cnt;
+			state->skip_cnt = msg_test_done.test_done.skip_cnt;
+			state->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt;
 
 			/* collect all logs */
 			if (msg_test_done.test_done.have_log) {
-				log_fp = open_memstream(&result->log_buf, &result->log_cnt);
+				log_fp = open_memstream(&state->log_buf, &state->log_cnt);
 				if (!log_fp)
 					goto error;
 
@@ -1017,25 +1006,11 @@ static void *dispatch_thread(void *ctx)
 				fclose(log_fp);
 				log_fp = NULL;
 			}
-			/* output log */
-			{
-				pthread_mutex_lock(&stdout_output_lock);
-
-				if (result->log_cnt) {
-					result->log_buf[result->log_cnt] = '\0';
-					fprintf(stdout, "%s", result->log_buf);
-					if (result->log_buf[result->log_cnt - 1] != '\n')
-						fprintf(stdout, "\n");
-				}
-
-				fprintf(stdout, "#%d %s:%s\n",
-					test->test_num, test->test_name,
-					result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
-
-				pthread_mutex_unlock(&stdout_output_lock);
-			}
-
 		} /* wait for test done */
+
+		pthread_mutex_lock(&stdout_output_lock);
+		dump_test_log(test, state, false);
+		pthread_mutex_unlock(&stdout_output_lock);
 	} /* while (true) */
 error:
 	if (env.debug)
@@ -1057,38 +1032,50 @@ done:
 	return NULL;
 }
 
-static void print_all_error_logs(void)
+static void calculate_summary_and_print_errors(struct test_env *env)
 {
 	int i;
+	int succ_cnt = 0, fail_cnt = 0, sub_succ_cnt = 0, skip_cnt = 0;
 
-	if (env.fail_cnt)
-		fprintf(stdout, "\nAll error logs:\n");
+	for (i = 0; i < prog_test_cnt; i++) {
+		struct test_state *state = &test_states[i];
+
+		if (!state->tested)
+			continue;
+
+		sub_succ_cnt += state->sub_succ_cnt;
+		skip_cnt += state->skip_cnt;
+
+		if (state->error_cnt)
+			fail_cnt++;
+		else
+			succ_cnt++;
+	}
+
+	if (fail_cnt)
+		printf("\nAll error logs:\n");
 
 	/* print error logs again */
 	for (i = 0; i < prog_test_cnt; i++) {
-		struct prog_test_def *test;
-		struct test_result *result;
-
-		test = &prog_test_defs[i];
-		result = &test_results[i];
+		struct prog_test_def *test = &prog_test_defs[i];
+		struct test_state *state = &test_states[i];
 
-		if (!test->tested || !result->error_cnt)
+		if (!state->tested || !state->error_cnt)
 			continue;
 
-		fprintf(stdout, "\n#%d %s:%s\n",
-			test->test_num, test->test_name,
-			result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
-
-		if (result->log_cnt) {
-			result->log_buf[result->log_cnt] = '\0';
-			fprintf(stdout, "%s", result->log_buf);
-			if (result->log_buf[result->log_cnt - 1] != '\n')
-				fprintf(stdout, "\n");
-		}
+		dump_test_log(test, state, true);
 	}
+
+	printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+	       succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt);
+
+	env->succ_cnt = succ_cnt;
+	env->sub_succ_cnt = sub_succ_cnt;
+	env->fail_cnt = fail_cnt;
+	env->skip_cnt = skip_cnt;
 }
 
-static int server_main(void)
+static void server_main(void)
 {
 	pthread_t *dispatcher_threads;
 	struct dispatch_data *data;
@@ -1144,60 +1131,18 @@ static int server_main(void)
 
 	for (int i = 0; i < prog_test_cnt; i++) {
 		struct prog_test_def *test = &prog_test_defs[i];
-		struct test_result *result = &test_results[i];
 
 		if (!test->should_run || !test->run_serial_test)
 			continue;
 
-		stdio_hijack();
-
 		run_one_test(i);
-
-		stdio_restore();
-		if (env.log_buf) {
-			result->log_cnt = env.log_cnt;
-			result->log_buf = strdup(env.log_buf);
-
-			free(env.log_buf);
-			env.log_buf = NULL;
-			env.log_cnt = 0;
-		}
-		restore_netns();
-
-		fprintf(stdout, "#%d %s:%s\n",
-			test->test_num, test->test_name,
-			test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
-
-		result->error_cnt = test->error_cnt;
-		result->skip_cnt = test->skip_cnt;
-		result->sub_succ_cnt = test->sub_succ_cnt;
 	}
 
 	/* generate summary */
 	fflush(stderr);
 	fflush(stdout);
 
-	for (i = 0; i < prog_test_cnt; i++) {
-		struct prog_test_def *current_test;
-		struct test_result *result;
-
-		current_test = &prog_test_defs[i];
-		result = &test_results[i];
-
-		if (!current_test->tested)
-			continue;
-
-		env.succ_cnt += result->error_cnt ? 0 : 1;
-		env.skip_cnt += result->skip_cnt;
-		if (result->error_cnt)
-			env.fail_cnt++;
-		env.sub_succ_cnt += result->sub_succ_cnt;
-	}
-
-	print_all_error_logs();
-
-	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
-		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+	calculate_summary_and_print_errors(&env);
 
 	/* reap all workers */
 	for (i = 0; i < env.workers; i++) {
@@ -1207,8 +1152,6 @@ static int server_main(void)
 		if (pid != env.worker_pids[i])
 			perror("Unable to reap worker");
 	}
-
-	return 0;
 }
 
 static int worker_main(int sock)
@@ -1229,35 +1172,29 @@ static int worker_main(int sock)
 					env.worker_id);
 			goto out;
 		case MSG_DO_TEST: {
-			int test_to_run;
-			struct prog_test_def *test;
+			int test_to_run = msg.do_test.test_num;
+			struct prog_test_def *test = &prog_test_defs[test_to_run];
+			struct test_state *state = &test_states[test_to_run];
 			struct msg msg_done;
 
-			test_to_run = msg.do_test.test_num;
-			test = &prog_test_defs[test_to_run];
-
 			if (env.debug)
 				fprintf(stderr, "[%d]: #%d:%s running.\n",
 					env.worker_id,
 					test_to_run + 1,
 					test->test_name);
 
-			stdio_hijack();
-
 			run_one_test(test_to_run);
 
-			stdio_restore();
-
 			memset(&msg_done, 0, sizeof(msg_done));
 			msg_done.type = MSG_TEST_DONE;
 			msg_done.test_done.test_num = test_to_run;
-			msg_done.test_done.error_cnt = test->error_cnt;
-			msg_done.test_done.skip_cnt = test->skip_cnt;
-			msg_done.test_done.sub_succ_cnt = test->sub_succ_cnt;
+			msg_done.test_done.error_cnt = state->error_cnt;
+			msg_done.test_done.skip_cnt = state->skip_cnt;
+			msg_done.test_done.sub_succ_cnt = state->sub_succ_cnt;
 			msg_done.test_done.have_log = false;
 
-			if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) {
-				if (env.log_cnt)
+			if (env.verbosity > VERBOSE_NONE || state->force_log || state->error_cnt) {
+				if (state->log_cnt)
 					msg_done.test_done.have_log = true;
 			}
 			if (send_message(sock, &msg_done) < 0) {
@@ -1270,8 +1207,8 @@ static int worker_main(int sock)
 				char *src;
 				size_t slen;
 
-				src = env.log_buf;
-				slen = env.log_cnt;
+				src = state->log_buf;
+				slen = state->log_cnt;
 				while (slen) {
 					struct msg msg_log;
 					char *dest;
@@ -1291,10 +1228,10 @@ static int worker_main(int sock)
 					assert(send_message(sock, &msg_log) >= 0);
 				}
 			}
-			if (env.log_buf) {
-				free(env.log_buf);
-				env.log_buf = NULL;
-				env.log_cnt = 0;
+			if (state->log_buf) {
+				free(state->log_buf);
+				state->log_buf = NULL;
+				state->log_cnt = 0;
 			}
 			if (env.debug)
 				fprintf(stderr, "[%d]: #%d:%s done.\n",
@@ -1425,7 +1362,6 @@ int main(int argc, char **argv)
 
 	for (i = 0; i < prog_test_cnt; i++) {
 		struct prog_test_def *test = &prog_test_defs[i];
-		struct test_result *result;
 
 		if (!test->should_run)
 			continue;
@@ -1441,34 +1377,7 @@ int main(int argc, char **argv)
 			continue;
 		}
 
-		stdio_hijack();
-
 		run_one_test(i);
-
-		stdio_restore();
-
-		fprintf(env.stdout, "#%d %s:%s\n",
-			test->test_num, test->test_name,
-			test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
-
-		result = &test_results[i];
-		result->error_cnt = test->error_cnt;
-		if (env.log_buf) {
-			result->log_buf = strdup(env.log_buf);
-			result->log_cnt = env.log_cnt;
-
-			free(env.log_buf);
-			env.log_buf = NULL;
-			env.log_cnt = 0;
-		}
-
-		if (test->error_cnt)
-			env.fail_cnt++;
-		else
-			env.succ_cnt++;
-
-		skip_account();
-		env.sub_succ_cnt += test->sub_succ_cnt;
 	}
 
 	if (env.get_test_cnt) {
@@ -1479,10 +1388,7 @@ int main(int argc, char **argv)
 	if (env.list_test_names)
 		goto out;
 
-	print_all_error_logs();
-
-	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
-		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+	calculate_summary_and_print_errors(&env);
 
 	close(env.saved_netns_fd);
 out:
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 6a8d68bb459e..0a102ce460d6 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -64,6 +64,25 @@ struct test_selector {
 	int num_set_len;
 };
 
+struct test_state {
+	bool tested;
+	bool force_log;
+
+	int error_cnt;
+	int skip_cnt;
+	int subtest_skip_cnt;
+	int sub_succ_cnt;
+
+	char *subtest_name;
+	int subtest_num;
+
+	/* store counts before subtest started */
+	int old_error_cnt;
+
+	size_t log_cnt;
+	char *log_buf;
+};
+
 struct test_env {
 	struct test_selector test_selector;
 	struct test_selector subtest_selector;
@@ -76,12 +95,11 @@ struct test_env {
 	bool get_test_cnt;
 	bool list_test_names;
 
-	struct prog_test_def *test; /* current running tests */
+	struct prog_test_def *test; /* current running test */
+	struct test_state *test_state; /* current running test result */
 
 	FILE *stdout;
 	FILE *stderr;
-	char *log_buf;
-	size_t log_cnt;
 	int nr_cpus;
 
 	int succ_cnt; /* successful tests */
@@ -126,11 +144,12 @@ struct msg {
 
 extern struct test_env env;
 
-extern void test__force_log();
-extern bool test__start_subtest(const char *name);
-extern void test__skip(void);
-extern void test__fail(void);
-extern int test__join_cgroup(const char *path);
+void test__force_log(void);
+bool test__start_subtest(const char *name);
+void test__end_subtest(void);
+void test__skip(void);
+void test__fail(void);
+int test__join_cgroup(const char *path);
 
 #define PRINT_FAIL(format...)                                                  \
 	({                                                                     \
-- 
cgit 


From 8c89b5db7a2894e33417dd69680729e8f65f5709 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 18 Apr 2022 21:32:30 -0700
Subject: selftests/bpf: Limit unroll_count for pyperf600 test

LLVM commit [1] changed loop pragma behavior such that
full loop unroll is always honored with user pragma.
Previously, unroll count also depends on the unrolled
code size. For pyperf600, without [1], the loop unroll
count is 150. With [1], the loop unroll count is 600.

The unroll count of 600 caused the program size close to
298k and this caused the following code is generated:
         0:       7b 1a 00 ff 00 00 00 00 *(u64 *)(r10 - 256) = r1
  ;       uint64_t pid_tgid = bpf_get_current_pid_tgid();
         1:       85 00 00 00 0e 00 00 00 call 14
         2:       bf 06 00 00 00 00 00 00 r6 = r0
  ;       pid_t pid = (pid_t)(pid_tgid >> 32);
         3:       bf 61 00 00 00 00 00 00 r1 = r6
         4:       77 01 00 00 20 00 00 00 r1 >>= 32
         5:       63 1a fc ff 00 00 00 00 *(u32 *)(r10 - 4) = r1
         6:       bf a2 00 00 00 00 00 00 r2 = r10
         7:       07 02 00 00 fc ff ff ff r2 += -4
  ;       PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
         8:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
        10:       85 00 00 00 01 00 00 00 call 1
        11:       bf 08 00 00 00 00 00 00 r8 = r0
  ;       if (!pidData)
        12:       15 08 15 e8 00 00 00 00 if r8 == 0 goto -6123 <LBB0_27588+0xffffffffffdae100>

Note that insn 12 has a branch offset -6123 which is clearly illegal
and will be rejected by the verifier. The negative offset is due to
the branch range is greater than INT16_MAX.

This patch changed the unroll count to be 150 to avoid above
branch target insn out-of-range issue. Also the llvm is enhanced ([2])
to assert if the branch target insn is out of INT16 range.

  [1] https://reviews.llvm.org/D119148
  [2] https://reviews.llvm.org/D123877

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220419043230.2928530-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/pyperf.h    |  4 ++++
 tools/testing/selftests/bpf/progs/pyperf600.c | 11 +++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 1ed28882daf3..5d3dc4d66d47 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -299,7 +299,11 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
 #ifdef NO_UNROLL
 #pragma clang loop unroll(disable)
 #else
+#ifdef UNROLL_COUNT
+#pragma clang loop unroll_count(UNROLL_COUNT)
+#else
 #pragma clang loop unroll(full)
+#endif
 #endif /* NO_UNROLL */
 		/* Unwind python stack */
 		for (int i = 0; i < STACK_MAX_LEN; ++i) {
diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c
index cb49b89e37cd..ce1aa5189cc4 100644
--- a/tools/testing/selftests/bpf/progs/pyperf600.c
+++ b/tools/testing/selftests/bpf/progs/pyperf600.c
@@ -1,9 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
 #define STACK_MAX_LEN 600
-/* clang will not unroll the loop 600 times.
- * Instead it will unroll it to the amount it deemed
- * appropriate, but the loop will still execute 600 times.
- * Total program size is around 90k insns
+/* Full unroll of 600 iterations will have total
+ * program size close to 298k insns and this may
+ * cause BPF_JMP insn out of 16-bit integer range.
+ * So limit the unroll size to 150 so the
+ * total program size is around 80k insns but
+ * the loop will still execute 600 times.
  */
+#define UNROLL_COUNT 150
 #include "pyperf.h"
-- 
cgit 


From 44df171a10f8969d1456e0a5af7fbac142d7fa18 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 18 Apr 2022 22:09:00 -0700
Subject: selftests/bpf: Workaround a verifier issue for test exhandler

The llvm patch [1] enabled opaque pointer which caused selftest
'exhandler' failure.
  ...
  ; work = task->task_works;
  7: (79) r1 = *(u64 *)(r6 +2120)       ; R1_w=ptr_callback_head(off=0,imm=0) R6_w=ptr_task_struct(off=0,imm=0)
  ; func = work->func;
  8: (79) r2 = *(u64 *)(r1 +8)          ; R1_w=ptr_callback_head(off=0,imm=0) R2_w=scalar()
  ; if (!work && !func)
  9: (4f) r1 |= r2
  math between ptr_ pointer and register with unbounded min value is not allowed

  below is insn 10 and 11
  10: (55) if r1 != 0 goto +5
  11: (18) r1 = 0 ll
  ...

In llvm, the code generation of 'r1 |= r2' happened in codegen
selectiondag phase due to difference of opaque pointer vs. non-opaque pointer.
Without [1], the related code looks like:
  r2 = *(u64 *)(r6 + 2120)
  r1 = *(u64 *)(r2 + 8)
  if r2 != 0 goto +6 <LBB0_4>
  if r1 != 0 goto +5 <LBB0_4>
  r1 = 0 ll
  ...

I haven't found a good way in llvm to fix this issue. So let us workaround the
problem first so bpf CI won't be blocked.

  [1] https://reviews.llvm.org/D123300

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220419050900.3136024-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/exhandler_kern.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c
index f5ca142abf8f..dd9b30a0f0fc 100644
--- a/tools/testing/selftests/bpf/progs/exhandler_kern.c
+++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c
@@ -7,6 +7,8 @@
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_core_read.h>
 
+#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
+
 char _license[] SEC("license") = "GPL";
 
 unsigned int exception_triggered;
@@ -37,7 +39,16 @@ int BPF_PROG(trace_task_newtask, struct task_struct *task, u64 clone_flags)
 	 */
 	work = task->task_works;
 	func = work->func;
-	if (!work && !func)
-		exception_triggered++;
+	/* Currently verifier will fail for `btf_ptr |= btf_ptr` * instruction.
+	 * To workaround the issue, use barrier_var() and rewrite as below to
+	 * prevent compiler from generating verifier-unfriendly code.
+	 */
+	barrier_var(work);
+	if (work)
+		return 0;
+	barrier_var(func);
+	if (func)
+		return 0;
+	exception_triggered++;
 	return 0;
 }
-- 
cgit 


From 0d7fefebea552771b17682a12330ea47e369a5df Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 18 Apr 2022 17:24:51 -0700
Subject: selftests/bpf: Use non-autoloaded programs in few tests

Take advantage of new libbpf feature for declarative non-autoloaded BPF
program SEC() definitions in few test that test single program at a time
out of many available programs within the single BPF object.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220419002452.632125-2-andrii@kernel.org
---
 .../selftests/bpf/prog_tests/helper_restricted.c   | 10 ++++-----
 .../selftests/bpf/prog_tests/reference_tracking.c  | 23 ++++++--------------
 .../selftests/bpf/prog_tests/test_strncmp.c        | 25 +++-------------------
 tools/testing/selftests/bpf/progs/strncmp_test.c   |  8 +++----
 .../selftests/bpf/progs/test_helper_restricted.c   | 16 +++++++-------
 .../selftests/bpf/progs/test_sk_lookup_kern.c      | 18 ++++++++--------
 6 files changed, 35 insertions(+), 65 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
index e1de5f80c3b2..0354f9b82c65 100644
--- a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
+++ b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
@@ -6,11 +6,10 @@
 void test_helper_restricted(void)
 {
 	int prog_i = 0, prog_cnt;
-	int duration = 0;
 
 	do {
 		struct test_helper_restricted *test;
-		int maybeOK;
+		int err;
 
 		test = test_helper_restricted__open();
 		if (!ASSERT_OK_PTR(test, "open"))
@@ -21,12 +20,11 @@ void test_helper_restricted(void)
 		for (int j = 0; j < prog_cnt; ++j) {
 			struct bpf_program *prog = *test->skeleton->progs[j].prog;
 
-			maybeOK = bpf_program__set_autoload(prog, prog_i == j);
-			ASSERT_OK(maybeOK, "set autoload");
+			bpf_program__set_autoload(prog, true);
 		}
 
-		maybeOK = test_helper_restricted__load(test);
-		CHECK(!maybeOK, test->skeleton->progs[prog_i].name, "helper isn't restricted");
+		err = test_helper_restricted__load(test);
+		ASSERT_ERR(err, "load_should_fail");
 
 		test_helper_restricted__destroy(test);
 	} while (++prog_i < prog_cnt);
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index 873323fb18ba..739d2ea6ca55 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -1,21 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 
-static void toggle_object_autoload_progs(const struct bpf_object *obj,
-					 const char *name_load)
-{
-	struct bpf_program *prog;
-
-	bpf_object__for_each_program(prog, obj) {
-		const char *name = bpf_program__name(prog);
-
-		if (!strcmp(name_load, name))
-			bpf_program__set_autoload(prog, true);
-		else
-			bpf_program__set_autoload(prog, false);
-	}
-}
-
 void test_reference_tracking(void)
 {
 	const char *file = "test_sk_lookup_kern.o";
@@ -39,6 +24,7 @@ void test_reference_tracking(void)
 		goto cleanup;
 
 	bpf_object__for_each_program(prog, obj_iter) {
+		struct bpf_program *p;
 		const char *name;
 
 		name = bpf_program__name(prog);
@@ -49,7 +35,12 @@ void test_reference_tracking(void)
 		if (!ASSERT_OK_PTR(obj, "obj_open_file"))
 			goto cleanup;
 
-		toggle_object_autoload_progs(obj, name);
+		/* all programs are not loaded by default, so just set
+		 * autoload to true for the single prog under test
+		 */
+		p = bpf_object__find_program_by_name(obj, name);
+		bpf_program__set_autoload(p, true);
+
 		/* Expect verifier failure if test name has 'err' */
 		if (strncmp(name, "err_", sizeof("err_") - 1) == 0) {
 			libbpf_print_fn_t old_print_fn;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
index b57a3009465f..7ddd6615b7e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
@@ -44,16 +44,12 @@ static void strncmp_full_str_cmp(struct strncmp_test *skel, const char *name,
 static void test_strncmp_ret(void)
 {
 	struct strncmp_test *skel;
-	struct bpf_program *prog;
 	int err, got;
 
 	skel = strncmp_test__open();
 	if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
 		return;
 
-	bpf_object__for_each_program(prog, skel->obj)
-		bpf_program__set_autoload(prog, false);
-
 	bpf_program__set_autoload(skel->progs.do_strncmp, true);
 
 	err = strncmp_test__load(skel);
@@ -91,18 +87,13 @@ out:
 static void test_strncmp_bad_not_const_str_size(void)
 {
 	struct strncmp_test *skel;
-	struct bpf_program *prog;
 	int err;
 
 	skel = strncmp_test__open();
 	if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
 		return;
 
-	bpf_object__for_each_program(prog, skel->obj)
-		bpf_program__set_autoload(prog, false);
-
-	bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size,
-				  true);
+	bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, true);
 
 	err = strncmp_test__load(skel);
 	ASSERT_ERR(err, "strncmp_test load bad_not_const_str_size");
@@ -113,18 +104,13 @@ static void test_strncmp_bad_not_const_str_size(void)
 static void test_strncmp_bad_writable_target(void)
 {
 	struct strncmp_test *skel;
-	struct bpf_program *prog;
 	int err;
 
 	skel = strncmp_test__open();
 	if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
 		return;
 
-	bpf_object__for_each_program(prog, skel->obj)
-		bpf_program__set_autoload(prog, false);
-
-	bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target,
-				  true);
+	bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, true);
 
 	err = strncmp_test__load(skel);
 	ASSERT_ERR(err, "strncmp_test load bad_writable_target");
@@ -135,18 +121,13 @@ static void test_strncmp_bad_writable_target(void)
 static void test_strncmp_bad_not_null_term_target(void)
 {
 	struct strncmp_test *skel;
-	struct bpf_program *prog;
 	int err;
 
 	skel = strncmp_test__open();
 	if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
 		return;
 
-	bpf_object__for_each_program(prog, skel->obj)
-		bpf_program__set_autoload(prog, false);
-
-	bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target,
-				  true);
+	bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, true);
 
 	err = strncmp_test__load(skel);
 	ASSERT_ERR(err, "strncmp_test load bad_not_null_term_target");
diff --git a/tools/testing/selftests/bpf/progs/strncmp_test.c b/tools/testing/selftests/bpf/progs/strncmp_test.c
index 900d930d48a8..769668feed48 100644
--- a/tools/testing/selftests/bpf/progs/strncmp_test.c
+++ b/tools/testing/selftests/bpf/progs/strncmp_test.c
@@ -19,7 +19,7 @@ unsigned int no_const_str_size = STRNCMP_STR_SZ;
 
 char _license[] SEC("license") = "GPL";
 
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
 int do_strncmp(void *ctx)
 {
 	if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
@@ -29,7 +29,7 @@ int do_strncmp(void *ctx)
 	return 0;
 }
 
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
 int strncmp_bad_not_const_str_size(void *ctx)
 {
 	/* The value of string size is not const, so will fail */
@@ -37,7 +37,7 @@ int strncmp_bad_not_const_str_size(void *ctx)
 	return 0;
 }
 
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
 int strncmp_bad_writable_target(void *ctx)
 {
 	/* Compared target is not read-only, so will fail */
@@ -45,7 +45,7 @@ int strncmp_bad_writable_target(void *ctx)
 	return 0;
 }
 
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
 int strncmp_bad_not_null_term_target(void *ctx)
 {
 	/* Compared target is not null-terminated, so will fail */
diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
index 68d64c365f90..20ef9d433b97 100644
--- a/tools/testing/selftests/bpf/progs/test_helper_restricted.c
+++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
@@ -56,7 +56,7 @@ static void spin_lock_work(void)
 	}
 }
 
-SEC("raw_tp/sys_enter")
+SEC("?raw_tp/sys_enter")
 int raw_tp_timer(void *ctx)
 {
 	timer_work();
@@ -64,7 +64,7 @@ int raw_tp_timer(void *ctx)
 	return 0;
 }
 
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
 int tp_timer(void *ctx)
 {
 	timer_work();
@@ -72,7 +72,7 @@ int tp_timer(void *ctx)
 	return 0;
 }
 
-SEC("kprobe/sys_nanosleep")
+SEC("?kprobe/sys_nanosleep")
 int kprobe_timer(void *ctx)
 {
 	timer_work();
@@ -80,7 +80,7 @@ int kprobe_timer(void *ctx)
 	return 0;
 }
 
-SEC("perf_event")
+SEC("?perf_event")
 int perf_event_timer(void *ctx)
 {
 	timer_work();
@@ -88,7 +88,7 @@ int perf_event_timer(void *ctx)
 	return 0;
 }
 
-SEC("raw_tp/sys_enter")
+SEC("?raw_tp/sys_enter")
 int raw_tp_spin_lock(void *ctx)
 {
 	spin_lock_work();
@@ -96,7 +96,7 @@ int raw_tp_spin_lock(void *ctx)
 	return 0;
 }
 
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
 int tp_spin_lock(void *ctx)
 {
 	spin_lock_work();
@@ -104,7 +104,7 @@ int tp_spin_lock(void *ctx)
 	return 0;
 }
 
-SEC("kprobe/sys_nanosleep")
+SEC("?kprobe/sys_nanosleep")
 int kprobe_spin_lock(void *ctx)
 {
 	spin_lock_work();
@@ -112,7 +112,7 @@ int kprobe_spin_lock(void *ctx)
 	return 0;
 }
 
-SEC("perf_event")
+SEC("?perf_event")
 int perf_event_spin_lock(void *ctx)
 {
 	spin_lock_work();
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index 40f161480a2f..b502e5c92e33 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -52,7 +52,7 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
 	return result;
 }
 
-SEC("tc")
+SEC("?tc")
 int sk_lookup_success(struct __sk_buff *skb)
 {
 	void *data_end = (void *)(long)skb->data_end;
@@ -78,7 +78,7 @@ int sk_lookup_success(struct __sk_buff *skb)
 	return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
 }
 
-SEC("tc")
+SEC("?tc")
 int sk_lookup_success_simple(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -90,7 +90,7 @@ int sk_lookup_success_simple(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("tc")
+SEC("?tc")
 int err_use_after_free(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -105,7 +105,7 @@ int err_use_after_free(struct __sk_buff *skb)
 	return family;
 }
 
-SEC("tc")
+SEC("?tc")
 int err_modify_sk_pointer(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -120,7 +120,7 @@ int err_modify_sk_pointer(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("tc")
+SEC("?tc")
 int err_modify_sk_or_null_pointer(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -134,7 +134,7 @@ int err_modify_sk_or_null_pointer(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("tc")
+SEC("?tc")
 int err_no_release(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -143,7 +143,7 @@ int err_no_release(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("tc")
+SEC("?tc")
 int err_release_twice(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -155,7 +155,7 @@ int err_release_twice(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("tc")
+SEC("?tc")
 int err_release_unchecked(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -172,7 +172,7 @@ void lookup_no_release(struct __sk_buff *skb)
 	bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 }
 
-SEC("tc")
+SEC("?tc")
 int err_no_release_subcall(struct __sk_buff *skb)
 {
 	lookup_no_release(skb);
-- 
cgit 


From 24fe983abe01c53e1a9354fe21fab92579fcda6d Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 19 Apr 2022 22:16:08 +0530
Subject: selftests/bpf: Add tests for type tag order validation

Add a few test cases that ensure we catch cases of badly ordered type
tags in modifier chains.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-3-memxor@gmail.com
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 99 ++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 84aae639ddb5..ba5bde53d418 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3973,6 +3973,105 @@ static struct btf_raw_test raw_tests[] = {
 	.value_type_id = 1,
 	.max_entries = 1,
 },
+{
+	.descr = "type_tag test #2, type tag order",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_CONST_ENC(3),				/* [2] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 1),			/* [3] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Type tags don't precede modifiers",
+},
+{
+	.descr = "type_tag test #3, type tag order",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 3),			/* [2] */
+		BTF_CONST_ENC(4),				/* [3] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 1),			/* [4] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Type tags don't precede modifiers",
+},
+{
+	.descr = "type_tag test #4, type tag order",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 3),			/* [2] */
+		BTF_CONST_ENC(4),				/* [3] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 1),			/* [4] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Type tags don't precede modifiers",
+},
+{
+	.descr = "type_tag test #5, type tag order",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 3),			/* [2] */
+		BTF_CONST_ENC(1),				/* [3] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 2),			/* [4] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+},
+{
+	.descr = "type_tag test #6, type tag order",
+	.raw_types = {
+		BTF_PTR_ENC(2),					/* [1] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 3),			/* [2] */
+		BTF_CONST_ENC(4),				/* [3] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [4] */
+		BTF_PTR_ENC(6),					/* [5] */
+		BTF_CONST_ENC(2),				/* [6] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Type tags don't precede modifiers",
+},
 
 }; /* struct btf_raw_test raw_tests[] */
 
-- 
cgit 


From 2238a1f49006dbef3f25e201bffbdab392d0baef Mon Sep 17 00:00:00 2001
From: Ze Zhang <zhangze@loongson.cn>
Date: Sat, 16 Apr 2022 19:48:47 +0800
Subject: selftests/ftrace: add mips support for kprobe args string tests

This is the mips variant of commit <3990b5baf225> ("selftests/ftrace:
Add s390 support for kprobe args tests").

Signed-off-by: Ze Zhang <zhangze@loongson.cn>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index dc7ade196798..459741565222 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -25,6 +25,9 @@ ppc*)
 s390*)
   ARG1=%r2
 ;;
+mips*)
+  ARG1=%r4
+;;
 *)
   echo "Please implement other architecture here"
   exit_untested
-- 
cgit 


From d490527d30d7cec49257ceb1092ebf974d3303f5 Mon Sep 17 00:00:00 2001
From: Ze Zhang <zhangze@loongson.cn>
Date: Sat, 16 Apr 2022 19:48:48 +0800
Subject: selftests/ftrace: add mips support for kprobe args syntax tests

This is the mips variant of commit <3990b5baf225> ("selftests/ftrace:
Add s390 support for kprobe args tests").

Signed-off-by: Ze Zhang <zhangze@loongson.cn>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
index 47d84b5cb6ca..d4662c8cf407 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -36,6 +36,10 @@ s390*)
   GOODREG=%r2
   BADREG=%s2
 ;;
+mips*)
+  GOODREG=%r4
+  BADREG=%r12
+;;
 *)
   echo "Please implement other architecture here"
   exit_untested
-- 
cgit 


From abd26d348b2a366f8947e8c3c2ab9bc881ac9415 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 8 Apr 2022 21:36:24 +0800
Subject: selftests: mqueue: drop duplicate min definition

Drop duplicate macro min() definition in mq_perf_tests.c, use MIN() in
sys/param.h instead.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/mqueue/mq_perf_tests.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index 84fda3b49073..5c16159d0bcd 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -35,6 +35,7 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/stat.h>
+#include <sys/param.h>
 #include <mqueue.h>
 #include <popt.h>
 #include <error.h>
@@ -73,7 +74,6 @@ static char *usage =
 char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
 char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
 
-#define min(a, b) ((a) < (b) ? (a) : (b))
 #define MAX_CPUS 64
 char *cpu_option_string;
 int cpus_to_pin[MAX_CPUS];
@@ -560,7 +560,7 @@ int main(int argc, char *argv[])
 			"require root in order to modify\nsystem settings.  "
 			"Exiting.\n");
 
-	cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
+	cpus_online = MIN(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
 	cpu_set = CPU_ALLOC(cpus_online);
 	if (cpu_set == NULL) {
 		perror("CPU_ALLOC()");
-- 
cgit 


From 044011fdf162c5dd61c02841930c8f438a9adadb Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 19 Apr 2022 16:51:54 +0300
Subject: selftests: mlxsw: vxlan_flooding: Prevent flooding of unwanted
 packets

The test verifies that packets are correctly flooded by the bridge and
the VXLAN device by matching on the encapsulated packets at the other
end. However, if packets other than those generated by the test also
ingress the bridge (e.g., MLD packets), they will be flooded as well and
interfere with the expected count.

Make the test more robust by making sure that only the packets generated
by the test can ingress the bridge. Drop all the rest using tc filters
on the egress of 'br0' and 'h1'.

In the software data path, the problem can be solved by matching on the
inner destination MAC or dropping unwanted packets at the egress of the
VXLAN device, but this is not currently supported by mlxsw.

Fixes: 94d302deae25 ("selftests: mlxsw: Add a test for VxLAN flooding")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/vxlan_flooding.sh       | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
index fedcb7b35af9..af5ea50ed5c0 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
@@ -172,6 +172,17 @@ flooding_filters_add()
 	local lsb
 	local i
 
+	# Prevent unwanted packets from entering the bridge and interfering
+	# with the test.
+	tc qdisc add dev br0 clsact
+	tc filter add dev br0 egress protocol all pref 1 handle 1 \
+		matchall skip_hw action drop
+	tc qdisc add dev $h1 clsact
+	tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+		flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+	tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+		matchall skip_hw action drop
+
 	tc qdisc add dev $rp2 clsact
 
 	for i in $(eval echo {1..$num_remotes}); do
@@ -194,6 +205,12 @@ flooding_filters_del()
 	done
 
 	tc qdisc del dev $rp2 clsact
+
+	tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+	tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+	tc qdisc del dev $h1 clsact
+	tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+	tc qdisc del dev br0 clsact
 }
 
 flooding_check_packets()
-- 
cgit 


From 5e6242151d7f17b056a82ca7b860c4ec8eaa7589 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 19 Apr 2022 16:51:55 +0300
Subject: selftests: mlxsw: vxlan_flooding_ipv6: Prevent flooding of unwanted
 packets

The test verifies that packets are correctly flooded by the bridge and
the VXLAN device by matching on the encapsulated packets at the other
end. However, if packets other than those generated by the test also
ingress the bridge (e.g., MLD packets), they will be flooded as well and
interfere with the expected count.

Make the test more robust by making sure that only the packets generated
by the test can ingress the bridge. Drop all the rest using tc filters
on the egress of 'br0' and 'h1'.

In the software data path, the problem can be solved by matching on the
inner destination MAC or dropping unwanted packets at the egress of the
VXLAN device, but this is not currently supported by mlxsw.

Fixes: d01724dd2a66 ("selftests: mlxsw: spectrum-2: Add a test for VxLAN flooding with IPv6")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
index 429f7ee735cf..fd23c80eba31 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
@@ -159,6 +159,17 @@ flooding_remotes_add()
 	local lsb
 	local i
 
+	# Prevent unwanted packets from entering the bridge and interfering
+	# with the test.
+	tc qdisc add dev br0 clsact
+	tc filter add dev br0 egress protocol all pref 1 handle 1 \
+		matchall skip_hw action drop
+	tc qdisc add dev $h1 clsact
+	tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+		flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+	tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+		matchall skip_hw action drop
+
 	for i in $(eval echo {1..$num_remotes}); do
 		lsb=$((i + 1))
 
@@ -195,6 +206,12 @@ flooding_filters_del()
 	done
 
 	tc qdisc del dev $rp2 clsact
+
+	tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+	tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+	tc qdisc del dev $h1 clsact
+	tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+	tc qdisc del dev br0 clsact
 }
 
 flooding_check_packets()
-- 
cgit 


From 127e7dca427bc3e5d9a1c2071357e0f34be5c1d9 Mon Sep 17 00:00:00 2001
From: Liu Jian <liujian56@huawei.com>
Date: Sat, 16 Apr 2022 18:58:01 +0800
Subject: selftests/bpf: Add test for skb_load_bytes

Use bpf_prog_test_run_opts to test the skb_load_bytes function. Tests
the behavior when offset is greater than INT_MAX or a normal value.

Signed-off-by: Liu Jian <liujian56@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20220416105801.88708-4-liujian56@huawei.com
---
 .../selftests/bpf/prog_tests/skb_load_bytes.c      | 45 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/skb_load_bytes.c | 19 +++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c
 create mode 100644 tools/testing/selftests/bpf/progs/skb_load_bytes.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c
new file mode 100644
index 000000000000..d7f83c0a40a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "skb_load_bytes.skel.h"
+
+void test_skb_load_bytes(void)
+{
+	struct skb_load_bytes *skel;
+	int err, prog_fd, test_result;
+	struct __sk_buff skb = { 0 };
+
+	LIBBPF_OPTS(bpf_test_run_opts, tattr,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.ctx_in = &skb,
+		.ctx_size_in = sizeof(skb),
+	);
+
+	skel = skb_load_bytes__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	prog_fd = bpf_program__fd(skel->progs.skb_process);
+	if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+		goto out;
+
+	skel->bss->load_offset = (uint32_t)(-1);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+		goto out;
+	test_result = skel->bss->test_result;
+	if (!ASSERT_EQ(test_result, -EFAULT, "offset -1"))
+		goto out;
+
+	skel->bss->load_offset = (uint32_t)10;
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+		goto out;
+	test_result = skel->bss->test_result;
+	if (!ASSERT_EQ(test_result, 0, "offset 10"))
+		goto out;
+
+out:
+	skb_load_bytes__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/skb_load_bytes.c b/tools/testing/selftests/bpf/progs/skb_load_bytes.c
new file mode 100644
index 000000000000..e4252fd973be
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/skb_load_bytes.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 load_offset = 0;
+int test_result = 0;
+
+SEC("tc")
+int skb_process(struct __sk_buff *skb)
+{
+	char buf[16];
+
+	test_result = bpf_skb_load_bytes(skb, load_offset, buf, 10);
+
+	return 0;
+}
-- 
cgit 


From 835f14ed53076384f0e1dad2fddb4881315f124f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 17 Mar 2022 11:05:09 -0700
Subject: rcu: Make the TASKS_RCU Kconfig option be selected

Currently, any kernel built with CONFIG_PREEMPTION=y also gets
CONFIG_TASKS_RCU=y, which is not helpful to people trying to build
preemptible kernels of minimal size.

Because CONFIG_TASKS_RCU=y is needed only in kernels doing tracing of
one form or another, this commit moves from TASKS_RCU deciding when it
should be enabled to the tracing Kconfig options explicitly selecting it.
This allows building preemptible kernels without TASKS_RCU, if desired.

This commit also updates the SRCU-N and TREE09 rcutorture scenarios
in order to avoid Kconfig errors that would otherwise result from
CONFIG_TASKS_RCU being selected without its CONFIG_RCU_EXPERT dependency
being met.

[ paulmck: Apply BPF_SYSCALL feedback from Andrii Nakryiko. ]

Reported-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Tested-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 arch/Kconfig                                          | 1 +
 kernel/bpf/Kconfig                                    | 1 +
 kernel/rcu/Kconfig                                    | 3 ++-
 kernel/trace/Kconfig                                  | 1 +
 tools/testing/selftests/rcutorture/configs/rcu/SRCU-N | 2 ++
 tools/testing/selftests/rcutorture/configs/rcu/TREE09 | 2 ++
 6 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/arch/Kconfig b/arch/Kconfig
index 29b0167c088b..1bf29ce754af 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -35,6 +35,7 @@ config KPROBES
 	depends on MODULES
 	depends on HAVE_KPROBES
 	select KALLSYMS
+	select TASKS_RCU if PREEMPTION
 	help
 	  Kprobes allows you to trap at almost any kernel address and
 	  execute a callback function.  register_kprobe() establishes
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index d56ee177d5f8..2dfe1079f772 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -27,6 +27,7 @@ config BPF_SYSCALL
 	bool "Enable bpf() system call"
 	select BPF
 	select IRQ_WORK
+	select TASKS_RCU if PREEMPTION
 	select TASKS_TRACE_RCU
 	select BINARY_PRINTF
 	select NET_SOCK_MSG if NET
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index f559870fbf8b..4f665ae0cf55 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -78,7 +78,8 @@ config TASKS_RCU_GENERIC
 	  task-based RCU implementations.  Not for manual selection.
 
 config TASKS_RCU
-	def_bool PREEMPTION
+	def_bool 0
+	select IRQ_WORK
 	help
 	  This option enables a task-based RCU implementation that uses
 	  only voluntary context switch (not preemption!), idle, and
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2c43e327a619..bf5da6c4e999 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -144,6 +144,7 @@ config TRACING
 	select BINARY_PRINTF
 	select EVENT_TRACING
 	select TRACE_CLOCK
+	select TASKS_RCU if PREEMPTION
 
 config GENERIC_TRACER
 	bool
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
index 2da8b49589a0..07f5e0a70ae7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
@@ -6,3 +6,5 @@ CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
 #CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
index 8523a7515cbf..fc45645bb5f4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -13,3 +13,5 @@ CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 #CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
-- 
cgit 


From 40c1278aa7cd51d4f8627f7adc66aa73e01aff81 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 17 Mar 2022 13:29:59 -0700
Subject: rcutorture: Allow rcutorture without RCU Tasks Trace

Unless a kernel builds rcutorture, whether built-in or as a module, that
kernel is also built with CONFIG_TASKS_TRACE_RCU, whether anything else
needs Tasks Trace RCU or not.  This unnecessarily increases kernel size.
This commit therefore decouples the presence of rcutorture from the
presence of RCU Tasks Trace.

However, there is a need to select CONFIG_TASKS_TRACE_RCU for
testing purposes.  Except that casual users must not be bothered with
questions -- for them, this needs to be fully automated.  There is thus
a CONFIG_FORCE_TASKS_TRACE_RCU that selects CONFIG_TASKS_TRACE_RCU,
is user-selectable, but which depends on CONFIG_RCU_EXPERT.

[ paulmck: Apply kernel test robot feedback. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/Kconfig                                 |  22 +++--
 kernel/rcu/Kconfig.debug                           |   1 -
 kernel/rcu/rcutorture.c                            | 101 ++++++++++++---------
 .../selftests/rcutorture/configs/rcu/TRACE01       |   2 +
 .../selftests/rcutorture/configs/rcu/TRACE02       |   2 +
 5 files changed, 75 insertions(+), 53 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 4f665ae0cf55..2befd328e6a0 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -95,15 +95,23 @@ config TASKS_RUDE_RCU
 	  switches on all online CPUs, including idle ones, so use
 	  with caution.
 
-config TASKS_TRACE_RCU
-	def_bool 0
-	select IRQ_WORK
+config FORCE_TASKS_TRACE_RCU
+	bool "Force selection of Tasks Trace RCU"
+	depends on RCU_EXPERT
+	select TASKS_TRACE_RCU
+	default n
 	help
 	  This option enables a task-based RCU implementation that uses
 	  explicit rcu_read_lock_trace() read-side markers, and allows
-	  these readers to appear in the idle loop as well as on the CPU
-	  hotplug code paths.  It can force IPIs on online CPUs, including
-	  idle ones, so use with caution.
+	  these readers to appear in the idle loop as well as on the
+	  CPU hotplug code paths.  It can force IPIs on online CPUs,
+	  including idle ones, so use with caution.  Not for manual
+	  selection in most cases.
+
+config TASKS_TRACE_RCU
+	bool
+	default n
+	select IRQ_WORK
 
 config RCU_STALL_COMMON
 	def_bool TREE_RCU
@@ -227,7 +235,7 @@ config RCU_NOCB_CPU
 
 config TASKS_TRACE_RCU_READ_MB
 	bool "Tasks Trace RCU readers use memory barriers in user and idle"
-	depends on RCU_EXPERT
+	depends on RCU_EXPERT && TASKS_TRACE_RCU
 	default PREEMPT_RT || NR_CPUS < 8
 	help
 	  Use this option to further reduce the number of IPIs sent
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 4fd64999300f..d7f4bb1c4979 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -49,7 +49,6 @@ config RCU_TORTURE_TEST
 	select SRCU
 	select TASKS_RCU
 	select TASKS_RUDE_RCU
-	select TASKS_TRACE_RCU
 	default n
 	help
 	  This option provides a kernel module that runs torture tests
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 55d049c39608..7dd3e14ec907 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -737,6 +737,48 @@ static struct rcu_torture_ops busted_srcud_ops = {
 	.name		= "busted_srcud"
 };
 
+/*
+ * Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
+ * This implementation does not necessarily work well with CPU hotplug.
+ */
+
+static void synchronize_rcu_trivial(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu));
+		WARN_ON_ONCE(raw_smp_processor_id() != cpu);
+	}
+}
+
+static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
+{
+	preempt_disable();
+	return 0;
+}
+
+static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
+{
+	preempt_enable();
+}
+
+static struct rcu_torture_ops trivial_ops = {
+	.ttype		= RCU_TRIVIAL_FLAVOR,
+	.init		= rcu_sync_torture_init,
+	.readlock	= rcu_torture_read_lock_trivial,
+	.read_delay	= rcu_read_delay,  /* just reuse rcu's version. */
+	.readunlock	= rcu_torture_read_unlock_trivial,
+	.readlock_held	= torture_readlock_not_held,
+	.get_gp_seq	= rcu_no_completed,
+	.sync		= synchronize_rcu_trivial,
+	.exp_sync	= synchronize_rcu_trivial,
+	.fqs		= NULL,
+	.stats		= NULL,
+	.irq_capable	= 1,
+	.name		= "trivial"
+};
+
 /*
  * Definitions for RCU-tasks torture testing.
  */
@@ -780,48 +822,6 @@ static struct rcu_torture_ops tasks_ops = {
 	.name		= "tasks"
 };
 
-/*
- * Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
- * This implementation does not necessarily work well with CPU hotplug.
- */
-
-static void synchronize_rcu_trivial(void)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu));
-		WARN_ON_ONCE(raw_smp_processor_id() != cpu);
-	}
-}
-
-static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
-{
-	preempt_disable();
-	return 0;
-}
-
-static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
-{
-	preempt_enable();
-}
-
-static struct rcu_torture_ops trivial_ops = {
-	.ttype		= RCU_TRIVIAL_FLAVOR,
-	.init		= rcu_sync_torture_init,
-	.readlock	= rcu_torture_read_lock_trivial,
-	.read_delay	= rcu_read_delay,  /* just reuse rcu's version. */
-	.readunlock	= rcu_torture_read_unlock_trivial,
-	.readlock_held	= torture_readlock_not_held,
-	.get_gp_seq	= rcu_no_completed,
-	.sync		= synchronize_rcu_trivial,
-	.exp_sync	= synchronize_rcu_trivial,
-	.fqs		= NULL,
-	.stats		= NULL,
-	.irq_capable	= 1,
-	.name		= "trivial"
-};
-
 /*
  * Definitions for rude RCU-tasks torture testing.
  */
@@ -851,6 +851,8 @@ static struct rcu_torture_ops tasks_rude_ops = {
 	.name		= "tasks-rude"
 };
 
+#ifdef CONFIG_TASKS_TRACE_RCU
+
 /*
  * Definitions for tracing RCU-tasks torture testing.
  */
@@ -893,6 +895,15 @@ static struct rcu_torture_ops tasks_tracing_ops = {
 	.name		= "tasks-tracing"
 };
 
+#define TASKS_TRACING_OPS &tasks_tracing_ops,
+
+#else // #ifdef CONFIG_TASKS_TRACE_RCU
+
+#define TASKS_TRACING_OPS
+
+#endif // #else #ifdef CONFIG_TASKS_TRACE_RCU
+
+
 static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
 {
 	if (!cur_ops->gp_diff)
@@ -3096,9 +3107,9 @@ rcu_torture_init(void)
 	int flags = 0;
 	unsigned long gp_seq = 0;
 	static struct rcu_torture_ops *torture_ops[] = {
-		&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
-		&busted_srcud_ops, &tasks_ops, &tasks_rude_ops,
-		&tasks_tracing_ops, &trivial_ops,
+		&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops,
+		&tasks_ops, &tasks_rude_ops, TASKS_TRACING_OPS
+		&trivial_ops,
 	};
 
 	if (!torture_init_begin(torture_type, verbose))
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index e4d74e5fc1d0..0f5605ed1e48 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -7,5 +7,7 @@ CONFIG_PREEMPT=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_PROVE_LOCKING=n
 #CHECK#CONFIG_PROVE_RCU=n
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
 CONFIG_TASKS_TRACE_RCU_READ_MB=y
 CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
index 77541eeb4e9f..093ea6e8e65c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -7,5 +7,7 @@ CONFIG_PREEMPT=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
 CONFIG_TASKS_TRACE_RCU_READ_MB=n
 CONFIG_RCU_EXPERT=y
-- 
cgit 


From 3b6e1dd42317ec366dab3205f99280e2ab1ad85a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 17 Mar 2022 15:18:27 -0700
Subject: rcutorture: Allow rcutorture without RCU Tasks

Currently, a CONFIG_PREEMPT_NONE=y kernel substitutes normal RCU for
RCU Tasks.  Unless that kernel builds rcutorture, whether built-in or as
a module, in which case RCU Tasks is (unnecessarily) used.  This both
increases kernel size and increases the complexity of certain tracing
operations.  This commit therefore decouples the presence of rcutorture
from the presence of RCU Tasks.

However, there is a need to select CONFIG_TASKS_RCU for testing purposes.
Except that casual users must not be bothered with questions -- for them,
this needs to be fully automated.  There is thus a CONFIG_FORCE_TASKS_RCU
that selects CONFIG_TASKS_RCU, is user-selectable, but which depends
on CONFIG_RCU_EXPERT.

[ paulmck: Apply kernel test robot feedback. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/Kconfig                                     | 18 +++++++++++++-----
 kernel/rcu/Kconfig.debug                               |  1 -
 kernel/rcu/rcutorture.c                                | 13 ++++++++++++-
 tools/testing/selftests/rcutorture/configs/rcu/TASKS01 |  1 +
 tools/testing/selftests/rcutorture/configs/rcu/TASKS02 |  3 +++
 tools/testing/selftests/rcutorture/configs/rcu/TASKS03 |  2 ++
 6 files changed, 31 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 2befd328e6a0..8eac165db09f 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -77,13 +77,21 @@ config TASKS_RCU_GENERIC
 	  This option enables generic infrastructure code supporting
 	  task-based RCU implementations.  Not for manual selection.
 
+config FORCE_TASKS_RCU
+	bool "Force selection of TASKS_RCU"
+	depends on RCU_EXPERT
+	select TASKS_RCU
+	default n
+	help
+	  This option force-enables a task-based RCU implementation
+	  that uses only voluntary context switch (not preemption!),
+	  idle, and user-mode execution as quiescent states.  Not for
+	  manual selection in most cases.
+
 config TASKS_RCU
-	def_bool 0
+	bool
+	default n
 	select IRQ_WORK
-	help
-	  This option enables a task-based RCU implementation that uses
-	  only voluntary context switch (not preemption!), idle, and
-	  user-mode execution as quiescent states.  Not for manual selection.
 
 config TASKS_RUDE_RCU
 	def_bool 0
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index d7f4bb1c4979..c217a5e655a4 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -47,7 +47,6 @@ config RCU_TORTURE_TEST
 	depends on DEBUG_KERNEL
 	select TORTURE_TEST
 	select SRCU
-	select TASKS_RCU
 	select TASKS_RUDE_RCU
 	default n
 	help
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 7dd3e14ec907..65d045ff9766 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -779,6 +779,8 @@ static struct rcu_torture_ops trivial_ops = {
 	.name		= "trivial"
 };
 
+#ifdef CONFIG_TASKS_RCU
+
 /*
  * Definitions for RCU-tasks torture testing.
  */
@@ -822,6 +824,15 @@ static struct rcu_torture_ops tasks_ops = {
 	.name		= "tasks"
 };
 
+#define TASKS_OPS &tasks_ops,
+
+#else // #ifdef CONFIG_TASKS_RCU
+
+#define TASKS_OPS
+
+#endif // #else #ifdef CONFIG_TASKS_RCU
+
+
 /*
  * Definitions for rude RCU-tasks torture testing.
  */
@@ -3108,7 +3119,7 @@ rcu_torture_init(void)
 	unsigned long gp_seq = 0;
 	static struct rcu_torture_ops *torture_ops[] = {
 		&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops,
-		&tasks_ops, &tasks_rude_ops, TASKS_TRACING_OPS
+		TASKS_OPS &tasks_rude_ops, TASKS_TRACING_OPS
 		&trivial_ops,
 	};
 
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
index 3ca112444ce7..d84801b9a7ae 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -7,4 +7,5 @@ CONFIG_PREEMPT=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
+CONFIG_TASKS_RCU=y
 CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
index ad2be91e5ee7..d333b69bc831 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -2,3 +2,6 @@ CONFIG_SMP=n
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
index dc02083803ce..dea26c568678 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -7,3 +7,5 @@ CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
 #CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_TASKS_RCU=y
+CONFIG_RCU_EXPERT=y
-- 
cgit 


From 4c3f7b0e1e880e892d4bc4f50bf627b251b6e2cc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 17 Mar 2022 16:16:45 -0700
Subject: rcutorture: Allow rcutorture without RCU Tasks Rude

Unless a kernel builds rcutorture, whether built-in or as a module, that
kernel is also built with CONFIG_TASKS_RUDE_RCU, whether anything else
needs Tasks Rude RCU or not.  This unnecessarily increases kernel size.
This commit therefore decouples the presence of rcutorture from the
presence of RCU Tasks Rude.

However, there is a need to select CONFIG_TASKS_RUDE_RCU for testing
purposes.  Except that casual users must not be bothered with
questions -- for them, this needs to be fully automated.  There is
thus a CONFIG_FORCE_TASKS_RUDE_RCU that selects CONFIG_TASKS_RUDE_RCU,
is user-selectable, but which depends on CONFIG_RCU_EXPERT.

[ paulmck: Apply kernel test robot feedback. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/Kconfig                                  | 21 ++++++++++++++-------
 kernel/rcu/Kconfig.debug                            |  1 -
 kernel/rcu/rcutorture.c                             | 13 ++++++++++++-
 .../testing/selftests/rcutorture/configs/rcu/RUDE01 |  2 ++
 4 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 8eac165db09f..65d45c00fd1b 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -93,15 +93,22 @@ config TASKS_RCU
 	default n
 	select IRQ_WORK
 
+config FORCE_TASKS_RUDE_RCU
+	bool "Force selection of Tasks Rude RCU"
+	depends on RCU_EXPERT
+	select TASKS_RUDE_RCU
+	default n
+	help
+	  This option force-enables a task-based RCU implementation
+	  that uses only context switch (including preemption) and
+	  user-mode execution as quiescent states.  It forces IPIs and
+	  context switches on all online CPUs, including idle ones,
+	  so use with caution.	Not for manual selection in most cases.
+
 config TASKS_RUDE_RCU
-	def_bool 0
+	bool
+	default n
 	select IRQ_WORK
-	help
-	  This option enables a task-based RCU implementation that uses
-	  only context switch (including preemption) and user-mode
-	  execution as quiescent states.  It forces IPIs and context
-	  switches on all online CPUs, including idle ones, so use
-	  with caution.
 
 config FORCE_TASKS_TRACE_RCU
 	bool "Force selection of Tasks Trace RCU"
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index c217a5e655a4..f4a4468cbf03 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -47,7 +47,6 @@ config RCU_TORTURE_TEST
 	depends on DEBUG_KERNEL
 	select TORTURE_TEST
 	select SRCU
-	select TASKS_RUDE_RCU
 	default n
 	help
 	  This option provides a kernel module that runs torture tests
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 65d045ff9766..d528245108c2 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -833,6 +833,8 @@ static struct rcu_torture_ops tasks_ops = {
 #endif // #else #ifdef CONFIG_TASKS_RCU
 
 
+#ifdef CONFIG_TASKS_RUDE_RCU
+
 /*
  * Definitions for rude RCU-tasks torture testing.
  */
@@ -862,6 +864,15 @@ static struct rcu_torture_ops tasks_rude_ops = {
 	.name		= "tasks-rude"
 };
 
+#define TASKS_RUDE_OPS &tasks_rude_ops,
+
+#else // #ifdef CONFIG_TASKS_RUDE_RCU
+
+#define TASKS_RUDE_OPS
+
+#endif // #else #ifdef CONFIG_TASKS_RUDE_RCU
+
+
 #ifdef CONFIG_TASKS_TRACE_RCU
 
 /*
@@ -3119,7 +3130,7 @@ rcu_torture_init(void)
 	unsigned long gp_seq = 0;
 	static struct rcu_torture_ops *torture_ops[] = {
 		&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, &busted_srcud_ops,
-		TASKS_OPS &tasks_rude_ops, TASKS_TRACING_OPS
+		TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
 		&trivial_ops,
 	};
 
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
index 7093422050f6..6fd6acb94518 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -8,3 +8,5 @@ CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_RCU_EXPERT=y
+CONFIG_FORCE_TASKS_RUDE_RCU=y
+#CHECK#CONFIG_TASKS_RUDE_RCU=y
-- 
cgit 


From 3831fc02f496cd8a8e6c75217b290fe5158a3f36 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 18 Mar 2022 08:02:11 -0700
Subject: rcutorture: Add CONFIG_PREEMPT_DYNAMIC=n to TASKS02 scenario

Now that CONFIG_PREEMPT_DYNAMIC=y is the default, TASKS02 no longer
builds a pure non-preemptible kernel that uses Tiny RCU.  This commit
therefore fixes this new hole in rcutorture testing by adding
CONFIG_PREEMPT_DYNAMIC=n to the TASKS02 rcutorture scenario.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/TASKS02 | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
index d333b69bc831..2f9fcffff5ae 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -2,6 +2,7 @@ CONFIG_SMP=n
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_TASKS_RCU=y
 CONFIG_FORCE_TASKS_RCU=y
 CONFIG_RCU_EXPERT=y
-- 
cgit 


From 58524e0fed6a4509651005c06dc1a4ecb3ed0a61 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 18 Mar 2022 08:10:18 -0700
Subject: rcutorture: Allow specifying per-scenario stat_interval

The rcutorture test suite makes double use of the rcutorture.stat_interval
module parameter.  As its name suggests, it controls the frequency
of statistics printing, but it also controls the rcu_torture_writer()
stall timeout.  The current setting of 15 seconds works surprisingly well.
However, given that the RCU tasks stall-warning timeout is ten -minutes-,
15 seconds is too short for TASKS02, which runs a non-preemptible kernel
on a single CPU.

This commit therefore adds checks for per-scenario specification of the
rcutorture.stat_interval module parameter.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../selftests/rcutorture/configs/rcu/TASKS02.boot        |  1 +
 .../selftests/rcutorture/configs/rcu/ver_functions.sh    | 16 ++++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
index cd2a188eeb6d..b9b6d67cbc5f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
@@ -1 +1,2 @@
 rcutorture.torture_type=tasks
+rcutorture.stat_interval=60
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
index effa415f9b92..e2bc99c785e7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -9,7 +9,7 @@
 
 # rcutorture_param_n_barrier_cbs bootparam-string
 #
-# Adds n_barrier_cbs rcutorture module parameter to kernels having it.
+# Adds n_barrier_cbs rcutorture module parameter if not already specified.
 rcutorture_param_n_barrier_cbs () {
 	if echo $1 | grep -q "rcutorture\.n_barrier_cbs"
 	then
@@ -30,13 +30,25 @@ rcutorture_param_onoff () {
 	fi
 }
 
+# rcutorture_param_stat_interval bootparam-string
+#
+# Adds stat_interval rcutorture module parameter if not already specified.
+rcutorture_param_stat_interval () {
+	if echo $1 | grep -q "rcutorture\.stat_interval"
+	then
+		:
+	else
+		echo rcutorture.stat_interval=15
+	fi
+}
+
 # per_version_boot_params bootparam-string config-file seconds
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
 	echo $1 `rcutorture_param_onoff "$1" "$2"` \
 		`rcutorture_param_n_barrier_cbs "$1"` \
-		rcutorture.stat_interval=15 \
+		`rcutorture_param_stat_interval "$1"` \
 		rcutorture.shutdown_secs=$3 \
 		rcutorture.test_no_idle_hz=1 \
 		rcutorture.verbose=1
-- 
cgit 


From 5f654af150fd5aeb9fff138c7cbd72cea016b863 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 25 Mar 2022 14:39:54 -0700
Subject: refscale: Allow refscale without RCU Tasks

Currently, a CONFIG_PREEMPT_NONE=y kernel substitutes normal RCU for
RCU Tasks.  Unless that kernel builds refscale, whether built-in or as a
module, in which case RCU Tasks is (unnecessarily) built in.  This both
increases kernel size and increases the complexity of certain tracing
operations.  This commit therefore decouples the presence of refscale
from the presence of RCU Tasks.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/Kconfig.debug                                     |  1 -
 kernel/rcu/refscale.c                                        | 12 +++++++++++-
 tools/testing/selftests/rcutorture/configs/refscale/CFcommon |  2 ++
 .../testing/selftests/rcutorture/configs/refscale/NOPREEMPT  |  2 ++
 4 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index f4a4468cbf03..454924e03ef3 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -63,7 +63,6 @@ config RCU_REF_SCALE_TEST
 	depends on DEBUG_KERNEL
 	select TORTURE_TEST
 	select SRCU
-	select TASKS_RCU
 	select TASKS_RUDE_RCU
 	select TASKS_TRACE_RCU
 	default n
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index 5489ff7f478e..5079e47b3d18 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -207,6 +207,8 @@ static struct ref_scale_ops srcu_ops = {
 	.name		= "srcu"
 };
 
+#ifdef CONFIG_TASKS_RCU
+
 // Definitions for RCU Tasks ref scale testing: Empty read markers.
 // These definitions also work for RCU Rude readers.
 static void rcu_tasks_ref_scale_read_section(const int nloops)
@@ -232,6 +234,14 @@ static struct ref_scale_ops rcu_tasks_ops = {
 	.name		= "rcu-tasks"
 };
 
+#define RCU_TASKS_OPS &rcu_tasks_ops,
+
+#else // #ifdef CONFIG_TASKS_RCU
+
+#define RCU_TASKS_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_RCU
+
 // Definitions for RCU Tasks Trace ref scale testing.
 static void rcu_trace_ref_scale_read_section(const int nloops)
 {
@@ -790,7 +800,7 @@ ref_scale_init(void)
 	long i;
 	int firsterr = 0;
 	static struct ref_scale_ops *scale_ops[] = {
-		&rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops,
+		&rcu_ops, &srcu_ops, &rcu_trace_ops, RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
 		&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
 	};
 
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
index a98b58b54bb1..14fdafc576ce 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
@@ -1,2 +1,4 @@
 CONFIG_RCU_REF_SCALE_TEST=y
 CONFIG_PRINTK_TIME=y
+CONFIG_FORCE_TASKS_RCU=y
+#CHECK#CONFIG_TASKS_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
index 7f06838a91e6..ef2b501a6971 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
@@ -15,3 +15,5 @@ CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
-- 
cgit 


From dec86781a54f4a527386a0b86b22e99e2ac67a09 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 25 Mar 2022 15:21:07 -0700
Subject: refscale: Allow refscale without RCU Tasks Rude/Trace

Currently, a CONFIG_PREEMPT_NONE=y kernel substitutes normal RCU for
RCU Tasks Rude and RCU Tasks Trace.  Unless that kernel builds refscale,
whether built-in or as a module, in which case these RCU Tasks flavors are
(unnecessarily) built in.  This both increases kernel size and increases
the complexity of certain tracing operations.  This commit therefore
decouples the presence of refscale from the presence of RCU Tasks Rude
and RCU Tasks Trace.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/Kconfig.debug                                     |  2 --
 kernel/rcu/refscale.c                                        | 12 +++++++++++-
 tools/testing/selftests/rcutorture/configs/refscale/CFcommon |  2 ++
 3 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 454924e03ef3..dceaa3e754e5 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -63,8 +63,6 @@ config RCU_REF_SCALE_TEST
 	depends on DEBUG_KERNEL
 	select TORTURE_TEST
 	select SRCU
-	select TASKS_RUDE_RCU
-	select TASKS_TRACE_RCU
 	default n
 	help
 	  This option provides a kernel module that runs performance tests
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index 5079e47b3d18..909644abee67 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -242,6 +242,8 @@ static struct ref_scale_ops rcu_tasks_ops = {
 
 #endif // #else // #ifdef CONFIG_TASKS_RCU
 
+#ifdef CONFIG_TASKS_TRACE_RCU
+
 // Definitions for RCU Tasks Trace ref scale testing.
 static void rcu_trace_ref_scale_read_section(const int nloops)
 {
@@ -271,6 +273,14 @@ static struct ref_scale_ops rcu_trace_ops = {
 	.name		= "rcu-trace"
 };
 
+#define RCU_TRACE_OPS &rcu_trace_ops,
+
+#else // #ifdef CONFIG_TASKS_TRACE_RCU
+
+#define RCU_TRACE_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU
+
 // Definitions for reference count
 static atomic_t refcnt;
 
@@ -800,7 +810,7 @@ ref_scale_init(void)
 	long i;
 	int firsterr = 0;
 	static struct ref_scale_ops *scale_ops[] = {
-		&rcu_ops, &srcu_ops, &rcu_trace_ops, RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
+		&rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
 		&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
 	};
 
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
index 14fdafc576ce..fbea3b13baba 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
@@ -2,3 +2,5 @@ CONFIG_RCU_REF_SCALE_TEST=y
 CONFIG_PRINTK_TIME=y
 CONFIG_FORCE_TASKS_RCU=y
 #CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
-- 
cgit 


From 4df002d908796c1ff87b985af1d31a0e36e6c66f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 25 Mar 2022 16:39:01 -0700
Subject: rcuscale: Allow rcuscale without RCU Tasks

Currently, a CONFIG_PREEMPT_NONE=y kernel substitutes normal RCU for
RCU Tasks.  Unless that kernel builds rcuscale, whether built-in or as
a module, in which case RCU Tasks is (unnecessarily) built.  This both
increases kernel size and increases the complexity of certain tracing
operations.  This commit therefore decouples the presence of rcuscale
from the presence of RCU Tasks.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/Kconfig.debug                                     |  1 -
 kernel/rcu/rcuscale.c                                        | 12 +++++++++++-
 tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon |  4 ++--
 tools/testing/selftests/rcutorture/configs/rcuscale/TREE     |  2 ++
 4 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index dceaa3e754e5..71e73fceff87 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -28,7 +28,6 @@ config RCU_SCALE_TEST
 	depends on DEBUG_KERNEL
 	select TORTURE_TEST
 	select SRCU
-	select TASKS_RCU
 	select TASKS_RUDE_RCU
 	select TASKS_TRACE_RCU
 	default n
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 5e4f1f83d38e..311dbcb064ed 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -268,6 +268,8 @@ static struct rcu_scale_ops srcud_ops = {
 	.name		= "srcud"
 };
 
+#ifdef CONFIG_TASKS_RCU
+
 /*
  * Definitions for RCU-tasks scalability testing.
  */
@@ -295,6 +297,14 @@ static struct rcu_scale_ops tasks_ops = {
 	.name		= "tasks"
 };
 
+#define TASKS_OPS &tasks_ops,
+
+#else // #ifdef CONFIG_TASKS_RCU
+
+#define TASKS_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_RCU
+
 /*
  * Definitions for RCU-tasks-trace scalability testing.
  */
@@ -797,7 +807,7 @@ rcu_scale_init(void)
 	long i;
 	int firsterr = 0;
 	static struct rcu_scale_ops *scale_ops[] = {
-		&rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, &tasks_tracing_ops
+		&rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS &tasks_tracing_ops
 	};
 
 	if (!torture_init_begin(scale_type, verbose))
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
index 90942bb5bebc..2ed3b46a9c37 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
@@ -1,5 +1,5 @@
 CONFIG_RCU_SCALE_TEST=y
 CONFIG_PRINTK_TIME=y
 CONFIG_TASKS_RCU_GENERIC=y
-CONFIG_TASKS_RCU=y
-CONFIG_TASKS_TRACE_RCU=y
+CONFIG_FORCE_TASKS_RCU=y
+#CHECK#CONFIG_TASKS_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
index f110d9ffbe4c..b10706fd03a4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
@@ -16,3 +16,5 @@ CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
 CONFIG_RCU_TRACE=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
-- 
cgit 


From 5ce027f4cd0e2f28ea5574ede9eef290e2ede5c5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 25 Mar 2022 17:05:40 -0700
Subject: rcuscale: Allow rcuscale without RCU Tasks Rude/Trace

Currently, a CONFIG_PREEMPT_NONE=y kernel substitutes normal RCU for
RCU Tasks Rude and RCU Tasks Trace.  Unless that kernel builds rcuscale,
whether built-in or as a module, in which case these RCU Tasks flavors are
(unnecessarily) built in.  This both increases kernel size and increases
the complexity of certain tracing operations.  This commit therefore
decouples the presence of rcuscale from the presence of RCU Tasks Rude
and RCU Tasks Trace.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/Kconfig.debug                                     |  2 --
 kernel/rcu/rcuscale.c                                        | 12 +++++++++++-
 tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon |  3 ++-
 3 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 71e73fceff87..68092e1db64b 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -28,8 +28,6 @@ config RCU_SCALE_TEST
 	depends on DEBUG_KERNEL
 	select TORTURE_TEST
 	select SRCU
-	select TASKS_RUDE_RCU
-	select TASKS_TRACE_RCU
 	default n
 	help
 	  This option provides a kernel module that runs performance
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 311dbcb064ed..277a5bfb37d4 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -305,6 +305,8 @@ static struct rcu_scale_ops tasks_ops = {
 
 #endif // #else // #ifdef CONFIG_TASKS_RCU
 
+#ifdef CONFIG_TASKS_TRACE_RCU
+
 /*
  * Definitions for RCU-tasks-trace scalability testing.
  */
@@ -334,6 +336,14 @@ static struct rcu_scale_ops tasks_tracing_ops = {
 	.name		= "tasks-tracing"
 };
 
+#define TASKS_TRACING_OPS &tasks_tracing_ops,
+
+#else // #ifdef CONFIG_TASKS_TRACE_RCU
+
+#define TASKS_TRACING_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU
+
 static unsigned long rcuscale_seq_diff(unsigned long new, unsigned long old)
 {
 	if (!cur_ops->gp_diff)
@@ -807,7 +817,7 @@ rcu_scale_init(void)
 	long i;
 	int firsterr = 0;
 	static struct rcu_scale_ops *scale_ops[] = {
-		&rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS &tasks_tracing_ops
+		&rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_TRACING_OPS
 	};
 
 	if (!torture_init_begin(scale_type, verbose))
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
index 2ed3b46a9c37..6a00157bee5b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
@@ -1,5 +1,6 @@
 CONFIG_RCU_SCALE_TEST=y
 CONFIG_PRINTK_TIME=y
-CONFIG_TASKS_RCU_GENERIC=y
 CONFIG_FORCE_TASKS_RCU=y
 #CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
-- 
cgit 


From bf5e7a2f4609db6cd65c0cad22ab2fbb52f1927e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sun, 27 Mar 2022 12:13:30 -0700
Subject: scftorture: Adjust for TASKS_RCU Kconfig option being selected

This commit adjusts the scftorture PREEMPT and NOPREEMPT scenarios to
account for the TASKS_RCU Kconfig option being explicitly selected rather
than computed in isolation.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT | 2 ++
 tools/testing/selftests/rcutorture/configs/scf/PREEMPT   | 1 +
 2 files changed, 3 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
index b8429d6c6ebc..3a59346b3de7 100644
--- a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
@@ -7,3 +7,5 @@ CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_PROVE_LOCKING=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
index ae4992b141b0..cb37e08037d6 100644
--- a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
@@ -7,3 +7,4 @@ CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
+CONFIG_RCU_EXPERT=y
-- 
cgit 


From 00f3133b7f9598304c1fe25ad2d5c12b91199761 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 24 Feb 2022 16:08:48 -0800
Subject: torture: Skip vmlinux check for kvm-again.sh runs

The kvm-again.sh script reruns an previously built set of kernels, so
the vmlinux files are associated with that previous run, not this on.
This results in kvm-find_errors.sh reporting spurious failed-build errors.
This commit therefore omits the vmlinux check for kvm-again.sh runs.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 5f682fc892dd..88983cba7956 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -36,7 +36,7 @@ do
 	then
 		egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
 		files="$files $i.diags $i"
-	elif ! test -f ${scenariobasedir}/vmlinux
+	elif ! test -f ${scenariobasedir}/vmlinux && ! test -f "${rundir}/re-run"
 	then
 		echo No ${scenariobasedir}/vmlinux file > $i.diags
 		files="$files $i.diags $i"
-- 
cgit 


From 3e112a39f7ad6d4cb1110585b11c5e74294e9578 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 1 Mar 2022 06:36:52 -0800
Subject: torture: Enable CSD-lock stall reports for scftorture

This commit passes the csdlock_debug=1 kernel parameter in order to
enable CSD-lock stall reports for torture.sh scftorure runs.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index bfe09e2829c8..e84db823a50d 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -350,7 +350,7 @@ fi
 
 if test "$do_scftorture" = "yes"
 then
-	torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot"
+	torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
 	torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
 fi
 
-- 
cgit 


From eec52c7fb51e5a1b89508bdc0e91d955256ec5f1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 8 Mar 2022 10:23:46 -0800
Subject: rcutorture: Adjust scenarios' Kconfig options for
 CONFIG_PREEMPT_DYNAMIC

Now that CONFIG_PREEMPT_DYNAMIC=y is the default, kernels that are
ostensibly built with CONFIG_PREEMPT_NONE=y or CONFIG_PREEMPT_VOLUNTARY=y
are now actually built with CONFIG_PREEMPT=y, but are by default booted
so as to disable preemption.  Although this allows much more flexibility
from a single kernel binary, it means that the current rcutorture
scenarios won't find build errors that happen only when preemption is
fully disabled at build time.

This commit therefore adds CONFIG_PREEMPT_DYNAMIC=n to several scenarios,
and while in the area switches one from CONFIG_PREEMPT_NONE=y to
CONFIG_PREEMPT_VOLUNTARY=y to add coverage of this Kconfig option.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/TRACE01 | 1 +
 tools/testing/selftests/rcutorture/configs/rcu/TREE04  | 5 +++--
 tools/testing/selftests/rcutorture/configs/rcu/TREE07  | 1 +
 tools/testing/selftests/rcutorture/configs/rcu/TREE10  | 1 +
 4 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index e4d74e5fc1d0..b54cefde6e87 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -4,6 +4,7 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_PROVE_LOCKING=n
 #CHECK#CONFIG_PROVE_RCU=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 22ad0261728d..ae395981b5e5 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -1,8 +1,9 @@
 CONFIG_SMP=y
 CONFIG_NR_CPUS=8
-CONFIG_PREEMPT_NONE=y
-CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=y
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_TREE_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index 2789b47e4ecd..d30922d8c883 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -3,6 +3,7 @@ CONFIG_NR_CPUS=16
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_TREE_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
index 4a00539bfdd7..a323d8948b7c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -3,6 +3,7 @@ CONFIG_NR_CPUS=56
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_TREE_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
-- 
cgit 


From f877e3993b53e2dd1bdfadfc2bca68619d8a3f23 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 12 Mar 2022 21:12:41 -0800
Subject: scftorture: Remove extraneous "scf" from per_version_boot_params

There is an extraneous "scf" in the per_version_boot_params shell function
used by scftorture.  No harm done in that it is just passed as an argument
to the /init program in initrd, but this commit nevertheless removes it.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
index d3d9e35d3d55..2d949e58f5a5 100644
--- a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
@@ -25,6 +25,5 @@ per_version_boot_params () {
 	echo $1 `scftorture_param_onoff "$1" "$2"` \
 		scftorture.stat_interval=15 \
 		scftorture.shutdown_secs=$3 \
-		scftorture.verbose=1 \
-		scf
+		scftorture.verbose=1
 }
-- 
cgit 


From c7756fff4fa11611f81f0f3b1cb13f63b5d0f87e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 12 Mar 2022 21:32:55 -0800
Subject: torture: Save "make allmodconfig" .config file

Currently, torture.sh saves only the build output and exit code from the
"make allmodconfig" test.  This commit also saves the .config file.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index e84db823a50d..c5b3dedc6dc4 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -322,6 +322,7 @@ then
 	echo " --- make clean" > "$amcdir/Make.out" 2>&1
 	make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1
 	echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1
+	cp .config $amcdir
 	make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1
 	echo " --- make " >> "$amcdir/Make.out" 2>&1
 	make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1
-- 
cgit 


From 31015625768e6d8bc808a892b221b69afaaa8d07 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sun, 27 Mar 2022 10:06:53 -0700
Subject: rcutorture: Make kvm.sh allow more memory for --kasan runs

KASAN allots significant memory to track allocation state, and the amount
of memory has increased recently, which results in frequent OOMs on a
few of the rcutorture scenarios.  This commit therefore provides 2G of
memory for --kasan runs, up from the 512M default.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm.sh | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index af58b86a503a..263e16aeca0e 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -44,6 +44,7 @@ TORTURE_KCONFIG_KASAN_ARG=""
 TORTURE_KCONFIG_KCSAN_ARG=""
 TORTURE_KMAKE_ARG=""
 TORTURE_QEMU_MEM=512
+torture_qemu_mem_default=1
 TORTURE_REMOTE=
 TORTURE_SHUTDOWN_GRACE=180
 TORTURE_SUITE=rcu
@@ -180,6 +181,10 @@ do
 		;;
 	--kasan)
 		TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+		if test -n "$torture_qemu_mem_default"
+		then
+			TORTURE_QEMU_MEM=2G
+		fi
 		;;
 	--kconfig|--kconfigs)
 		checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
@@ -202,6 +207,7 @@ do
 	--memory)
 		checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
 		TORTURE_QEMU_MEM=$2
+		torture_qemu_mem_default=
 		shift
 		;;
 	--no-initrd)
-- 
cgit 


From d69e048b27cceec20b637ae8ec72102c79ae673c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 28 Mar 2022 13:16:18 -0700
Subject: rcutorture: Make torture.sh refscale and rcuscale specify Tasks Trace
 RCU

Now that the Tasks RCU flavors are selected by their users rather than
by the rcutorture scenarios, torture.sh fails when attempting to run
NOPREEMPT scenarios for refscale and rcuscale.  This commit therefore
makes torture.sh specify CONFIG_TASKS_TRACE_RCU=y to avoid such failure.

Why not also CONFIG_TASKS_RCU?  Because tracing selects this one.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index c5b3dedc6dc4..f9c1437b9a19 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -364,7 +364,7 @@ fi
 for prim in $primlist
 do
 	torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
-	torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+	torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
 done
 
 if test "$do_rcuscale" = yes
@@ -376,7 +376,7 @@ fi
 for prim in $primlist
 do
 	torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
-	torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+	torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
 done
 
 if test "$do_kvfree" = "yes"
-- 
cgit 


From fb036ad7db108649189d6577051a87e3d3741cf4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 28 Mar 2022 13:30:15 -0700
Subject: rcutorture: Make torture.sh allow for --kasan

The torture.sh script provides extra memory for scftorture and rcuscale.
However, the total memory provided is only 1G, which is less than the
2G that is required for KASAN testing.  This commit therefore ups the
torture.sh script's 1G to 2G.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/torture.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index f9c1437b9a19..3be9cfab93b5 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -352,7 +352,7 @@ fi
 if test "$do_scftorture" = "yes"
 then
 	torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
-	torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
+	torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
 fi
 
 if test "$do_refscale" = yes
@@ -382,7 +382,7 @@ done
 if test "$do_kvfree" = "yes"
 then
 	torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
-	torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
+	torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
 fi
 
 if test "$do_clocksourcewd" = "yes"
-- 
cgit 


From 920fd5e1771db8b338d4145c2d30d60bf0bcce99 Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Thu, 21 Apr 2022 15:01:04 +0200
Subject: selftests/bpf: Fix attach tests retcode checks

Switching to libbpf 1.0 API broke test_sock and test_sysctl as they
check for return of bpf_prog_attach to be exactly -1. Switch the check
to '< 0' instead.

Fixes: b858ba8c52b6 ("selftests/bpf: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK")
Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Yafang Shao <laoar.shao@gmail.com>
Link: https://lore.kernel.org/bpf/20220421130104.1582053-1-asavkov@redhat.com
---
 tools/testing/selftests/bpf/test_sock.c   | 2 +-
 tools/testing/selftests/bpf/test_sysctl.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index 6c4494076bbf..810c3740b2cc 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -492,7 +492,7 @@ static int run_test_case(int cgfd, const struct sock_test *test)
 			goto err;
 	}
 
-	if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
+	if (attach_sock_prog(cgfd, progfd, test->attach_type) < 0) {
 		if (test->result == ATTACH_REJECT)
 			goto out;
 		else
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index 5bae25ca19fb..57620e7c9048 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -1560,7 +1560,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test)
 			goto err;
 	}
 
-	if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) {
+	if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) < 0) {
 		if (test->result == ATTACH_REJECT)
 			goto out;
 		else
-- 
cgit 


From 6a12b8e20d7e72386594a9dbe7bf2d7fae3b3aa6 Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Thu, 21 Apr 2022 15:23:17 +0200
Subject: selftests/bpf: Fix prog_tests uprobe_autoattach compilation error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I am getting the following compilation error for prog_tests/uprobe_autoattach.c:

  tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c: In function ‘test_uprobe_autoattach’:
  ./test_progs.h:209:26: error: pointer ‘mem’ may be used after ‘free’ [-Werror=use-after-free]

The value of mem is now used in one of the asserts, which is why it may be
confusing compilers. However, it is not dereferenced. Silence this by moving
free(mem) after the assert block.

Fixes: 1717e248014c ("selftests/bpf: Uprobe tests should verify param/return values")
Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220421132317.1583867-1-asavkov@redhat.com
---
 tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
index d6003dc8cc99..35b87c7ba5be 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
@@ -34,7 +34,6 @@ void test_uprobe_autoattach(void)
 
 	/* trigger & validate shared library u[ret]probes attached by name */
 	mem = malloc(malloc_sz);
-	free(mem);
 
 	ASSERT_EQ(skel->bss->uprobe_byname_parm1, trigger_val, "check_uprobe_byname_parm1");
 	ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran");
@@ -44,6 +43,8 @@ void test_uprobe_autoattach(void)
 	ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran");
 	ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc");
 	ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran");
+
+	free(mem);
 cleanup:
 	test_uprobe_autoattach__destroy(skel);
 }
-- 
cgit 


From c14766a8a8f3d6c75bf09ec3b6d9ce4d8217015a Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Thu, 21 Apr 2022 11:43:20 +0200
Subject: selftests/bpf: Fix map tests errno checks

Switching to libbpf 1.0 API broke test_lpm_map and test_lru_map as error
reporting changed. Instead of setting errno and returning -1 bpf calls
now return -Exxx directly.
Drop errno checks and look at return code directly.

Fixes: b858ba8c52b6 ("selftests/bpf: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK")
Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Yafang Shao <laoar.shao@gmail.com>
Link: https://lore.kernel.org/bpf/20220421094320.1563570-1-asavkov@redhat.com
---
 tools/testing/selftests/bpf/test_lpm_map.c | 39 ++++++------------
 tools/testing/selftests/bpf/test_lru_map.c | 66 +++++++++++-------------------
 2 files changed, 37 insertions(+), 68 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 789c9748d241..c028d621c744 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -408,16 +408,13 @@ static void test_lpm_ipaddr(void)
 
 	/* Test some lookups that should not match any entry */
 	inet_pton(AF_INET, "10.0.0.1", key_ipv4->data);
-	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT);
 
 	inet_pton(AF_INET, "11.11.11.11", key_ipv4->data);
-	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT);
 
 	inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data);
-	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -ENOENT);
 
 	close(map_fd_ipv4);
 	close(map_fd_ipv6);
@@ -474,18 +471,15 @@ static void test_lpm_delete(void)
 	/* remove non-existent node */
 	key->prefixlen = 32;
 	inet_pton(AF_INET, "10.0.0.1", key->data);
-	assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
-		errno == ENOENT);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT);
 
 	key->prefixlen = 30; // unused prefix so far
 	inet_pton(AF_INET, "192.255.0.0", key->data);
-	assert(bpf_map_delete_elem(map_fd, key) == -1 &&
-		errno == ENOENT);
+	assert(bpf_map_delete_elem(map_fd, key) == -ENOENT);
 
 	key->prefixlen = 16; // same prefix as the root node
 	inet_pton(AF_INET, "192.255.0.0", key->data);
-	assert(bpf_map_delete_elem(map_fd, key) == -1 &&
-		errno == ENOENT);
+	assert(bpf_map_delete_elem(map_fd, key) == -ENOENT);
 
 	/* assert initial lookup */
 	key->prefixlen = 32;
@@ -530,8 +524,7 @@ static void test_lpm_delete(void)
 
 	key->prefixlen = 32;
 	inet_pton(AF_INET, "192.168.128.1", key->data);
-	assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
-		errno == ENOENT);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT);
 
 	close(map_fd);
 }
@@ -552,8 +545,7 @@ static void test_lpm_get_next_key(void)
 	assert(map_fd >= 0);
 
 	/* empty tree. get_next_key should return ENOENT */
-	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -ENOENT);
 
 	/* get and verify the first key, get the second one should fail. */
 	key_p->prefixlen = 16;
@@ -565,8 +557,7 @@ static void test_lpm_get_next_key(void)
 	assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
 	       key_p->data[1] == 168);
 
-	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
 
 	/* no exact matching key should get the first one in post order. */
 	key_p->prefixlen = 8;
@@ -590,8 +581,7 @@ static void test_lpm_get_next_key(void)
 	       next_key_p->data[1] == 168);
 
 	memcpy(key_p, next_key_p, key_size);
-	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
 
 	/* Add one more element (total three) */
 	key_p->prefixlen = 24;
@@ -614,8 +604,7 @@ static void test_lpm_get_next_key(void)
 	       next_key_p->data[1] == 168);
 
 	memcpy(key_p, next_key_p, key_size);
-	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
 
 	/* Add one more element (total four) */
 	key_p->prefixlen = 24;
@@ -643,8 +632,7 @@ static void test_lpm_get_next_key(void)
 	       next_key_p->data[1] == 168);
 
 	memcpy(key_p, next_key_p, key_size);
-	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
 
 	/* Add one more element (total five) */
 	key_p->prefixlen = 28;
@@ -678,8 +666,7 @@ static void test_lpm_get_next_key(void)
 	       next_key_p->data[1] == 168);
 
 	memcpy(key_p, next_key_p, key_size);
-	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
 
 	/* no exact matching key should return the first one in post order */
 	key_p->prefixlen = 22;
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index a6aa2d121955..4d0650cfb5cd 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -175,24 +175,20 @@ static void test_lru_sanity0(int map_type, int map_flags)
 				    BPF_NOEXIST));
 
 	/* BPF_NOEXIST means: add new element if it doesn't exist */
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
-	       /* key=1 already exists */
-	       && errno == EEXIST);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+	/* key=1 already exists */
 
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -1 &&
-	       errno == EINVAL);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -EINVAL);
 
 	/* insert key=2 element */
 
 	/* check that key=2 is not found */
 	key = 2;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* BPF_EXIST means: update existing element */
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
-	       /* key=2 is not there */
-	       errno == ENOENT);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+	/* key=2 is not there */
 
 	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
 
@@ -200,8 +196,7 @@ static void test_lru_sanity0(int map_type, int map_flags)
 
 	/* check that key=3 is not found */
 	key = 3;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* check that key=1 can be found and mark the ref bit to
 	 * stop LRU from removing key=1
@@ -217,8 +212,7 @@ static void test_lru_sanity0(int map_type, int map_flags)
 
 	/* key=2 has been removed from the LRU */
 	key = 2;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* lookup elem key=1 and delete it, then check it doesn't exist */
 	key = 1;
@@ -381,8 +375,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	end_key = 1 + batch_size;
 	value[0] = 4321;
 	for (key = 1; key < end_key; key++) {
-		assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-		       errno == ENOENT);
+		assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 		assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
@@ -562,8 +555,7 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
 	assert(!bpf_map_lookup_elem_with_ref_bit(map_fd, key, value));
 
 	/* Cannot find the last key because it was removed by LRU */
-	assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -ENOENT);
 }
 
 /* Test map with only one element */
@@ -711,21 +703,18 @@ static void test_lru_sanity7(int map_type, int map_flags)
 				    BPF_NOEXIST));
 
 	/* BPF_NOEXIST means: add new element if it doesn't exist */
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
-	       /* key=1 already exists */
-	       && errno == EEXIST);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+	/* key=1 already exists */
 
 	/* insert key=2 element */
 
 	/* check that key=2 is not found */
 	key = 2;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* BPF_EXIST means: update existing element */
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
-	       /* key=2 is not there */
-	       errno == ENOENT);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+	/* key=2 is not there */
 
 	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
 
@@ -733,8 +722,7 @@ static void test_lru_sanity7(int map_type, int map_flags)
 
 	/* check that key=3 is not found */
 	key = 3;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* check that key=1 can be found and mark the ref bit to
 	 * stop LRU from removing key=1
@@ -757,8 +745,7 @@ static void test_lru_sanity7(int map_type, int map_flags)
 
 	/* key=2 has been removed from the LRU */
 	key = 2;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
 
@@ -805,21 +792,18 @@ static void test_lru_sanity8(int map_type, int map_flags)
 	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
 
 	/* BPF_NOEXIST means: add new element if it doesn't exist */
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
-	       /* key=1 already exists */
-	       && errno == EEXIST);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+	/* key=1 already exists */
 
 	/* insert key=2 element */
 
 	/* check that key=2 is not found */
 	key = 2;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* BPF_EXIST means: update existing element */
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
-	       /* key=2 is not there */
-	       errno == ENOENT);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+	/* key=2 is not there */
 
 	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
 	assert(!bpf_map_update_elem(expected_map_fd, &key, value,
@@ -829,8 +813,7 @@ static void test_lru_sanity8(int map_type, int map_flags)
 
 	/* check that key=3 is not found */
 	key = 3;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* check that key=1 can be found and do _not_ mark ref bit.
 	 * this will be evicted on next update.
@@ -853,8 +836,7 @@ static void test_lru_sanity8(int map_type, int map_flags)
 
 	/* key=1 has been removed from the LRU */
 	key = 1;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
 
-- 
cgit 


From 266a19a0bc4fbfab4d981a47640ca98972a01865 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Thu, 14 Apr 2022 12:30:31 +0200
Subject: KVM: selftests: Silence compiler warning in the kvm_page_table_test

When compiling kvm_page_table_test.c, I get this compiler warning
with gcc 11.2:

kvm_page_table_test.c: In function 'pre_init_before_test':
../../../../tools/include/linux/kernel.h:44:24: warning: comparison of
 distinct pointer types lacks a cast
   44 |         (void) (&_max1 == &_max2);              \
      |                        ^~
kvm_page_table_test.c:281:21: note: in expansion of macro 'max'
  281 |         alignment = max(0x100000, alignment);
      |                     ^~~

Fix it by adjusting the type of the absolute value.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Message-Id: <20220414103031.565037-1-thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/kvm_page_table_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index ba1fdc3dcf4a..2c4a7563a4f8 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -278,7 +278,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
 	else
 		guest_test_phys_mem = p->phys_offset;
 #ifdef __s390x__
-	alignment = max(0x100000, alignment);
+	alignment = max(0x100000UL, alignment);
 #endif
 	guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
 
-- 
cgit 


From f18b4aebe107d092e384b1ae680b1e1de7a0196d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 20 Apr 2022 06:27:27 -0400
Subject: kvm: selftests: do not use bitfields larger than 32-bits for PTEs

Red Hat's QE team reported test failure on access_tracking_perf_test:

Testing guest mode: PA-bits:ANY, VA-bits:48,  4K pages
guest physical test memory offset: 0x3fffbffff000

Populating memory             : 0.684014577s
Writing to populated memory   : 0.006230175s
Reading from populated memory : 0.004557805s
==== Test Assertion Failure ====
  lib/kvm_util.c:1411: false
  pid=125806 tid=125809 errno=4 - Interrupted system call
     1  0x0000000000402f7c: addr_gpa2hva at kvm_util.c:1411
     2   (inlined by) addr_gpa2hva at kvm_util.c:1405
     3  0x0000000000401f52: lookup_pfn at access_tracking_perf_test.c:98
     4   (inlined by) mark_vcpu_memory_idle at access_tracking_perf_test.c:152
     5   (inlined by) vcpu_thread_main at access_tracking_perf_test.c:232
     6  0x00007fefe9ff81ce: ?? ??:0
     7  0x00007fefe9c64d82: ?? ??:0
  No vm physical memory at 0xffbffff000

I can easily reproduce it with a Intel(R) Xeon(R) CPU E5-2630 with 46 bits
PA.

It turns out that the address translation for clearing idle page tracking
returned a wrong result; addr_gva2gpa()'s last step, which is based on
"pte[index[0]].pfn", did the calculation with 40 bits length and the
high 12 bits got truncated.  In above case the GPA address to be returned
should be 0x3fffbffff000 for GVA 0xc0000000, but it got truncated into
0xffbffff000 and the subsequent gpa2hva lookup failed.

The width of operations on bit fields greater than 32-bit is
implementation defined, and differs between GCC (which uses the bitfield
precision) and clang (which uses 64-bit arithmetic), so this is a
potential minefield.  Remove the bit fields and using manual masking
instead.

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2075036
Reported-by: Nana Liu <nanliu@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Tested-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/processor.h       |  15 ++
 tools/testing/selftests/kvm/lib/x86_64/processor.c | 192 +++++++++------------
 2 files changed, 92 insertions(+), 115 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 37db341d4cc5..86e79af64dea 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -60,6 +60,21 @@
 /* CPUID.0x8000_0001.EDX */
 #define CPUID_GBPAGES		(1ul << 26)
 
+/* Page table bitfield declarations */
+#define PTE_PRESENT_MASK        BIT_ULL(0)
+#define PTE_WRITABLE_MASK       BIT_ULL(1)
+#define PTE_USER_MASK           BIT_ULL(2)
+#define PTE_ACCESSED_MASK       BIT_ULL(5)
+#define PTE_DIRTY_MASK          BIT_ULL(6)
+#define PTE_LARGE_MASK          BIT_ULL(7)
+#define PTE_GLOBAL_MASK         BIT_ULL(8)
+#define PTE_NX_MASK             BIT_ULL(63)
+
+#define PAGE_SHIFT		12
+
+#define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)
+#define PTE_GET_PFN(pte)        (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
+
 /* General Registers in 64-Bit Mode */
 struct gpr64_regs {
 	u64 rax;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 9f000dfb5594..0dd442c26015 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -19,38 +19,6 @@
 
 vm_vaddr_t exception_handlers;
 
-/* Virtual translation table structure declarations */
-struct pageUpperEntry {
-	uint64_t present:1;
-	uint64_t writable:1;
-	uint64_t user:1;
-	uint64_t write_through:1;
-	uint64_t cache_disable:1;
-	uint64_t accessed:1;
-	uint64_t ignored_06:1;
-	uint64_t page_size:1;
-	uint64_t ignored_11_08:4;
-	uint64_t pfn:40;
-	uint64_t ignored_62_52:11;
-	uint64_t execute_disable:1;
-};
-
-struct pageTableEntry {
-	uint64_t present:1;
-	uint64_t writable:1;
-	uint64_t user:1;
-	uint64_t write_through:1;
-	uint64_t cache_disable:1;
-	uint64_t accessed:1;
-	uint64_t dirty:1;
-	uint64_t reserved_07:1;
-	uint64_t global:1;
-	uint64_t ignored_11_09:3;
-	uint64_t pfn:40;
-	uint64_t ignored_62_52:11;
-	uint64_t execute_disable:1;
-};
-
 void regs_dump(FILE *stream, struct kvm_regs *regs,
 	       uint8_t indent)
 {
@@ -195,23 +163,21 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
 	return &page_table[index];
 }
 
-static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
-						    uint64_t pt_pfn,
-						    uint64_t vaddr,
-						    uint64_t paddr,
-						    int level,
-						    enum x86_page_size page_size)
+static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+				       uint64_t pt_pfn,
+				       uint64_t vaddr,
+				       uint64_t paddr,
+				       int level,
+				       enum x86_page_size page_size)
 {
-	struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
-
-	if (!pte->present) {
-		pte->writable = true;
-		pte->present = true;
-		pte->page_size = (level == page_size);
-		if (pte->page_size)
-			pte->pfn = paddr >> vm->page_shift;
+	uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+
+	if (!(*pte & PTE_PRESENT_MASK)) {
+		*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+		if (level == page_size)
+			*pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
 		else
-			pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+			*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
 	} else {
 		/*
 		 * Entry already present.  Assert that the caller doesn't want
@@ -221,7 +187,7 @@ static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
 		TEST_ASSERT(level != page_size,
 			    "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
 			    page_size, vaddr);
-		TEST_ASSERT(!pte->page_size,
+		TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
 			    "Cannot create page table at level: %u, vaddr: 0x%lx\n",
 			    level, vaddr);
 	}
@@ -232,8 +198,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 		   enum x86_page_size page_size)
 {
 	const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
-	struct pageUpperEntry *pml4e, *pdpe, *pde;
-	struct pageTableEntry *pte;
+	uint64_t *pml4e, *pdpe, *pde;
+	uint64_t *pte;
 
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
 		    "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -257,24 +223,22 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	 */
 	pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
 				      vaddr, paddr, 3, page_size);
-	if (pml4e->page_size)
+	if (*pml4e & PTE_LARGE_MASK)
 		return;
 
-	pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
-	if (pdpe->page_size)
+	pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
+	if (*pdpe & PTE_LARGE_MASK)
 		return;
 
-	pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
-	if (pde->page_size)
+	pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
+	if (*pde & PTE_LARGE_MASK)
 		return;
 
 	/* Fill in page table entry. */
-	pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
-	TEST_ASSERT(!pte->present,
+	pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
+	TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
 		    "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
-	pte->pfn = paddr >> vm->page_shift;
-	pte->writable = true;
-	pte->present = 1;
+	*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
 }
 
 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -282,12 +246,12 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 	__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
 }
 
-static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
+static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
 						       uint64_t vaddr)
 {
 	uint16_t index[4];
-	struct pageUpperEntry *pml4e, *pdpe, *pde;
-	struct pageTableEntry *pte;
+	uint64_t *pml4e, *pdpe, *pde;
+	uint64_t *pte;
 	struct kvm_cpuid_entry2 *entry;
 	struct kvm_sregs sregs;
 	int max_phy_addr;
@@ -329,30 +293,29 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
 	index[3] = (vaddr >> 39) & 0x1ffu;
 
 	pml4e = addr_gpa2hva(vm, vm->pgd);
-	TEST_ASSERT(pml4e[index[3]].present,
+	TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
 		"Expected pml4e to be present for gva: 0x%08lx", vaddr);
-	TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
-		(rsvd_mask | (1ull << 7))) == 0,
+	TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
 		"Unexpected reserved bits set.");
 
-	pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
-	TEST_ASSERT(pdpe[index[2]].present,
+	pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
+	TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
 		"Expected pdpe to be present for gva: 0x%08lx", vaddr);
-	TEST_ASSERT(pdpe[index[2]].page_size == 0,
+	TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
 		"Expected pdpe to map a pde not a 1-GByte page.");
-	TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
+	TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
 		"Unexpected reserved bits set.");
 
-	pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
-	TEST_ASSERT(pde[index[1]].present,
+	pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
+	TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
 		"Expected pde to be present for gva: 0x%08lx", vaddr);
-	TEST_ASSERT(pde[index[1]].page_size == 0,
+	TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
 		"Expected pde to map a pte not a 2-MByte page.");
-	TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
+	TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
 		"Unexpected reserved bits set.");
 
-	pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
-	TEST_ASSERT(pte[index[0]].present,
+	pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
+	TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
 		"Expected pte to be present for gva: 0x%08lx", vaddr);
 
 	return &pte[index[0]];
@@ -360,7 +323,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
 
 uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
 {
-	struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+	uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
 
 	return *(uint64_t *)pte;
 }
@@ -368,18 +331,17 @@ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
 void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
 			     uint64_t pte)
 {
-	struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
-								  vaddr);
+	uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
 
 	*(uint64_t *)new_pte = pte;
 }
 
 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 {
-	struct pageUpperEntry *pml4e, *pml4e_start;
-	struct pageUpperEntry *pdpe, *pdpe_start;
-	struct pageUpperEntry *pde, *pde_start;
-	struct pageTableEntry *pte, *pte_start;
+	uint64_t *pml4e, *pml4e_start;
+	uint64_t *pdpe, *pdpe_start;
+	uint64_t *pde, *pde_start;
+	uint64_t *pte, *pte_start;
 
 	if (!vm->pgd_created)
 		return;
@@ -389,58 +351,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 	fprintf(stream, "%*s      index hvaddr         gpaddr         "
 		"addr         w exec dirty\n",
 		indent, "");
-	pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
+	pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
 	for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
 		pml4e = &pml4e_start[n1];
-		if (!pml4e->present)
+		if (!(*pml4e & PTE_PRESENT_MASK))
 			continue;
-		fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+		fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
 			" %u\n",
 			indent, "",
 			pml4e - pml4e_start, pml4e,
-			addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
-			pml4e->writable, pml4e->execute_disable);
+			addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
+			!!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
 
-		pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
+		pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
 		for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
 			pdpe = &pdpe_start[n2];
-			if (!pdpe->present)
+			if (!(*pdpe & PTE_PRESENT_MASK))
 				continue;
-			fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10lx "
+			fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10llx "
 				"%u  %u\n",
 				indent, "",
 				pdpe - pdpe_start, pdpe,
 				addr_hva2gpa(vm, pdpe),
-				(uint64_t) pdpe->pfn, pdpe->writable,
-				pdpe->execute_disable);
+				PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
+				!!(*pdpe & PTE_NX_MASK));
 
-			pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
+			pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
 			for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
 				pde = &pde_start[n3];
-				if (!pde->present)
+				if (!(*pde & PTE_PRESENT_MASK))
 					continue;
 				fprintf(stream, "%*spde   0x%-3zx %p "
-					"0x%-12lx 0x%-10lx %u  %u\n",
+					"0x%-12lx 0x%-10llx %u  %u\n",
 					indent, "", pde - pde_start, pde,
 					addr_hva2gpa(vm, pde),
-					(uint64_t) pde->pfn, pde->writable,
-					pde->execute_disable);
+					PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
+					!!(*pde & PTE_NX_MASK));
 
-				pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
+				pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
 				for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
 					pte = &pte_start[n4];
-					if (!pte->present)
+					if (!(*pte & PTE_PRESENT_MASK))
 						continue;
 					fprintf(stream, "%*spte   0x%-3zx %p "
-						"0x%-12lx 0x%-10lx %u  %u "
+						"0x%-12lx 0x%-10llx %u  %u "
 						"    %u    0x%-10lx\n",
 						indent, "",
 						pte - pte_start, pte,
 						addr_hva2gpa(vm, pte),
-						(uint64_t) pte->pfn,
-						pte->writable,
-						pte->execute_disable,
-						pte->dirty,
+						PTE_GET_PFN(*pte),
+						!!(*pte & PTE_WRITABLE_MASK),
+						!!(*pte & PTE_NX_MASK),
+						!!(*pte & PTE_DIRTY_MASK),
 						((uint64_t) n1 << 27)
 							| ((uint64_t) n2 << 18)
 							| ((uint64_t) n3 << 9)
@@ -558,8 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 {
 	uint16_t index[4];
-	struct pageUpperEntry *pml4e, *pdpe, *pde;
-	struct pageTableEntry *pte;
+	uint64_t *pml4e, *pdpe, *pde;
+	uint64_t *pte;
 
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -572,22 +534,22 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 	if (!vm->pgd_created)
 		goto unmapped_gva;
 	pml4e = addr_gpa2hva(vm, vm->pgd);
-	if (!pml4e[index[3]].present)
+	if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
 		goto unmapped_gva;
 
-	pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
-	if (!pdpe[index[2]].present)
+	pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
+	if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
 		goto unmapped_gva;
 
-	pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
-	if (!pde[index[1]].present)
+	pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
+	if (!(pde[index[1]] & PTE_PRESENT_MASK))
 		goto unmapped_gva;
 
-	pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
-	if (!pte[index[0]].present)
+	pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
+	if (!(pte[index[0]] & PTE_PRESENT_MASK))
 		goto unmapped_gva;
 
-	return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
+	return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & 0xfffu);
 
 unmapped_gva:
 	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
-- 
cgit 


From e852be8b148e117e25be1c98cf72ee489b05919e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 20 Apr 2022 06:27:27 -0400
Subject: kvm: selftests: introduce and use more page size-related constants

Clean up code that was hardcoding masks for various fields,
now that the masks are included in processor.h.

For more cleanup, define PAGE_SIZE and PAGE_MASK just like in Linux.
PAGE_SIZE in particular was defined by several tests.

Suggested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/x86_64/processor.h   |  2 ++
 tools/testing/selftests/kvm/lib/x86_64/processor.c       | 12 ++++++------
 tools/testing/selftests/kvm/x86_64/amx_test.c            |  1 -
 tools/testing/selftests/kvm/x86_64/emulator_error_test.c |  1 -
 tools/testing/selftests/kvm/x86_64/smm_test.c            |  2 --
 tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c |  1 -
 tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c     |  1 -
 tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c     |  1 -
 8 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 86e79af64dea..d0d51adec76e 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -71,6 +71,8 @@
 #define PTE_NX_MASK             BIT_ULL(63)
 
 #define PAGE_SHIFT		12
+#define PAGE_SIZE		(1ULL << PAGE_SHIFT)
+#define PAGE_MASK		(~(PAGE_SIZE-1))
 
 #define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)
 #define PTE_GET_PFN(pte)        (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 0dd442c26015..33ea5e9955d9 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -255,13 +255,13 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
 	struct kvm_cpuid_entry2 *entry;
 	struct kvm_sregs sregs;
 	int max_phy_addr;
-	/* Set the bottom 52 bits. */
-	uint64_t rsvd_mask = 0x000fffffffffffff;
+	uint64_t rsvd_mask = 0;
 
 	entry = kvm_get_supported_cpuid_index(0x80000008, 0);
 	max_phy_addr = entry->eax & 0x000000ff;
-	/* Clear the bottom bits of the reserved mask. */
-	rsvd_mask = (rsvd_mask >> max_phy_addr) << max_phy_addr;
+	/* Set the high bits in the reserved mask. */
+	if (max_phy_addr < 52)
+		rsvd_mask = GENMASK_ULL(51, max_phy_addr);
 
 	/*
 	 * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
@@ -271,7 +271,7 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
 	 */
 	vcpu_sregs_get(vm, vcpuid, &sregs);
 	if ((sregs.efer & EFER_NX) == 0) {
-		rsvd_mask |= (1ull << 63);
+		rsvd_mask |= PTE_NX_MASK;
 	}
 
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
@@ -549,7 +549,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 	if (!(pte[index[0]] & PTE_PRESENT_MASK))
 		goto unmapped_gva;
 
-	return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & 0xfffu);
+	return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK);
 
 unmapped_gva:
 	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index 52a3ef6629e8..76f65c22796f 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -29,7 +29,6 @@
 #define X86_FEATURE_XSAVE		(1 << 26)
 #define X86_FEATURE_OSXSAVE		(1 << 27)
 
-#define PAGE_SIZE			(1 << 12)
 #define NUM_TILES			8
 #define TILE_SIZE			1024
 #define XSAVE_SIZE			((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
index f070ff0224fa..aeb3850f81bd 100644
--- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
+++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
@@ -12,7 +12,6 @@
 #include "vmx.h"
 
 #define VCPU_ID	   1
-#define PAGE_SIZE  4096
 #define MAXPHYADDR 36
 
 #define MEM_REGION_GVA	0x0000123456789000
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index a626d40fdb48..b4e0c860769e 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -21,8 +21,6 @@
 
 #define VCPU_ID	      1
 
-#define PAGE_SIZE  4096
-
 #define SMRAM_SIZE 65536
 #define SMRAM_MEMSLOT ((1 << 16) | 1)
 #define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index e683d0ac3e45..19b35c607dc6 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -32,7 +32,6 @@
 #define MSR_IA32_TSC_ADJUST 0x3b
 #endif
 
-#define PAGE_SIZE	4096
 #define VCPU_ID		5
 
 #define TSC_ADJUST_VALUE (1ll << 32)
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 865e17146815..bcd370827859 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -23,7 +23,6 @@
 #define SHINFO_REGION_GVA	0xc0000000ULL
 #define SHINFO_REGION_GPA	0xc0000000ULL
 #define SHINFO_REGION_SLOT	10
-#define PAGE_SIZE		4096
 
 #define DUMMY_REGION_GPA	(SHINFO_REGION_GPA + (2 * PAGE_SIZE))
 #define DUMMY_REGION_SLOT	11
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
index adc94452b57c..b30fe9de1d4f 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -15,7 +15,6 @@
 
 #define HCALL_REGION_GPA	0xc0000000ULL
 #define HCALL_REGION_SLOT	10
-#define PAGE_SIZE		4096
 
 static struct kvm_vm *vm;
 
-- 
cgit 


From 9c85a9bae267f6b5e5e374d0d023bbbe9db096d3 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 21 Apr 2022 16:35:49 -0700
Subject: selftest/vm: verify mmap addr in mremap_test

Avoid calling mmap with requested addresses that are less than the
system's mmap_min_addr.  When run as root, mmap returns EACCES when
trying to map addresses < mmap_min_addr.  This is not one of the error
codes for the condition to retry the mmap in the test.

Rather than arbitrarily retrying on EACCES, don't attempt an mmap until
addr > vm.mmap_min_addr.

Add a munmap call after an alignment check as the mappings are retained
after the retry and can reach the vm.max_map_count sysctl.

Link: https://lkml.kernel.org/r/20220420215721.4868-1-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/mremap_test.c | 41 +++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index 7c0b0617b9f8..9a518fee6306 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -6,6 +6,7 @@
 
 #include <errno.h>
 #include <stdlib.h>
+#include <stdio.h>
 #include <string.h>
 #include <sys/mman.h>
 #include <time.h>
@@ -63,6 +64,35 @@ enum {
 	.expect_failure = should_fail				\
 }
 
+/* Returns mmap_min_addr sysctl tunable from procfs */
+static unsigned long long get_mmap_min_addr(void)
+{
+	FILE *fp;
+	int n_matched;
+	static unsigned long long addr;
+
+	if (addr)
+		return addr;
+
+	fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
+	if (fp == NULL) {
+		ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
+			strerror(errno));
+		exit(KSFT_SKIP);
+	}
+
+	n_matched = fscanf(fp, "%llu", &addr);
+	if (n_matched != 1) {
+		ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
+			strerror(errno));
+		fclose(fp);
+		exit(KSFT_SKIP);
+	}
+
+	fclose(fp);
+	return addr;
+}
+
 /*
  * Returns the start address of the mapping on success, else returns
  * NULL on failure.
@@ -71,8 +101,15 @@ static void *get_source_mapping(struct config c)
 {
 	unsigned long long addr = 0ULL;
 	void *src_addr = NULL;
+	unsigned long long mmap_min_addr;
+
+	mmap_min_addr = get_mmap_min_addr();
+
 retry:
 	addr += c.src_alignment;
+	if (addr < mmap_min_addr)
+		goto retry;
+
 	src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
 			MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
 			-1, 0);
@@ -90,8 +127,10 @@ retry:
 	 * alignment in the tests.
 	 */
 	if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
-			!((unsigned long long) src_addr & c.src_alignment))
+			!((unsigned long long) src_addr & c.src_alignment)) {
+		munmap(src_addr, c.region_size);
 		goto retry;
+	}
 
 	if (!src_addr)
 		goto error;
-- 
cgit 


From 18d609daa546c919fd36b62a7b510c18de4b4af8 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 21 Apr 2022 16:35:52 -0700
Subject: selftest/vm: verify remap destination address in mremap_test

Because mremap does not have a MAP_FIXED_NOREPLACE flag, it can destroy
existing mappings.  This causes a segfault when regions such as text are
remapped and the permissions are changed.

Verify the requested mremap destination address does not overlap any
existing mappings by using mmap's MAP_FIXED_NOREPLACE flag.  Keep
incrementing the destination address until a valid mapping is found or
fail the current test once the max address is reached.

Link: https://lkml.kernel.org/r/20220420215721.4868-2-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/mremap_test.c | 42 +++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index 9a518fee6306..58775dab3cc6 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -10,6 +10,7 @@
 #include <string.h>
 #include <sys/mman.h>
 #include <time.h>
+#include <stdbool.h>
 
 #include "../kselftest.h"
 
@@ -64,6 +65,30 @@ enum {
 	.expect_failure = should_fail				\
 }
 
+/*
+ * Returns false if the requested remap region overlaps with an
+ * existing mapping (e.g text, stack) else returns true.
+ */
+static bool is_remap_region_valid(void *addr, unsigned long long size)
+{
+	void *remap_addr = NULL;
+	bool ret = true;
+
+	/* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
+	remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
+					 MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+					 -1, 0);
+
+	if (remap_addr == MAP_FAILED) {
+		if (errno == EEXIST)
+			ret = false;
+	} else {
+		munmap(remap_addr, size);
+	}
+
+	return ret;
+}
+
 /* Returns mmap_min_addr sysctl tunable from procfs */
 static unsigned long long get_mmap_min_addr(void)
 {
@@ -111,8 +136,8 @@ retry:
 		goto retry;
 
 	src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
-			MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
-			-1, 0);
+					MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+					-1, 0);
 	if (src_addr == MAP_FAILED) {
 		if (errno == EPERM || errno == EEXIST)
 			goto retry;
@@ -179,9 +204,20 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
 	if (!((unsigned long long) addr & c.dest_alignment))
 		addr = (void *) ((unsigned long long) addr | c.dest_alignment);
 
+	/* Don't destroy existing mappings unless expected to overlap */
+	while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
+		/* Check for unsigned overflow */
+		if (addr + c.dest_alignment < addr) {
+			ksft_print_msg("Couldn't find a valid region to remap to\n");
+			ret = -1;
+			goto out;
+		}
+		addr += c.dest_alignment;
+	}
+
 	clock_gettime(CLOCK_MONOTONIC, &t_start);
 	dest_addr = mremap(src_addr, c.region_size, c.region_size,
-			MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
+					  MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
 	clock_gettime(CLOCK_MONOTONIC, &t_end);
 
 	if (dest_addr == MAP_FAILED) {
-- 
cgit 


From e5508fc52c76fe42d8bb091fbd7796eeb64b52c4 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 21 Apr 2022 16:35:55 -0700
Subject: selftest/vm: support xfail in mremap_test

Use ksft_test_result_xfail for the tests which are expected to fail.

Link: https://lkml.kernel.org/r/20220420215721.4868-3-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/mremap_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index 58775dab3cc6..db0270127aeb 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -268,7 +268,7 @@ static void run_mremap_test_case(struct test test_case, int *failures,
 
 	if (remap_time < 0) {
 		if (test_case.expect_failure)
-			ksft_test_result_pass("%s\n\tExpected mremap failure\n",
+			ksft_test_result_xfail("%s\n\tExpected mremap failure\n",
 					      test_case.name);
 		else {
 			ksft_test_result_fail("%s\n", test_case.name);
-- 
cgit 


From 80df2fb95df26c849c6cc137344013cc048a083f Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 21 Apr 2022 16:35:58 -0700
Subject: selftest/vm: add skip support to mremap_test

Allow the mremap test to be skipped due to errors such as failing to
parse the mmap_min_addr sysctl.

Link: https://lkml.kernel.org/r/20220420215721.4868-4-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/run_vmtests.sh | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 3b265f140c25..352ba00cf26b 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -291,11 +291,16 @@ echo "-------------------"
 echo "running mremap_test"
 echo "-------------------"
 ./mremap_test
-if [ $? -ne 0 ]; then
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	 echo "[SKIP]"
+	 exitcode=$ksft_skip
+else
 	echo "[FAIL]"
 	exitcode=1
-else
-	echo "[PASS]"
 fi
 
 echo "-----------------"
-- 
cgit 


From b3625b1324a56ff1194734c9b84a51b05e14a419 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Apr 2022 17:29:05 +0200
Subject: selftests: firmware: Use smaller dictionary for XZ compression

The xz -9 option leads to an unnecessarily too large dictionary that
isn't really suitable for the kernel firmware loader.  Pass the
dictionary size explicitly, instead.

While we're at it, make the xz command call defined in $RUN_XZ for
simplicity.

Fixes: 108ae07c5036 ("selftests: firmware: Add compressed firmware tests")
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://lore.kernel.org/r/20220421152908.4718-3-tiwai@suse.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/fw_filesystem.sh | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index c2a2a100114b..731f011def78 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -11,6 +11,8 @@ TEST_REQS_FW_SET_CUSTOM_PATH="yes"
 TEST_DIR=$(dirname $0)
 source $TEST_DIR/fw_lib.sh
 
+RUN_XZ="xz -C crc32 --lzma2=dict=2MiB"
+
 check_mods
 check_setup
 verify_reqs
@@ -410,9 +412,9 @@ test_request_firmware_nowait_custom()
 	RANDOM_FILE_PATH=$(setup_random_file)
 	RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
 	if [ "$2" = "both" ]; then
-		xz -9 -C crc32 -k $RANDOM_FILE_PATH
+		$RUN_XZ -k $RANDOM_FILE_PATH
 	elif [ "$2" = "xzonly" ]; then
-		xz -9 -C crc32 $RANDOM_FILE_PATH
+		$RUN_XZ $RANDOM_FILE_PATH
 	fi
 	config_set_name $RANDOM_FILE
 	config_trigger_async
@@ -501,7 +503,7 @@ test_request_partial_firmware_into_buf_nofile 2 10
 test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0
 
 # test with both files present
-xz -9 -C crc32 -k $FW
+$RUN_XZ -k $FW
 config_set_name $NAME
 echo
 echo "Testing with both plain and xz files present..."
-- 
cgit 


From 04c826d0726720bc69617a291fad6e482ecf83d6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Apr 2022 17:29:06 +0200
Subject: selftests: firmware: Fix the request_firmware_into_buf() test for XZ
 format

The test uses a different firmware name, and we forgot to adapt for
the XZ compressed file tests.

https://lore.kernel.org/all/20210127154939.13288-1-tiwai@suse.de/

Fixes: 1798045900b7 ("selftests: firmware: Add request_firmware_into_buf tests")
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://lore.kernel.org/r/20220421152908.4718-4-tiwai@suse.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/fw_filesystem.sh | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index 731f011def78..3ac09b401a83 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -504,6 +504,7 @@ test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0
 
 # test with both files present
 $RUN_XZ -k $FW
+$RUN_XZ -k $FW_INTO_BUF
 config_set_name $NAME
 echo
 echo "Testing with both plain and xz files present..."
@@ -529,6 +530,7 @@ done
 
 # test with only xz file present
 mv "$FW" "${FW}-orig"
+mv "$FW_INTO_BUF" "${FW_INTO_BUF}-orig"
 echo
 echo "Testing with only xz file present..."
 for i in $(seq 1 5); do
-- 
cgit 


From f18b45ff9ac7ff05beeebd62d7c25d58f38b1410 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Apr 2022 17:29:07 +0200
Subject: selftests: firmware: Simplify test patterns

The test patterns are almost same in three sequential tests.
Make the unified helper function for improving the readability.

Link: https://lore.kernel.org/all/20210127154939.13288-1-tiwai@suse.de/
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://lore.kernel.org/r/20220421152908.4718-5-tiwai@suse.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/fw_filesystem.sh | 106 ++++++----------------
 1 file changed, 30 insertions(+), 76 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index 3ac09b401a83..4a574be8b862 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -437,6 +437,32 @@ test_request_partial_firmware_into_buf()
 	echo "OK"
 }
 
+do_tests ()
+{
+	mode="$1"
+	suffix="$2"
+
+	for i in $(seq 1 5); do
+		test_batched_request_firmware$suffix $i $mode
+	done
+
+	for i in $(seq 1 5); do
+		test_batched_request_firmware_into_buf$suffix $i $mode
+	done
+
+	for i in $(seq 1 5); do
+		test_batched_request_firmware_direct$suffix $i $mode
+	done
+
+	for i in $(seq 1 5); do
+		test_request_firmware_nowait_uevent$suffix $i $mode
+	done
+
+	for i in $(seq 1 5); do
+		test_request_firmware_nowait_custom$suffix $i $mode
+	done
+}
+
 # Only continue if batched request triggers are present on the
 # test-firmware driver
 test_config_present
@@ -444,25 +470,7 @@ test_config_present
 # test with the file present
 echo
 echo "Testing with the file present..."
-for i in $(seq 1 5); do
-	test_batched_request_firmware $i normal
-done
-
-for i in $(seq 1 5); do
-	test_batched_request_firmware_into_buf $i normal
-done
-
-for i in $(seq 1 5); do
-	test_batched_request_firmware_direct $i normal
-done
-
-for i in $(seq 1 5); do
-	test_request_firmware_nowait_uevent $i normal
-done
-
-for i in $(seq 1 5); do
-	test_request_firmware_nowait_custom $i normal
-done
+do_tests normal
 
 # Partial loads cannot use fallback, so do not repeat tests.
 test_request_partial_firmware_into_buf 0 10
@@ -474,25 +482,7 @@ test_request_partial_firmware_into_buf 2 10
 # a hung task, which would require a hard reset.
 echo
 echo "Testing with the file missing..."
-for i in $(seq 1 5); do
-	test_batched_request_firmware_nofile $i
-done
-
-for i in $(seq 1 5); do
-	test_batched_request_firmware_into_buf_nofile $i
-done
-
-for i in $(seq 1 5); do
-	test_batched_request_firmware_direct_nofile $i
-done
-
-for i in $(seq 1 5); do
-	test_request_firmware_nowait_uevent_nofile $i
-done
-
-for i in $(seq 1 5); do
-	test_request_firmware_nowait_custom_nofile $i
-done
+do_tests nofile _nofile
 
 # Partial loads cannot use fallback, so do not repeat tests.
 test_request_partial_firmware_into_buf_nofile 0 10
@@ -508,49 +498,13 @@ $RUN_XZ -k $FW_INTO_BUF
 config_set_name $NAME
 echo
 echo "Testing with both plain and xz files present..."
-for i in $(seq 1 5); do
-	test_batched_request_firmware $i both
-done
-
-for i in $(seq 1 5); do
-	test_batched_request_firmware_into_buf $i both
-done
-
-for i in $(seq 1 5); do
-	test_batched_request_firmware_direct $i both
-done
-
-for i in $(seq 1 5); do
-	test_request_firmware_nowait_uevent $i both
-done
-
-for i in $(seq 1 5); do
-	test_request_firmware_nowait_custom $i both
-done
+do_tests both
 
 # test with only xz file present
 mv "$FW" "${FW}-orig"
 mv "$FW_INTO_BUF" "${FW_INTO_BUF}-orig"
 echo
 echo "Testing with only xz file present..."
-for i in $(seq 1 5); do
-	test_batched_request_firmware $i xzonly
-done
-
-for i in $(seq 1 5); do
-	test_batched_request_firmware_into_buf $i xzonly
-done
-
-for i in $(seq 1 5); do
-	test_batched_request_firmware_direct $i xzonly
-done
-
-for i in $(seq 1 5); do
-	test_request_firmware_nowait_uevent $i xzonly
-done
-
-for i in $(seq 1 5); do
-	test_request_firmware_nowait_custom $i xzonly
-done
+do_tests xzonly
 
 exit 0
-- 
cgit 


From bc67cac1032679ba5bcd8f2767f661248c2a0c9a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Apr 2022 17:29:08 +0200
Subject: selftests: firmware: Add ZSTD compressed file tests

It's similar like XZ compressed files.  For the simplicity, both XZ
and ZSTD tests are done in a single function.  The format is specified
via $COMPRESS_FORMAT and the compression function is pre-defined.

Link: https://lore.kernel.org/r/20210127154939.13288-5-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://lore.kernel.org/r/20220421152908.4718-6-tiwai@suse.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/fw_filesystem.sh | 76 +++++++++++++++++------
 tools/testing/selftests/firmware/fw_lib.sh        | 12 +++-
 2 files changed, 65 insertions(+), 23 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index 4a574be8b862..1a99aea0549e 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -12,6 +12,7 @@ TEST_DIR=$(dirname $0)
 source $TEST_DIR/fw_lib.sh
 
 RUN_XZ="xz -C crc32 --lzma2=dict=2MiB"
+RUN_ZSTD="zstd -q"
 
 check_mods
 check_setup
@@ -213,7 +214,7 @@ read_firmwares()
 	else
 		fwfile="$FW"
 	fi
-	if [ "$1" = "xzonly" ]; then
+	if [ "$1" = "componly" ]; then
 		fwfile="${fwfile}-orig"
 	fi
 	for i in $(seq 0 3); do
@@ -237,7 +238,7 @@ read_partial_firmwares()
 		fwfile="${FW}"
 	fi
 
-	if [ "$1" = "xzonly" ]; then
+	if [ "$1" = "componly" ]; then
 		fwfile="${fwfile}-orig"
 	fi
 
@@ -411,10 +412,8 @@ test_request_firmware_nowait_custom()
 	config_unset_uevent
 	RANDOM_FILE_PATH=$(setup_random_file)
 	RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
-	if [ "$2" = "both" ]; then
-		$RUN_XZ -k $RANDOM_FILE_PATH
-	elif [ "$2" = "xzonly" ]; then
-		$RUN_XZ $RANDOM_FILE_PATH
+	if [ -n "$2" -a "$2" != "normal" ]; then
+		compress_"$2"_"$COMPRESS_FORMAT" $RANDOM_FILE_PATH
 	fi
 	config_set_name $RANDOM_FILE
 	config_trigger_async
@@ -490,21 +489,58 @@ test_request_partial_firmware_into_buf_nofile 0 5
 test_request_partial_firmware_into_buf_nofile 1 6
 test_request_partial_firmware_into_buf_nofile 2 10
 
-test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0
+test_request_firmware_compressed ()
+{
+	export COMPRESS_FORMAT="$1"
 
-# test with both files present
-$RUN_XZ -k $FW
-$RUN_XZ -k $FW_INTO_BUF
-config_set_name $NAME
-echo
-echo "Testing with both plain and xz files present..."
-do_tests both
+	# test with both files present
+	compress_both_"$COMPRESS_FORMAT" $FW
+	compress_both_"$COMPRESS_FORMAT" $FW_INTO_BUF
 
-# test with only xz file present
-mv "$FW" "${FW}-orig"
-mv "$FW_INTO_BUF" "${FW_INTO_BUF}-orig"
-echo
-echo "Testing with only xz file present..."
-do_tests xzonly
+	config_set_name $NAME
+	echo
+	echo "Testing with both plain and $COMPRESS_FORMAT files present..."
+	do_tests both
+
+	# test with only compressed file present
+	mv "$FW" "${FW}-orig"
+	mv "$FW_INTO_BUF" "${FW_INTO_BUF}-orig"
+
+	config_set_name $NAME
+	echo
+	echo "Testing with only $COMPRESS_FORMAT file present..."
+	do_tests componly
+
+	mv "${FW}-orig" "$FW"
+	mv "${FW_INTO_BUF}-orig" "$FW_INTO_BUF"
+}
+
+compress_both_XZ ()
+{
+	$RUN_XZ -k "$@"
+}
+
+compress_componly_XZ ()
+{
+	$RUN_XZ "$@"
+}
+
+compress_both_ZSTD ()
+{
+	$RUN_ZSTD -k "$@"
+}
+
+compress_componly_ZSTD ()
+{
+	$RUN_ZSTD --rm "$@"
+}
+
+if test "$HAS_FW_LOADER_COMPRESS_XZ" = "yes"; then
+	test_request_firmware_compressed XZ
+fi
+
+if test "$HAS_FW_LOADER_COMPRESS_ZSTD" = "yes"; then
+	test_request_firmware_compressed ZSTD
+fi
 
 exit 0
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index 5b8c0fedee76..3fa8282b053b 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -62,7 +62,8 @@ check_setup()
 {
 	HAS_FW_LOADER_USER_HELPER="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER=y)"
 	HAS_FW_LOADER_USER_HELPER_FALLBACK="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y)"
-	HAS_FW_LOADER_COMPRESS="$(kconfig_has CONFIG_FW_LOADER_COMPRESS=y)"
+	HAS_FW_LOADER_COMPRESS_XZ="$(kconfig_has CONFIG_FW_LOADER_COMPRESS_XZ=y)"
+	HAS_FW_LOADER_COMPRESS_ZSTD="$(kconfig_has CONFIG_FW_LOADER_COMPRESS_ZSTD=y)"
 	PROC_FW_IGNORE_SYSFS_FALLBACK="0"
 	PROC_FW_FORCE_SYSFS_FALLBACK="0"
 
@@ -98,9 +99,14 @@ check_setup()
 
 	OLD_FWPATH="$(cat /sys/module/firmware_class/parameters/path)"
 
-	if [ "$HAS_FW_LOADER_COMPRESS" = "yes" ]; then
+	if [ "$HAS_FW_LOADER_COMPRESS_XZ" = "yes" ]; then
 		if ! which xz 2> /dev/null > /dev/null; then
-			HAS_FW_LOADER_COMPRESS=""
+			HAS_FW_LOADER_COMPRESS_XZ=""
+		fi
+	fi
+	if [ "$HAS_FW_LOADER_COMPRESS_ZSTD" = "yes" ]; then
+		if ! which zstd 2> /dev/null > /dev/null; then
+			HAS_FW_LOADER_COMPRESS_ZSTD=""
 		fi
 	fi
 }
-- 
cgit 


From b9663a6ff8289a095d56d9a3a3f9c185a7b7b0d7 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 22 Apr 2022 13:20:21 -0400
Subject: tools: Add kmem_cache_alloc_lru()

Turn kmem_cache_alloc() into a wrapper around kmem_cache_alloc_lru().

Fixes: 9bbdc0f32409 ("xarray: use kmem_cache_alloc_lru to allocate xa_node")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reported-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reported-by: Li Wang <liwang@redhat.com>
---
 tools/include/linux/slab.h       | 8 +++++++-
 tools/testing/radix-tree/linux.c | 3 ++-
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h
index f41d8a0eb1a4..0616409513eb 100644
--- a/tools/include/linux/slab.h
+++ b/tools/include/linux/slab.h
@@ -28,7 +28,13 @@ static inline void *kzalloc(size_t size, gfp_t gfp)
 	return kmalloc(size, gfp | __GFP_ZERO);
 }
 
-void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
+struct list_lru;
+
+void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *, int flags);
+static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
+{
+	return kmem_cache_alloc_lru(cachep, NULL, flags);
+}
 void kmem_cache_free(struct kmem_cache *cachep, void *objp);
 
 struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 81539f543954..d5c1bcba86fe 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -25,7 +25,8 @@ struct kmem_cache {
 	void (*ctor)(void *);
 };
 
-void *kmem_cache_alloc(struct kmem_cache *cachep, int gfp)
+void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
+		int gfp)
 {
 	void *p;
 
-- 
cgit 


From 820a4f88ee4636433a46fcf14054036d0f71e798 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Fri, 22 Apr 2022 10:33:49 -0700
Subject: cgroup: Add new test_cpu.c test suite in cgroup selftests

The cgroup selftests suite currently contains tests that validate various
aspects of cgroup, such as validating the expected behavior for memory
controllers, the expected behavior of cgroup.procs, etc. There are no tests
that validate the expected behavior of the cgroup cpu controller.

This patch therefore adds a new test_cpu.c file that will contain cpu
controller testcases. The file currently only contains a single testcase
that validates creating nested cgroups with cgroup.subtree_control
including cpu. Future patches will add more sophisticated testcases that
validate functional aspects of the cpu controller.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/cgroup/.gitignore |   1 +
 tools/testing/selftests/cgroup/Makefile   |   2 +
 tools/testing/selftests/cgroup/test_cpu.c | 110 ++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+)
 create mode 100644 tools/testing/selftests/cgroup/test_cpu.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index be9643ef6285..306ee1b01e72 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -4,3 +4,4 @@ test_core
 test_freezer
 test_kmem
 test_kill
+test_cpu
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 745fe25fa0b9..478217cc1371 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -10,6 +10,7 @@ TEST_GEN_PROGS += test_kmem
 TEST_GEN_PROGS += test_core
 TEST_GEN_PROGS += test_freezer
 TEST_GEN_PROGS += test_kill
+TEST_GEN_PROGS += test_cpu
 
 LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
 
@@ -20,3 +21,4 @@ $(OUTPUT)/test_kmem: cgroup_util.c
 $(OUTPUT)/test_core: cgroup_util.c
 $(OUTPUT)/test_freezer: cgroup_util.c
 $(OUTPUT)/test_kill: cgroup_util.c
+$(OUTPUT)/test_cpu: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
new file mode 100644
index 000000000000..a724bff00d07
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <linux/limits.h>
+#include <stdio.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+/*
+ * This test creates two nested cgroups with and without enabling
+ * the cpu controller.
+ */
+static int test_cpucg_subtree_control(const char *root)
+{
+	char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL;
+	int ret = KSFT_FAIL;
+
+	// Create two nested cgroups with the cpu controller enabled.
+	parent = cg_name(root, "cpucg_test_0");
+	if (!parent)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+		goto cleanup;
+
+	child = cg_name(parent, "cpucg_test_child");
+	if (!child)
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_read_strstr(child, "cgroup.controllers", "cpu"))
+		goto cleanup;
+
+	// Create two nested cgroups without enabling the cpu controller.
+	parent2 = cg_name(root, "cpucg_test_1");
+	if (!parent2)
+		goto cleanup;
+
+	if (cg_create(parent2))
+		goto cleanup;
+
+	child2 = cg_name(parent2, "cpucg_test_child");
+	if (!child2)
+		goto cleanup;
+
+	if (cg_create(child2))
+		goto cleanup;
+
+	if (!cg_read_strstr(child2, "cgroup.controllers", "cpu"))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(child);
+	free(child);
+	cg_destroy(child2);
+	free(child2);
+	cg_destroy(parent);
+	free(parent);
+	cg_destroy(parent2);
+	free(parent2);
+
+	return ret;
+}
+
+#define T(x) { x, #x }
+struct cpucg_test {
+	int (*fn)(const char *root);
+	const char *name;
+} tests[] = {
+	T(test_cpucg_subtree_control),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+	char root[PATH_MAX];
+	int i, ret = EXIT_SUCCESS;
+
+	if (cg_find_unified_root(root, sizeof(root)))
+		ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+	if (cg_read_strstr(root, "cgroup.subtree_control", "cpu"))
+		if (cg_write(root, "cgroup.subtree_control", "+cpu"))
+			ksft_exit_skip("Failed to set cpu controller\n");
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		switch (tests[i].fn(root)) {
+		case KSFT_PASS:
+			ksft_test_result_pass("%s\n", tests[i].name);
+			break;
+		case KSFT_SKIP:
+			ksft_test_result_skip("%s\n", tests[i].name);
+			break;
+		default:
+			ret = EXIT_FAILURE;
+			ksft_test_result_fail("%s\n", tests[i].name);
+			break;
+		}
+	}
+
+	return ret;
+}
-- 
cgit 


From 3c879a1bb88792c05c2dd6a957423838b2830d73 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Fri, 22 Apr 2022 10:33:51 -0700
Subject: cgroup: Add test_cpucg_stats() testcase to cgroup cpu selftests

test_cpu.c includes testcases that validate the cgroup cpu controller.
This patch adds a new testcase called test_cpucg_stats() that verifies the
expected behavior of the cpu.stat interface. In doing so, we define a
new hog_cpus_timed() function which takes a cpu_hog_func_param struct
that configures how many CPUs it uses, and how long it runs. Future
patches will also spawn threads that hog CPUs, so this function will
eventually serve those use-cases as well.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/cgroup/cgroup_util.h |   3 +
 tools/testing/selftests/cgroup/test_cpu.c    | 128 +++++++++++++++++++++++++++
 2 files changed, 131 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 628738532ac9..973774fa7bee 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -8,6 +8,9 @@
 
 #define MB(x) (x << 20)
 
+#define USEC_PER_SEC	1000000L
+#define NSEC_PER_SEC	1000000000L
+
 /*
  * Checks if two given values differ by less than err% of their sum.
  */
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index a724bff00d07..3bd61964a262 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -2,11 +2,19 @@
 
 #define _GNU_SOURCE
 #include <linux/limits.h>
+#include <errno.h>
+#include <pthread.h>
 #include <stdio.h>
+#include <time.h>
 
 #include "../kselftest.h"
 #include "cgroup_util.h"
 
+struct cpu_hog_func_param {
+	int nprocs;
+	struct timespec ts;
+};
+
 /*
  * This test creates two nested cgroups with and without enabling
  * the cpu controller.
@@ -70,12 +78,132 @@ cleanup:
 	return ret;
 }
 
+static void *hog_cpu_thread_func(void *arg)
+{
+	while (1)
+		;
+
+	return NULL;
+}
+
+static struct timespec
+timespec_sub(const struct timespec *lhs, const struct timespec *rhs)
+{
+	struct timespec zero = {
+		.tv_sec = 0,
+		.tv_nsec = 0,
+	};
+	struct timespec ret;
+
+	if (lhs->tv_sec < rhs->tv_sec)
+		return zero;
+
+	ret.tv_sec = lhs->tv_sec - rhs->tv_sec;
+
+	if (lhs->tv_nsec < rhs->tv_nsec) {
+		if (ret.tv_sec == 0)
+			return zero;
+
+		ret.tv_sec--;
+		ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec;
+	} else
+		ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec;
+
+	return ret;
+}
+
+static int hog_cpus_timed(const char *cgroup, void *arg)
+{
+	const struct cpu_hog_func_param *param =
+		(struct cpu_hog_func_param *)arg;
+	struct timespec ts_run = param->ts;
+	struct timespec ts_remaining = ts_run;
+	int i, ret;
+
+	for (i = 0; i < param->nprocs; i++) {
+		pthread_t tid;
+
+		ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL);
+		if (ret != 0)
+			return ret;
+	}
+
+	while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) {
+		struct timespec ts_total;
+
+		ret = nanosleep(&ts_remaining, NULL);
+		if (ret && errno != EINTR)
+			return ret;
+
+		ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total);
+		if (ret != 0)
+			return ret;
+
+		ts_remaining = timespec_sub(&ts_run, &ts_total);
+	}
+
+	return 0;
+}
+
+/*
+ * Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that
+ * cpu.stat shows the expected output.
+ */
+static int test_cpucg_stats(const char *root)
+{
+	int ret = KSFT_FAIL;
+	long usage_usec, user_usec, system_usec;
+	long usage_seconds = 2;
+	long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+	char *cpucg;
+
+	cpucg = cg_name(root, "cpucg_test");
+	if (!cpucg)
+		goto cleanup;
+
+	if (cg_create(cpucg))
+		goto cleanup;
+
+	usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
+	user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
+	system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec");
+	if (usage_usec != 0 || user_usec != 0 || system_usec != 0)
+		goto cleanup;
+
+	struct cpu_hog_func_param param = {
+		.nprocs = 1,
+		.ts = {
+			.tv_sec = usage_seconds,
+			.tv_nsec = 0,
+		},
+	};
+	if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
+		goto cleanup;
+
+	usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
+	user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
+	if (user_usec <= 0)
+		goto cleanup;
+
+	if (!values_close(usage_usec, expected_usage_usec, 1))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(cpucg);
+	free(cpucg);
+
+	return ret;
+}
+
 #define T(x) { x, #x }
 struct cpucg_test {
 	int (*fn)(const char *root);
 	const char *name;
 } tests[] = {
 	T(test_cpucg_subtree_control),
+	T(test_cpucg_stats),
 };
 #undef T
 
-- 
cgit 


From 6376b22cd0a3455a534b6921b816ffab68ddc48f Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Fri, 22 Apr 2022 10:33:52 -0700
Subject: cgroup: Add test_cpucg_weight_overprovisioned() testcase

test_cpu.c includes testcases that validate the cgroup cpu controller.
This patch adds a new testcase called test_cpucg_weight_overprovisioned()
that verifies the expected behavior of creating multiple processes with
different cpu.weight, on a system that is overprovisioned.

So as to avoid code duplication, this patch also updates cpu_hog_func_param
to take a new hog_clock_type enum which informs how time is counted in
hog_cpus_timed() (either process time or wall clock time).

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/cgroup/cgroup_util.c |  12 +++
 tools/testing/selftests/cgroup/cgroup_util.h |   1 +
 tools/testing/selftests/cgroup/test_cpu.c    | 135 ++++++++++++++++++++++++++-
 3 files changed, 145 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index dbaa7aabbb4a..4297d580e3f8 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -190,6 +190,18 @@ int cg_write(const char *cgroup, const char *control, char *buf)
 	return -1;
 }
 
+int cg_write_numeric(const char *cgroup, const char *control, long value)
+{
+	char buf[64];
+	int ret;
+
+	ret = sprintf(buf, "%lu", value);
+	if (ret < 0)
+		return ret;
+
+	return cg_write(cgroup, control, buf);
+}
+
 int cg_find_unified_root(char *root, size_t len)
 {
 	char buf[10 * PAGE_SIZE];
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 973774fa7bee..2ee2119281d7 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -35,6 +35,7 @@ extern long cg_read_long(const char *cgroup, const char *control);
 long cg_read_key_long(const char *cgroup, const char *control, const char *key);
 extern long cg_read_lc(const char *cgroup, const char *control);
 extern int cg_write(const char *cgroup, const char *control, char *buf);
+int cg_write_numeric(const char *cgroup, const char *control, long value);
 extern int cg_run(const char *cgroup,
 		  int (*fn)(const char *cgroup, void *arg),
 		  void *arg);
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index 3bd61964a262..8d901c06c79d 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -2,6 +2,8 @@
 
 #define _GNU_SOURCE
 #include <linux/limits.h>
+#include <sys/sysinfo.h>
+#include <sys/wait.h>
 #include <errno.h>
 #include <pthread.h>
 #include <stdio.h>
@@ -10,9 +12,17 @@
 #include "../kselftest.h"
 #include "cgroup_util.h"
 
+enum hog_clock_type {
+	// Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock.
+	CPU_HOG_CLOCK_PROCESS,
+	// Count elapsed time using system wallclock time.
+	CPU_HOG_CLOCK_WALL,
+};
+
 struct cpu_hog_func_param {
 	int nprocs;
 	struct timespec ts;
+	enum hog_clock_type clock_type;
 };
 
 /*
@@ -118,8 +128,13 @@ static int hog_cpus_timed(const char *cgroup, void *arg)
 		(struct cpu_hog_func_param *)arg;
 	struct timespec ts_run = param->ts;
 	struct timespec ts_remaining = ts_run;
+	struct timespec ts_start;
 	int i, ret;
 
+	ret = clock_gettime(CLOCK_MONOTONIC, &ts_start);
+	if (ret != 0)
+		return ret;
+
 	for (i = 0; i < param->nprocs; i++) {
 		pthread_t tid;
 
@@ -135,9 +150,19 @@ static int hog_cpus_timed(const char *cgroup, void *arg)
 		if (ret && errno != EINTR)
 			return ret;
 
-		ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total);
-		if (ret != 0)
-			return ret;
+		if (param->clock_type == CPU_HOG_CLOCK_PROCESS) {
+			ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total);
+			if (ret != 0)
+				return ret;
+		} else {
+			struct timespec ts_current;
+
+			ret = clock_gettime(CLOCK_MONOTONIC, &ts_current);
+			if (ret != 0)
+				return ret;
+
+			ts_total = timespec_sub(&ts_current, &ts_start);
+		}
 
 		ts_remaining = timespec_sub(&ts_run, &ts_total);
 	}
@@ -176,6 +201,7 @@ static int test_cpucg_stats(const char *root)
 			.tv_sec = usage_seconds,
 			.tv_nsec = 0,
 		},
+		.clock_type = CPU_HOG_CLOCK_PROCESS,
 	};
 	if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
 		goto cleanup;
@@ -197,6 +223,108 @@ cleanup:
 	return ret;
 }
 
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B     cpu.weight = 50
+ * A/C     cpu.weight = 100
+ * A/D     cpu.weight = 150
+ *
+ * A separate process is then created for each child cgroup which spawns as
+ * many threads as there are cores, and hogs each CPU as much as possible
+ * for some time interval.
+ *
+ * Once all of the children have exited, we verify that each child cgroup
+ * was given proportional runtime as informed by their cpu.weight.
+ */
+static int test_cpucg_weight_overprovisioned(const char *root)
+{
+	struct child {
+		char *cgroup;
+		pid_t pid;
+		long usage;
+	};
+	int ret = KSFT_FAIL, i;
+	char *parent = NULL;
+	struct child children[3] = {NULL};
+	long usage_seconds = 10;
+
+	parent = cg_name(root, "cpucg_test_0");
+	if (!parent)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i);
+		if (!children[i].cgroup)
+			goto cleanup;
+
+		if (cg_create(children[i].cgroup))
+			goto cleanup;
+
+		if (cg_write_numeric(children[i].cgroup, "cpu.weight",
+					50 * (i + 1)))
+			goto cleanup;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		struct cpu_hog_func_param param = {
+			.nprocs = get_nprocs(),
+			.ts = {
+				.tv_sec = usage_seconds,
+				.tv_nsec = 0,
+			},
+			.clock_type = CPU_HOG_CLOCK_WALL,
+		};
+		pid_t pid = cg_run_nowait(children[i].cgroup, hog_cpus_timed,
+				(void *)&param);
+		if (pid <= 0)
+			goto cleanup;
+		children[i].pid = pid;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		int retcode;
+
+		waitpid(children[i].pid, &retcode, 0);
+		if (!WIFEXITED(retcode))
+			goto cleanup;
+		if (WEXITSTATUS(retcode))
+			goto cleanup;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(children); i++)
+		children[i].usage = cg_read_key_long(children[i].cgroup,
+				"cpu.stat", "usage_usec");
+
+	for (i = 0; i < ARRAY_SIZE(children) - 1; i++) {
+		long delta;
+
+		if (children[i + 1].usage <= children[i].usage)
+			goto cleanup;
+
+		delta = children[i + 1].usage - children[i].usage;
+		if (!values_close(delta, children[0].usage, 35))
+			goto cleanup;
+	}
+
+	ret = KSFT_PASS;
+cleanup:
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		cg_destroy(children[i].cgroup);
+		free(children[i].cgroup);
+	}
+	cg_destroy(parent);
+	free(parent);
+
+	return ret;
+}
+
 #define T(x) { x, #x }
 struct cpucg_test {
 	int (*fn)(const char *root);
@@ -204,6 +332,7 @@ struct cpucg_test {
 } tests[] = {
 	T(test_cpucg_subtree_control),
 	T(test_cpucg_stats),
+	T(test_cpucg_weight_overprovisioned),
 };
 #undef T
 
-- 
cgit 


From 4ab93063c83a2478863158799b027e9489ad4a40 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Fri, 22 Apr 2022 10:33:53 -0700
Subject: cgroup: Add test_cpucg_weight_underprovisioned() testcase

test_cpu.c includes testcases that validate the cgroup cpu controller.
This patch adds a new testcase called test_cpucg_weight_underprovisioned()
that verifies that processes with different cpu.weight that are all running
on an underprovisioned system, still get roughly the same amount of cpu
time.

Because test_cpucg_weight_underprovisioned() is very similar to
test_cpucg_weight_overprovisioned(), this patch also pulls the common logic
into a separate helper function that is invoked from both testcases, and
which uses function pointers to invoke the unique portions of the
testcases.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/cgroup/test_cpu.c | 155 ++++++++++++++++++++++--------
 1 file changed, 117 insertions(+), 38 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index 8d901c06c79d..64f9ce91c992 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -19,6 +19,12 @@ enum hog_clock_type {
 	CPU_HOG_CLOCK_WALL,
 };
 
+struct cpu_hogger {
+	char *cgroup;
+	pid_t pid;
+	long usage;
+};
+
 struct cpu_hog_func_param {
 	int nprocs;
 	struct timespec ts;
@@ -223,31 +229,15 @@ cleanup:
 	return ret;
 }
 
-/*
- * First, this test creates the following hierarchy:
- * A
- * A/B     cpu.weight = 50
- * A/C     cpu.weight = 100
- * A/D     cpu.weight = 150
- *
- * A separate process is then created for each child cgroup which spawns as
- * many threads as there are cores, and hogs each CPU as much as possible
- * for some time interval.
- *
- * Once all of the children have exited, we verify that each child cgroup
- * was given proportional runtime as informed by their cpu.weight.
- */
-static int test_cpucg_weight_overprovisioned(const char *root)
+static int
+run_cpucg_weight_test(
+		const char *root,
+		pid_t (*spawn_child)(const struct cpu_hogger *child),
+		int (*validate)(const struct cpu_hogger *children, int num_children))
 {
-	struct child {
-		char *cgroup;
-		pid_t pid;
-		long usage;
-	};
 	int ret = KSFT_FAIL, i;
 	char *parent = NULL;
-	struct child children[3] = {NULL};
-	long usage_seconds = 10;
+	struct cpu_hogger children[3] = {NULL};
 
 	parent = cg_name(root, "cpucg_test_0");
 	if (!parent)
@@ -273,16 +263,7 @@ static int test_cpucg_weight_overprovisioned(const char *root)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(children); i++) {
-		struct cpu_hog_func_param param = {
-			.nprocs = get_nprocs(),
-			.ts = {
-				.tv_sec = usage_seconds,
-				.tv_nsec = 0,
-			},
-			.clock_type = CPU_HOG_CLOCK_WALL,
-		};
-		pid_t pid = cg_run_nowait(children[i].cgroup, hog_cpus_timed,
-				(void *)&param);
+		pid_t pid = spawn_child(&children[i]);
 		if (pid <= 0)
 			goto cleanup;
 		children[i].pid = pid;
@@ -302,7 +283,46 @@ static int test_cpucg_weight_overprovisioned(const char *root)
 		children[i].usage = cg_read_key_long(children[i].cgroup,
 				"cpu.stat", "usage_usec");
 
-	for (i = 0; i < ARRAY_SIZE(children) - 1; i++) {
+	if (validate(children, ARRAY_SIZE(children)))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+cleanup:
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		cg_destroy(children[i].cgroup);
+		free(children[i].cgroup);
+	}
+	cg_destroy(parent);
+	free(parent);
+
+	return ret;
+}
+
+static pid_t weight_hog_ncpus(const struct cpu_hogger *child, int ncpus)
+{
+	long usage_seconds = 10;
+	struct cpu_hog_func_param param = {
+		.nprocs = ncpus,
+		.ts = {
+			.tv_sec = usage_seconds,
+			.tv_nsec = 0,
+		},
+		.clock_type = CPU_HOG_CLOCK_WALL,
+	};
+	return cg_run_nowait(child->cgroup, hog_cpus_timed, (void *)&param);
+}
+
+static pid_t weight_hog_all_cpus(const struct cpu_hogger *child)
+{
+	return weight_hog_ncpus(child, get_nprocs());
+}
+
+static int
+overprovision_validate(const struct cpu_hogger *children, int num_children)
+{
+	int ret = KSFT_FAIL, i;
+
+	for (i = 0; i < num_children - 1; i++) {
 		long delta;
 
 		if (children[i + 1].usage <= children[i].usage)
@@ -315,16 +335,74 @@ static int test_cpucg_weight_overprovisioned(const char *root)
 
 	ret = KSFT_PASS;
 cleanup:
-	for (i = 0; i < ARRAY_SIZE(children); i++) {
-		cg_destroy(children[i].cgroup);
-		free(children[i].cgroup);
+	return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B     cpu.weight = 50
+ * A/C     cpu.weight = 100
+ * A/D     cpu.weight = 150
+ *
+ * A separate process is then created for each child cgroup which spawns as
+ * many threads as there are cores, and hogs each CPU as much as possible
+ * for some time interval.
+ *
+ * Once all of the children have exited, we verify that each child cgroup
+ * was given proportional runtime as informed by their cpu.weight.
+ */
+static int test_cpucg_weight_overprovisioned(const char *root)
+{
+	return run_cpucg_weight_test(root, weight_hog_all_cpus,
+			overprovision_validate);
+}
+
+static pid_t weight_hog_one_cpu(const struct cpu_hogger *child)
+{
+	return weight_hog_ncpus(child, 1);
+}
+
+static int
+underprovision_validate(const struct cpu_hogger *children, int num_children)
+{
+	int ret = KSFT_FAIL, i;
+
+	for (i = 0; i < num_children - 1; i++) {
+		if (!values_close(children[i + 1].usage, children[0].usage, 15))
+			goto cleanup;
 	}
-	cg_destroy(parent);
-	free(parent);
 
+	ret = KSFT_PASS;
+cleanup:
 	return ret;
 }
 
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B     cpu.weight = 50
+ * A/C     cpu.weight = 100
+ * A/D     cpu.weight = 150
+ *
+ * A separate process is then created for each child cgroup which spawns a
+ * single thread that hogs a CPU. The testcase is only run on systems that
+ * have at least one core per-thread in the child processes.
+ *
+ * Once all of the children have exited, we verify that each child cgroup
+ * had roughly the same runtime despite having different cpu.weight.
+ */
+static int test_cpucg_weight_underprovisioned(const char *root)
+{
+	// Only run the test if there are enough cores to avoid overprovisioning
+	// the system.
+	if (get_nprocs() < 4)
+		return KSFT_SKIP;
+
+	return run_cpucg_weight_test(root, weight_hog_one_cpu,
+			underprovision_validate);
+}
+
 #define T(x) { x, #x }
 struct cpucg_test {
 	int (*fn)(const char *root);
@@ -333,6 +411,7 @@ struct cpucg_test {
 	T(test_cpucg_subtree_control),
 	T(test_cpucg_stats),
 	T(test_cpucg_weight_overprovisioned),
+	T(test_cpucg_weight_underprovisioned),
 };
 #undef T
 
-- 
cgit 


From fd0493a1e49eb643ac86f3932b1bde60bcc9f402 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 20 Apr 2022 20:39:45 -0700
Subject: selftests/bpf: Switch fexit_stress to bpf_link_create() API

Use bpf_link_create() API in fexit_stress test to attach FEXIT programs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Kui-Feng Lee <kuifeng@fb.com>
Link: https://lore.kernel.org/bpf/20220421033945.3602803-4-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/fexit_stress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index 3ee2107bbf7a..fe1f0f26ea14 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -53,7 +53,7 @@ void test_fexit_stress(void)
 					    &trace_opts);
 		if (!ASSERT_GE(fexit_fd[i], 0, "fexit load"))
 			goto out;
-		link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]);
+		link_fd[i] = bpf_link_create(fexit_fd[i], 0, BPF_TRACE_FEXIT, NULL);
 		if (!ASSERT_GE(link_fd[i], 0, "fexit attach"))
 			goto out;
 	}
-- 
cgit 


From 00f3d2ed9dac8fc8674a021765a0772f74c6127b Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 21 Apr 2022 15:48:04 +0200
Subject: wireguard: selftests: enable ACPI for SMP

It turns out that by having CONFIG_ACPI=n, we've been failing to boot
additional CPUs, and so these systems were functionally UP. The code
bloat is unfortunate for build times, but I don't see an alternative. So
this commit sets CONFIG_ACPI=y for x86_64 and i686 configs.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/wireguard/qemu/arch/i686.config   | 1 +
 tools/testing/selftests/wireguard/qemu/arch/x86_64.config | 1 +
 2 files changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config
index a85025d7206e..a9b4fe795048 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/i686.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config
@@ -1,3 +1,4 @@
+CONFIG_ACPI=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
index 00a1ef4869d5..45dd53a0d760 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
@@ -1,3 +1,4 @@
+CONFIG_ACPI=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-- 
cgit 


From 8bd03be3418c7b723996b621414382427cd44dc0 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 22 Apr 2022 14:55:43 -0700
Subject: selftests: mptcp: add infinite map mibs check

This patch adds a function chk_infi_nr() to check the mibs for the
infinite mapping. Invoke it in chk_join_nr() when validate_checksum
is set.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 36 ++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 7314257d248a..9eb4d889a24a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1106,6 +1106,38 @@ chk_rst_nr()
 	echo "$extra_msg"
 }
 
+chk_infi_nr()
+{
+	local infi_tx=$1
+	local infi_rx=$2
+	local count
+	local dump_stats
+
+	printf "%-${nr_blank}s %s" " " "itx"
+	count=$(ip netns exec $ns2 nstat -as | grep InfiniteMapTx | awk '{print $2}')
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$infi_tx" ]; then
+		echo "[fail] got $count infinite map[s] TX expected $infi_tx"
+		fail_test
+		dump_stats=1
+	else
+		echo -n "[ ok ]"
+	fi
+
+	echo -n " - infirx"
+	count=$(ip netns exec $ns1 nstat -as | grep InfiniteMapRx | awk '{print $2}')
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$infi_rx" ]; then
+		echo "[fail] got $count infinite map[s] RX expected $infi_rx"
+		fail_test
+		dump_stats=1
+	else
+		echo "[ ok ]"
+	fi
+
+	[ "${dump_stats}" = 1 ] && dump_stats
+}
+
 chk_join_nr()
 {
 	local syn_nr=$1
@@ -1115,7 +1147,8 @@ chk_join_nr()
 	local csum_ns2=${5:-0}
 	local fail_nr=${6:-0}
 	local rst_nr=${7:-0}
-	local corrupted_pkts=${8:-0}
+	local infi_nr=${8:-0}
+	local corrupted_pkts=${9:-0}
 	local count
 	local dump_stats
 	local with_cookie
@@ -1170,6 +1203,7 @@ chk_join_nr()
 		chk_csum_nr $csum_ns1 $csum_ns2
 		chk_fail_nr $fail_nr $fail_nr
 		chk_rst_nr $rst_nr $rst_nr
+		chk_infi_nr $infi_nr $infi_nr
 	fi
 }
 
-- 
cgit 


From b343734ee26537bc0b81a32c79e789e6387643cd Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 22 Apr 2022 13:14:57 +0300
Subject: selftests: forwarding: add option to run tests with stable MAC
 addresses

By default, DSA switch ports inherit their MAC address from the DSA
master.

This works well for practical situations, but some selftests like
bridge_vlan_unaware.sh loop back 2 standalone DSA ports with 2 bridged
DSA ports, and require the bridge to forward packets between the
standalone ports.

Due to the bridge seeing that the MAC DA it needs to forward is present
as a local FDB entry (it coincides with the MAC address of the bridge
ports), the test packets are not forwarded, but terminated locally on
br0. In turn, this makes the ping and ping6 tests fail.

Address this by introducing an option to have stable MAC addresses.
When mac_addr_prepare is called, the current addresses of the netifs are
saved and replaced with 00:01:02:03:04:${netif number}. Then when
mac_addr_restore is called at the end of the test, the original MAC
addresses are restored. This ensures that the MAC addresses are unique,
which makes the test pass even for DSA ports.

The usage model is for the behavior to be opt-in via STABLE_MAC_ADDRS,
which DSA should set to true, all others behave as before. By hooking
the calls to mac_addr_prepare and mac_addr_restore within the forwarding
lib itself, we do not need to patch each individual selftest, the only
requirement is that pre_cleanup is called.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 36 +++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 664b9ecaf228..e3b3cdef3170 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -27,6 +27,7 @@ INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
 LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
 REQUIRE_JQ=${REQUIRE_JQ:=yes}
 REQUIRE_MZ=${REQUIRE_MZ:=yes}
+STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
 
 relative_path="${BASH_SOURCE%/*}"
 if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -214,10 +215,41 @@ create_netif()
 	esac
 }
 
+declare -A MAC_ADDR_ORIG
+mac_addr_prepare()
+{
+	local new_addr=
+	local dev=
+
+	for ((i = 1; i <= NUM_NETIFS; ++i)); do
+		dev=${NETIFS[p$i]}
+		new_addr=$(printf "00:01:02:03:04:%02x" $i)
+
+		MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address')
+		# Strip quotes
+		MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
+		ip link set dev $dev address $new_addr
+	done
+}
+
+mac_addr_restore()
+{
+	local dev=
+
+	for ((i = 1; i <= NUM_NETIFS; ++i)); do
+		dev=${NETIFS[p$i]}
+		ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]}
+	done
+}
+
 if [[ "$NETIF_CREATE" = "yes" ]]; then
 	create_netif
 fi
 
+if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
+	mac_addr_prepare
+fi
+
 for ((i = 1; i <= NUM_NETIFS; ++i)); do
 	ip link show dev ${NETIFS[p$i]} &> /dev/null
 	if [[ $? -ne 0 ]]; then
@@ -503,6 +535,10 @@ pre_cleanup()
 		echo "Pausing before cleanup, hit any key to continue"
 		read
 	fi
+
+	if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
+		mac_addr_restore
+	fi
 }
 
 vrf_prepare()
-- 
cgit 


From fe32dffdcd33d34bc9bab267a55a8726074b0010 Mon Sep 17 00:00:00 2001
From: Joachim Wiberg <troglobit@gmail.com>
Date: Fri, 22 Apr 2022 13:14:58 +0300
Subject: selftests: forwarding: add TCPDUMP_EXTRA_FLAGS to lib.sh

For some use-cases we may want to change the tcpdump flags used in
tcpdump_start().  For instance, observing interfaces without the PROMISC
flag, e.g. to see what's really being forwarded to the bridge interface.

Signed-off-by: Joachim Wiberg <troglobit@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
 mode change 100644 => 100755 tools/testing/selftests/net/forwarding/lib.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
old mode 100644
new mode 100755
index e3b3cdef3170..de10451d7671
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -28,6 +28,7 @@ LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
 REQUIRE_JQ=${REQUIRE_JQ:=yes}
 REQUIRE_MZ=${REQUIRE_MZ:=yes}
 STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
+TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
 
 relative_path="${BASH_SOURCE%/*}"
 if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -1405,7 +1406,7 @@ tcpdump_start()
 		capuser="-Z $SUDO_USER"
 	fi
 
-	$ns_cmd tcpdump -e -n -Q in -i $if_name \
+	$ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
 		-s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
 	cappid=$!
 
-- 
cgit 


From 6182c5c5098f350fd394df818b99acd075e37189 Mon Sep 17 00:00:00 2001
From: Joachim Wiberg <troglobit@gmail.com>
Date: Fri, 22 Apr 2022 13:14:59 +0300
Subject: selftests: forwarding: multiple instances in tcpdump helper

Extend tcpdump_start() & C:o to handle multiple instances.  Useful when
observing bridge operation, e.g., unicast learning/flooding, and any
case of multicast distribution (to these ports but not that one ...).

This means the interface argument is now a mandatory argument to all
tcpdump_*() functions, hence the changes to the ocelot flower test.

Signed-off-by: Joachim Wiberg <troglobit@gmail.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/ocelot/tc_flower_chains.sh         | 24 ++++++++++----------
 tools/testing/selftests/net/forwarding/lib.sh      | 26 ++++++++++++++++------
 2 files changed, 31 insertions(+), 19 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index eaf8a04a7ca5..7e684e27a682 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -215,15 +215,15 @@ test_vlan_pop()
 
 	sleep 1
 
-	tcpdump_stop
+	tcpdump_stop $eth2
 
-	if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then
+	if tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then
 		echo "OK"
 	else
 		echo "FAIL"
 	fi
 
-	tcpdump_cleanup
+	tcpdump_cleanup $eth2
 }
 
 test_vlan_push()
@@ -236,15 +236,15 @@ test_vlan_push()
 
 	sleep 1
 
-	tcpdump_stop
+	tcpdump_stop $eth3.100
 
-	if tcpdump_show | grep -q "$eth2_mac > $eth3_mac"; then
+	if tcpdump_show $eth3.100 | grep -q "$eth2_mac > $eth3_mac"; then
 		echo "OK"
 	else
 		echo "FAIL"
 	fi
 
-	tcpdump_cleanup
+	tcpdump_cleanup $eth3.100
 }
 
 test_vlan_ingress_modify()
@@ -267,15 +267,15 @@ test_vlan_ingress_modify()
 
 	sleep 1
 
-	tcpdump_stop
+	tcpdump_stop $eth2
 
-	if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
+	if tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
 		echo "OK"
 	else
 		echo "FAIL"
 	fi
 
-	tcpdump_cleanup
+	tcpdump_cleanup $eth2
 
 	tc filter del dev $eth0 ingress chain $(IS1 2) pref 3
 
@@ -305,15 +305,15 @@ test_vlan_egress_modify()
 
 	sleep 1
 
-	tcpdump_stop
+	tcpdump_stop $eth2
 
-	if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
+	if tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
 		echo "OK"
 	else
 		echo "FAIL"
 	fi
 
-	tcpdump_cleanup
+	tcpdump_cleanup $eth2
 
 	tc filter del dev $eth1 egress chain $(ES0) pref 3
 	tc qdisc del dev $eth1 clsact
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index de10451d7671..7eff5ecf7565 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1386,13 +1386,17 @@ stop_traffic()
 	{ kill %% && wait %%; } 2>/dev/null
 }
 
+declare -A cappid
+declare -A capfile
+declare -A capout
+
 tcpdump_start()
 {
 	local if_name=$1; shift
 	local ns=$1; shift
 
-	capfile=$(mktemp)
-	capout=$(mktemp)
+	capfile[$if_name]=$(mktemp)
+	capout[$if_name]=$(mktemp)
 
 	if [ -z $ns ]; then
 		ns_cmd=""
@@ -1407,26 +1411,34 @@ tcpdump_start()
 	fi
 
 	$ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
-		-s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
-	cappid=$!
+		-s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
+		> "${capout[$if_name]}" 2>&1 &
+	cappid[$if_name]=$!
 
 	sleep 1
 }
 
 tcpdump_stop()
 {
-	$ns_cmd kill $cappid
+	local if_name=$1
+	local pid=${cappid[$if_name]}
+
+	$ns_cmd kill "$pid" && wait "$pid"
 	sleep 1
 }
 
 tcpdump_cleanup()
 {
-	rm $capfile $capout
+	local if_name=$1
+
+	rm ${capfile[$if_name]} ${capout[$if_name]}
 }
 
 tcpdump_show()
 {
-	tcpdump -e -n -r $capfile 2>&1
+	local if_name=$1
+
+	tcpdump -e -n -r ${capfile[$if_name]} 2>&1
 }
 
 # return 0 if the packet wasn't seen on host2_if or 1 if it was
-- 
cgit 


From f23cddc72294a345b4a8c3662b0ab6077c7583c7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 22 Apr 2022 13:15:00 +0300
Subject: selftests: forwarding: add helpers for IP multicast group
 joins/leaves

Extend the forwarding library with calls to some small C programs which
join an IP multicast group and send some packets to it. Both IPv4 and
IPv6 groups are supported. Use cases range from testing IGMP/MLD
snooping, to RX filtering, to multicast routing.

Testing multicast traffic using msend/mreceive is intended to be done
using tcpdump.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 38 +++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 7eff5ecf7565..15fb46b39fe8 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -27,6 +27,7 @@ INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
 LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
 REQUIRE_JQ=${REQUIRE_JQ:=yes}
 REQUIRE_MZ=${REQUIRE_MZ:=yes}
+REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
 STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
 TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
 
@@ -161,6 +162,12 @@ fi
 if [[ "$REQUIRE_MZ" = "yes" ]]; then
 	require_command $MZ
 fi
+if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
+	# https://github.com/vladimiroltean/mtools/
+	# patched for IPv6 support
+	require_command msend
+	require_command mreceive
+fi
 
 if [[ ! -v NUM_NETIFS ]]; then
 	echo "SKIP: importer does not define \"NUM_NETIFS\""
@@ -1548,6 +1555,37 @@ brmcast_check_sg_state()
 	done
 }
 
+mc_join()
+{
+	local if_name=$1
+	local group=$2
+	local vrf_name=$(master_name_get $if_name)
+
+	# We don't care about actual reception, just about joining the
+	# IP multicast group and adding the L2 address to the device's
+	# MAC filtering table
+	ip vrf exec $vrf_name \
+		mreceive -g $group -I $if_name > /dev/null 2>&1 &
+	mreceive_pid=$!
+
+	sleep 1
+}
+
+mc_leave()
+{
+	kill "$mreceive_pid" && wait "$mreceive_pid"
+}
+
+mc_send()
+{
+	local if_name=$1
+	local groups=$2
+	local vrf_name=$(master_name_get $if_name)
+
+	ip vrf exec $vrf_name \
+		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
+}
+
 start_ip_monitor()
 {
 	local mtype=$1; shift
-- 
cgit 


From a5114df6c61336269558d3316079d25867716e64 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 22 Apr 2022 13:15:01 +0300
Subject: selftests: forwarding: add helper for retrieving IPv6 link-local
 address of interface

Pinging an IPv6 link-local multicast address selects the link-local
unicast address of the interface as source, and we'd like to monitor for
that in tcpdump.

Add a helper to the forwarding library which retrieves the link-local
IPv6 address of an interface, to make that task easier.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 15fb46b39fe8..5386c826e46a 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -868,6 +868,15 @@ mac_get()
 	ip -j link show dev $if_name | jq -r '.[]["address"]'
 }
 
+ipv6_lladdr_get()
+{
+	local if_name=$1
+
+	ip -j addr show dev $if_name | \
+		jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
+		head -1
+}
+
 bridge_ageing_time_get()
 {
 	local bridge=$1
-- 
cgit 


From 476a4f05d9b83f78c2511c01376b40609ad83834 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 22 Apr 2022 13:15:02 +0300
Subject: selftests: forwarding: add a no_forwarding.sh test

Bombard a standalone switch port with various kinds of traffic to ensure
it is really standalone and doesn't leak packets to other switch ports.
Also check for switch ports in different bridges, and switch ports in a
VLAN-aware bridge but having different pvids. No forwarding should take
place in either case.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/no_forwarding.sh      | 261 +++++++++++++++++++++
 1 file changed, 261 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/no_forwarding.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh
new file mode 100755
index 000000000000..af3b398d13f0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh
@@ -0,0 +1,261 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="standalone two_bridges one_bridge_two_pvids"
+NUM_NETIFS=4
+
+source lib.sh
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p3]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+IPV4_ALLNODES="224.0.0.1"
+IPV6_ALLNODES="ff02::1"
+MACV4_ALLNODES="01:00:5e:00:00:01"
+MACV6_ALLNODES="33:33:00:00:00:01"
+NON_IP_MC="01:02:03:04:05:06"
+NON_IP_PKT="00:04 48:45:4c:4f"
+BC="ff:ff:ff:ff:ff:ff"
+
+# The full 4K VLAN space is too much to check, so strategically pick some
+# values which should provide reasonable coverage
+vids=(0 1 2 5 10 20 50 100 200 500 1000 1000 2000 4000 4094)
+
+send_non_ip()
+{
+	local if_name=$1
+	local smac=$2
+	local dmac=$3
+
+	$MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+}
+
+send_uc_ipv4()
+{
+	local if_name=$1
+	local dmac=$2
+
+	ip neigh add $H2_IPV4 lladdr $dmac dev $if_name
+	ping_do $if_name $H2_IPV4
+	ip neigh del $H2_IPV4 dev $if_name
+}
+
+send_mc_ipv4()
+{
+	local if_name=$1
+
+	ping_do $if_name $IPV4_ALLNODES "-I $if_name"
+}
+
+send_uc_ipv6()
+{
+	local if_name=$1
+	local dmac=$2
+
+	ip -6 neigh add $H2_IPV6 lladdr $dmac dev $if_name
+	ping6_do $if_name $H2_IPV6
+	ip -6 neigh del $H2_IPV6 dev $if_name
+}
+
+send_mc_ipv6()
+{
+	local if_name=$1
+
+	ping6_do $if_name $IPV6_ALLNODES%$if_name
+}
+
+check_rcv()
+{
+	local if_name=$1
+	local type=$2
+	local pattern=$3
+	local should_fail=1
+
+	RET=0
+
+	tcpdump_show $if_name | grep -q "$pattern"
+
+	check_err_fail "$should_fail" "$?" "reception"
+
+	log_test "$type"
+}
+
+run_test()
+{
+	local test_name="$1"
+	local smac=$(mac_get $h1)
+	local dmac=$(mac_get $h2)
+	local h1_ipv6_lladdr=$(ipv6_lladdr_get $h1)
+	local vid=
+
+	echo "$test_name: Sending packets"
+
+	tcpdump_start $h2
+
+	send_non_ip $h1 $smac $dmac
+	send_non_ip $h1 $smac $NON_IP_MC
+	send_non_ip $h1 $smac $BC
+	send_uc_ipv4 $h1 $dmac
+	send_mc_ipv4 $h1
+	send_uc_ipv6 $h1 $dmac
+	send_mc_ipv6 $h1
+
+	for vid in "${vids[@]}"; do
+		vlan_create $h1 $vid
+		simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+
+		send_non_ip $h1.$vid $smac $dmac
+		send_non_ip $h1.$vid $smac $NON_IP_MC
+		send_non_ip $h1.$vid $smac $BC
+		send_uc_ipv4 $h1.$vid $dmac
+		send_mc_ipv4 $h1.$vid
+		send_uc_ipv6 $h1.$vid $dmac
+		send_mc_ipv6 $h1.$vid
+
+		simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+		vlan_destroy $h1 $vid
+	done
+
+	sleep 1
+
+	echo "$test_name: Checking which packets were received"
+
+	tcpdump_stop $h2
+
+	check_rcv $h2 "$test_name: Unicast non-IP untagged" \
+		"$smac > $dmac, 802.3, length 4:"
+
+	check_rcv $h2 "$test_name: Multicast non-IP untagged" \
+		"$smac > $NON_IP_MC, 802.3, length 4:"
+
+	check_rcv $h2 "$test_name: Broadcast non-IP untagged" \
+		"$smac > $BC, 802.3, length 4:"
+
+	check_rcv $h2 "$test_name: Unicast IPv4 untagged" \
+		"$smac > $dmac, ethertype IPv4 (0x0800)"
+
+	check_rcv $h2 "$test_name: Multicast IPv4 untagged" \
+		"$smac > $MACV4_ALLNODES, ethertype IPv4 (0x0800).*: $H1_IPV4 > $IPV4_ALLNODES"
+
+	check_rcv $h2 "$test_name: Unicast IPv6 untagged" \
+		"$smac > $dmac, ethertype IPv6 (0x86dd).*8: $H1_IPV6 > $H2_IPV6"
+
+	check_rcv $h2 "$test_name: Multicast IPv6 untagged" \
+		"$smac > $MACV6_ALLNODES, ethertype IPv6 (0x86dd).*: $h1_ipv6_lladdr > $IPV6_ALLNODES"
+
+	for vid in "${vids[@]}"; do
+		check_rcv $h2 "$test_name: Unicast non-IP VID $vid" \
+			"$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+		check_rcv $h2 "$test_name: Multicast non-IP VID $vid" \
+			"$smac > $NON_IP_MC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+		check_rcv $h2 "$test_name: Broadcast non-IP VID $vid" \
+			"$smac > $BC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+		check_rcv $h2 "$test_name: Unicast IPv4 VID $vid" \
+			"$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $H2_IPV4"
+
+		check_rcv $h2 "$test_name: Multicast IPv4 VID $vid" \
+			"$smac > $MACV4_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $IPV4_ALLNODES"
+
+		check_rcv $h2 "$test_name: Unicast IPv6 VID $vid" \
+			"$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $H1_IPV6 > $H2_IPV6"
+
+		check_rcv $h2 "$test_name: Multicast IPv6 VID $vid" \
+			"$smac > $MACV6_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $h1_ipv6_lladdr > $IPV6_ALLNODES"
+	done
+
+	tcpdump_cleanup $h2
+}
+
+standalone()
+{
+	run_test "Standalone switch ports"
+}
+
+two_bridges()
+{
+	ip link add br0 type bridge && ip link set br0 up
+	ip link add br1 type bridge && ip link set br1 up
+	ip link set $swp1 master br0
+	ip link set $swp2 master br1
+
+	run_test "Switch ports in different bridges"
+
+	ip link del br1
+	ip link del br0
+}
+
+one_bridge_two_pvids()
+{
+	ip link add br0 type bridge vlan_filtering 1 vlan_default_pvid 0
+	ip link set br0 up
+	ip link set $swp1 master br0
+	ip link set $swp2 master br0
+
+	bridge vlan add dev $swp1 vid 1 pvid untagged
+	bridge vlan add dev $swp1 vid 2 pvid untagged
+
+	run_test "Switch ports in VLAN-aware bridge with different PVIDs"
+
+	ip link del br0
+}
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+setup_prepare()
+{
+	vrf_prepare
+
+	h1_create
+	h2_create
+	# we call simple_if_init from the test itself, but setup_wait expects
+	# that we call it from here, and waits until the interfaces are up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From 90b9566aa5cd3f99e5923d364ac976d5d3589fa6 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 22 Apr 2022 13:15:03 +0300
Subject: selftests: forwarding: add a test for local_termination.sh

This tests the capability of switch ports to filter out undesired
traffic. Different drivers are expected to have different capabilities
here (so some may fail and some may pass), yet the test still has some
value, for example to check for regressions.

There are 2 kinds of failures, one is when a packet which should have
been accepted isn't (and that should be fixed), and the other "failure"
(as reported by the test) is when a packet could have been filtered out
(for being unnecessary) yet it was received.

The bridge driver fares particularly badly at this test:

TEST: br0: Unicast IPv4 to primary MAC address                      [ OK ]
TEST: br0: Unicast IPv4 to macvlan MAC address                      [ OK ]
TEST: br0: Unicast IPv4 to unknown MAC address                      [FAIL]
        reception succeeded, but should have failed
TEST: br0: Unicast IPv4 to unknown MAC address, promisc             [ OK ]
TEST: br0: Unicast IPv4 to unknown MAC address, allmulti            [FAIL]
        reception succeeded, but should have failed
TEST: br0: Multicast IPv4 to joined group                           [ OK ]
TEST: br0: Multicast IPv4 to unknown group                          [FAIL]
        reception succeeded, but should have failed
TEST: br0: Multicast IPv4 to unknown group, promisc                 [ OK ]
TEST: br0: Multicast IPv4 to unknown group, allmulti                [ OK ]
TEST: br0: Multicast IPv6 to joined group                           [ OK ]
TEST: br0: Multicast IPv6 to unknown group                          [FAIL]
        reception succeeded, but should have failed
TEST: br0: Multicast IPv6 to unknown group, promisc                 [ OK ]
TEST: br0: Multicast IPv6 to unknown group, allmulti                [ OK ]

mainly because it does not implement IFF_UNICAST_FLT. Yet I still think
having the test (with the failures) is useful in case somebody wants to
tackle that problem in the future, to make an easy before-and-after
comparison.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/local_termination.sh  | 299 +++++++++++++++++++++
 1 file changed, 299 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/local_termination.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
new file mode 100755
index 000000000000..c5b0cbc85b3e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -0,0 +1,299 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="standalone bridge"
+NUM_NETIFS=2
+PING_COUNT=1
+REQUIRE_MTOOLS=yes
+REQUIRE_MZ=no
+
+source lib.sh
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+BRIDGE_ADDR="00:00:de:ad:be:ee"
+MACVLAN_ADDR="00:00:de:ad:be:ef"
+UNKNOWN_UC_ADDR1="de:ad:be:ef:ee:03"
+UNKNOWN_UC_ADDR2="de:ad:be:ef:ee:04"
+UNKNOWN_UC_ADDR3="de:ad:be:ef:ee:05"
+JOINED_IPV4_MC_ADDR="225.1.2.3"
+UNKNOWN_IPV4_MC_ADDR1="225.1.2.4"
+UNKNOWN_IPV4_MC_ADDR2="225.1.2.5"
+UNKNOWN_IPV4_MC_ADDR3="225.1.2.6"
+JOINED_IPV6_MC_ADDR="ff2e::0102:0304"
+UNKNOWN_IPV6_MC_ADDR1="ff2e::0102:0305"
+UNKNOWN_IPV6_MC_ADDR2="ff2e::0102:0306"
+UNKNOWN_IPV6_MC_ADDR3="ff2e::0102:0307"
+
+JOINED_MACV4_MC_ADDR="01:00:5e:01:02:03"
+UNKNOWN_MACV4_MC_ADDR1="01:00:5e:01:02:04"
+UNKNOWN_MACV4_MC_ADDR2="01:00:5e:01:02:05"
+UNKNOWN_MACV4_MC_ADDR3="01:00:5e:01:02:06"
+JOINED_MACV6_MC_ADDR="33:33:01:02:03:04"
+UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05"
+UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06"
+UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07"
+
+NON_IP_MC="01:02:03:04:05:06"
+NON_IP_PKT="00:04 48:45:4c:4f"
+BC="ff:ff:ff:ff:ff:ff"
+
+# Disable promisc to ensure we don't receive unknown MAC DA packets
+export TCPDUMP_EXTRA_FLAGS="-pl"
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p2]}
+
+send_non_ip()
+{
+	local if_name=$1
+	local smac=$2
+	local dmac=$3
+
+	$MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+}
+
+send_uc_ipv4()
+{
+	local if_name=$1
+	local dmac=$2
+
+	ip neigh add $H2_IPV4 lladdr $dmac dev $if_name
+	ping_do $if_name $H2_IPV4
+	ip neigh del $H2_IPV4 dev $if_name
+}
+
+check_rcv()
+{
+	local if_name=$1
+	local type=$2
+	local pattern=$3
+	local should_receive=$4
+	local should_fail=
+
+	[ $should_receive = true ] && should_fail=0 || should_fail=1
+	RET=0
+
+	tcpdump_show $if_name | grep -q "$pattern"
+
+	check_err_fail "$should_fail" "$?" "reception"
+
+	log_test "$if_name: $type"
+}
+
+mc_route_prepare()
+{
+	local if_name=$1
+	local vrf_name=$(master_name_get $if_name)
+
+	ip route add 225.100.1.0/24 dev $if_name vrf $vrf_name
+	ip -6 route add ff2e::/64 dev $if_name vrf $vrf_name
+}
+
+mc_route_destroy()
+{
+	local if_name=$1
+	local vrf_name=$(master_name_get $if_name)
+
+	ip route del 225.100.1.0/24 dev $if_name vrf $vrf_name
+	ip -6 route del ff2e::/64 dev $if_name vrf $vrf_name
+}
+
+run_test()
+{
+	local rcv_if_name=$1
+	local smac=$(mac_get $h1)
+	local rcv_dmac=$(mac_get $rcv_if_name)
+
+	tcpdump_start $rcv_if_name
+
+	mc_route_prepare $h1
+	mc_route_prepare $rcv_if_name
+
+	send_uc_ipv4 $h1 $rcv_dmac
+	send_uc_ipv4 $h1 $MACVLAN_ADDR
+	send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1
+
+	ip link set dev $rcv_if_name promisc on
+	send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2
+	mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2
+	mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2
+	ip link set dev $rcv_if_name promisc off
+
+	mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR
+	mc_send $h1 $JOINED_IPV4_MC_ADDR
+	mc_leave
+
+	mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR
+	mc_send $h1 $JOINED_IPV6_MC_ADDR
+	mc_leave
+
+	mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1
+	mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1
+
+	ip link set dev $rcv_if_name allmulticast on
+	send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3
+	mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3
+	mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3
+	ip link set dev $rcv_if_name allmulticast off
+
+	mc_route_destroy $rcv_if_name
+	mc_route_destroy $h1
+
+	sleep 1
+
+	tcpdump_stop $rcv_if_name
+
+	check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \
+		"$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \
+		"$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
+		"$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
+		false
+
+	check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \
+		"$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, allmulti" \
+		"$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
+		false
+
+	check_rcv $rcv_if_name "Multicast IPv4 to joined group" \
+		"$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv4 to unknown group" \
+		"$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
+		false
+
+	check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \
+		"$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \
+		"$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv6 to joined group" \
+		"$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
+		"$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
+		false
+
+	check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \
+		"$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \
+		"$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \
+		true
+
+	tcpdump_cleanup $rcv_if_name
+}
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+bridge_create()
+{
+	ip link add br0 type bridge
+	ip link set br0 address $BRIDGE_ADDR
+	ip link set br0 up
+
+	ip link set $h2 master br0
+	ip link set $h2 up
+
+	simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64
+}
+
+bridge_destroy()
+{
+	simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64
+
+	ip link del br0
+}
+
+standalone()
+{
+	h1_create
+	h2_create
+
+	ip link add link $h2 name macvlan0 type macvlan mode private
+	ip link set macvlan0 address $MACVLAN_ADDR
+	ip link set macvlan0 up
+
+	run_test $h2
+
+	ip link del macvlan0
+
+	h2_destroy
+	h1_destroy
+}
+
+bridge()
+{
+	h1_create
+	bridge_create
+
+	ip link add link br0 name macvlan0 type macvlan mode private
+	ip link set macvlan0 address $MACVLAN_ADDR
+	ip link set macvlan0 up
+
+	run_test br0
+
+	ip link del macvlan0
+
+	bridge_destroy
+	h1_destroy
+}
+
+cleanup()
+{
+	pre_cleanup
+	vrf_cleanup
+}
+
+setup_prepare()
+{
+	vrf_prepare
+	# setup_wait() needs this
+	ip link set $h1 up
+	ip link set $h2 up
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From 07c8a2dd69f6102adc12a621b4ef5e17d2a5b40d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 22 Apr 2022 13:15:04 +0300
Subject: selftests: drivers: dsa: add a subset of forwarding selftests

This adds an initial subset of forwarding selftests which I considered
to be relevant for DSA drivers, along with a forwarding.config that
makes it easier to run them (disables veth pair creation, makes sure MAC
addresses are unique and stable).

The intention is to request driver writers to run these selftests during
review and make sure that the tests pass, or at least that the problems
are known.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh  | 1 +
 tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh          | 1 +
 tools/testing/selftests/drivers/net/dsa/bridge_mld.sh          | 1 +
 tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh   | 1 +
 tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh   | 1 +
 tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh | 1 +
 tools/testing/selftests/drivers/net/dsa/forwarding.config      | 2 ++
 tools/testing/selftests/drivers/net/dsa/lib.sh                 | 1 +
 tools/testing/selftests/drivers/net/dsa/local_termination.sh   | 1 +
 tools/testing/selftests/drivers/net/dsa/no_forwarding.sh       | 1 +
 10 files changed, 11 insertions(+)
 create mode 120000 tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
 create mode 120000 tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
 create mode 120000 tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
 create mode 120000 tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
 create mode 120000 tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
 create mode 120000 tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
 create mode 100644 tools/testing/selftests/drivers/net/dsa/forwarding.config
 create mode 120000 tools/testing/selftests/drivers/net/dsa/lib.sh
 create mode 120000 tools/testing/selftests/drivers/net/dsa/local_termination.sh
 create mode 120000 tools/testing/selftests/drivers/net/dsa/no_forwarding.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
new file mode 120000
index 000000000000..f5eb940c4c7c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_locked_port.sh
\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
new file mode 120000
index 000000000000..76492da525f7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_mdb.sh
\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
new file mode 120000
index 000000000000..81a7e0df0474
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_mld.sh
\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
new file mode 120000
index 000000000000..9831ed74376a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_vlan_aware.sh
\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
new file mode 120000
index 000000000000..7f3c3f0bf719
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_vlan_mcast.sh
\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
new file mode 120000
index 000000000000..bf1a57e6bde1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_vlan_unaware.sh
\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/forwarding.config b/tools/testing/selftests/drivers/net/dsa/forwarding.config
new file mode 100644
index 000000000000..7adc1396fae0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/forwarding.config
@@ -0,0 +1,2 @@
+NETIF_CREATE=no
+STABLE_MAC_ADDRS=yes
diff --git a/tools/testing/selftests/drivers/net/dsa/lib.sh b/tools/testing/selftests/drivers/net/dsa/lib.sh
new file mode 120000
index 000000000000..39c96828c5ef
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/lib.sh
@@ -0,0 +1 @@
+../../../net/forwarding/lib.sh
\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/local_termination.sh b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
new file mode 120000
index 000000000000..c08166f84501
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
@@ -0,0 +1 @@
+../../../net/forwarding/local_termination.sh
\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
new file mode 120000
index 000000000000..b9757466bc97
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
@@ -0,0 +1 @@
+../../../net/forwarding/no_forwarding.sh
\ No newline at end of file
-- 
cgit 


From c92b576a13ad917e6f5bcee916e6004e711edfa7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 21 Apr 2022 12:50:20 +0100
Subject: selftests: alsa: Start validating control names

Not much of a test but we keep on getting problems with boolean controls
not being called Switches so let's add a few basic checks to help people
spot problems.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220421115020.14118-1-broonie@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 tools/testing/selftests/alsa/mixer-test.c | 41 ++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
index eb2213540fe3..a38b89c28030 100644
--- a/tools/testing/selftests/alsa/mixer-test.c
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -27,7 +27,7 @@
 
 #include "../kselftest.h"
 
-#define TESTS_PER_CONTROL 6
+#define TESTS_PER_CONTROL 7
 
 struct card_data {
 	snd_ctl_t *handle;
@@ -456,6 +456,44 @@ out:
 			 ctl->card->card, ctl->elem);
 }
 
+static bool strend(const char *haystack, const char *needle)
+{
+	size_t haystack_len = strlen(haystack);
+	size_t needle_len = strlen(needle);
+
+	if (needle_len > haystack_len)
+		return false;
+	return strcmp(haystack + haystack_len - needle_len, needle) == 0;
+}
+
+static void test_ctl_name(struct ctl_data *ctl)
+{
+	bool name_ok = true;
+	bool check;
+
+	/* Only boolean controls should end in Switch */
+	if (strend(ctl->name, " Switch")) {
+		if (snd_ctl_elem_info_get_type(ctl->info) != SND_CTL_ELEM_TYPE_BOOLEAN) {
+			ksft_print_msg("%d.%d %s ends in Switch but is not boolean\n",
+				       ctl->card->card, ctl->elem, ctl->name);
+			name_ok = false;
+		}
+	}
+
+	/* Writeable boolean controls should end in Switch */
+	if (snd_ctl_elem_info_get_type(ctl->info) == SND_CTL_ELEM_TYPE_BOOLEAN &&
+	    snd_ctl_elem_info_is_writable(ctl->info)) {
+		if (!strend(ctl->name, " Switch")) {
+			ksft_print_msg("%d.%d %s is a writeable boolean but not a Switch\n",
+				       ctl->card->card, ctl->elem, ctl->name);
+			name_ok = false;
+		}
+	}
+
+	ksft_test_result(name_ok, "name.%d.%d\n",
+			 ctl->card->card, ctl->elem);
+}
+
 static bool show_mismatch(struct ctl_data *ctl, int index,
 			  snd_ctl_elem_value_t *read_val,
 			  snd_ctl_elem_value_t *expected_val)
@@ -1062,6 +1100,7 @@ int main(void)
 		 * test stores the default value for later cleanup.
 		 */
 		test_ctl_get_value(ctl);
+		test_ctl_name(ctl);
 		test_ctl_write_default(ctl);
 		test_ctl_write_valid(ctl);
 		test_ctl_write_invalid(ctl);
-- 
cgit 


From 5e22298918258d8927b55604598c3ca37ef48e2f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 25 Apr 2022 06:44:26 +0300
Subject: selftests: mlxsw: Check devices on provisioned line card

Once line card is provisioned, check the count of devices on it and
print them out.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/mlxsw/devlink_linecard.sh          | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
index 08a922d8b86a..67b0e56cb413 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
@@ -152,6 +152,7 @@ unprovision_test()
 
 LC_16X100G_TYPE="16x100G"
 LC_16X100G_PORT_COUNT=16
+LC_16X100G_DEVICE_COUNT=4
 
 supported_types_check()
 {
@@ -177,6 +178,26 @@ supported_types_check()
 	check_err $? "16X100G not found between supported types of linecard $lc"
 }
 
+lc_devices_check()
+{
+	local lc=$1
+	local expected_device_count=$2
+	local device_count
+	local device
+
+	device_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+		       jq -e -r ".[][][].devices |length")
+	check_err $? "Failed to get linecard $lc device count"
+	[ $device_count != 0 ]
+	check_err $? "No device found on linecard $lc"
+	[ $device_count == $expected_device_count ]
+	check_err $? "Unexpected device count on linecard $lc (got $expected_device_count, expected $device_count)"
+	for (( device=0; device<device_count; device++ ))
+	do
+		log_info "Linecard $lc device $device"
+	done
+}
+
 ports_check()
 {
 	local lc=$1
@@ -206,6 +227,7 @@ provision_test()
 		unprovision_one $lc
 	fi
 	provision_one $lc $LC_16X100G_TYPE
+	lc_devices_check $lc $LC_16X100G_DEVICE_COUNT
 	ports_check $lc $LC_16X100G_PORT_COUNT
 	log_test "Provision"
 }
-- 
cgit 


From 08682c9e58cde26736566445ccec23b4bdbefac6 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 25 Apr 2022 06:44:28 +0300
Subject: selftests: mlxsw: Check line card info on provisioned line card

Once line card is provisioned, check if HW revision and INI version
are exposed.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/devlink_linecard.sh     | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
index 67b0e56cb413..04bedd98eb8b 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
@@ -178,6 +178,22 @@ supported_types_check()
 	check_err $? "16X100G not found between supported types of linecard $lc"
 }
 
+lc_info_check()
+{
+	local lc=$1
+	local fixed_hw_revision
+	local running_ini_version
+
+	fixed_hw_revision=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \
+			    jq -e -r '.[][][].versions.fixed."hw.revision"')
+	check_err $? "Failed to get linecard $lc fixed.hw.revision"
+	log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\""
+	running_ini_version=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \
+			      jq -e -r '.[][][].versions.running."ini.version"')
+	check_err $? "Failed to get linecard $lc running.ini.version"
+	log_info "Linecard $lc running.ini.version: \"$running_ini_version\""
+}
+
 lc_devices_check()
 {
 	local lc=$1
@@ -228,6 +244,7 @@ provision_test()
 	fi
 	provision_one $lc $LC_16X100G_TYPE
 	lc_devices_check $lc $LC_16X100G_DEVICE_COUNT
+	lc_info_check $lc
 	ports_check $lc $LC_16X100G_PORT_COUNT
 	log_test "Provision"
 }
-- 
cgit 


From 002defd576a3eb5d0f8bf11d27f0581ed0f34dd4 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 25 Apr 2022 06:44:31 +0300
Subject: selftests: mlxsw: Check device info on activated line card

Once line card is activated, check the device FW version is exposed.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/mlxsw/devlink_linecard.sh          | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
index 04bedd98eb8b..53a65f416770 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
@@ -259,6 +259,26 @@ interface_check()
 	setup_wait
 }
 
+lc_devices_info_check()
+{
+	local lc=$1
+	local expected_device_count=$2
+	local device_count
+	local device
+	local running_device_fw
+
+	device_count=$(devlink lc info $DEVLINK_DEV lc $lc -j | \
+		       jq -e -r ".[][][].devices |length")
+	check_err $? "Failed to get linecard $lc device count"
+	for (( device=0; device<device_count; device++ ))
+	do
+		running_device_fw=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \
+				    jq -e -r ".[][][].devices[$device].versions.running.fw")
+		check_err $? "Failed to get linecard $lc device $device running fw version"
+		log_info "Linecard $lc device $device running.fw: \"$running_device_fw\""
+	done
+}
+
 activation_16x100G_test()
 {
 	RET=0
@@ -275,6 +295,8 @@ activation_16x100G_test()
 		$ACTIVATION_TIMEOUT)
 	check_err $? "Failed to get linecard $lc activated (timeout)"
 
+	lc_devices_info_check $lc $LC_16X100G_DEVICE_COUNT
+
 	interface_check
 
 	log_test "Activation 16x100G"
-- 
cgit 


From ea1d15a067d601cf156c042bd0834acad67db650 Mon Sep 17 00:00:00 2001
From: Karthik Alapati <mail@karthek.com>
Date: Sat, 23 Apr 2022 11:14:34 +0530
Subject: selftests/binderfs: Improve message to provide more info

Currently the binderfs test says what failure it encountered
without saying why it may occurred when it fails to mount
binderfs. So, Warn about enabling CONFIG_ANDROID_BINDERFS in the
running kernel.

Signed-off-by: Karthik Alapati <mail@karthek.com>
Reviewed-by: Christian Brauner (Microsoft) <brauner@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/filesystems/binderfs/binderfs_test.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 0315955ff0f4..bc1c407651fc 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -412,7 +412,8 @@ TEST(binderfs_stress)
 
 		ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
 		ASSERT_EQ(ret, 0) {
-			TH_LOG("%s - Failed to mount binderfs", strerror(errno));
+			TH_LOG("%s - Failed to mount binderfs, check if CONFIG_ANDROID_BINDERFS is enabled in the running kernel",
+				strerror(errno));
 		}
 
 		for (int i = 0; i < ARRAY_SIZE(fds); i++) {
-- 
cgit 


From b76ee4f576ebfbab3891e51a531ec7ad7ef10a7a Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Sat, 23 Apr 2022 05:30:50 -0700
Subject: cgroup: Adding test_cpucg_nested_weight_overprovisioned() testcase

The cgroup cpu controller tests in
tools/testing/selftests/cgroup/test_cpu.c have some testcases that validate
the expected behavior of setting cpu.weight on cgroups, and then hogging
CPUs. What is still missing from the suite is a testcase that validates
nested cgroups. This patch adds test_cpucg_nested_weight_overprovisioned(),
which validates that a parent's cpu.weight will override its children if
they overcommit a host, and properly protect any sibling groups of that
parent.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/cgroup/test_cpu.c | 122 ++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index 64f9ce91c992..fc90b4d0feb9 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -403,6 +403,127 @@ static int test_cpucg_weight_underprovisioned(const char *root)
 			underprovision_validate);
 }
 
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B     cpu.weight = 1000
+ * A/C     cpu.weight = 1000
+ * A/C/D   cpu.weight = 5000
+ * A/C/E   cpu.weight = 5000
+ *
+ * A separate process is then created for each leaf, which spawn nproc threads
+ * that burn a CPU for a few seconds.
+ *
+ * Once all of those processes have exited, we verify that each of the leaf
+ * cgroups have roughly the same usage from cpu.stat.
+ */
+static int
+test_cpucg_nested_weight_overprovisioned(const char *root)
+{
+	int ret = KSFT_FAIL, i;
+	char *parent = NULL, *child = NULL;
+	struct cpu_hogger leaf[3] = {NULL};
+	long nested_leaf_usage, child_usage;
+	int nprocs = get_nprocs();
+
+	parent = cg_name(root, "cpucg_test");
+	child = cg_name(parent, "cpucg_child");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+	if (cg_write(child, "cgroup.subtree_control", "+cpu"))
+		goto cleanup;
+	if (cg_write(child, "cpu.weight", "1000"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+		const char *ancestor;
+		long weight;
+
+		if (i == 0) {
+			ancestor = parent;
+			weight = 1000;
+		} else {
+			ancestor = child;
+			weight = 5000;
+		}
+		leaf[i].cgroup = cg_name_indexed(ancestor, "cpucg_leaf", i);
+		if (!leaf[i].cgroup)
+			goto cleanup;
+
+		if (cg_create(leaf[i].cgroup))
+			goto cleanup;
+
+		if (cg_write_numeric(leaf[i].cgroup, "cpu.weight", weight))
+			goto cleanup;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+		pid_t pid;
+		struct cpu_hog_func_param param = {
+			.nprocs = nprocs,
+			.ts = {
+				.tv_sec = 10,
+				.tv_nsec = 0,
+			},
+			.clock_type = CPU_HOG_CLOCK_WALL,
+		};
+
+		pid = cg_run_nowait(leaf[i].cgroup, hog_cpus_timed,
+				(void *)&param);
+		if (pid <= 0)
+			goto cleanup;
+		leaf[i].pid = pid;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+		int retcode;
+
+		waitpid(leaf[i].pid, &retcode, 0);
+		if (!WIFEXITED(retcode))
+			goto cleanup;
+		if (WEXITSTATUS(retcode))
+			goto cleanup;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+		leaf[i].usage = cg_read_key_long(leaf[i].cgroup,
+				"cpu.stat", "usage_usec");
+		if (leaf[i].usage <= 0)
+			goto cleanup;
+	}
+
+	nested_leaf_usage = leaf[1].usage + leaf[2].usage;
+	if (!values_close(leaf[0].usage, nested_leaf_usage, 15))
+		goto cleanup;
+
+	child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec");
+	if (child_usage <= 0)
+		goto cleanup;
+	if (!values_close(child_usage, nested_leaf_usage, 1))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+cleanup:
+	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+		cg_destroy(leaf[i].cgroup);
+		free(leaf[i].cgroup);
+	}
+	cg_destroy(child);
+	free(child);
+	cg_destroy(parent);
+	free(parent);
+
+	return ret;
+}
+
 #define T(x) { x, #x }
 struct cpucg_test {
 	int (*fn)(const char *root);
@@ -412,6 +533,7 @@ struct cpucg_test {
 	T(test_cpucg_stats),
 	T(test_cpucg_weight_overprovisioned),
 	T(test_cpucg_weight_underprovisioned),
+	T(test_cpucg_nested_weight_overprovisioned),
 };
 #undef T
 
-- 
cgit 


From 89ca0efa8468f230df965257d0c03fc3664b4331 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Sat, 23 Apr 2022 05:30:51 -0700
Subject: cgroup: Add test_cpucg_nested_weight_underprovisioned() testcase

The cgroup cpu controller test suite currently contains a testcase called
test_cpucg_nested_weight_underprovisioned() which verifies the expected
behavior of cpu.weight when applied to nested cgroups. That first testcase
validated the expected behavior when the processes in the leaf cgroups
overcommitted the system. This patch adds a complementary
test_cpucg_nested_weight_underprovisioned() testcase which validates
behavior when those leaf cgroups undercommit the system.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/cgroup/test_cpu.c | 73 ++++++++++++++++++++++++-------
 1 file changed, 57 insertions(+), 16 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index fc90b4d0feb9..de6289814c23 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -403,22 +403,8 @@ static int test_cpucg_weight_underprovisioned(const char *root)
 			underprovision_validate);
 }
 
-/*
- * First, this test creates the following hierarchy:
- * A
- * A/B     cpu.weight = 1000
- * A/C     cpu.weight = 1000
- * A/C/D   cpu.weight = 5000
- * A/C/E   cpu.weight = 5000
- *
- * A separate process is then created for each leaf, which spawn nproc threads
- * that burn a CPU for a few seconds.
- *
- * Once all of those processes have exited, we verify that each of the leaf
- * cgroups have roughly the same usage from cpu.stat.
- */
 static int
-test_cpucg_nested_weight_overprovisioned(const char *root)
+run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
 {
 	int ret = KSFT_FAIL, i;
 	char *parent = NULL, *child = NULL;
@@ -426,6 +412,16 @@ test_cpucg_nested_weight_overprovisioned(const char *root)
 	long nested_leaf_usage, child_usage;
 	int nprocs = get_nprocs();
 
+	if (!overprovisioned) {
+		if (nprocs < 4)
+			/*
+			 * Only run the test if there are enough cores to avoid overprovisioning
+			 * the system.
+			 */
+			return KSFT_SKIP;
+		nprocs /= 4;
+	}
+
 	parent = cg_name(root, "cpucg_test");
 	child = cg_name(parent, "cpucg_child");
 	if (!parent || !child)
@@ -501,9 +497,13 @@ test_cpucg_nested_weight_overprovisioned(const char *root)
 	}
 
 	nested_leaf_usage = leaf[1].usage + leaf[2].usage;
-	if (!values_close(leaf[0].usage, nested_leaf_usage, 15))
+	if (overprovisioned) {
+		if (!values_close(leaf[0].usage, nested_leaf_usage, 15))
+			goto cleanup;
+	} else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15))
 		goto cleanup;
 
+
 	child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec");
 	if (child_usage <= 0)
 		goto cleanup;
@@ -524,6 +524,46 @@ cleanup:
 	return ret;
 }
 
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B     cpu.weight = 1000
+ * A/C     cpu.weight = 1000
+ * A/C/D   cpu.weight = 5000
+ * A/C/E   cpu.weight = 5000
+ *
+ * A separate process is then created for each leaf, which spawn nproc threads
+ * that burn a CPU for a few seconds.
+ *
+ * Once all of those processes have exited, we verify that each of the leaf
+ * cgroups have roughly the same usage from cpu.stat.
+ */
+static int
+test_cpucg_nested_weight_overprovisioned(const char *root)
+{
+	return run_cpucg_nested_weight_test(root, true);
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B     cpu.weight = 1000
+ * A/C     cpu.weight = 1000
+ * A/C/D   cpu.weight = 5000
+ * A/C/E   cpu.weight = 5000
+ *
+ * A separate process is then created for each leaf, which nproc / 4 threads
+ * that burns a CPU for a few seconds.
+ *
+ * Once all of those processes have exited, we verify that each of the leaf
+ * cgroups have roughly the same usage from cpu.stat.
+ */
+static int
+test_cpucg_nested_weight_underprovisioned(const char *root)
+{
+	return run_cpucg_nested_weight_test(root, false);
+}
+
 #define T(x) { x, #x }
 struct cpucg_test {
 	int (*fn)(const char *root);
@@ -534,6 +574,7 @@ struct cpucg_test {
 	T(test_cpucg_weight_overprovisioned),
 	T(test_cpucg_weight_underprovisioned),
 	T(test_cpucg_nested_weight_overprovisioned),
+	T(test_cpucg_nested_weight_underprovisioned),
 };
 #undef T
 
-- 
cgit 


From 889ab8113ef1386c57d64da106b850e752949f07 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Sat, 23 Apr 2022 05:30:52 -0700
Subject: cgroup: Add test_cpucg_max() testcase

The cgroup cpu controller test suite has a number of testcases that
validate the expected behavior of the cpu.weight knob, but none for
cpu.max. This testcase fixes that by adding a testcase for cpu.max as well.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/cgroup/test_cpu.c | 54 +++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index de6289814c23..715922c15c78 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -564,6 +564,59 @@ test_cpucg_nested_weight_underprovisioned(const char *root)
 	return run_cpucg_nested_weight_test(root, false);
 }
 
+/*
+ * This test creates a cgroup with some maximum value within a period, and
+ * verifies that a process in the cgroup is not overscheduled.
+ */
+static int test_cpucg_max(const char *root)
+{
+	int ret = KSFT_FAIL;
+	long usage_usec, user_usec;
+	long usage_seconds = 1;
+	long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+	char *cpucg;
+
+	cpucg = cg_name(root, "cpucg_test");
+	if (!cpucg)
+		goto cleanup;
+
+	if (cg_create(cpucg))
+		goto cleanup;
+
+	if (cg_write(cpucg, "cpu.max", "1000"))
+		goto cleanup;
+
+	struct cpu_hog_func_param param = {
+		.nprocs = 1,
+		.ts = {
+			.tv_sec = usage_seconds,
+			.tv_nsec = 0,
+		},
+		.clock_type = CPU_HOG_CLOCK_WALL,
+	};
+	if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
+		goto cleanup;
+
+	usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
+	user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
+	if (user_usec <= 0)
+		goto cleanup;
+
+	if (user_usec >= expected_usage_usec)
+		goto cleanup;
+
+	if (values_close(usage_usec, expected_usage_usec, 95))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(cpucg);
+	free(cpucg);
+
+	return ret;
+}
+
 #define T(x) { x, #x }
 struct cpucg_test {
 	int (*fn)(const char *root);
@@ -575,6 +628,7 @@ struct cpucg_test {
 	T(test_cpucg_weight_underprovisioned),
 	T(test_cpucg_nested_weight_overprovisioned),
 	T(test_cpucg_nested_weight_underprovisioned),
+	T(test_cpucg_max),
 };
 #undef T
 
-- 
cgit 


From a79906570f9646ae174dd0899ea54cc2eeffd788 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Sat, 23 Apr 2022 05:30:53 -0700
Subject: cgroup: Add test_cpucg_max_nested() testcase

The cgroup cpu controller selftests have a test_cpucg_max() testcase
that validates the behavior of the cpu.max knob. Let's also add a
testcase that verifies that the behavior works correctly when set on a
nested cgroup.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/cgroup/test_cpu.c | 63 +++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index 715922c15c78..24020a2c68dc 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -617,6 +617,68 @@ cleanup:
 	return ret;
 }
 
+/*
+ * This test verifies that a process inside of a nested cgroup whose parent
+ * group has a cpu.max value set, is properly throttled.
+ */
+static int test_cpucg_max_nested(const char *root)
+{
+	int ret = KSFT_FAIL;
+	long usage_usec, user_usec;
+	long usage_seconds = 1;
+	long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+	char *parent, *child;
+
+	parent = cg_name(root, "cpucg_parent");
+	child = cg_name(parent, "cpucg_child");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "cpu.max", "1000"))
+		goto cleanup;
+
+	struct cpu_hog_func_param param = {
+		.nprocs = 1,
+		.ts = {
+			.tv_sec = usage_seconds,
+			.tv_nsec = 0,
+		},
+		.clock_type = CPU_HOG_CLOCK_WALL,
+	};
+	if (cg_run(child, hog_cpus_timed, (void *)&param))
+		goto cleanup;
+
+	usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec");
+	user_usec = cg_read_key_long(child, "cpu.stat", "user_usec");
+	if (user_usec <= 0)
+		goto cleanup;
+
+	if (user_usec >= expected_usage_usec)
+		goto cleanup;
+
+	if (values_close(usage_usec, expected_usage_usec, 95))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(child);
+	free(child);
+	cg_destroy(parent);
+	free(parent);
+
+	return ret;
+}
+
 #define T(x) { x, #x }
 struct cpucg_test {
 	int (*fn)(const char *root);
@@ -629,6 +691,7 @@ struct cpucg_test {
 	T(test_cpucg_nested_weight_overprovisioned),
 	T(test_cpucg_nested_weight_underprovisioned),
 	T(test_cpucg_max),
+	T(test_cpucg_max_nested),
 };
 #undef T
 
-- 
cgit 


From 5c26993c31f0f726aa1f90158f17ec95069b7cb2 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Sat, 23 Apr 2022 05:30:54 -0700
Subject: cgroup: Add config file to cgroup selftest suite

Most of the test suites in tools/testing/selftests contain a config file
that specifies which kernel config options need to be present in order for
the test suite to be able to run and perform meaningful validation. There
is no config file for the tools/testing/selftests/cgroup test suite, so
this patch adds one.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/cgroup/config | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 tools/testing/selftests/cgroup/config

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/config b/tools/testing/selftests/cgroup/config
new file mode 100644
index 000000000000..84fe884fad86
--- /dev/null
+++ b/tools/testing/selftests/cgroup/config
@@ -0,0 +1,8 @@
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_KMEM=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_PAGE_COUNTER=y
-- 
cgit 


From 678f0cdc572c5fda940cb038d70eebb8d818adc8 Mon Sep 17 00:00:00 2001
From: Yuanchu Xie <yuanchu@google.com>
Date: Mon, 18 Apr 2022 20:20:17 +0000
Subject: selftests/damon: add damon to selftests root Makefile

Currently the damon selftests are not built with the rest of the
selftests. We add damon to the list of targets.

Fixes: b348eb7abd09 ("mm/damon: add user space selftests")
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Yuanchu Xie <yuanchu@google.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 2319ec87f53d..bd2ac8b3bf1f 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -9,6 +9,7 @@ TARGETS += clone3
 TARGETS += core
 TARGETS += cpufreq
 TARGETS += cpu-hotplug
+TARGETS += damon
 TARGETS += drivers/dma-buf
 TARGETS += efivarfs
 TARGETS += exec
-- 
cgit 


From a23039c7306f53416ba35d230201398ea34f4640 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Mon, 25 Apr 2022 14:01:11 -0700
Subject: selftests: Provide local define of __cpuid_count()

Some selftests depend on information provided by the CPUID instruction.
To support this dependency the selftests implement private wrappers for
CPUID.

Duplication of the CPUID wrappers should be avoided.

Both gcc and clang/LLVM provide __cpuid_count() macros but neither
the macro nor its header file are available in all the compiler
versions that need to be supported by the selftests. __cpuid_count()
as provided by gcc is available starting with gcc v4.4, so it is
not available if the latest tests need to be run in all the
environments required to support kernels v4.9 and v4.14 that
have the minimal required gcc v3.2.

Duplicate gcc's __cpuid_count() macro to provide a centrally defined
macro for __cpuid_count() to help eliminate the duplicate CPUID wrappers
while continuing to compile in older environments.

Suggested-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/kselftest.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index b8f248018174..33a0dbd26bd3 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -53,6 +53,21 @@
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 #endif
 
+/*
+ * gcc cpuid.h provides __cpuid_count() since v4.4.
+ * Clang/LLVM cpuid.h provides  __cpuid_count() since v3.4.0.
+ *
+ * Provide local define for tests needing __cpuid_count() because
+ * selftests need to work in older environments that do not yet
+ * have __cpuid_count().
+ */
+#ifndef __cpuid_count
+#define __cpuid_count(level, count, a, b, c, d)				\
+	__asm__ __volatile__ ("cpuid\n\t"				\
+			      : "=a" (a), "=b" (b), "=c" (c), "=d" (d)	\
+			      : "0" (level), "2" (count))
+#endif
+
 /* define kselftest exit codes */
 #define KSFT_PASS  0
 #define KSFT_FAIL  1
-- 
cgit 


From 0dba8dae6b0489c7ea5722273f3b2d70fd9756c5 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Mon, 25 Apr 2022 14:01:12 -0700
Subject: selftests/vm/pkeys: Use provided __cpuid_count() macro

kselftest.h makes the __cpuid_count() macro available
to conveniently call the CPUID instruction.

Remove the local CPUID wrapper and use __cpuid_count()
from already included kselftest.h instead.

__cpuid_count() from kselftest.h is used instead of the
macro provided by the compiler since gcc v4.4 (via cpuid.h)
because the selftest needs to be compiled with gcc v3.2,
the minimal required version for stable kernels.

Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Sandipan Das <sandipan@linux.ibm.com>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: "Desnes A. Nunes do Rosario" <desnesn@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Suchanek <msuchanek@suse.de>
Cc: linux-mm@kvack.org
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vm/pkey-x86.h | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index e4a4ce2b826d..b078ce9c6d2a 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -80,19 +80,6 @@ static inline void __write_pkey_reg(u64 pkey_reg)
 	assert(pkey_reg == __read_pkey_reg());
 }
 
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
-		unsigned int *ecx, unsigned int *edx)
-{
-	/* ecx is often an input as well as an output. */
-	asm volatile(
-		"cpuid;"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (*eax), "2" (*ecx));
-}
-
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
 #define X86_FEATURE_PKU        (1<<3) /* Protection Keys for Userspace */
 #define X86_FEATURE_OSPKE      (1<<4) /* OS Protection Keys Enable */
@@ -104,9 +91,7 @@ static inline int cpu_has_pkeys(void)
 	unsigned int ecx;
 	unsigned int edx;
 
-	eax = 0x7;
-	ecx = 0x0;
-	__cpuid(&eax, &ebx, &ecx, &edx);
+	__cpuid_count(0x7, 0x0, eax, ebx, ecx, edx);
 
 	if (!(ecx & X86_FEATURE_PKU)) {
 		dprintf2("cpu does not have PKU\n");
@@ -142,9 +127,7 @@ int pkey_reg_xstate_offset(void)
 	/* assume that XSTATE_PKEY is set in XCR0 */
 	leaf = XSTATE_PKEY_BIT;
 	{
-		eax = XSTATE_CPUID;
-		ecx = leaf;
-		__cpuid(&eax, &ebx, &ecx, &edx);
+		__cpuid_count(XSTATE_CPUID, leaf, eax, ebx, ecx, edx);
 
 		if (leaf == XSTATE_PKEY_BIT) {
 			xstate_offset = ebx;
-- 
cgit 


From 2ba8a7abb5ef402d94830bcf599f645852eb0153 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Mon, 25 Apr 2022 14:01:13 -0700
Subject: selftests/x86/amx: Use provided __cpuid_count() macro

kselftest.h makes the __cpuid_count() macro available
to conveniently call the CPUID instruction.

Remove the local CPUID wrapper and use __cpuid_count()
from kselftest.h instead.

__cpuid_count() from kselftest.h is used instead of the
macro provided by the compiler since gcc v4.4 (via cpuid.h)
because the selftest needs to be supported with gcc v3.2,
the minimal required version for stable kernels.

Cc: Chang S. Bae <chang.seok.bae@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/x86/amx.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
index 2189f0322d8b..625e42901237 100644
--- a/tools/testing/selftests/x86/amx.c
+++ b/tools/testing/selftests/x86/amx.c
@@ -17,6 +17,8 @@
 #include <sys/syscall.h>
 #include <sys/wait.h>
 
+#include "../kselftest.h" /* For __cpuid_count() */
+
 #ifndef __x86_64__
 # error This test is 64-bit only
 #endif
@@ -45,13 +47,6 @@ static inline uint64_t xgetbv(uint32_t index)
 	return eax + ((uint64_t)edx << 32);
 }
 
-static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
-{
-	asm volatile("cpuid;"
-		     : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
-		     : "0" (*eax), "2" (*ecx));
-}
-
 static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
 {
 	uint32_t rfbm_lo = rfbm;
@@ -115,9 +110,7 @@ static inline void check_cpuid_xsave(void)
 	 * support for the XSAVE feature set, including
 	 * XGETBV.
 	 */
-	eax = 1;
-	ecx = 0;
-	cpuid(&eax, &ebx, &ecx, &edx);
+	__cpuid_count(1, 0, eax, ebx, ecx, edx);
 	if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
 		fatal_error("cpuid: no CPU xsave support");
 	if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
@@ -140,9 +133,8 @@ static void check_cpuid_xtiledata(void)
 {
 	uint32_t eax, ebx, ecx, edx;
 
-	eax = CPUID_LEAF_XSTATE;
-	ecx = CPUID_SUBLEAF_XSTATE_USER;
-	cpuid(&eax, &ebx, &ecx, &edx);
+	__cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
+		      eax, ebx, ecx, edx);
 
 	/*
 	 * EBX enumerates the size (in bytes) required by the XSAVE
@@ -153,10 +145,8 @@ static void check_cpuid_xtiledata(void)
 	 */
 	xbuf_size = ebx;
 
-	eax = CPUID_LEAF_XSTATE;
-	ecx = XFEATURE_XTILEDATA;
-
-	cpuid(&eax, &ebx, &ecx, &edx);
+	__cpuid_count(CPUID_LEAF_XSTATE, XFEATURE_XTILEDATA,
+		      eax, ebx, ecx, edx);
 	/*
 	 * eax: XTILEDATA state component size
 	 * ebx: XTILEDATA state component offset in user buffer
-- 
cgit 


From 170d1c23f2a356932259034f73d579d0bab857d6 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Mon, 25 Apr 2022 14:01:14 -0700
Subject: selftests/x86/corrupt_xstate_header: Use provided __cpuid_count()
 macro

kselftest.h makes the __cpuid_count() macro available
to conveniently call the CPUID instruction.

Remove the local CPUID wrapper and use __cpuid_count()
from kselftest.h instead.

__cpuid_count() from kselftest.h is used instead of the
macro provided by the compiler since gcc v4.4 (via cpuid.h)
because the selftest needs to be supported with gcc v3.2,
the minimal required version for stable kernels.

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/x86/corrupt_xstate_header.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/corrupt_xstate_header.c b/tools/testing/selftests/x86/corrupt_xstate_header.c
index ab8599c10ce5..cf9ce8fbb656 100644
--- a/tools/testing/selftests/x86/corrupt_xstate_header.c
+++ b/tools/testing/selftests/x86/corrupt_xstate_header.c
@@ -17,25 +17,13 @@
 #include <stdint.h>
 #include <sys/wait.h>
 
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
-			   unsigned int *ecx, unsigned int *edx)
-{
-	asm volatile(
-		"cpuid;"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (*eax), "2" (*ecx));
-}
+#include "../kselftest.h" /* For __cpuid_count() */
 
 static inline int xsave_enabled(void)
 {
 	unsigned int eax, ebx, ecx, edx;
 
-	eax = 0x1;
-	ecx = 0x0;
-	__cpuid(&eax, &ebx, &ecx, &edx);
+	__cpuid_count(0x1, 0x0, eax, ebx, ecx, edx);
 
 	/* Is CR4.OSXSAVE enabled ? */
 	return ecx & (1U << 27);
-- 
cgit 


From 6220f69e72a534838cffd84dce6afd777777be03 Mon Sep 17 00:00:00 2001
From: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Date: Wed, 23 Mar 2022 17:09:27 +0900
Subject: selftests/resctrl: Extend CPU vendor detection

Currently, the resctrl_tests only has a function to detect AMD vendor.
Since when the Intel Sub-NUMA Clustering feature is enabled,
Intel CMT and MBM counters may not be accurate,
the resctrl_tests also need a function to detect Intel vendor.
And in the future, resctrl_tests will need a function to detect different
vendors, such as Arm.

Extend the function to detect Intel vendor as well. Also,
this function can be easily extended to detect other vendors.

Signed-off-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/cat_test.c      |  2 +-
 tools/testing/selftests/resctrl/resctrl.h       |  5 ++-
 tools/testing/selftests/resctrl/resctrl_tests.c | 41 ++++++++++++++++---------
 tools/testing/selftests/resctrl/resctrlfs.c     |  2 +-
 4 files changed, 33 insertions(+), 17 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index cd4f68388e0f..1c5e90c63254 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -89,7 +89,7 @@ static int check_results(struct resctrl_val_param *param)
 
 	return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64,
 			       MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS,
-			       !is_amd, false);
+			       get_vendor() == ARCH_INTEL, false);
 }
 
 void cat_test_cleanup(void)
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 1ad10c47e31d..f0ded31fb3c7 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -34,6 +34,9 @@
 #define L3_MON_PATH		"/sys/fs/resctrl/info/L3_MON"
 #define L3_MON_FEATURES_PATH	"/sys/fs/resctrl/info/L3_MON/mon_features"
 
+#define ARCH_INTEL     1
+#define ARCH_AMD       2
+
 #define PARENT_EXIT(err_msg)			\
 	do {					\
 		perror(err_msg);		\
@@ -75,8 +78,8 @@ struct resctrl_val_param {
 extern pid_t bm_pid, ppid;
 
 extern char llc_occup_path[1024];
-extern bool is_amd;
 
+int get_vendor(void);
 bool check_resctrlfs_support(void);
 int filter_dmesg(void);
 int remount_resctrlfs(bool mum_resctrlfs);
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 973f09a66e1e..3e7cdf1125df 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -13,25 +13,41 @@
 #define BENCHMARK_ARGS		64
 #define BENCHMARK_ARG_SIZE	64
 
-bool is_amd;
-
-void detect_amd(void)
+static int detect_vendor(void)
 {
 	FILE *inf = fopen("/proc/cpuinfo", "r");
+	int vendor_id = 0;
+	char *s = NULL;
 	char *res;
 
 	if (!inf)
-		return;
+		return vendor_id;
 
 	res = fgrep(inf, "vendor_id");
 
-	if (res) {
-		char *s = strchr(res, ':');
+	if (res)
+		s = strchr(res, ':');
+
+	if (s && !strcmp(s, ": GenuineIntel\n"))
+		vendor_id = ARCH_INTEL;
+	else if (s && !strcmp(s, ": AuthenticAMD\n"))
+		vendor_id = ARCH_AMD;
 
-		is_amd = s && !strcmp(s, ": AuthenticAMD\n");
-		free(res);
-	}
 	fclose(inf);
+	free(res);
+	return vendor_id;
+}
+
+int get_vendor(void)
+{
+	static int vendor = -1;
+
+	if (vendor == -1)
+		vendor = detect_vendor();
+	if (vendor == 0)
+		ksft_print_msg("Can not get vendor info...\n");
+
+	return vendor;
 }
 
 static void cmd_help(void)
@@ -207,9 +223,6 @@ int main(int argc, char **argv)
 	if (geteuid() != 0)
 		return ksft_exit_fail_msg("Not running as root, abort testing.\n");
 
-	/* Detect AMD vendor */
-	detect_amd();
-
 	if (has_ben) {
 		/* Extract benchmark command from command line. */
 		for (i = ben_ind; i < argc; i++) {
@@ -241,10 +254,10 @@ int main(int argc, char **argv)
 
 	ksft_set_plan(tests ? : 4);
 
-	if (!is_amd && mbm_test)
+	if ((get_vendor() == ARCH_INTEL) && mbm_test)
 		run_mbm_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
 
-	if (!is_amd && mba_test)
+	if ((get_vendor() == ARCH_INTEL) && mba_test)
 		run_mba_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
 
 	if (cmt_test)
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 5f5a166ade60..6f543e470ad4 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -106,7 +106,7 @@ int get_resource_id(int cpu_no, int *resource_id)
 	char phys_pkg_path[1024];
 	FILE *fp;
 
-	if (is_amd)
+	if (get_vendor() == ARCH_AMD)
 		sprintf(phys_pkg_path, "%s%d/cache/index3/id",
 			PHYS_ID_PATH, cpu_no);
 	else
-- 
cgit 


From d577380da04e410a31e7f944b04d838ab1c8a1c3 Mon Sep 17 00:00:00 2001
From: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Date: Wed, 23 Mar 2022 17:09:28 +0900
Subject: selftests/resctrl: Print a message if the result of MBM&CMT tests is
 failed on Intel CPU

According to "Intel Resource Director Technology (Intel RDT) on
2nd Generation Intel Xeon Scalable Processors Reference Manual",
When the Intel Sub-NUMA Clustering(SNC) feature is enabled,
Intel CMT and MBM counters may not be accurate.

However, there does not seem to be an architectural way to detect
if SNC is enabled.

If the result of MBM&CMT test fails on Intel CPU,
print a message to let users know a possible cause of failure.

Acked-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/resctrl_tests.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 3e7cdf1125df..f8da3ecf67ef 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -86,6 +86,8 @@ static void run_mbm_test(bool has_ben, char **benchmark_cmd, int span,
 		sprintf(benchmark_cmd[5], "%s", MBA_STR);
 	res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
 	ksft_test_result(!res, "MBM: bw change\n");
+	if ((get_vendor() == ARCH_INTEL) && res)
+		ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
 	mbm_test_cleanup();
 }
 
@@ -122,6 +124,8 @@ static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no)
 		sprintf(benchmark_cmd[5], "%s", CMT_STR);
 	res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd);
 	ksft_test_result(!res, "CMT: test\n");
+	if ((get_vendor() == ARCH_INTEL) && res)
+		ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
 	cmt_test_cleanup();
 }
 
-- 
cgit 


From f54b3278164405fdd4f5f1b53f0d04e34ade27a6 Mon Sep 17 00:00:00 2001
From: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Date: Wed, 23 Mar 2022 17:12:22 +0900
Subject: selftests/resctrl: Kill child process before parent process
 terminates if SIGTERM is received

In kselftest framework, a sub test is run using the timeout utility
and it will send SIGTERM to the test upon timeout.

In resctrl_tests, a child process is created by fork() to
run benchmark but SIGTERM is not set in sigaction().
If SIGTERM signal is received, the parent process will be killed,
but the child process still exists.

Kill child process before the parent process terminates
if SIGTERM signal is received.

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/resctrl_val.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index 95224345c78e..b32b96356ec7 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -678,6 +678,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
 	sigemptyset(&sigact.sa_mask);
 	sigact.sa_flags = SA_SIGINFO;
 	if (sigaction(SIGINT, &sigact, NULL) ||
+	    sigaction(SIGTERM, &sigact, NULL) ||
 	    sigaction(SIGHUP, &sigact, NULL)) {
 		perror("# sigaction");
 		ret = errno;
-- 
cgit 


From e2e3fb6ef0d6548defbe0be6e092397aaa92f3a1 Mon Sep 17 00:00:00 2001
From: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Date: Wed, 23 Mar 2022 17:12:23 +0900
Subject: selftests/resctrl: Change the default limited time to 120 seconds

When testing on a Intel(R) Xeon(R) Gold 6254 CPU @ 3.10GHz the resctrl
selftests fail due to timeout after exceeding the default time limit of
45 seconds. On this system the test takes about 68 seconds.
Since the failing test by default accesses a fixed size of memory, the
execution time should not vary significantly between different environment.
A new default of 120 seconds should be sufficient yet easy to customize
with the introduction of the "settings" file for reference.

Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/settings | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 tools/testing/selftests/resctrl/settings

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/resctrl/settings b/tools/testing/selftests/resctrl/settings
new file mode 100644
index 000000000000..a383f3d4565b
--- /dev/null
+++ b/tools/testing/selftests/resctrl/settings
@@ -0,0 +1,3 @@
+# If running time is longer than 120 seconds when new tests are added in
+# the future, increase timeout here.
+timeout=120
-- 
cgit 


From 3531d930c36fb991668ba0644fb14e8e24d923af Mon Sep 17 00:00:00 2001
From: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Date: Wed, 23 Mar 2022 17:12:24 +0900
Subject: selftests/resctrl: Fix resctrl_tests' return code to work with
 selftest framework

In kselftest framework, if a sub test can not run by some reasons,
the test result should be marked as SKIP rather than FAIL.
Return KSFT_SKIP(4) instead of KSFT_FAIL(1) if resctrl_tests is not run
as root or it is run on a test environment which does not support resctrl.

 - ksft_exit_fail_msg(): returns KSFT_FAIL(1)
 - ksft_exit_skip(): returns KSFT_SKIP(4)

Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/resctrl_tests.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index f8da3ecf67ef..df0d8d8526fc 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -225,7 +225,7 @@ int main(int argc, char **argv)
 	 * 2. We execute perf commands
 	 */
 	if (geteuid() != 0)
-		return ksft_exit_fail_msg("Not running as root, abort testing.\n");
+		return ksft_exit_skip("Not running as root. Skipping...\n");
 
 	if (has_ben) {
 		/* Extract benchmark command from command line. */
@@ -252,7 +252,7 @@ int main(int argc, char **argv)
 	sprintf(bm_type, "fill_buf");
 
 	if (!check_resctrlfs_support())
-		return ksft_exit_fail_msg("resctrl FS does not exist\n");
+		return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n");
 
 	filter_dmesg();
 
-- 
cgit 


From b733143cc455bf83fa5fbd2e0eac63fb2d302461 Mon Sep 17 00:00:00 2001
From: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Date: Wed, 23 Mar 2022 17:12:25 +0900
Subject: selftests/resctrl: Make resctrl_tests run using kselftest framework

In kselftest framework, all tests can be build/run at a time,
and a sub test also can be build/run individually. As follows:
$ make kselftest-all TARGETS=resctrl
$ make -C tools/testing/selftests run_tests
$ make -C tools/testing/selftests TARGETS=resctrl run_tests

However, resctrl_tests cannot be run using kselftest framework,
users have to change directory to tools/testing/selftests/resctrl/,
run "make" to build executable file "resctrl_tests",
and run "sudo ./resctrl_tests" to execute the test.

To build/run resctrl_tests using kselftest framework.
Modify tools/testing/selftests/Makefile
and tools/testing/selftests/resctrl/Makefile.

Even after this change, users can still build/run resctrl_tests
without using framework as before.

Reviewed-by: Reinette Chatre <reinette.chatre@intel.com> # resctrl changes
Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/Makefile         |  1 +
 tools/testing/selftests/resctrl/Makefile | 17 ++++-------------
 2 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index bd2ac8b3bf1f..0aedcd76cf0f 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -53,6 +53,7 @@ TARGETS += proc
 TARGETS += pstore
 TARGETS += ptrace
 TARGETS += openat2
+TARGETS += resctrl
 TARGETS += rlimits
 TARGETS += rseq
 TARGETS += rtc
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index 6bcee2ec91a9..bee5fa8f1ac9 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -1,17 +1,8 @@
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2
-SRCS=$(wildcard *.c)
-OBJS=$(SRCS:.c=.o)
+CFLAGS += $(KHDR_INCLUDES)
 
-all: resctrl_tests
+TEST_GEN_PROGS := resctrl_tests
 
-$(OBJS): $(SRCS)
-	$(CC) $(CFLAGS) -c $(SRCS)
+include ../lib.mk
 
-resctrl_tests: $(OBJS)
-	$(CC) $(CFLAGS) -o $@ $^
-
-.PHONY: clean
-
-clean:
-	$(RM) $(OBJS) resctrl_tests
+$(OUTPUT)/resctrl_tests: $(wildcard *.c)
-- 
cgit 


From 42e2f21451f73a00ce3e63c46da8fb71d30b4cb0 Mon Sep 17 00:00:00 2001
From: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Date: Wed, 23 Mar 2022 17:12:26 +0900
Subject: selftests/resctrl: Update README about using kselftest framework to
 build/run resctrl_tests

resctrl_tests can be built or run using kselftests framework.
Add description on how to do so in README.

Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/README | 39 ++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README
index 3d2bbd4fa3aa..8d11ce7c2ee5 100644
--- a/tools/testing/selftests/resctrl/README
+++ b/tools/testing/selftests/resctrl/README
@@ -12,24 +12,49 @@ Allocation test on Intel RDT hardware. More tests will be added in the future.
 And the test suit can be extended to cover AMD QoS and ARM MPAM hardware
 as well.
 
+resctrl_tests can be run with or without kselftest framework.
+
+WITH KSELFTEST FRAMEWORK
+=======================
+
 BUILD
 -----
 
-Run "make" to build executable file "resctrl_tests".
+Build executable file "resctrl_tests" from top level directory of the kernel source:
+ $ make -C tools/testing/selftests TARGETS=resctrl
 
 RUN
 ---
 
-To use resctrl_tests, root or sudoer privileges are required. This is because
-the test needs to mount resctrl file system and change contents in the file
-system.
+Run resctrl_tests as sudo or root since the test needs to mount resctrl file
+system and change contents in the file system.
+Using kselftest framework will run all supported tests within resctrl_tests:
+
+ $ sudo make -C tools/testing/selftests TARGETS=resctrl run_tests
+
+More details about kselftest framework can be found in
+Documentation/dev-tools/kselftest.rst.
+
+WITHOUT KSELFTEST FRAMEWORK
+===========================
+
+BUILD
+-----
+
+Build executable file "resctrl_tests" from this directory(tools/testing/selftests/resctrl/):
+  $ make
+
+RUN
+---
 
+Run resctrl_tests as sudo or root since the test needs to mount resctrl file
+system and change contents in the file system.
 Executing the test without any parameter will run all supported tests:
 
-	sudo ./resctrl_tests
+ $ sudo ./resctrl_tests
 
 OVERVIEW OF EXECUTION
----------------------
+=====================
 
 A test case has four stages:
 
@@ -41,7 +66,7 @@ A test case has four stages:
   - teardown: umount resctrl and clear temporary files.
 
 ARGUMENTS
----------
+=========
 
 Parameter '-h' shows usage information.
 
-- 
cgit 


From 68c4844985d1f8c1b1a71dfcdbfacb5a30babc95 Mon Sep 17 00:00:00 2001
From: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Date: Wed, 23 Mar 2022 17:12:27 +0900
Subject: selftests/resctrl: Add missing SPDX license to Makefile

Add the missing SPDX(SPDX-License-Identifier) license header to
tools/testing/selftests/resctrl/Makefile.

Acked-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Shaopeng Tan <tan.shaopeng@jp.fujitsu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/Makefile | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index bee5fa8f1ac9..73d53257df42 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
 CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2
 CFLAGS += $(KHDR_INCLUDES)
 
-- 
cgit 


From 8f14852e89113d738c99c375b4c8b8b7e1073df1 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Mon, 25 Apr 2022 03:18:50 +0530
Subject: bpf: Tag argument to be released in bpf_func_proto

Add a new type flag for bpf_arg_type that when set tells verifier that
for a release function, that argument's register will be the one for
which meta.ref_obj_id will be set, and which will then be released
using release_reference. To capture the regno, introduce a new field
release_regno in bpf_call_arg_meta.

This would be required in the next patch, where we may either pass NULL
or a refcounted pointer as an argument to the release function
bpf_kptr_xchg. Just releasing only when meta.ref_obj_id is set is not
enough, as there is a case where the type of argument needed matches,
but the ref_obj_id is set to 0. Hence, we must enforce that whenever
meta.ref_obj_id is zero, the register that is to be released can only
be NULL for a release function.

Since we now indicate whether an argument is to be released in
bpf_func_proto itself, is_release_function helper has lost its utitlity,
hence refactor code to work without it, and just rely on
meta.release_regno to know when to release state for a ref_obj_id.
Still, the restriction of one release argument and only one ref_obj_id
passed to BPF helper or kfunc remains. This may be lifted in the future.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-3-memxor@gmail.com
---
 include/linux/bpf.h                                |  5 +-
 include/linux/bpf_verifier.h                       |  3 +-
 kernel/bpf/btf.c                                   | 11 ++--
 kernel/bpf/ringbuf.c                               |  4 +-
 kernel/bpf/verifier.c                              | 76 ++++++++++++----------
 net/core/filter.c                                  |  2 +-
 .../testing/selftests/bpf/verifier/ref_tracking.c  |  2 +-
 tools/testing/selftests/bpf/verifier/sock.c        |  6 +-
 8 files changed, 60 insertions(+), 49 deletions(-)

(limited to 'tools/testing')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ce12124048c0..492edd2c5713 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -366,7 +366,10 @@ enum bpf_type_flag {
 	 */
 	MEM_PERCPU		= BIT(4 + BPF_BASE_TYPE_BITS),
 
-	__BPF_TYPE_LAST_FLAG	= MEM_PERCPU,
+	/* Indicates that the argument will be released. */
+	OBJ_RELEASE		= BIT(5 + BPF_BASE_TYPE_BITS),
+
+	__BPF_TYPE_LAST_FLAG	= OBJ_RELEASE,
 };
 
 /* Max number of base types. */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 3a9d2d7cc6b7..1f1e7f2ea967 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -523,8 +523,7 @@ int check_ptr_off_reg(struct bpf_verifier_env *env,
 		      const struct bpf_reg_state *reg, int regno);
 int check_func_arg_reg_off(struct bpf_verifier_env *env,
 			   const struct bpf_reg_state *reg, int regno,
-			   enum bpf_arg_type arg_type,
-			   bool is_release_func);
+			   enum bpf_arg_type arg_type);
 int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 			     u32 regno);
 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 563ac61e6d6b..f0287342204f 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6047,6 +6047,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 	 * verifier sees.
 	 */
 	for (i = 0; i < nargs; i++) {
+		enum bpf_arg_type arg_type = ARG_DONTCARE;
 		u32 regno = i + 1;
 		struct bpf_reg_state *reg = &regs[regno];
 
@@ -6067,7 +6068,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
 		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
 
-		ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE, rel);
+		if (rel && reg->ref_obj_id)
+			arg_type |= OBJ_RELEASE;
+		ret = check_func_arg_reg_off(env, reg, regno, arg_type);
 		if (ret < 0)
 			return ret;
 
@@ -6099,11 +6102,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 			if (reg->type == PTR_TO_BTF_ID) {
 				reg_btf = reg->btf;
 				reg_ref_id = reg->btf_id;
-				/* Ensure only one argument is referenced
-				 * PTR_TO_BTF_ID, check_func_arg_reg_off relies
-				 * on only one referenced register being allowed
-				 * for kfuncs.
-				 */
+				/* Ensure only one argument is referenced PTR_TO_BTF_ID */
 				if (reg->ref_obj_id) {
 					if (ref_obj_id) {
 						bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 710ba9de12ce..5173fd37590f 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -404,7 +404,7 @@ BPF_CALL_2(bpf_ringbuf_submit, void *, sample, u64, flags)
 const struct bpf_func_proto bpf_ringbuf_submit_proto = {
 	.func		= bpf_ringbuf_submit,
 	.ret_type	= RET_VOID,
-	.arg1_type	= ARG_PTR_TO_ALLOC_MEM,
+	.arg1_type	= ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE,
 	.arg2_type	= ARG_ANYTHING,
 };
 
@@ -417,7 +417,7 @@ BPF_CALL_2(bpf_ringbuf_discard, void *, sample, u64, flags)
 const struct bpf_func_proto bpf_ringbuf_discard_proto = {
 	.func		= bpf_ringbuf_discard,
 	.ret_type	= RET_VOID,
-	.arg1_type	= ARG_PTR_TO_ALLOC_MEM,
+	.arg1_type	= ARG_PTR_TO_ALLOC_MEM | OBJ_RELEASE,
 	.arg2_type	= ARG_ANYTHING,
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 17ca586e0585..5426bab7f02c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -245,6 +245,7 @@ struct bpf_call_arg_meta {
 	struct bpf_map *map_ptr;
 	bool raw_mode;
 	bool pkt_access;
+	u8 release_regno;
 	int regno;
 	int access_size;
 	int mem_size;
@@ -471,17 +472,6 @@ static bool type_may_be_null(u32 type)
 	return type & PTR_MAYBE_NULL;
 }
 
-/* Determine whether the function releases some resources allocated by another
- * function call. The first reference type argument will be assumed to be
- * released by release_reference().
- */
-static bool is_release_function(enum bpf_func_id func_id)
-{
-	return func_id == BPF_FUNC_sk_release ||
-	       func_id == BPF_FUNC_ringbuf_submit ||
-	       func_id == BPF_FUNC_ringbuf_discard;
-}
-
 static bool may_be_acquire_function(enum bpf_func_id func_id)
 {
 	return func_id == BPF_FUNC_sk_lookup_tcp ||
@@ -5326,6 +5316,11 @@ static bool arg_type_is_int_ptr(enum bpf_arg_type type)
 	       type == ARG_PTR_TO_LONG;
 }
 
+static bool arg_type_is_release(enum bpf_arg_type type)
+{
+	return type & OBJ_RELEASE;
+}
+
 static int int_ptr_type_to_size(enum bpf_arg_type type)
 {
 	if (type == ARG_PTR_TO_INT)
@@ -5536,11 +5531,10 @@ found:
 
 int check_func_arg_reg_off(struct bpf_verifier_env *env,
 			   const struct bpf_reg_state *reg, int regno,
-			   enum bpf_arg_type arg_type,
-			   bool is_release_func)
+			   enum bpf_arg_type arg_type)
 {
-	bool fixed_off_ok = false, release_reg;
 	enum bpf_reg_type type = reg->type;
+	bool fixed_off_ok = false;
 
 	switch ((u32)type) {
 	case SCALAR_VALUE:
@@ -5558,7 +5552,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 		/* Some of the argument types nevertheless require a
 		 * zero register offset.
 		 */
-		if (arg_type != ARG_PTR_TO_ALLOC_MEM)
+		if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM)
 			return 0;
 		break;
 	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
@@ -5566,19 +5560,17 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 	 */
 	case PTR_TO_BTF_ID:
 		/* When referenced PTR_TO_BTF_ID is passed to release function,
-		 * it's fixed offset must be 0. We rely on the property that
-		 * only one referenced register can be passed to BPF helpers and
-		 * kfuncs. In the other cases, fixed offset can be non-zero.
+		 * it's fixed offset must be 0.	In the other cases, fixed offset
+		 * can be non-zero.
 		 */
-		release_reg = is_release_func && reg->ref_obj_id;
-		if (release_reg && reg->off) {
+		if (arg_type_is_release(arg_type) && reg->off) {
 			verbose(env, "R%d must have zero offset when passed to release func\n",
 				regno);
 			return -EINVAL;
 		}
-		/* For release_reg == true, fixed_off_ok must be false, but we
-		 * already checked and rejected reg->off != 0 above, so set to
-		 * true to allow fixed offset for all other cases.
+		/* For arg is release pointer, fixed_off_ok must be false, but
+		 * we already checked and rejected reg->off != 0 above, so set
+		 * to true to allow fixed offset for all other cases.
 		 */
 		fixed_off_ok = true;
 		break;
@@ -5637,14 +5629,24 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 	if (err)
 		return err;
 
-	err = check_func_arg_reg_off(env, reg, regno, arg_type, is_release_function(meta->func_id));
+	err = check_func_arg_reg_off(env, reg, regno, arg_type);
 	if (err)
 		return err;
 
 skip_type_check:
-	/* check_func_arg_reg_off relies on only one referenced register being
-	 * allowed for BPF helpers.
-	 */
+	if (arg_type_is_release(arg_type)) {
+		if (!reg->ref_obj_id && !register_is_null(reg)) {
+			verbose(env, "R%d must be referenced when passed to release function\n",
+				regno);
+			return -EINVAL;
+		}
+		if (meta->release_regno) {
+			verbose(env, "verifier internal error: more than one release argument\n");
+			return -EFAULT;
+		}
+		meta->release_regno = regno;
+	}
+
 	if (reg->ref_obj_id) {
 		if (meta->ref_obj_id) {
 			verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
@@ -6151,7 +6153,8 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn)
 	return true;
 }
 
-static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
+static int check_func_proto(const struct bpf_func_proto *fn, int func_id,
+			    struct bpf_call_arg_meta *meta)
 {
 	return check_raw_mode_ok(fn) &&
 	       check_arg_pair_ok(fn) &&
@@ -6835,7 +6838,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 	memset(&meta, 0, sizeof(meta));
 	meta.pkt_access = fn->pkt_access;
 
-	err = check_func_proto(fn, func_id);
+	err = check_func_proto(fn, func_id, &meta);
 	if (err) {
 		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
 			func_id_name(func_id), func_id);
@@ -6868,8 +6871,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 			return err;
 	}
 
-	if (is_release_function(func_id)) {
-		err = release_reference(env, meta.ref_obj_id);
+	regs = cur_regs(env);
+
+	if (meta.release_regno) {
+		err = -EINVAL;
+		if (meta.ref_obj_id)
+			err = release_reference(env, meta.ref_obj_id);
+		/* meta.ref_obj_id can only be 0 if register that is meant to be
+		 * released is NULL, which must be > R0.
+		 */
+		else if (register_is_null(&regs[meta.release_regno]))
+			err = 0;
 		if (err) {
 			verbose(env, "func %s#%d reference has not been acquired before\n",
 				func_id_name(func_id), func_id);
@@ -6877,8 +6889,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		}
 	}
 
-	regs = cur_regs(env);
-
 	switch (func_id) {
 	case BPF_FUNC_tail_call:
 		err = check_reference_leak(env);
diff --git a/net/core/filter.c b/net/core/filter.c
index 8847316ee20e..da04ad179fda 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6621,7 +6621,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
 	.func		= bpf_sk_release,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON | OBJ_RELEASE,
 };
 
 BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index fbd682520e47..57a83d763ec1 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -796,7 +796,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "reference has not been acquired before",
+	.errstr = "R1 must be referenced when passed to release function",
 },
 {
 	/* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index 86b24cad27a7..d11d0b28be41 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -417,7 +417,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "reference has not been acquired before",
+	.errstr = "R1 must be referenced when passed to release function",
 },
 {
 	"bpf_sk_release(bpf_sk_fullsock(skb->sk))",
@@ -436,7 +436,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "reference has not been acquired before",
+	.errstr = "R1 must be referenced when passed to release function",
 },
 {
 	"bpf_sk_release(bpf_tcp_sock(skb->sk))",
@@ -455,7 +455,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "reference has not been acquired before",
+	.errstr = "R1 must be referenced when passed to release function",
 },
 {
 	"sk_storage_get(map, skb->sk, NULL, 0): value == NULL",
-- 
cgit 


From a84ca704d8306a949578d36c028c8e1bab3dcf0b Mon Sep 17 00:00:00 2001
From: Haowen Bai <baihaowen@meizu.com>
Date: Sun, 24 Apr 2022 16:26:41 +0800
Subject: selftests/powerpc/pmu: Fix unsigned function returning negative
 constant

The function __perf_reg_mask has an unsigned return type, but returns a
negative constant to indicate an error condition. So we change unsigned
to int.

Signed-off-by: Haowen Bai <baihaowen@meizu.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1650788802-14402-1-git-send-email-baihaowen@meizu.com
---
 tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
index fca054bbc094..c01a31d5f4ee 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
@@ -274,7 +274,7 @@ u64 *get_intr_regs(struct event *event, void *sample_buff)
 	return intr_regs;
 }
 
-static const unsigned int __perf_reg_mask(const char *register_name)
+static const int __perf_reg_mask(const char *register_name)
 {
 	if (!strcmp(register_name, "R0"))
 		return 0;
-- 
cgit 


From 2cbc469a6fc345651569f9570b899af3b005cf80 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Mon, 25 Apr 2022 03:18:59 +0530
Subject: selftests/bpf: Add C tests for kptr

This uses the __kptr and __kptr_ref macros as well, and tries to test
the stuff that is supposed to work, since we have negative tests in
test_verifier suite. Also include some code to test map-in-map support,
such that the inner_map_meta matches the kptr_off_tab of map added as
element.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-12-memxor@gmail.com
---
 tools/testing/selftests/bpf/prog_tests/map_kptr.c |  37 +++++
 tools/testing/selftests/bpf/progs/map_kptr.c      | 190 ++++++++++++++++++++++
 2 files changed, 227 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/map_kptr.c
 create mode 100644 tools/testing/selftests/bpf/progs/map_kptr.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
new file mode 100644
index 000000000000..9e2fbda64a65
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#include "map_kptr.skel.h"
+
+void test_map_kptr(void)
+{
+	struct map_kptr *skel;
+	int key = 0, ret;
+	char buf[24];
+
+	skel = map_kptr__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load"))
+		return;
+
+	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0);
+	ASSERT_OK(ret, "array_map update");
+	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0);
+	ASSERT_OK(ret, "array_map update2");
+
+	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_map), &key, buf, 0);
+	ASSERT_OK(ret, "hash_map update");
+	ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_map), &key);
+	ASSERT_OK(ret, "hash_map delete");
+
+	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key, buf, 0);
+	ASSERT_OK(ret, "hash_malloc_map update");
+	ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key);
+	ASSERT_OK(ret, "hash_malloc_map delete");
+
+	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.lru_hash_map), &key, buf, 0);
+	ASSERT_OK(ret, "lru_hash_map update");
+	ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.lru_hash_map), &key);
+	ASSERT_OK(ret, "lru_hash_map delete");
+
+	map_kptr__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
new file mode 100644
index 000000000000..1b0e0409eaa5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct map_value {
+	struct prog_test_ref_kfunc __kptr *unref_ptr;
+	struct prog_test_ref_kfunc __kptr_ref *ref_ptr;
+};
+
+struct array_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} array_map SEC(".maps");
+
+struct hash_map {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} hash_map SEC(".maps");
+
+struct hash_malloc_map {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+} hash_malloc_map SEC(".maps");
+
+struct lru_hash_map {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} lru_hash_map SEC(".maps");
+
+#define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name)       \
+	struct {                                                \
+		__uint(type, map_type);                         \
+		__uint(max_entries, 1);                         \
+		__uint(key_size, sizeof(int));                  \
+		__uint(value_size, sizeof(int));                \
+		__array(values, struct inner_map_type);         \
+	} name SEC(".maps") = {                                 \
+		.values = { [0] = &inner_map_type },            \
+	}
+
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_map, array_of_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_map, array_of_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_malloc_map, array_of_hash_malloc_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, lru_hash_map, array_of_lru_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, array_map, hash_of_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps);
+
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+
+static void test_kptr_unref(struct map_value *v)
+{
+	struct prog_test_ref_kfunc *p;
+
+	p = v->unref_ptr;
+	/* store untrusted_ptr_or_null_ */
+	v->unref_ptr = p;
+	if (!p)
+		return;
+	if (p->a + p->b > 100)
+		return;
+	/* store untrusted_ptr_ */
+	v->unref_ptr = p;
+	/* store NULL */
+	v->unref_ptr = NULL;
+}
+
+static void test_kptr_ref(struct map_value *v)
+{
+	struct prog_test_ref_kfunc *p;
+
+	p = v->ref_ptr;
+	/* store ptr_or_null_ */
+	v->unref_ptr = p;
+	if (!p)
+		return;
+	if (p->a + p->b > 100)
+		return;
+	/* store NULL */
+	p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+	if (!p)
+		return;
+	if (p->a + p->b > 100) {
+		bpf_kfunc_call_test_release(p);
+		return;
+	}
+	/* store ptr_ */
+	v->unref_ptr = p;
+	bpf_kfunc_call_test_release(p);
+
+	p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+	if (!p)
+		return;
+	/* store ptr_ */
+	p = bpf_kptr_xchg(&v->ref_ptr, p);
+	if (!p)
+		return;
+	if (p->a + p->b > 100) {
+		bpf_kfunc_call_test_release(p);
+		return;
+	}
+	bpf_kfunc_call_test_release(p);
+}
+
+static void test_kptr_get(struct map_value *v)
+{
+	struct prog_test_ref_kfunc *p;
+
+	p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0);
+	if (!p)
+		return;
+	if (p->a + p->b > 100) {
+		bpf_kfunc_call_test_release(p);
+		return;
+	}
+	bpf_kfunc_call_test_release(p);
+}
+
+static void test_kptr(struct map_value *v)
+{
+	test_kptr_unref(v);
+	test_kptr_ref(v);
+	test_kptr_get(v);
+}
+
+SEC("tc")
+int test_map_kptr(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int i, key = 0;
+
+#define TEST(map)					\
+	v = bpf_map_lookup_elem(&map, &key);		\
+	if (!v)						\
+		return 0;				\
+	test_kptr(v)
+
+	TEST(array_map);
+	TEST(hash_map);
+	TEST(hash_malloc_map);
+	TEST(lru_hash_map);
+
+#undef TEST
+	return 0;
+}
+
+SEC("tc")
+int test_map_in_map_kptr(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int i, key = 0;
+	void *map;
+
+#define TEST(map_in_map)                                \
+	map = bpf_map_lookup_elem(&map_in_map, &key);   \
+	if (!map)                                       \
+		return 0;                               \
+	v = bpf_map_lookup_elem(map, &key);		\
+	if (!v)						\
+		return 0;				\
+	test_kptr(v)
+
+	TEST(array_of_array_maps);
+	TEST(array_of_hash_maps);
+	TEST(array_of_hash_malloc_maps);
+	TEST(array_of_lru_hash_maps);
+	TEST(hash_of_array_maps);
+	TEST(hash_of_hash_maps);
+	TEST(hash_of_hash_malloc_maps);
+	TEST(hash_of_lru_hash_maps);
+
+#undef TEST
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 05a945deefaa9fe6d34f06f0ab0cbfc72e2dbfa0 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Mon, 25 Apr 2022 03:19:00 +0530
Subject: selftests/bpf: Add verifier tests for kptr

Reuse bpf_prog_test functions to test the support for PTR_TO_BTF_ID in
BPF map case, including some tests that verify implementation sanity and
corner cases.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-13-memxor@gmail.com
---
 net/bpf/test_run.c                              |  45 ++-
 tools/testing/selftests/bpf/test_verifier.c     |  55 ++-
 tools/testing/selftests/bpf/verifier/map_kptr.c | 469 ++++++++++++++++++++++++
 3 files changed, 562 insertions(+), 7 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/verifier/map_kptr.c

(limited to 'tools/testing')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index e7b9c2636d10..29fe32821e7e 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -584,6 +584,12 @@ noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p)
 {
 }
 
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b)
+{
+	return &prog_test_struct;
+}
+
 struct prog_test_pass1 {
 	int x0;
 	struct {
@@ -669,6 +675,7 @@ BTF_ID(func, bpf_kfunc_call_test3)
 BTF_ID(func, bpf_kfunc_call_test_acquire)
 BTF_ID(func, bpf_kfunc_call_test_release)
 BTF_ID(func, bpf_kfunc_call_memb_release)
+BTF_ID(func, bpf_kfunc_call_test_kptr_get)
 BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
 BTF_ID(func, bpf_kfunc_call_test_pass1)
 BTF_ID(func, bpf_kfunc_call_test_pass2)
@@ -682,6 +689,7 @@ BTF_SET_END(test_sk_check_kfunc_ids)
 
 BTF_SET_START(test_sk_acquire_kfunc_ids)
 BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_test_kptr_get)
 BTF_SET_END(test_sk_acquire_kfunc_ids)
 
 BTF_SET_START(test_sk_release_kfunc_ids)
@@ -691,8 +699,13 @@ BTF_SET_END(test_sk_release_kfunc_ids)
 
 BTF_SET_START(test_sk_ret_null_kfunc_ids)
 BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_test_kptr_get)
 BTF_SET_END(test_sk_ret_null_kfunc_ids)
 
+BTF_SET_START(test_sk_kptr_acquire_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_kptr_get)
+BTF_SET_END(test_sk_kptr_acquire_kfunc_ids)
+
 static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
 			   u32 size, u32 headroom, u32 tailroom)
 {
@@ -1579,14 +1592,36 @@ out:
 
 static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
 	.owner        = THIS_MODULE,
-	.check_set    = &test_sk_check_kfunc_ids,
-	.acquire_set  = &test_sk_acquire_kfunc_ids,
-	.release_set  = &test_sk_release_kfunc_ids,
-	.ret_null_set = &test_sk_ret_null_kfunc_ids,
+	.check_set        = &test_sk_check_kfunc_ids,
+	.acquire_set      = &test_sk_acquire_kfunc_ids,
+	.release_set      = &test_sk_release_kfunc_ids,
+	.ret_null_set     = &test_sk_ret_null_kfunc_ids,
+	.kptr_acquire_set = &test_sk_kptr_acquire_kfunc_ids
 };
 
+BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids)
+BTF_ID(struct, prog_test_ref_kfunc)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(struct, prog_test_member)
+BTF_ID(func, bpf_kfunc_call_memb_release)
+
 static int __init bpf_prog_test_run_init(void)
 {
-	return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+	const struct btf_id_dtor_kfunc bpf_prog_test_dtor_kfunc[] = {
+		{
+		  .btf_id       = bpf_prog_test_dtor_kfunc_ids[0],
+		  .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[1]
+		},
+		{
+		  .btf_id	= bpf_prog_test_dtor_kfunc_ids[2],
+		  .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[3],
+		},
+	};
+	int ret;
+
+	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+	return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc,
+						  ARRAY_SIZE(bpf_prog_test_dtor_kfunc),
+						  THIS_MODULE);
 }
 late_initcall(bpf_prog_test_run_init);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index a2cd236c32eb..372579c9f45e 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -53,7 +53,7 @@
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	22
+#define MAX_NR_MAPS	23
 #define MAX_TEST_RUNS	8
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
@@ -101,6 +101,7 @@ struct bpf_test {
 	int fixup_map_reuseport_array[MAX_FIXUPS];
 	int fixup_map_ringbuf[MAX_FIXUPS];
 	int fixup_map_timer[MAX_FIXUPS];
+	int fixup_map_kptr[MAX_FIXUPS];
 	struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
 	/* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
 	 * Can be a tab-separated sequence of expected strings. An empty string
@@ -621,8 +622,15 @@ static int create_cgroup_storage(bool percpu)
  * struct timer {
  *   struct bpf_timer t;
  * };
+ * struct btf_ptr {
+ *   struct prog_test_ref_kfunc __kptr *ptr;
+ *   struct prog_test_ref_kfunc __kptr_ref *ptr;
+ *   struct prog_test_member __kptr_ref *ptr;
+ * }
  */
-static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t";
+static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"
+				  "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_ref"
+				  "\0prog_test_member";
 static __u32 btf_raw_types[] = {
 	/* int */
 	BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
@@ -638,6 +646,22 @@ static __u32 btf_raw_types[] = {
 	/* struct timer */                              /* [5] */
 	BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16),
 	BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */
+	/* struct prog_test_ref_kfunc */		/* [6] */
+	BTF_STRUCT_ENC(51, 0, 0),
+	BTF_STRUCT_ENC(89, 0, 0),			/* [7] */
+	/* type tag "kptr" */
+	BTF_TYPE_TAG_ENC(75, 6),			/* [8] */
+	/* type tag "kptr_ref" */
+	BTF_TYPE_TAG_ENC(80, 6),			/* [9] */
+	BTF_TYPE_TAG_ENC(80, 7),			/* [10] */
+	BTF_PTR_ENC(8),					/* [11] */
+	BTF_PTR_ENC(9),					/* [12] */
+	BTF_PTR_ENC(10),				/* [13] */
+	/* struct btf_ptr */				/* [14] */
+	BTF_STRUCT_ENC(43, 3, 24),
+	BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr *ptr; */
+	BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr_ref *ptr; */
+	BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */
 };
 
 static int load_btf(void)
@@ -727,6 +751,25 @@ static int create_map_timer(void)
 	return fd;
 }
 
+static int create_map_kptr(void)
+{
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		.btf_key_type_id = 1,
+		.btf_value_type_id = 14,
+	);
+	int fd, btf_fd;
+
+	btf_fd = load_btf();
+	if (btf_fd < 0)
+		return -1;
+
+	opts.btf_fd = btf_fd;
+	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 24, 1, &opts);
+	if (fd < 0)
+		printf("Failed to create map with btf_id pointer\n");
+	return fd;
+}
+
 static char bpf_vlog[UINT_MAX >> 8];
 
 static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
@@ -754,6 +797,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
 	int *fixup_map_ringbuf = test->fixup_map_ringbuf;
 	int *fixup_map_timer = test->fixup_map_timer;
+	int *fixup_map_kptr = test->fixup_map_kptr;
 	struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
 
 	if (test->fill_helper) {
@@ -947,6 +991,13 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 			fixup_map_timer++;
 		} while (*fixup_map_timer);
 	}
+	if (*fixup_map_kptr) {
+		map_fds[22] = create_map_kptr();
+		do {
+			prog[*fixup_map_kptr].imm = map_fds[22];
+			fixup_map_kptr++;
+		} while (*fixup_map_kptr);
+	}
 
 	/* Patch in kfunc BTF IDs */
 	if (fixup_kfunc_btf_id->kfunc) {
diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
new file mode 100644
index 000000000000..9113834640e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
@@ -0,0 +1,469 @@
+/* Common tests */
+{
+	"map_kptr: BPF_ST imm != 0",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "BPF_ST imm must be 0 when storing to kptr at off=0",
+},
+{
+	"map_kptr: size != bpf_size_to_bytes(BPF_DW)",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "kptr access size must be BPF_DW",
+},
+{
+	"map_kptr: map_value non-const var_off",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0),
+	BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+	BPF_EXIT_INSN(),
+	BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "kptr access cannot have variable offset",
+},
+{
+	"map_kptr: bpf_kptr_xchg non-const var_off",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0),
+	BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+	BPF_EXIT_INSN(),
+	BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_3),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "R1 doesn't have constant offset. kptr has to be at the constant offset",
+},
+{
+	"map_kptr: unaligned boundary load/store",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 7),
+	BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "kptr access misaligned expected=0 off=7",
+},
+{
+	"map_kptr: reject var_off != 0",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+	BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+	BPF_EXIT_INSN(),
+	BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+	BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "variable untrusted_ptr_ access var_off=(0x0; 0x7) disallowed",
+},
+/* Tests for unreferened PTR_TO_BTF_ID */
+{
+	"map_kptr: unref: reject btf_struct_ids_match == false",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+	BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc expected=ptr_prog_test",
+},
+{
+	"map_kptr: unref: loaded pointer marked as untrusted",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "R0 invalid mem access 'untrusted_ptr_or_null_'",
+},
+{
+	"map_kptr: unref: correct in kernel type size",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 24),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "access beyond struct prog_test_ref_kfunc at off 24 size 8",
+},
+{
+	"map_kptr: unref: inherit PTR_UNTRUSTED on struct walk",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 16),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "R1 type=untrusted_ptr_ expected=percpu_ptr_",
+},
+{
+	"map_kptr: unref: no reference state created",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = ACCEPT,
+},
+{
+	"map_kptr: unref: bpf_kptr_xchg rejected",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "off=0 kptr isn't referenced kptr",
+},
+{
+	"map_kptr: unref: bpf_kfunc_call_test_kptr_get rejected",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "arg#0 no referenced kptr at map value offset=0",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_kptr_get", 13 },
+	}
+},
+/* Tests for referenced PTR_TO_BTF_ID */
+{
+	"map_kptr: ref: loaded pointer marked as untrusted",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_1, 0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_",
+},
+{
+	"map_kptr: ref: reject off != 0",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member",
+},
+{
+	"map_kptr: ref: reference state created and released on xchg",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "Unreleased reference id=5 alloc_insn=20",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_acquire", 15 },
+	}
+},
+{
+	"map_kptr: ref: reject STX",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, 0),
+	BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "store to referenced kptr disallowed",
+},
+{
+	"map_kptr: ref: reject ST",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ST_MEM(BPF_DW, BPF_REG_0, 8, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "store to referenced kptr disallowed",
+},
+{
+	"map_kptr: reject helper access to kptr",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "kptr cannot be accessed indirectly by helper",
+},
-- 
cgit 


From 792c0a345f0eb2a6bb12afcca1cf6e518bf57b43 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Mon, 25 Apr 2022 03:19:01 +0530
Subject: selftests/bpf: Add test for strict BTF type check

Ensure that the edge case where first member type was matched
successfully even if it didn't match BTF type of register is caught and
rejected by the verifier.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-14-memxor@gmail.com
---
 net/bpf/test_run.c                           | 22 +++++++++++++++++++++-
 tools/testing/selftests/bpf/verifier/calls.c | 20 ++++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 29fe32821e7e..7a1579c91432 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -550,8 +550,13 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
 	return sk;
 }
 
+struct prog_test_member1 {
+	int a;
+};
+
 struct prog_test_member {
-	u64 c;
+	struct prog_test_member1 m;
+	int c;
 };
 
 struct prog_test_ref_kfunc {
@@ -576,6 +581,12 @@ bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
 	return &prog_test_struct;
 }
 
+noinline struct prog_test_member *
+bpf_kfunc_call_memb_acquire(void)
+{
+	return &prog_test_struct.memb;
+}
+
 noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
 {
 }
@@ -584,6 +595,10 @@ noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p)
 {
 }
 
+noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
+{
+}
+
 noinline struct prog_test_ref_kfunc *
 bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b)
 {
@@ -673,8 +688,10 @@ BTF_ID(func, bpf_kfunc_call_test1)
 BTF_ID(func, bpf_kfunc_call_test2)
 BTF_ID(func, bpf_kfunc_call_test3)
 BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_memb_acquire)
 BTF_ID(func, bpf_kfunc_call_test_release)
 BTF_ID(func, bpf_kfunc_call_memb_release)
+BTF_ID(func, bpf_kfunc_call_memb1_release)
 BTF_ID(func, bpf_kfunc_call_test_kptr_get)
 BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
 BTF_ID(func, bpf_kfunc_call_test_pass1)
@@ -689,16 +706,19 @@ BTF_SET_END(test_sk_check_kfunc_ids)
 
 BTF_SET_START(test_sk_acquire_kfunc_ids)
 BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_memb_acquire)
 BTF_ID(func, bpf_kfunc_call_test_kptr_get)
 BTF_SET_END(test_sk_acquire_kfunc_ids)
 
 BTF_SET_START(test_sk_release_kfunc_ids)
 BTF_ID(func, bpf_kfunc_call_test_release)
 BTF_ID(func, bpf_kfunc_call_memb_release)
+BTF_ID(func, bpf_kfunc_call_memb1_release)
 BTF_SET_END(test_sk_release_kfunc_ids)
 
 BTF_SET_START(test_sk_ret_null_kfunc_ids)
 BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_memb_acquire)
 BTF_ID(func, bpf_kfunc_call_test_kptr_get)
 BTF_SET_END(test_sk_ret_null_kfunc_ids)
 
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 2e03decb11b6..743ed34c1238 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -138,6 +138,26 @@
 		{ "bpf_kfunc_call_memb_release", 8 },
 	},
 },
+{
+	"calls: invalid kfunc call: don't match first member type when passed to release kfunc",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "kernel function bpf_kfunc_call_memb1_release args#0 expected pointer",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_memb_acquire", 1 },
+		{ "bpf_kfunc_call_memb1_release", 5 },
+	},
+},
 {
 	"calls: invalid kfunc call: PTR_TO_BTF_ID with negative offset",
 	.insns = {
-- 
cgit 


From dbbf16895a89953ccbcf0dab2806821b1ec565ff Mon Sep 17 00:00:00 2001
From: ran jianping <ran.jianping@zte.com.cn>
Date: Sun, 24 Apr 2022 06:26:55 +0000
Subject: tools/testing/nvdimm: remove unneeded flush_workqueue

All work currently pending will be done first by calling destroy_workqueue,
so there is no need to flush it explicitly.

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: ran jianping <ran.jianping@zte.com.cn>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20220424062655.3221152-1-ran.jianping@zte.com.cn
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 1da76ccde448..e7e1a640e482 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -3375,7 +3375,6 @@ static __exit void nfit_test_exit(void)
 {
 	int i;
 
-	flush_workqueue(nfit_wq);
 	destroy_workqueue(nfit_wq);
 	for (i = 0; i < NUM_NFITS; i++)
 		platform_device_unregister(&instances[i]->pdev);
-- 
cgit 


From c7b607fa9325ccc94982774c505176677117689c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 26 Apr 2022 13:25:31 +0100
Subject: selftests/resctrl: Fix null pointer dereference on open failed

Currently if opening /dev/null fails to open then file pointer fp
is null and further access to fp via fprintf will cause a null
pointer dereference. Fix this by returning a negative error value
when a null fp is detected.

Detected using cppcheck static analysis:
tools/testing/selftests/resctrl/fill_buf.c:124:6: note: Assuming
that condition '!fp' is not redundant
 if (!fp)
     ^
tools/testing/selftests/resctrl/fill_buf.c:126:10: note: Null
pointer dereference
 fprintf(fp, "Sum: %d ", ret);

Fixes: a2561b12fe39 ("selftests/resctrl: Add built in benchmark")
Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/resctrl/fill_buf.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index 51e5cf22632f..56ccbeae0638 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -121,8 +121,10 @@ static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr,
 
 	/* Consume read result so that reading memory is not optimized out. */
 	fp = fopen("/dev/null", "w");
-	if (!fp)
+	if (!fp) {
 		perror("Unable to write to /dev/null");
+		return -1;
+	}
 	fprintf(fp, "Sum: %d ", ret);
 	fclose(fp);
 
-- 
cgit 


From b82bb1ffbb9a20032853ef4e0d5b8f37c6ae7c25 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 25 Apr 2022 17:45:06 -0700
Subject: selftests/bpf: Add CO-RE relos and SEC("?...") to linked_funcs
 selftests

Enhance linked_funcs selftest with two tricky features that might not
obviously work correctly together. We add CO-RE relocations to entry BPF
programs and mark those programs as non-autoloadable with SEC("?...")
annotation. This makes sure that libbpf itself handles .BTF.ext CO-RE
relocation data matching correctly for SEC("?...") programs, as well as
ensures that BPF static linker handles this correctly (this was the case
before, no changes are necessary, but it wasn't explicitly tested).

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220426004511.2691730-6-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/linked_funcs.c | 6 ++++++
 tools/testing/selftests/bpf/progs/linked_funcs1.c     | 7 ++++++-
 tools/testing/selftests/bpf/progs/linked_funcs2.c     | 7 ++++++-
 3 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
index e9916f2817ec..cad664546912 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
@@ -14,6 +14,12 @@ void test_linked_funcs(void)
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		return;
 
+	/* handler1 and handler2 are marked as SEC("?raw_tp/sys_enter") and
+	 * are set to not autoload by default
+	 */
+	bpf_program__set_autoload(skel->progs.handler1, true);
+	bpf_program__set_autoload(skel->progs.handler2, true);
+
 	skel->rodata->my_tid = syscall(SYS_gettid);
 	skel->bss->syscall_id = SYS_getpgid;
 
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
index 963b393c37e8..b05571bc67d5 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs1.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -61,12 +61,17 @@ extern int set_output_val2(int x);
 /* here we'll force set_output_ctx2() to be __hidden in the final obj file */
 __hidden extern void set_output_ctx2(__u64 *ctx);
 
-SEC("raw_tp/sys_enter")
+SEC("?raw_tp/sys_enter")
 int BPF_PROG(handler1, struct pt_regs *regs, long id)
 {
+	static volatile int whatever;
+
 	if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
 		return 0;
 
+	/* make sure we have CO-RE relocations in main program */
+	whatever = bpf_core_type_size(struct task_struct);
+
 	set_output_val2(1000);
 	set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */
 
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
index db195872f4eb..ee7e3848ee4f 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs2.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -61,12 +61,17 @@ extern int set_output_val1(int x);
 /* here we'll force set_output_ctx1() to be __hidden in the final obj file */
 __hidden extern void set_output_ctx1(__u64 *ctx);
 
-SEC("raw_tp/sys_enter")
+SEC("?raw_tp/sys_enter")
 int BPF_PROG(handler2, struct pt_regs *regs, long id)
 {
+	static volatile int whatever;
+
 	if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
 		return 0;
 
+	/* make sure we have CO-RE relocations in main program */
+	whatever = bpf_core_type_size(struct task_struct);
+
 	set_output_val1(2000);
 	set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */
 
-- 
cgit 


From ea4128eb43eb3fe856831eaa9f747fab350ed5f3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 25 Apr 2022 17:45:11 -0700
Subject: selftests/bpf: Add libbpf's log fixup logic selftests

Add tests validating that libbpf is indeed patching up BPF verifier log
with CO-RE relocation details. Also test partial and full truncation
scenarios.

This test might be a bit fragile due to changing BPF verifier log
format. If that proves to be frequently breaking, we can simplify tests
or remove the truncation subtests. But for now it seems useful to test
it in those conditions that are otherwise rarely occuring in practice.

Also test CO-RE relo failure in a subprog as that excercises subprogram CO-RE
relocation mapping logic which doesn't work out of the box without extra
relo storage previously done only for gen_loader case.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220426004511.2691730-11-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/log_fixup.c | 114 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_log_fixup.c |  38 +++++++
 tools/testing/selftests/bpf/test_progs.h           |  11 ++
 3 files changed, 163 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/log_fixup.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_log_fixup.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
new file mode 100644
index 000000000000..be3a956cb3a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_log_fixup.skel.h"
+
+enum trunc_type {
+	TRUNC_NONE,
+	TRUNC_PARTIAL,
+	TRUNC_FULL,
+};
+
+static void bad_core_relo(size_t log_buf_size, enum trunc_type trunc_type)
+{
+	char log_buf[8 * 1024];
+	struct test_log_fixup* skel;
+	int err;
+
+	skel = test_log_fixup__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.bad_relo, true);
+	memset(log_buf, 0, sizeof(log_buf));
+	bpf_program__set_log_buf(skel->progs.bad_relo, log_buf, log_buf_size ?: sizeof(log_buf));
+
+	err = test_log_fixup__load(skel);
+	if (!ASSERT_ERR(err, "load_fail"))
+		goto cleanup;
+
+	ASSERT_HAS_SUBSTR(log_buf,
+			  "0: <invalid CO-RE relocation>\n"
+			  "failed to resolve CO-RE relocation <byte_sz> ",
+			  "log_buf_part1");
+
+	switch (trunc_type) {
+	case TRUNC_NONE:
+		ASSERT_HAS_SUBSTR(log_buf,
+				  "struct task_struct___bad.fake_field (0:1 @ offset 4)\n",
+				  "log_buf_part2");
+		ASSERT_HAS_SUBSTR(log_buf,
+				  "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n",
+				  "log_buf_end");
+		break;
+	case TRUNC_PARTIAL:
+		/* we should get full libbpf message patch */
+		ASSERT_HAS_SUBSTR(log_buf,
+				  "struct task_struct___bad.fake_field (0:1 @ offset 4)\n",
+				  "log_buf_part2");
+		/* we shouldn't get full end of BPF verifier log */
+		ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"),
+			    "log_buf_end");
+		break;
+	case TRUNC_FULL:
+		/* we shouldn't get second part of libbpf message patch */
+		ASSERT_NULL(strstr(log_buf, "struct task_struct___bad.fake_field (0:1 @ offset 4)\n"),
+			    "log_buf_part2");
+		/* we shouldn't get full end of BPF verifier log */
+		ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"),
+			    "log_buf_end");
+		break;
+	}
+
+	if (env.verbosity > VERBOSE_NONE)
+		printf("LOG:   \n=================\n%s=================\n", log_buf);
+cleanup:
+	test_log_fixup__destroy(skel);
+}
+
+static void bad_core_relo_subprog(void)
+{
+	char log_buf[8 * 1024];
+	struct test_log_fixup* skel;
+	int err;
+
+	skel = test_log_fixup__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.bad_relo_subprog, true);
+	bpf_program__set_log_buf(skel->progs.bad_relo_subprog, log_buf, sizeof(log_buf));
+
+	err = test_log_fixup__load(skel);
+	if (!ASSERT_ERR(err, "load_fail"))
+		goto cleanup;
+
+	/* there should be no prog loading log because we specified per-prog log buf */
+	ASSERT_HAS_SUBSTR(log_buf,
+			  ": <invalid CO-RE relocation>\n"
+			  "failed to resolve CO-RE relocation <byte_off> ",
+			  "log_buf");
+	ASSERT_HAS_SUBSTR(log_buf,
+			  "struct task_struct___bad.fake_field_subprog (0:2 @ offset 8)\n",
+			  "log_buf");
+
+	if (env.verbosity > VERBOSE_NONE)
+		printf("LOG:   \n=================\n%s=================\n", log_buf);
+
+cleanup:
+	test_log_fixup__destroy(skel);
+}
+
+void test_log_fixup(void)
+{
+	if (test__start_subtest("bad_core_relo_trunc_none"))
+		bad_core_relo(0, TRUNC_NONE /* full buf */);
+	if (test__start_subtest("bad_core_relo_trunc_partial"))
+		bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */);
+	if (test__start_subtest("bad_core_relo_trunc_full"))
+		bad_core_relo(250, TRUNC_FULL  /* truncate also libbpf's message patch */);
+	if (test__start_subtest("bad_core_relo_subprog"))
+		bad_core_relo_subprog();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_log_fixup.c b/tools/testing/selftests/bpf/progs/test_log_fixup.c
new file mode 100644
index 000000000000..a78980d897b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_log_fixup.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct task_struct___bad {
+	int pid;
+	int fake_field;
+	void *fake_field_subprog;
+} __attribute__((preserve_access_index));
+
+SEC("?raw_tp/sys_enter")
+int bad_relo(const void *ctx)
+{
+	static struct task_struct___bad *t;
+
+	return bpf_core_field_size(t->fake_field);
+}
+
+static __noinline int bad_subprog(void)
+{
+	static struct task_struct___bad *t;
+
+	/* ugliness below is a field offset relocation */
+	return (void *)&t->fake_field_subprog - (void *)t;
+}
+
+SEC("?raw_tp/sys_enter")
+int bad_relo_subprog(const void *ctx)
+{
+	static struct task_struct___bad *t;
+
+	return bad_subprog() + bpf_core_field_size(t->pid);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 0a102ce460d6..d3fee3b98888 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -292,6 +292,17 @@ int test__join_cgroup(const char *path);
 	___ok;								\
 })
 
+#define ASSERT_HAS_SUBSTR(str, substr, name) ({				\
+	static int duration = 0;					\
+	const char *___str = str;					\
+	const char *___substr = substr;					\
+	bool ___ok = strstr(___str, ___substr) != NULL;			\
+	CHECK(!___ok, (name),						\
+	      "unexpected %s: '%s' is not a substring of '%s'\n",	\
+	      (name), ___substr, ___str);				\
+	___ok;								\
+})
+
 #define ASSERT_OK(res, name) ({						\
 	static int duration = 0;					\
 	long long ___res = (res);					\
-- 
cgit 


From 3527e1ab9a7990530dfb0137ddcfa64bed2915be Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Mon, 22 Jun 2020 12:18:32 +1000
Subject: selftests/powerpc: Add matrix multiply assist (MMA) test

Adds a simple test of some basic matrix multiply assist (MMA)
instructions.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
Tested-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200622021832.15870-1-alistair@popple.id.au
---
 tools/testing/selftests/powerpc/include/utils.h |  5 +++
 tools/testing/selftests/powerpc/math/Makefile   |  4 ++-
 tools/testing/selftests/powerpc/math/mma.S      | 33 +++++++++++++++++
 tools/testing/selftests/powerpc/math/mma.c      | 48 +++++++++++++++++++++++++
 4 files changed, 89 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/math/mma.S
 create mode 100644 tools/testing/selftests/powerpc/math/mma.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index b7d188fc87c7..b9fa9cd709df 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -135,6 +135,11 @@ do {								\
 #define PPC_FEATURE2_ARCH_3_1 0x00040000
 #endif
 
+/* POWER10 features */
+#ifndef PPC_FEATURE2_MMA
+#define PPC_FEATURE2_MMA 0x00020000
+#endif
+
 #if defined(__powerpc64__)
 #define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.gp_regs[PT_NIP]
 #define UCONTEXT_MSR(UC)	(UC)->uc_mcontext.gp_regs[PT_MSR]
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index fcc91c205984..3948f7c510aa 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt mma
 
 top_srcdir = ../../../../..
 include ../../lib.mk
@@ -17,3 +17,5 @@ $(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c
 
 $(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
 $(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c
+
+$(OUTPUT)/mma: mma.c mma.S ../utils.c
diff --git a/tools/testing/selftests/powerpc/math/mma.S b/tools/testing/selftests/powerpc/math/mma.S
new file mode 100644
index 000000000000..8528c9849565
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/mma.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Test basic matrix multiply assist (MMA) functionality if available.
+ *
+ * Copyright 2020, Alistair Popple, IBM Corp.
+ */
+	.global test_mma
+test_mma:
+	/* Load accumulator via VSX registers from image passed in r3 */
+	lxvh8x	4,0,3
+	lxvh8x	5,0,4
+
+	/* Clear and prime the accumulator (xxsetaccz) */
+	.long	0x7c030162
+
+	/* Prime the accumulator with MMA VSX move to accumulator
+	* X-form (xxmtacc) (not needed due to above zeroing) */
+	//.long 0x7c010162
+
+	/* xvi16ger2s */
+	.long	0xec042958
+
+	/* Store result in image passed in r5 */
+	stxvw4x	0,0,5
+	addi	5,5,16
+	stxvw4x	1,0,5
+	addi	5,5,16
+	stxvw4x	2,0,5
+	addi	5,5,16
+	stxvw4x	3,0,5
+	addi	5,5,16
+
+	blr
diff --git a/tools/testing/selftests/powerpc/math/mma.c b/tools/testing/selftests/powerpc/math/mma.c
new file mode 100644
index 000000000000..3a71808c993f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/mma.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test basic matrix multiply assist (MMA) functionality if available.
+ *
+ * Copyright 2020, Alistair Popple, IBM Corp.
+ */
+#include <stdio.h>
+#include <stdint.h>
+
+#include "utils.h"
+
+extern void test_mma(uint16_t (*)[8], uint16_t (*)[8], uint32_t (*)[4*4]);
+
+static int mma(void)
+{
+	int i;
+	int rc = 0;
+	uint16_t x[] = {1, 0, 2, 0, 3, 0, 4, 0};
+	uint16_t y[] = {1, 0, 2, 0, 3, 0, 4, 0};
+	uint32_t z[4*4];
+	uint32_t exp[4*4] = {1, 2, 3, 4,
+			     2, 4, 6, 8,
+			     3, 6, 9, 12,
+			     4, 8, 12, 16};
+
+	SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_ARCH_3_1), "Need ISAv3.1");
+	SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_MMA), "Need MMA");
+
+	test_mma(&x, &y, &z);
+
+	for (i = 0; i < 16; i++) {
+		printf("MMA[%d] = %d ", i, z[i]);
+
+		if (z[i] == exp[i]) {
+			printf(" (Correct)\n");
+		} else {
+			printf(" (Incorrect)\n");
+			rc = 1;
+		}
+	}
+
+	return rc;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(mma, "mma");
+}
-- 
cgit 


From b6e074e171bc5cc83eb91b03c2558c2d9fccf26e Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 26 Apr 2022 14:57:11 -0700
Subject: selftests: mptcp: add infinite map testcase

Add the single subflow test case for MP_FAIL, to test the infinite
mapping case. Use the test_linkfail value to make 128KB test files.

Add a new function reset_with_fail(), in it use 'iptables' and 'tc
action pedit' rules to produce the bit flips to trigger the checksum
failures. Set validate_checksum to enable checksums for the MP_FAIL
tests without passing the '-C' argument. Set check_invert flag to
enable the invert bytes check for the output data in check_transfer().
Instead of the file mismatch error, this test prints out the inverted
bytes.

Add a new function pedit_action_pkts() to get the numbers of the packets
edited by the tc pedit actions. Print this numbers to the output.

Also add the needed kernel configures in the selftests config file.

Suggested-by: Davide Caratti <dcaratti@redhat.com>
Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/config        |  8 +++
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 70 ++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index d36b7da5082a..38021a0dd527 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -12,6 +12,9 @@ CONFIG_NF_TABLES=m
 CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XTABLES=m
 CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NFT_TPROXY=m
 CONFIG_NFT_SOCKET=m
@@ -19,3 +22,8 @@ CONFIG_IP_ADVANCED_ROUTER=y
 CONFIG_IP_MULTIPLE_TABLES=y
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_SCH_INGRESS=m
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 9eb4d889a24a..34152a50a3e2 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -266,6 +266,58 @@ reset_with_allow_join_id0()
 	ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
 }
 
+# Modify TCP payload without corrupting the TCP packet
+#
+# This rule inverts a 8-bit word at byte offset 148 for the 2nd TCP ACK packets
+# carrying enough data.
+# Once it is done, the TCP Checksum field is updated so the packet is still
+# considered as valid at the TCP level.
+# Because the MPTCP checksum, covering the TCP options and data, has not been
+# updated, the modification will be detected and an MP_FAIL will be emitted:
+# what we want to validate here without corrupting "random" MPTCP options.
+#
+# To avoid having tc producing this pr_info() message for each TCP ACK packets
+# not carrying enough data:
+#
+#     tc action pedit offset 162 out of bounds
+#
+# Netfilter is used to mark packets with enough data.
+reset_with_fail()
+{
+	reset "${1}" || return 1
+
+	ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1
+	ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1
+
+	check_invert=1
+	validate_checksum=1
+	local i="$2"
+	local ip="${3:-4}"
+	local tables
+
+	tables="iptables"
+	if [ $ip -eq 6 ]; then
+		tables="ip6tables"
+	fi
+
+	ip netns exec $ns2 $tables \
+		-t mangle \
+		-A OUTPUT \
+		-o ns2eth$i \
+		-p tcp \
+		-m length --length 150:9999 \
+		-m statistic --mode nth --packet 1 --every 99999 \
+		-j MARK --set-mark 42 || exit 1
+
+	tc -n $ns2 qdisc add dev ns2eth$i clsact || exit 1
+	tc -n $ns2 filter add dev ns2eth$i egress \
+		protocol ip prio 1000 \
+		handle 42 fw \
+		action pedit munge offset 148 u8 invert \
+		pipe csum tcp \
+		index 100 || exit 1
+}
+
 fail_test()
 {
 	ret=1
@@ -1199,7 +1251,7 @@ chk_join_nr()
 		echo "[ ok ]"
 	fi
 	[ "${dump_stats}" = 1 ] && dump_stats
-	if [ $checksum -eq 1 ]; then
+	if [ $validate_checksum -eq 1 ]; then
 		chk_csum_nr $csum_ns1 $csum_ns2
 		chk_fail_nr $fail_nr $fail_nr
 		chk_rst_nr $rst_nr $rst_nr
@@ -2590,6 +2642,21 @@ fastclose_tests()
 	fi
 }
 
+pedit_action_pkts()
+{
+	tc -n $ns2 -j -s action show action pedit index 100 | \
+		sed 's/.*"packets":\([0-9]\+\),.*/\1/'
+}
+
+fail_tests()
+{
+	# single subflow
+	if reset_with_fail "Infinite map" 1; then
+		run_tests $ns1 $ns2 10.0.1.1 128
+		chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
+	fi
+}
+
 implicit_tests()
 {
 	# userspace pm type prevents add_addr
@@ -2658,6 +2725,7 @@ all_tests_sorted=(
 	d@deny_join_id0_tests
 	m@fullmesh_tests
 	z@fastclose_tests
+	F@fail_tests
 	I@implicit_tests
 )
 
-- 
cgit 


From 1f7d325f7d4944db022358a6f0dfe4fc206a6ced Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 26 Apr 2022 14:57:16 -0700
Subject: selftests: mptcp: check MP_FAIL response mibs

This patch extends chk_fail_nr to check the MP_FAIL response mibs.

Add a new argument invert for chk_fail_nr to allow it can check the
MP_FAIL TX and RX mibs from the opposite direction.

When the infinite map is received before the MP_FAIL response, the
response will be lost. A '-' can be added into fail_tx or fail_rx to
represent that MP_FAIL response TX or RX can be lost when doing the
checks.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 38 +++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 34152a50a3e2..8023c0773d95 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1054,13 +1054,38 @@ chk_fail_nr()
 {
 	local fail_tx=$1
 	local fail_rx=$2
+	local ns_invert=${3:-""}
 	local count
 	local dump_stats
+	local ns_tx=$ns1
+	local ns_rx=$ns2
+	local extra_msg=""
+	local allow_tx_lost=0
+	local allow_rx_lost=0
+
+	if [[ $ns_invert = "invert" ]]; then
+		ns_tx=$ns2
+		ns_rx=$ns1
+		extra_msg=" invert"
+	fi
+
+	if [[ "${fail_tx}" = "-"* ]]; then
+		allow_tx_lost=1
+		fail_tx=${fail_tx:1}
+	fi
+	if [[ "${fail_rx}" = "-"* ]]; then
+		allow_rx_lost=1
+		fail_rx=${fail_rx:1}
+	fi
 
 	printf "%-${nr_blank}s %s" " " "ftx"
-	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}')
+	count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$fail_tx" ]; then
+		extra_msg="$extra_msg,tx=$count"
+	fi
+	if { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } ||
+	   { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
 		echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx"
 		fail_test
 		dump_stats=1
@@ -1069,17 +1094,23 @@ chk_fail_nr()
 	fi
 
 	echo -n " - failrx"
-	count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}')
+	count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$fail_rx" ]; then
+		extra_msg="$extra_msg,rx=$count"
+	fi
+	if { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } ||
+	   { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
 		echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx"
 		fail_test
 		dump_stats=1
 	else
-		echo "[ ok ]"
+		echo -n "[ ok ]"
 	fi
 
 	[ "${dump_stats}" = 1 ] && dump_stats
+
+	echo "$extra_msg"
 }
 
 chk_fclose_nr()
@@ -2654,6 +2685,7 @@ fail_tests()
 	if reset_with_fail "Infinite map" 1; then
 		run_tests $ns1 $ns2 10.0.1.1 128
 		chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
+		chk_fail_nr 1 -1 invert
 	fi
 }
 
-- 
cgit 


From 53f368bfff31be43ad78fecfb04d0e4c0ebceef5 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 26 Apr 2022 14:57:17 -0700
Subject: selftests: mptcp: print extra msg in chk_csum_nr

When the multiple checksum errors occur in chk_csum_nr(), print the
numbers of the errors as an extra message.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 8023c0773d95..e5c8fc2816fb 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1013,6 +1013,7 @@ chk_csum_nr()
 	local csum_ns2=${2:-0}
 	local count
 	local dump_stats
+	local extra_msg=""
 	local allow_multi_errors_ns1=0
 	local allow_multi_errors_ns2=0
 
@@ -1028,6 +1029,9 @@ chk_csum_nr()
 	printf "%-${nr_blank}s %s" " " "sum"
 	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
 	[ -z "$count" ] && count=0
+	if [ "$count" != "$csum_ns1" ]; then
+		extra_msg="$extra_msg ns1=$count"
+	fi
 	if { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
 	   { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
 		echo "[fail] got $count data checksum error[s] expected $csum_ns1"
@@ -1039,15 +1043,20 @@ chk_csum_nr()
 	echo -n " - csum  "
 	count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
 	[ -z "$count" ] && count=0
+	if [ "$count" != "$csum_ns2" ]; then
+		extra_msg="$extra_msg ns2=$count"
+	fi
 	if { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
 	   { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
 		echo "[fail] got $count data checksum error[s] expected $csum_ns2"
 		fail_test
 		dump_stats=1
 	else
-		echo "[ ok ]"
+		echo -n "[ ok ]"
 	fi
 	[ "${dump_stats}" = 1 ] && dump_stats
+
+	echo "$extra_msg"
 }
 
 chk_fail_nr()
-- 
cgit 


From 0925225956bbef863d51ee882d4d20c9a9c90db2 Mon Sep 17 00:00:00 2001
From: Mykola Lysenko <mykolal@fb.com>
Date: Tue, 26 Apr 2022 21:13:53 -0700
Subject: bpf/selftests: Add granular subtest output for prog_test

Implement per subtest log collection for both parallel
and sequential test execution. This allows granular
per-subtest error output in the 'All error logs' section.
Add subtest log transfer into the protocol during the
parallel test execution.

Move all test log printing logic into dump_test_log
function. One exception is the output of test names when
verbose printing is enabled. Move test name/result
printing into separate functions to avoid repetition.

Print all successful subtest results in the log. Print
only failed test logs when test does not have subtests.
Or only failed subtests' logs when test has subtests.

Disable 'All error logs' output when verbose mode is
enabled. This functionality was already broken and is
causing confusion.

Signed-off-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220427041353.246007-1-mykolal@fb.com
---
 tools/testing/selftests/bpf/test_progs.c | 626 ++++++++++++++++++++++---------
 tools/testing/selftests/bpf/test_progs.h |  35 +-
 2 files changed, 485 insertions(+), 176 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index c536d1d29d57..22fe283710fa 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -18,6 +18,91 @@
 #include <sys/socket.h>
 #include <sys/un.h>
 
+static bool verbose(void)
+{
+	return env.verbosity > VERBOSE_NONE;
+}
+
+static void stdio_hijack_init(char **log_buf, size_t *log_cnt)
+{
+#ifdef __GLIBC__
+	if (verbose() && env.worker_id == -1) {
+		/* nothing to do, output to stdout by default */
+		return;
+	}
+
+	fflush(stdout);
+	fflush(stderr);
+
+	stdout = open_memstream(log_buf, log_cnt);
+	if (!stdout) {
+		stdout = env.stdout;
+		perror("open_memstream");
+		return;
+	}
+
+	if (env.subtest_state)
+		env.subtest_state->stdout = stdout;
+	else
+		env.test_state->stdout = stdout;
+
+	stderr = stdout;
+#endif
+}
+
+static void stdio_hijack(char **log_buf, size_t *log_cnt)
+{
+#ifdef __GLIBC__
+	if (verbose() && env.worker_id == -1) {
+		/* nothing to do, output to stdout by default */
+		return;
+	}
+
+	env.stdout = stdout;
+	env.stderr = stderr;
+
+	stdio_hijack_init(log_buf, log_cnt);
+#endif
+}
+
+static void stdio_restore_cleanup(void)
+{
+#ifdef __GLIBC__
+	if (verbose() && env.worker_id == -1) {
+		/* nothing to do, output to stdout by default */
+		return;
+	}
+
+	fflush(stdout);
+
+	if (env.subtest_state) {
+		fclose(env.subtest_state->stdout);
+		stdout = env.test_state->stdout;
+		stderr = env.test_state->stdout;
+	} else {
+		fclose(env.test_state->stdout);
+	}
+#endif
+}
+
+static void stdio_restore(void)
+{
+#ifdef __GLIBC__
+	if (verbose() && env.worker_id == -1) {
+		/* nothing to do, output to stdout by default */
+		return;
+	}
+
+	if (stdout == env.stdout)
+		return;
+
+	stdio_restore_cleanup();
+
+	stdout = env.stdout;
+	stderr = env.stderr;
+#endif
+}
+
 /* Adapted from perf/util/string.c */
 static bool glob_match(const char *str, const char *pat)
 {
@@ -130,30 +215,90 @@ static bool should_run_subtest(struct test_selector *sel,
 	return subtest_num < subtest_sel->num_set_len && subtest_sel->num_set[subtest_num];
 }
 
+static char *test_result(bool failed, bool skipped)
+{
+	return failed ? "FAIL" : (skipped ? "SKIP" : "OK");
+}
+
+static void print_test_log(char *log_buf, size_t log_cnt)
+{
+	log_buf[log_cnt] = '\0';
+	fprintf(env.stdout, "%s", log_buf);
+	if (log_buf[log_cnt - 1] != '\n')
+		fprintf(env.stdout, "\n");
+}
+
+static void print_test_name(int test_num, const char *test_name, char *result)
+{
+	fprintf(env.stdout, "#%-9d %s", test_num, test_name);
+
+	if (result)
+		fprintf(env.stdout, ":%s", result);
+
+	fprintf(env.stdout, "\n");
+}
+
+static void print_subtest_name(int test_num, int subtest_num,
+			       const char *test_name, char *subtest_name,
+			       char *result)
+{
+	fprintf(env.stdout, "#%-3d/%-5d %s/%s",
+		test_num, subtest_num,
+		test_name, subtest_name);
+
+	if (result)
+		fprintf(env.stdout, ":%s", result);
+
+	fprintf(env.stdout, "\n");
+}
+
 static void dump_test_log(const struct prog_test_def *test,
 			  const struct test_state *test_state,
-			  bool force_failed)
+			  bool skip_ok_subtests,
+			  bool par_exec_result)
 {
-	bool failed = test_state->error_cnt > 0 || force_failed;
+	bool test_failed = test_state->error_cnt > 0;
+	bool force_log = test_state->force_log;
+	bool print_test = verbose() || force_log || test_failed;
+	int i;
+	struct subtest_state *subtest_state;
+	bool subtest_failed;
+	bool print_subtest;
 
-	/* worker always holds log */
+	/* we do not print anything in the worker thread */
 	if (env.worker_id != -1)
 		return;
 
-	fflush(stdout); /* exports test_state->log_buf & test_state->log_cnt */
+	/* there is nothing to print when verbose log is used and execution
+	 * is not in parallel mode
+	 */
+	if (verbose() && !par_exec_result)
+		return;
+
+	if (test_state->log_cnt && print_test)
+		print_test_log(test_state->log_buf, test_state->log_cnt);
 
-	fprintf(env.stdout, "#%-3d %s:%s\n",
-		test->test_num, test->test_name,
-		failed ? "FAIL" : (test_state->skip_cnt ? "SKIP" : "OK"));
+	for (i = 0; i < test_state->subtest_num; i++) {
+		subtest_state = &test_state->subtest_states[i];
+		subtest_failed = subtest_state->error_cnt;
+		print_subtest = verbose() || force_log || subtest_failed;
 
-	if (env.verbosity > VERBOSE_NONE || test_state->force_log || failed) {
-		if (test_state->log_cnt) {
-			test_state->log_buf[test_state->log_cnt] = '\0';
-			fprintf(env.stdout, "%s", test_state->log_buf);
-			if (test_state->log_buf[test_state->log_cnt - 1] != '\n')
-				fprintf(env.stdout, "\n");
+		if (skip_ok_subtests && !subtest_failed)
+			continue;
+
+		if (subtest_state->log_cnt && print_subtest) {
+			print_test_log(subtest_state->log_buf,
+				       subtest_state->log_cnt);
 		}
+
+		print_subtest_name(test->test_num, i + 1,
+				   test->test_name, subtest_state->name,
+				   test_result(subtest_state->error_cnt,
+					       subtest_state->skipped));
 	}
+
+	print_test_name(test->test_num, test->test_name,
+			test_result(test_failed, test_state->skip_cnt));
 }
 
 static void stdio_restore(void);
@@ -205,35 +350,50 @@ static void restore_netns(void)
 void test__end_subtest(void)
 {
 	struct prog_test_def *test = env.test;
-	struct test_state *state = env.test_state;
-	int sub_error_cnt = state->error_cnt - state->old_error_cnt;
-
-	fprintf(stdout, "#%d/%d %s/%s:%s\n",
-	       test->test_num, state->subtest_num, test->test_name, state->subtest_name,
-	       sub_error_cnt ? "FAIL" : (state->subtest_skip_cnt ? "SKIP" : "OK"));
-
-	if (sub_error_cnt == 0) {
-		if (state->subtest_skip_cnt == 0) {
-			state->sub_succ_cnt++;
-		} else {
-			state->subtest_skip_cnt = 0;
-			state->skip_cnt++;
-		}
+	struct test_state *test_state = env.test_state;
+	struct subtest_state *subtest_state = env.subtest_state;
+
+	if (subtest_state->error_cnt) {
+		test_state->error_cnt++;
+	} else {
+		if (!subtest_state->skipped)
+			test_state->sub_succ_cnt++;
+		else
+			test_state->skip_cnt++;
 	}
 
-	free(state->subtest_name);
-	state->subtest_name = NULL;
+	if (verbose() && !env.workers)
+		print_subtest_name(test->test_num, test_state->subtest_num,
+				   test->test_name, subtest_state->name,
+				   test_result(subtest_state->error_cnt,
+					       subtest_state->skipped));
+
+	stdio_restore_cleanup();
+	env.subtest_state = NULL;
 }
 
 bool test__start_subtest(const char *subtest_name)
 {
 	struct prog_test_def *test = env.test;
 	struct test_state *state = env.test_state;
+	struct subtest_state *subtest_state;
+	size_t sub_state_size = sizeof(*subtest_state);
 
-	if (state->subtest_name)
+	if (env.subtest_state)
 		test__end_subtest();
 
 	state->subtest_num++;
+	state->subtest_states =
+		realloc(state->subtest_states,
+			state->subtest_num * sub_state_size);
+	if (!state->subtest_states) {
+		fprintf(stderr, "Not enough memory to allocate subtest result\n");
+		return false;
+	}
+
+	subtest_state = &state->subtest_states[state->subtest_num - 1];
+
+	memset(subtest_state, 0, sub_state_size);
 
 	if (!subtest_name || !subtest_name[0]) {
 		fprintf(env.stderr,
@@ -242,21 +402,25 @@ bool test__start_subtest(const char *subtest_name)
 		return false;
 	}
 
+	subtest_state->name = strdup(subtest_name);
+	if (!subtest_state->name) {
+		fprintf(env.stderr,
+			"Subtest #%d: failed to copy subtest name!\n",
+			state->subtest_num);
+		return false;
+	}
+
 	if (!should_run_subtest(&env.test_selector,
 				&env.subtest_selector,
 				state->subtest_num,
 				test->test_name,
-				subtest_name))
-		return false;
-
-	state->subtest_name = strdup(subtest_name);
-	if (!state->subtest_name) {
-		fprintf(env.stderr,
-			"Subtest #%d: failed to copy subtest name!\n",
-			state->subtest_num);
+				subtest_name)) {
+		subtest_state->skipped = true;
 		return false;
 	}
-	state->old_error_cnt = state->error_cnt;
+
+	env.subtest_state = subtest_state;
+	stdio_hijack_init(&subtest_state->log_buf, &subtest_state->log_cnt);
 
 	return true;
 }
@@ -268,15 +432,18 @@ void test__force_log(void)
 
 void test__skip(void)
 {
-	if (env.test_state->subtest_name)
-		env.test_state->subtest_skip_cnt++;
+	if (env.subtest_state)
+		env.subtest_state->skipped = true;
 	else
 		env.test_state->skip_cnt++;
 }
 
 void test__fail(void)
 {
-	env.test_state->error_cnt++;
+	if (env.subtest_state)
+		env.subtest_state->error_cnt++;
+	else
+		env.test_state->error_cnt++;
 }
 
 int test__join_cgroup(const char *path)
@@ -455,14 +622,14 @@ static void unload_bpf_testmod(void)
 		fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n");
 	if (delete_module("bpf_testmod", 0)) {
 		if (errno == ENOENT) {
-			if (env.verbosity > VERBOSE_NONE)
+			if (verbose())
 				fprintf(stdout, "bpf_testmod.ko is already unloaded.\n");
 			return;
 		}
 		fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno);
 		return;
 	}
-	if (env.verbosity > VERBOSE_NONE)
+	if (verbose())
 		fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n");
 }
 
@@ -473,7 +640,7 @@ static int load_bpf_testmod(void)
 	/* ensure previous instance of the module is unloaded */
 	unload_bpf_testmod();
 
-	if (env.verbosity > VERBOSE_NONE)
+	if (verbose())
 		fprintf(stdout, "Loading bpf_testmod.ko...\n");
 
 	fd = open("bpf_testmod.ko", O_RDONLY);
@@ -488,7 +655,7 @@ static int load_bpf_testmod(void)
 	}
 	close(fd);
 
-	if (env.verbosity > VERBOSE_NONE)
+	if (verbose())
 		fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n");
 	return 0;
 }
@@ -655,7 +822,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			}
 		}
 
-		if (env->verbosity > VERBOSE_NONE) {
+		if (verbose()) {
 			if (setenv("SELFTESTS_VERBOSE", "1", 1) == -1) {
 				fprintf(stderr,
 					"Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)",
@@ -696,44 +863,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	return 0;
 }
 
-static void stdio_hijack(char **log_buf, size_t *log_cnt)
-{
-#ifdef __GLIBC__
-	env.stdout = stdout;
-	env.stderr = stderr;
-
-	if (env.verbosity > VERBOSE_NONE && env.worker_id == -1) {
-		/* nothing to do, output to stdout by default */
-		return;
-	}
-
-	/* stdout and stderr -> buffer */
-	fflush(stdout);
-
-	stdout = open_memstream(log_buf, log_cnt);
-	if (!stdout) {
-		stdout = env.stdout;
-		perror("open_memstream");
-		return;
-	}
-
-	stderr = stdout;
-#endif
-}
-
-static void stdio_restore(void)
-{
-#ifdef __GLIBC__
-	if (stdout == env.stdout)
-		return;
-
-	fclose(stdout);
-
-	stdout = env.stdout;
-	stderr = env.stderr;
-#endif
-}
-
 /*
  * Determine if test_progs is running as a "flavored" test runner and switch
  * into corresponding sub-directory to load correct BPF objects.
@@ -759,7 +888,7 @@ int cd_flavor_subdir(const char *exec_name)
 	if (!flavor)
 		return 0;
 	flavor++;
-	if (env.verbosity > VERBOSE_NONE)
+	if (verbose())
 		fprintf(stdout,	"Switching to flavor '%s' subdirectory...\n", flavor);
 
 	return chdir(flavor);
@@ -812,8 +941,10 @@ void crash_handler(int signum)
 
 	sz = backtrace(bt, ARRAY_SIZE(bt));
 
-	if (env.test)
-		dump_test_log(env.test, env.test_state, true);
+	if (env.test) {
+		env.test_state->error_cnt++;
+		dump_test_log(env.test, env.test_state, true, false);
+	}
 	if (env.stdout)
 		stdio_restore();
 	if (env.worker_id != -1)
@@ -839,13 +970,18 @@ static inline const char *str_msg(const struct msg *msg, char *buf)
 {
 	switch (msg->type) {
 	case MSG_DO_TEST:
-		sprintf(buf, "MSG_DO_TEST %d", msg->do_test.test_num);
+		sprintf(buf, "MSG_DO_TEST %d", msg->do_test.num);
 		break;
 	case MSG_TEST_DONE:
 		sprintf(buf, "MSG_TEST_DONE %d (log: %d)",
-			msg->test_done.test_num,
+			msg->test_done.num,
 			msg->test_done.have_log);
 		break;
+	case MSG_SUBTEST_DONE:
+		sprintf(buf, "MSG_SUBTEST_DONE %d (log: %d)",
+			msg->subtest_done.num,
+			msg->subtest_done.have_log);
+		break;
 	case MSG_TEST_LOG:
 		sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)",
 			strlen(msg->test_log.log_buf),
@@ -901,12 +1037,14 @@ static void run_one_test(int test_num)
 		test->run_serial_test();
 
 	/* ensure last sub-test is finalized properly */
-	if (state->subtest_name)
+	if (env.subtest_state)
 		test__end_subtest();
 
 	state->tested = true;
 
-	dump_test_log(test, state, false);
+	if (verbose() && env.worker_id == -1)
+		print_test_name(test_num + 1, test->test_name,
+				test_result(state->error_cnt, state->skip_cnt));
 
 	reset_affinity();
 	restore_netns();
@@ -914,6 +1052,8 @@ static void run_one_test(int test_num)
 		cleanup_cgroup_environment();
 
 	stdio_restore();
+
+	dump_test_log(test, state, false, false);
 }
 
 struct dispatch_data {
@@ -921,6 +1061,73 @@ struct dispatch_data {
 	int sock_fd;
 };
 
+static int read_prog_test_msg(int sock_fd, struct msg *msg, enum msg_type type)
+{
+	if (recv_message(sock_fd, msg) < 0)
+		return 1;
+
+	if (msg->type != type) {
+		printf("%s: unexpected message type %d. expected %d\n", __func__, msg->type, type);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int dispatch_thread_read_log(int sock_fd, char **log_buf, size_t *log_cnt)
+{
+	FILE *log_fp = NULL;
+
+	log_fp = open_memstream(log_buf, log_cnt);
+	if (!log_fp)
+		return 1;
+
+	while (true) {
+		struct msg msg;
+
+		if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_LOG))
+			return 1;
+
+		fprintf(log_fp, "%s", msg.test_log.log_buf);
+		if (msg.test_log.is_last)
+			break;
+	}
+	fclose(log_fp);
+	log_fp = NULL;
+	return 0;
+}
+
+static int dispatch_thread_send_subtests(int sock_fd, struct test_state *state)
+{
+	struct msg msg;
+	struct subtest_state *subtest_state;
+	int subtest_num = state->subtest_num;
+
+	state->subtest_states = malloc(subtest_num * sizeof(*subtest_state));
+
+	for (int i = 0; i < subtest_num; i++) {
+		subtest_state = &state->subtest_states[i];
+
+		memset(subtest_state, 0, sizeof(*subtest_state));
+
+		if (read_prog_test_msg(sock_fd, &msg, MSG_SUBTEST_DONE))
+			return 1;
+
+		subtest_state->name = strdup(msg.subtest_done.name);
+		subtest_state->error_cnt = msg.subtest_done.error_cnt;
+		subtest_state->skipped = msg.subtest_done.skipped;
+
+		/* collect all logs */
+		if (msg.subtest_done.have_log)
+			if (dispatch_thread_read_log(sock_fd,
+						     &subtest_state->log_buf,
+						     &subtest_state->log_cnt))
+				return 1;
+	}
+
+	return 0;
+}
+
 static void *dispatch_thread(void *ctx)
 {
 	struct dispatch_data *data = ctx;
@@ -957,8 +1164,9 @@ static void *dispatch_thread(void *ctx)
 		{
 			struct msg msg_do_test;
 
+			memset(&msg_do_test, 0, sizeof(msg_do_test));
 			msg_do_test.type = MSG_DO_TEST;
-			msg_do_test.do_test.test_num = test_to_run;
+			msg_do_test.do_test.num = test_to_run;
 			if (send_message(sock_fd, &msg_do_test) < 0) {
 				perror("Fail to send command");
 				goto done;
@@ -967,49 +1175,39 @@ static void *dispatch_thread(void *ctx)
 		}
 
 		/* wait for test done */
-		{
-			int err;
-			struct msg msg_test_done;
+		do {
+			struct msg msg;
 
-			err = recv_message(sock_fd, &msg_test_done);
-			if (err < 0)
+			if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_DONE))
 				goto error;
-			if (msg_test_done.type != MSG_TEST_DONE)
-				goto error;
-			if (test_to_run != msg_test_done.test_done.test_num)
+			if (test_to_run != msg.test_done.num)
 				goto error;
 
 			state = &test_states[test_to_run];
 			state->tested = true;
-			state->error_cnt = msg_test_done.test_done.error_cnt;
-			state->skip_cnt = msg_test_done.test_done.skip_cnt;
-			state->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt;
+			state->error_cnt = msg.test_done.error_cnt;
+			state->skip_cnt = msg.test_done.skip_cnt;
+			state->sub_succ_cnt = msg.test_done.sub_succ_cnt;
+			state->subtest_num = msg.test_done.subtest_num;
 
 			/* collect all logs */
-			if (msg_test_done.test_done.have_log) {
-				log_fp = open_memstream(&state->log_buf, &state->log_cnt);
-				if (!log_fp)
+			if (msg.test_done.have_log) {
+				if (dispatch_thread_read_log(sock_fd,
+							     &state->log_buf,
+							     &state->log_cnt))
 					goto error;
+			}
 
-				while (true) {
-					struct msg msg_log;
-
-					if (recv_message(sock_fd, &msg_log) < 0)
-						goto error;
-					if (msg_log.type != MSG_TEST_LOG)
-						goto error;
+			/* collect all subtests and subtest logs */
+			if (!state->subtest_num)
+				break;
 
-					fprintf(log_fp, "%s", msg_log.test_log.log_buf);
-					if (msg_log.test_log.is_last)
-						break;
-				}
-				fclose(log_fp);
-				log_fp = NULL;
-			}
-		} /* wait for test done */
+			if (dispatch_thread_send_subtests(sock_fd, state))
+				goto error;
+		} while (false);
 
 		pthread_mutex_lock(&stdout_output_lock);
-		dump_test_log(test, state, false);
+		dump_test_log(test, state, false, true);
 		pthread_mutex_unlock(&stdout_output_lock);
 	} /* while (true) */
 error:
@@ -1052,18 +1250,24 @@ static void calculate_summary_and_print_errors(struct test_env *env)
 			succ_cnt++;
 	}
 
-	if (fail_cnt)
+	/*
+	 * We only print error logs summary when there are failed tests and
+	 * verbose mode is not enabled. Otherwise, results may be incosistent.
+	 *
+	 */
+	if (!verbose() && fail_cnt) {
 		printf("\nAll error logs:\n");
 
-	/* print error logs again */
-	for (i = 0; i < prog_test_cnt; i++) {
-		struct prog_test_def *test = &prog_test_defs[i];
-		struct test_state *state = &test_states[i];
+		/* print error logs again */
+		for (i = 0; i < prog_test_cnt; i++) {
+			struct prog_test_def *test = &prog_test_defs[i];
+			struct test_state *state = &test_states[i];
 
-		if (!state->tested || !state->error_cnt)
-			continue;
+			if (!state->tested || !state->error_cnt)
+				continue;
 
-		dump_test_log(test, state, true);
+			dump_test_log(test, state, true, true);
+		}
 	}
 
 	printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
@@ -1154,6 +1358,90 @@ static void server_main(void)
 	}
 }
 
+static void worker_main_send_log(int sock, char *log_buf, size_t log_cnt)
+{
+	char *src;
+	size_t slen;
+
+	src = log_buf;
+	slen = log_cnt;
+	while (slen) {
+		struct msg msg_log;
+		char *dest;
+		size_t len;
+
+		memset(&msg_log, 0, sizeof(msg_log));
+		msg_log.type = MSG_TEST_LOG;
+		dest = msg_log.test_log.log_buf;
+		len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen;
+		memcpy(dest, src, len);
+
+		src += len;
+		slen -= len;
+		if (!slen)
+			msg_log.test_log.is_last = true;
+
+		assert(send_message(sock, &msg_log) >= 0);
+	}
+}
+
+static void free_subtest_state(struct subtest_state *state)
+{
+	if (state->log_buf) {
+		free(state->log_buf);
+		state->log_buf = NULL;
+		state->log_cnt = 0;
+	}
+	free(state->name);
+	state->name = NULL;
+}
+
+static int worker_main_send_subtests(int sock, struct test_state *state)
+{
+	int i, result = 0;
+	struct msg msg;
+	struct subtest_state *subtest_state;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.type = MSG_SUBTEST_DONE;
+
+	for (i = 0; i < state->subtest_num; i++) {
+		subtest_state = &state->subtest_states[i];
+
+		msg.subtest_done.num = i;
+
+		strncpy(msg.subtest_done.name, subtest_state->name, MAX_SUBTEST_NAME);
+
+		msg.subtest_done.error_cnt = subtest_state->error_cnt;
+		msg.subtest_done.skipped = subtest_state->skipped;
+		msg.subtest_done.have_log = false;
+
+		if (verbose() || state->force_log || subtest_state->error_cnt) {
+			if (subtest_state->log_cnt)
+				msg.subtest_done.have_log = true;
+		}
+
+		if (send_message(sock, &msg) < 0) {
+			perror("Fail to send message done");
+			result = 1;
+			goto out;
+		}
+
+		/* send logs */
+		if (msg.subtest_done.have_log)
+			worker_main_send_log(sock, subtest_state->log_buf, subtest_state->log_cnt);
+
+		free_subtest_state(subtest_state);
+		free(subtest_state->name);
+	}
+
+out:
+	for (; i < state->subtest_num; i++)
+		free_subtest_state(&state->subtest_states[i]);
+	free(state->subtest_states);
+	return result;
+}
+
 static int worker_main(int sock)
 {
 	save_netns();
@@ -1172,10 +1460,10 @@ static int worker_main(int sock)
 					env.worker_id);
 			goto out;
 		case MSG_DO_TEST: {
-			int test_to_run = msg.do_test.test_num;
+			int test_to_run = msg.do_test.num;
 			struct prog_test_def *test = &prog_test_defs[test_to_run];
 			struct test_state *state = &test_states[test_to_run];
-			struct msg msg_done;
+			struct msg msg;
 
 			if (env.debug)
 				fprintf(stderr, "[%d]: #%d:%s running.\n",
@@ -1185,54 +1473,38 @@ static int worker_main(int sock)
 
 			run_one_test(test_to_run);
 
-			memset(&msg_done, 0, sizeof(msg_done));
-			msg_done.type = MSG_TEST_DONE;
-			msg_done.test_done.test_num = test_to_run;
-			msg_done.test_done.error_cnt = state->error_cnt;
-			msg_done.test_done.skip_cnt = state->skip_cnt;
-			msg_done.test_done.sub_succ_cnt = state->sub_succ_cnt;
-			msg_done.test_done.have_log = false;
+			memset(&msg, 0, sizeof(msg));
+			msg.type = MSG_TEST_DONE;
+			msg.test_done.num = test_to_run;
+			msg.test_done.error_cnt = state->error_cnt;
+			msg.test_done.skip_cnt = state->skip_cnt;
+			msg.test_done.sub_succ_cnt = state->sub_succ_cnt;
+			msg.test_done.subtest_num = state->subtest_num;
+			msg.test_done.have_log = false;
 
-			if (env.verbosity > VERBOSE_NONE || state->force_log || state->error_cnt) {
+			if (verbose() || state->force_log || state->error_cnt) {
 				if (state->log_cnt)
-					msg_done.test_done.have_log = true;
+					msg.test_done.have_log = true;
 			}
-			if (send_message(sock, &msg_done) < 0) {
+			if (send_message(sock, &msg) < 0) {
 				perror("Fail to send message done");
 				goto out;
 			}
 
 			/* send logs */
-			if (msg_done.test_done.have_log) {
-				char *src;
-				size_t slen;
-
-				src = state->log_buf;
-				slen = state->log_cnt;
-				while (slen) {
-					struct msg msg_log;
-					char *dest;
-					size_t len;
-
-					memset(&msg_log, 0, sizeof(msg_log));
-					msg_log.type = MSG_TEST_LOG;
-					dest = msg_log.test_log.log_buf;
-					len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen;
-					memcpy(dest, src, len);
-
-					src += len;
-					slen -= len;
-					if (!slen)
-						msg_log.test_log.is_last = true;
-
-					assert(send_message(sock, &msg_log) >= 0);
-				}
-			}
+			if (msg.test_done.have_log)
+				worker_main_send_log(sock, state->log_buf, state->log_cnt);
+
 			if (state->log_buf) {
 				free(state->log_buf);
 				state->log_buf = NULL;
 				state->log_cnt = 0;
 			}
+
+			if (state->subtest_num)
+				if (worker_main_send_subtests(sock, state))
+					goto out;
+
 			if (env.debug)
 				fprintf(stderr, "[%d]: #%d:%s done.\n",
 					env.worker_id,
@@ -1250,6 +1522,23 @@ out:
 	return 0;
 }
 
+static void free_test_states(void)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(prog_test_defs); i++) {
+		struct test_state *test_state = &test_states[i];
+
+		for (j = 0; j < test_state->subtest_num; j++)
+			free_subtest_state(&test_state->subtest_states[j]);
+
+		free(test_state->subtest_states);
+		free(test_state->log_buf);
+		test_state->subtest_states = NULL;
+		test_state->log_buf = NULL;
+	}
+}
+
 int main(int argc, char **argv)
 {
 	static const struct argp argp = {
@@ -1396,6 +1685,7 @@ out:
 		unload_bpf_testmod();
 
 	free_test_selector(&env.test_selector);
+	free_test_states();
 
 	if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
 		return EXIT_NO_TEST;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index d3fee3b98888..dd1b91d7985a 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -64,23 +64,31 @@ struct test_selector {
 	int num_set_len;
 };
 
+struct subtest_state {
+	char *name;
+	size_t log_cnt;
+	char *log_buf;
+	int error_cnt;
+	bool skipped;
+
+	FILE *stdout;
+};
+
 struct test_state {
 	bool tested;
 	bool force_log;
 
 	int error_cnt;
 	int skip_cnt;
-	int subtest_skip_cnt;
 	int sub_succ_cnt;
 
-	char *subtest_name;
+	struct subtest_state *subtest_states;
 	int subtest_num;
 
-	/* store counts before subtest started */
-	int old_error_cnt;
-
 	size_t log_cnt;
 	char *log_buf;
+
+	FILE *stdout;
 };
 
 struct test_env {
@@ -96,7 +104,8 @@ struct test_env {
 	bool list_test_names;
 
 	struct prog_test_def *test; /* current running test */
-	struct test_state *test_state; /* current running test result */
+	struct test_state *test_state; /* current running test state */
+	struct subtest_state *subtest_state; /* current running subtest state */
 
 	FILE *stdout;
 	FILE *stderr;
@@ -116,29 +125,39 @@ struct test_env {
 };
 
 #define MAX_LOG_TRUNK_SIZE 8192
+#define MAX_SUBTEST_NAME 1024
 enum msg_type {
 	MSG_DO_TEST = 0,
 	MSG_TEST_DONE = 1,
 	MSG_TEST_LOG = 2,
+	MSG_SUBTEST_DONE = 3,
 	MSG_EXIT = 255,
 };
 struct msg {
 	enum msg_type type;
 	union {
 		struct {
-			int test_num;
+			int num;
 		} do_test;
 		struct {
-			int test_num;
+			int num;
 			int sub_succ_cnt;
 			int error_cnt;
 			int skip_cnt;
 			bool have_log;
+			int subtest_num;
 		} test_done;
 		struct {
 			char log_buf[MAX_LOG_TRUNK_SIZE + 1];
 			bool is_last;
 		} test_log;
+		struct {
+			int num;
+			char name[MAX_SUBTEST_NAME + 1];
+			int error_cnt;
+			bool skipped;
+			bool have_log;
+		} subtest_done;
 	};
 };
 
-- 
cgit 


From 854f856f7ee35d26cdfd26e4eb3f293cc8cd8d12 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 4 Apr 2022 10:06:11 +0100
Subject: kselftest/arm64: Fix comment for ptrace_sve_get_fpsimd_data()

The comment for ptrace_sve_get_fpsimd_data() doesn't describe what the test
does at all, fix that.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220404090613.181272-2-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/sve-ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index 4c418b2021e0..7682798adbba 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -395,7 +395,7 @@ out:
 	free(write_buf);
 }
 
-/* Validate attempting to set SVE data and read SVE data */
+/* Validate attempting to set SVE data and read it via the FPSIMD regset */
 static void ptrace_set_sve_get_fpsimd_data(pid_t child,
 					   const struct vec_type *type,
 					   unsigned int vl)
-- 
cgit 


From 1fb1e285b4a8a3664897c34414787ea825124cb2 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 4 Apr 2022 10:06:12 +0100
Subject: kselftest/arm64: Remove assumption that tasks start FPSIMD only

Currently the sve-ptrace test for setting and reading FPSIMD data assumes
that the child will start off in FPSIMD only mode and that it can use this
to read some FPSIMD mode SVE ptrace data, skipping the test if it can't.
This isn't an assumption guaranteed by the ABI and also limits how we can
use this testcase within the program. Instead skip the initial read and
just generate a FPSIMD format buffer for the write part of the test, making
the coverage more robust in the face of future kernel and test program
changes.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220404090613.181272-3-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/sve-ptrace.c | 39 +++++++++++----------------
 1 file changed, 16 insertions(+), 23 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index 7682798adbba..8f6146d89ca4 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -46,7 +46,7 @@ static const struct vec_type vec_types[] = {
 
 #define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
 #define FLAG_TESTS 2
-#define FPSIMD_TESTS 3
+#define FPSIMD_TESTS 2
 
 #define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types))
 
@@ -240,28 +240,24 @@ static void check_u32(unsigned int vl, const char *reg,
 /* Access the FPSIMD registers via the SVE regset */
 static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
 {
-	void *svebuf = NULL;
-	size_t svebufsz = 0;
+	void *svebuf;
 	struct user_sve_header *sve;
 	struct user_fpsimd_state *fpsimd, new_fpsimd;
 	unsigned int i, j;
 	unsigned char *p;
+	int ret;
 
-	/* New process should start with FPSIMD registers only */
-	sve = get_sve(child, type, &svebuf, &svebufsz);
-	if (!sve) {
-		ksft_test_result_fail("get_sve(%s): %s\n",
-				      type->name, strerror(errno));
-
+	svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+	if (!svebuf) {
+		ksft_test_result_fail("Failed to allocate FPSIMD buffer\n");
 		return;
-	} else {
-		ksft_test_result_pass("get_sve(%s FPSIMD)\n", type->name);
 	}
 
-	ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
-			 "Got FPSIMD registers via %s\n", type->name);
-	if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
-		goto out;
+	memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+	sve = svebuf;
+	sve->flags = SVE_PT_REGS_FPSIMD;
+	sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD);
+	sve->vl = 16;  /* We don't care what the VL is */
 
 	/* Try to set a known FPSIMD state via PT_REGS_SVE */
 	fpsimd = (struct user_fpsimd_state *)((char *)sve +
@@ -273,12 +269,11 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
 			p[j] = j;
 	}
 
-	if (set_sve(child, type, sve)) {
-		ksft_test_result_fail("set_sve(%s FPSIMD): %s\n",
-				      type->name, strerror(errno));
-
+	ret = set_sve(child, type, sve);
+	ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n",
+			 type->name, ret);
+	if (ret)
 		goto out;
-	}
 
 	/* Verify via the FPSIMD regset */
 	if (get_fpsimd(child, &new_fpsimd)) {
@@ -548,11 +543,9 @@ static int do_parent(pid_t child)
 		if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
 			ptrace_sve_fpsimd(child, &vec_types[i]);
 		} else {
-			ksft_test_result_skip("%s FPSIMD get via SVE\n",
-					      vec_types[i].name);
 			ksft_test_result_skip("%s FPSIMD set via SVE\n",
 					      vec_types[i].name);
-			ksft_test_result_skip("%s set read via FPSIMD\n",
+			ksft_test_result_skip("%s FPSIMD read\n",
 					      vec_types[i].name);
 		}
 
-- 
cgit 


From 82f97bcd876a6b5f764726a5210bde638d9f4d0a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 4 Apr 2022 10:06:13 +0100
Subject: kselftest/arm64: Validate setting via FPSIMD and read via SVE regsets

Currently we validate that we can set the floating point state via the SVE
regset and read the data via the FPSIMD regset but we do not valiate that
the opposite case works as expected. Add a test that covers this case,
noting that when reading via SVE regset the kernel has the option of
returning either SVE or FPSIMD data so we need to accept both formats.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220404090613.181272-4-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/sve-ptrace.c | 123 +++++++++++++++++++++++++-
 1 file changed, 122 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index 8f6146d89ca4..36b6f0749f23 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -44,7 +44,7 @@ static const struct vec_type vec_types[] = {
 	},
 };
 
-#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
+#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 4)
 #define FLAG_TESTS 2
 #define FPSIMD_TESTS 2
 
@@ -78,6 +78,15 @@ static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
 	return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
 }
 
+static int set_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
+{
+	struct iovec iov;
+
+	iov.iov_base = fpsimd;
+	iov.iov_len = sizeof(*fpsimd);
+	return ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov);
+}
+
 static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
 				       void **buf, size_t *size)
 {
@@ -473,6 +482,115 @@ out:
 	free(write_buf);
 }
 
+/* Validate attempting to set FPSIMD data and read it via the SVE regset */
+static void ptrace_set_fpsimd_get_sve_data(pid_t child,
+					   const struct vec_type *type,
+					   unsigned int vl)
+{
+	void *read_buf = NULL;
+	unsigned char *p;
+	struct user_sve_header *read_sve;
+	unsigned int vq = sve_vq_from_vl(vl);
+	struct user_fpsimd_state write_fpsimd;
+	int ret, i, j;
+	size_t read_sve_size = 0;
+	size_t expected_size;
+	int errors = 0;
+
+	if (__BYTE_ORDER == __BIG_ENDIAN) {
+		ksft_test_result_skip("Big endian not supported\n");
+		return;
+	}
+
+	for (i = 0; i < 32; ++i) {
+		p = (unsigned char *)&write_fpsimd.vregs[i];
+
+		for (j = 0; j < sizeof(write_fpsimd.vregs[i]); ++j)
+			p[j] = j;
+	}
+
+	ret = set_fpsimd(child, &write_fpsimd);
+	if (ret != 0) {
+		ksft_test_result_fail("Failed to set FPSIMD state: %d\n)",
+				      ret);
+		return;
+	}
+
+	if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) {
+		ksft_test_result_fail("Failed to read %s VL %u data\n",
+				      type->name, vl);
+		return;
+	}
+	read_sve = read_buf;
+
+	if (read_sve->vl != vl) {
+		ksft_test_result_fail("Child VL != expected VL %d\n",
+				      read_sve->vl, vl);
+		goto out;
+	}
+
+	/* The kernel may return either SVE or FPSIMD format */
+	switch (read_sve->flags & SVE_PT_REGS_MASK) {
+	case SVE_PT_REGS_FPSIMD:
+		expected_size = SVE_PT_FPSIMD_SIZE(vq, SVE_PT_REGS_FPSIMD);
+		if (read_sve_size < expected_size) {
+			ksft_test_result_fail("Read %d bytes, expected %d\n",
+					      read_sve_size, expected_size);
+			goto out;
+		}
+
+		ret = memcmp(&write_fpsimd, read_buf + SVE_PT_FPSIMD_OFFSET,
+			     sizeof(write_fpsimd));
+		if (ret != 0) {
+			ksft_print_msg("Read FPSIMD data mismatch\n");
+			errors++;
+		}
+		break;
+
+	case SVE_PT_REGS_SVE:
+		expected_size = SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+		if (read_sve_size < expected_size) {
+			ksft_test_result_fail("Read %d bytes, expected %d\n",
+					      read_sve_size, expected_size);
+			goto out;
+		}
+
+		for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+			__uint128_t tmp = 0;
+
+			/*
+			 * Z regs are stored endianness invariant, this won't
+			 * work for big endian
+			 */
+			memcpy(&tmp, read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+			       sizeof(tmp));
+
+			if (tmp != write_fpsimd.vregs[i]) {
+				ksft_print_msg("Mismatch in FPSIMD for %s VL %u Z%d/V%d\n",
+					       type->name, vl, i, i);
+				errors++;
+			}
+		}
+
+		check_u32(vl, "FPSR", &write_fpsimd.fpsr,
+			  read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors);
+		check_u32(vl, "FPCR", &write_fpsimd.fpcr,
+			  read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors);
+		break;
+	default:
+		ksft_print_msg("Unexpected regs type %d\n",
+			       read_sve->flags & SVE_PT_REGS_MASK);
+		errors++;
+		break;
+	}
+
+	ksft_test_result(errors == 0, "Set FPSIMD, read via SVE for %s VL %u\n",
+			 type->name, vl);
+
+out:
+	free(read_buf);
+}
+
 static int do_parent(pid_t child)
 {
 	int ret = EXIT_FAILURE;
@@ -578,11 +696,14 @@ static int do_parent(pid_t child)
 			if (vl_supported) {
 				ptrace_set_sve_get_sve_data(child, &vec_types[i], vl);
 				ptrace_set_sve_get_fpsimd_data(child, &vec_types[i], vl);
+				ptrace_set_fpsimd_get_sve_data(child, &vec_types[i], vl);
 			} else {
 				ksft_test_result_skip("%s set SVE get SVE for VL %d\n",
 						      vec_types[i].name, vl);
 				ksft_test_result_skip("%s set SVE get FPSIMD for VL %d\n",
 						      vec_types[i].name, vl);
+				ksft_test_result_skip("%s set FPSIMD get SVE for VL %d\n",
+						      vec_types[i].name, vl);
 			}
 		}
 	}
-- 
cgit 


From 3f374d7972c48bc0824bdabb8f94fe82e54fd07d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 11:32:40 +0100
Subject: kselftest/arm64: Handle more kselftest result codes in MTE helpers

The MTE selftests have a helper evaluate_test() which translates a return
code into a call to ksft_test_result_*(). Currently this only handles pass
and fail, silently ignoring any other code. Update the helper to support
skipped tests and log any unknown return codes as an error so we get at
least some diagnostic if anything goes wrong.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220419103243.24774-2-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/mte/mte_common_util.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
index 195a7d1879e6..2d3e71724e55 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.h
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -75,10 +75,21 @@ unsigned int mte_get_pstate_tco(void);
 /* Test framework static inline functions/macros */
 static inline void evaluate_test(int err, const char *msg)
 {
-	if (err == KSFT_PASS)
+	switch (err) {
+	case KSFT_PASS:
 		ksft_test_result_pass(msg);
-	else if (err == KSFT_FAIL)
+		break;
+	case KSFT_FAIL:
 		ksft_test_result_fail(msg);
+		break;
+	case KSFT_SKIP:
+		ksft_test_result_skip(msg);
+		break;
+	default:
+		ksft_test_result_error("Unknown return code %d from %s",
+				       err, msg);
+		break;
+	}
 }
 
 static inline int check_allocated_memory(void *ptr, size_t size,
-- 
cgit 


From 191e678bdc9be2447dae227f5b6ea1e995c5ee9c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 11:32:41 +0100
Subject: kselftest/arm64: Log unexpected asynchronous MTE faults

Help people figure out problems by printing a diagnostic when we get an
unexpected asynchronous fault.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220419103243.24774-3-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/mte/mte_common_util.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index 0328a1e08f65..5327aa958171 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -37,6 +37,10 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc)
 		if (si->si_code == SEGV_MTEAERR) {
 			if (cur_mte_cxt.trig_si_code == si->si_code)
 				cur_mte_cxt.fault_valid = true;
+			else
+				ksft_print_msg("Got unexpected SEGV_MTEAERR at pc=$lx, fault addr=%lx\n",
+					       ((ucontext_t *)uc)->uc_mcontext.pc,
+					       addr);
 			return;
 		}
 		/* Compare the context for precise error */
-- 
cgit 


From f326c9a6f49b06c0a936d68ae23cb90899835c3b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 11:32:42 +0100
Subject: kselftest/arm64: Refactor parameter checking in mte_switch_mode()

Currently we just have a big if statement with a non-specific diagnostic
checking both the mode and the tag. Since we'll need to dynamically check
for asymmetric mode support in the system and to improve debugability split
these checks out.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220419103243.24774-4-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/mte/mte_common_util.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index 5327aa958171..260206f4dce0 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -273,9 +273,18 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
 {
 	unsigned long en = 0;
 
-	if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR ||
-	      mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) {
-		ksft_print_msg("FAIL: Invalid mte config option\n");
+	switch (mte_option) {
+	case MTE_NONE_ERR:
+	case MTE_SYNC_ERR:
+	case MTE_ASYNC_ERR:
+		break;
+	default:
+		ksft_print_msg("FAIL: Invalid MTE option %x\n", mte_option);
+		return -EINVAL;
+	}
+
+	if (!(incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) {
+		ksft_print_msg("FAIL: Invalid incl_mask %lx\n", incl_mask);
 		return -EINVAL;
 	}
 	en = PR_TAGGED_ADDR_ENABLE;
-- 
cgit 


From e2d9642a5a5101a559e7d368a1df8e01e960096b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 11:32:43 +0100
Subject: kselftest/arm64: Add simple test for MTE prctl

The current tests use the prctls for various things but there's no
coverage of the edges of the interface so add some basics. This isn't
hugely useful as it is (it originally had some coverage for the
combinations with asymmetric mode but we removed the prctl() for that)
but it might be a helpful starting point for future work, for example
covering error handling.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220419103243.24774-5-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/mte/.gitignore    |   1 +
 tools/testing/selftests/arm64/mte/check_prctl.c | 119 ++++++++++++++++++++++++
 2 files changed, 120 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/mte/check_prctl.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
index d1fe4ddf1669..052d0f9f92b3 100644
--- a/tools/testing/selftests/arm64/mte/.gitignore
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -3,5 +3,6 @@ check_gcr_el1_cswitch
 check_tags_inclusion
 check_child_memory
 check_mmap_options
+check_prctl
 check_ksm_options
 check_user_mem
diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c
new file mode 100644
index 000000000000..f139a33a43ef
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_prctl.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2022 ARM Limited
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+
+static int set_tagged_addr_ctrl(int val)
+{
+	int ret;
+
+	ret = prctl(PR_SET_TAGGED_ADDR_CTRL, val, 0, 0, 0);
+	if (ret < 0)
+		ksft_print_msg("PR_SET_TAGGED_ADDR_CTRL: failed %d %d (%s)\n",
+			       ret, errno, strerror(errno));
+	return ret;
+}
+
+static int get_tagged_addr_ctrl(void)
+{
+	int ret;
+
+	ret = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
+	if (ret < 0)
+		ksft_print_msg("PR_GET_TAGGED_ADDR_CTRL failed: %d %d (%s)\n",
+			       ret, errno, strerror(errno));
+	return ret;
+}
+
+/*
+ * Read the current mode without having done any configuration, should
+ * run first.
+ */
+void check_basic_read(void)
+{
+	int ret;
+
+	ret = get_tagged_addr_ctrl();
+	if (ret < 0) {
+		ksft_test_result_fail("check_basic_read\n");
+		return;
+	}
+
+	if (ret & PR_MTE_TCF_SYNC)
+		ksft_print_msg("SYNC enabled\n");
+	if (ret & PR_MTE_TCF_ASYNC)
+		ksft_print_msg("ASYNC enabled\n");
+
+	/* Any configuration is valid */
+	ksft_test_result_pass("check_basic_read\n");
+}
+
+/*
+ * Attempt to set a specified combination of modes.
+ */
+void set_mode_test(const char *name, int hwcap2, int mask)
+{
+	int ret;
+
+	if ((getauxval(AT_HWCAP2) & hwcap2) != hwcap2) {
+		ksft_test_result_skip("%s\n", name);
+		return;
+	}
+
+	ret = set_tagged_addr_ctrl(mask);
+	if (ret < 0) {
+		ksft_test_result_fail("%s\n", name);
+		return;
+	}
+
+	ret = get_tagged_addr_ctrl();
+	if (ret < 0) {
+		ksft_test_result_fail("%s\n", name);
+		return;
+	}
+
+	if ((ret & PR_MTE_TCF_MASK) == mask) {
+		ksft_test_result_pass("%s\n", name);
+	} else {
+		ksft_print_msg("Got %x, expected %x\n",
+			       (ret & PR_MTE_TCF_MASK), mask);
+		ksft_test_result_fail("%s\n", name);
+	}
+}
+
+struct mte_mode {
+	int mask;
+	int hwcap2;
+	const char *name;
+} mte_modes[] = {
+	{ PR_MTE_TCF_NONE,  0,          "NONE"  },
+	{ PR_MTE_TCF_SYNC,  HWCAP2_MTE, "SYNC"  },
+	{ PR_MTE_TCF_ASYNC, HWCAP2_MTE, "ASYNC" },
+	{ PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC,  HWCAP2_MTE, "SYNC+ASYNC"  },
+};
+
+int main(void)
+{
+	int i;
+
+	ksft_print_header();
+	ksft_set_plan(5);
+
+	check_basic_read();
+	for (i = 0; i < ARRAY_SIZE(mte_modes); i++)
+		set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2,
+			      mte_modes[i].mask);
+
+	ksft_print_cnts();
+
+	return 0;
+}
-- 
cgit 


From 6d51b18865c65390973e6ed0aec20239cf475489 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:36 +0100
Subject: kselftest/arm64: Add manual encodings for SME instructions

As for the kernel so that we don't have ambitious toolchain requirements
to build the tests manually encode some of the SVE instructions.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-29-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/sme-inst.h | 51 +++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/fp/sme-inst.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/sme-inst.h b/tools/testing/selftests/arm64/fp/sme-inst.h
new file mode 100644
index 000000000000..7191e53ca1c0
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sme-inst.h
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021-2 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+
+#ifndef SME_INST_H
+#define SME_INST_H
+
+/*
+ * RDSVL X\nx, #\imm
+ */
+.macro rdsvl nx, imm
+	.inst	0x4bf5800			\
+		| (\imm << 5)			\
+		| (\nx)
+.endm
+
+.macro smstop
+	msr	S0_3_C4_C6_3, xzr
+.endm
+
+.macro smstart_za
+	msr	S0_3_C4_C5_3, xzr
+.endm
+
+.macro smstart_sm
+	msr	S0_3_C4_C3_3, xzr
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ *	LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _ldr_za nw, nxbase, offset=0
+	.inst	0xe1000000			\
+		| (((\nw) & 3) << 13)		\
+		| ((\nxbase) << 5)		\
+		| ((\offset) & 7)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ *	STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _str_za nw, nxbase, offset=0
+	.inst	0xe1200000			\
+		| (((\nw) & 3) << 13)		\
+		| ((\nxbase) << 5)		\
+		| ((\offset) & 7)
+.endm
+
+#endif
-- 
cgit 


From e8c4451480d0cb37cbc69160113b1f4ff211cd16 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:37 +0100
Subject: kselftest/arm64: sme: Add SME support to vlset

The Scalable Matrix Extenions (SME) introduces additional register state
with configurable vector lengths, similar to SVE but configured separately.
Extend vlset to support configuring this state with a --sme or -s command
line option.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-30-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/vlset.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c
index 308d27a68226..76912a581a95 100644
--- a/tools/testing/selftests/arm64/fp/vlset.c
+++ b/tools/testing/selftests/arm64/fp/vlset.c
@@ -22,12 +22,15 @@ static int inherit = 0;
 static int no_inherit = 0;
 static int force = 0;
 static unsigned long vl;
+static int set_ctl = PR_SVE_SET_VL;
+static int get_ctl = PR_SVE_GET_VL;
 
 static const struct option options[] = {
 	{ "force",	no_argument, NULL, 'f' },
 	{ "inherit",	no_argument, NULL, 'i' },
 	{ "max",	no_argument, NULL, 'M' },
 	{ "no-inherit",	no_argument, &no_inherit, 1 },
+	{ "sme",	no_argument, NULL, 's' },
 	{ "help",	no_argument, NULL, '?' },
 	{}
 };
@@ -50,6 +53,9 @@ static int parse_options(int argc, char **argv)
 		case 'M':	vl = SVE_VL_MAX; break;
 		case 'f':	force = 1; break;
 		case 'i':	inherit = 1; break;
+		case 's':	set_ctl = PR_SME_SET_VL;
+				get_ctl = PR_SME_GET_VL;
+				break;
 		case 0:		break;
 		default:	goto error;
 		}
@@ -125,14 +131,14 @@ int main(int argc, char **argv)
 	if (inherit)
 		flags |= PR_SVE_VL_INHERIT;
 
-	t = prctl(PR_SVE_SET_VL, vl | flags);
+	t = prctl(set_ctl, vl | flags);
 	if (t < 0) {
 		fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n",
 			program_name, strerror(errno));
 		goto error;
 	}
 
-	t = prctl(PR_SVE_GET_VL);
+	t = prctl(get_ctl);
 	if (t == -1) {
 		fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n",
 			program_name, strerror(errno));
-- 
cgit 


From 30e3a42b5d47d6dadba73a8509a6687a9d8f8e40 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:38 +0100
Subject: kselftest/arm64: Add tests for TPIDR2

The Scalable Matrix Extension adds a new system register TPIDR2 intended to
be used by libc for its own thread specific use, add some kselftests which
exercise the ABI for it.

Since this test should with some adjustment work for TPIDR and any other
similar registers added in future add tests for it in a separate
directory rather than placing it with the other floating point tests,
nothing existing looked suitable so I created a new test directory
called "abi".

Since this feature is intended to be used by libc the test is built as
freestanding code using nolibc so we don't end up with the test program
and libc both trying to manage the register simultaneously and
distrupting each other. As a result of being written using nolibc rather
than using hwcaps to identify if SME is available in the system we check
for the default SME vector length configuration in proc, adding hwcap
support to nolibc seems like disproportionate effort and didn't feel
entirely idiomatic for what nolibc is trying to do.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-31-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/abi/.gitignore |   1 +
 tools/testing/selftests/arm64/abi/Makefile   |   9 +-
 tools/testing/selftests/arm64/abi/tpidr2.c   | 298 +++++++++++++++++++++++++++
 3 files changed, 307 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/arm64/abi/tpidr2.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/abi/.gitignore b/tools/testing/selftests/arm64/abi/.gitignore
index b79cf5814c23..b9e54417250d 100644
--- a/tools/testing/selftests/arm64/abi/.gitignore
+++ b/tools/testing/selftests/arm64/abi/.gitignore
@@ -1 +1,2 @@
 syscall-abi
+tpidr2
diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile
index 96eba974ac8d..c8d7f2495eb2 100644
--- a/tools/testing/selftests/arm64/abi/Makefile
+++ b/tools/testing/selftests/arm64/abi/Makefile
@@ -1,8 +1,15 @@
 # SPDX-License-Identifier: GPL-2.0
 # Copyright (C) 2021 ARM Limited
 
-TEST_GEN_PROGS := syscall-abi
+TEST_GEN_PROGS := syscall-abi tpidr2
 
 include ../../lib.mk
 
 $(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S
+
+# Build with nolibc since TPIDR2 is intended to be actively managed by
+# libc and we're trying to test the functionality that it depends on here.
+$(OUTPUT)/tpidr2: tpidr2.c
+	$(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+		-static -include ../../../../include/nolibc/nolibc.h \
+		-ffreestanding -Wall $^ -o $@ -lgcc
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
new file mode 100644
index 000000000000..351a098b503a
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#define SYS_TPIDR2 "S3_3_C13_C0_5"
+
+#define EXPECTED_TESTS 5
+
+static void putstr(const char *str)
+{
+	write(1, str, strlen(str));
+}
+
+static void putnum(unsigned int num)
+{
+	char c;
+
+	if (num / 10)
+		putnum(num / 10);
+
+	c = '0' + (num % 10);
+	write(1, &c, 1);
+}
+
+static int tests_run;
+static int tests_passed;
+static int tests_failed;
+static int tests_skipped;
+
+static void set_tpidr2(uint64_t val)
+{
+	asm volatile (
+		"msr	" SYS_TPIDR2 ", %0\n"
+		:
+		: "r"(val)
+		: "cc");
+}
+
+static uint64_t get_tpidr2(void)
+{
+	uint64_t val;
+
+	asm volatile (
+		"mrs	%0, " SYS_TPIDR2 "\n"
+		: "=r"(val)
+		:
+		: "cc");
+
+	return val;
+}
+
+static void print_summary(void)
+{
+	if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS)
+		putstr("# UNEXPECTED TEST COUNT: ");
+
+	putstr("# Totals: pass:");
+	putnum(tests_passed);
+	putstr(" fail:");
+	putnum(tests_failed);
+	putstr(" xfail:0 xpass:0 skip:");
+	putnum(tests_skipped);
+	putstr(" error:0\n");
+}
+
+/* Processes should start with TPIDR2 == 0 */
+static int default_value(void)
+{
+	return get_tpidr2() == 0;
+}
+
+/* If we set TPIDR2 we should read that value */
+static int write_read(void)
+{
+	set_tpidr2(getpid());
+
+	return getpid() == get_tpidr2();
+}
+
+/* If we set a value we should read the same value after scheduling out */
+static int write_sleep_read(void)
+{
+	set_tpidr2(getpid());
+
+	msleep(100);
+
+	return getpid() == get_tpidr2();
+}
+
+/*
+ * If we fork the value in the parent should be unchanged and the
+ * child should start with the same value and be able to set its own
+ * value.
+ */
+static int write_fork_read(void)
+{
+	pid_t newpid, waiting, oldpid;
+	int status;
+
+	set_tpidr2(getpid());
+
+	oldpid = getpid();
+	newpid = fork();
+	if (newpid == 0) {
+		/* In child */
+		if (get_tpidr2() != oldpid) {
+			putstr("# TPIDR2 changed in child: ");
+			putnum(get_tpidr2());
+			putstr("\n");
+			exit(0);
+		}
+
+		set_tpidr2(getpid());
+		if (get_tpidr2() == getpid()) {
+			exit(1);
+		} else {
+			putstr("# Failed to set TPIDR2 in child\n");
+			exit(0);
+		}
+	}
+	if (newpid < 0) {
+		putstr("# fork() failed: -");
+		putnum(-newpid);
+		putstr("\n");
+		return 0;
+	}
+
+	for (;;) {
+		waiting = waitpid(newpid, &status, 0);
+
+		if (waiting < 0) {
+			if (errno == EINTR)
+				continue;
+			putstr("# waitpid() failed: ");
+			putnum(errno);
+			putstr("\n");
+			return 0;
+		}
+		if (waiting != newpid) {
+			putstr("# waitpid() returned wrong PID\n");
+			return 0;
+		}
+
+		if (!WIFEXITED(status)) {
+			putstr("# child did not exit\n");
+			return 0;
+		}
+
+		if (getpid() != get_tpidr2()) {
+			putstr("# TPIDR2 corrupted in parent\n");
+			return 0;
+		}
+
+		return WEXITSTATUS(status);
+	}
+}
+
+/*
+ * sys_clone() has a lot of per architecture variation so just define
+ * it here rather than adding it to nolibc, plus the raw API is a
+ * little more convenient for this test.
+ */
+static int sys_clone(unsigned long clone_flags, unsigned long newsp,
+		     int *parent_tidptr, unsigned long tls,
+		     int *child_tidptr)
+{
+	return my_syscall5(__NR_clone, clone_flags, newsp, parent_tidptr, tls,
+			   child_tidptr);
+}
+
+/*
+ * If we clone with CLONE_SETTLS then the value in the parent should
+ * be unchanged and the child should start with zero and be able to
+ * set its own value.
+ */
+static int write_clone_read(void)
+{
+	int parent_tid, child_tid;
+	pid_t parent, waiting;
+	int ret, status;
+
+	parent = getpid();
+	set_tpidr2(parent);
+
+	ret = sys_clone(CLONE_SETTLS, 0, &parent_tid, 0, &child_tid);
+	if (ret == -1) {
+		putstr("# clone() failed\n");
+		putnum(errno);
+		putstr("\n");
+		return 0;
+	}
+
+	if (ret == 0) {
+		/* In child */
+		if (get_tpidr2() != 0) {
+			putstr("# TPIDR2 non-zero in child: ");
+			putnum(get_tpidr2());
+			putstr("\n");
+			exit(0);
+		}
+
+		if (gettid() == 0)
+			putstr("# Child TID==0\n");
+		set_tpidr2(gettid());
+		if (get_tpidr2() == gettid()) {
+			exit(1);
+		} else {
+			putstr("# Failed to set TPIDR2 in child\n");
+			exit(0);
+		}
+	}
+
+	for (;;) {
+		waiting = wait4(ret, &status, __WCLONE, NULL);
+
+		if (waiting < 0) {
+			if (errno == EINTR)
+				continue;
+			putstr("# wait4() failed: ");
+			putnum(errno);
+			putstr("\n");
+			return 0;
+		}
+		if (waiting != ret) {
+			putstr("# wait4() returned wrong PID ");
+			putnum(waiting);
+			putstr("\n");
+			return 0;
+		}
+
+		if (!WIFEXITED(status)) {
+			putstr("# child did not exit\n");
+			return 0;
+		}
+
+		if (parent != get_tpidr2()) {
+			putstr("# TPIDR2 corrupted in parent\n");
+			return 0;
+		}
+
+		return WEXITSTATUS(status);
+	}
+}
+
+#define run_test(name)			     \
+	if (name()) {			     \
+		tests_passed++;		     \
+	} else {			     \
+		tests_failed++;		     \
+		putstr("not ");		     \
+	}				     \
+	putstr("ok ");			     \
+	putnum(++tests_run);		     \
+	putstr(" " #name "\n");
+
+int main(int argc, char **argv)
+{
+	int ret, i;
+
+	putstr("TAP version 13\n");
+	putstr("1..");
+	putnum(EXPECTED_TESTS);
+	putstr("\n");
+
+	putstr("# PID: ");
+	putnum(getpid());
+	putstr("\n");
+
+	/*
+	 * This test is run with nolibc which doesn't support hwcap and
+	 * it's probably disproportionate to implement so instead check
+	 * for the default vector length configuration in /proc.
+	 */
+	ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
+	if (ret >= 0) {
+		run_test(default_value);
+		run_test(write_read);
+		run_test(write_sleep_read);
+		run_test(write_fork_read);
+		run_test(write_clone_read);
+
+	} else {
+		putstr("# SME support not present\n");
+
+		for (i = 0; i < EXPECTED_TESTS; i++) {
+			putstr("ok ");
+			putnum(i);
+			putstr(" skipped, TPIDR2 not supported\n");
+		}
+
+		tests_skipped += EXPECTED_TESTS;
+	}
+
+	print_summary();
+
+	return 0;
+}
-- 
cgit 


From a0f2eb641b7c4ff753374f8b2043b8bbb1666a96 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:39 +0100
Subject: kselftest/arm64: Extend vector configuration API tests to cover SME

Provide RDVL helpers for SME and extend the main vector configuration tests
to cover SME.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-32-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/.gitignore   |  1 +
 tools/testing/selftests/arm64/fp/Makefile     |  3 ++-
 tools/testing/selftests/arm64/fp/rdvl-sme.c   | 14 ++++++++++++++
 tools/testing/selftests/arm64/fp/rdvl.S       | 10 ++++++++++
 tools/testing/selftests/arm64/fp/rdvl.h       |  1 +
 tools/testing/selftests/arm64/fp/vec-syscfg.c | 10 ++++++++++
 6 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/arm64/fp/rdvl-sme.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index c50d86331ed2..6e9a610c5e5d 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -1,5 +1,6 @@
 fp-pidbench
 fpsimd-test
+rdvl-sme
 rdvl-sve
 sve-probe-vls
 sve-ptrace
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index 95f0b877a060..a224fff8082b 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -3,7 +3,7 @@
 CFLAGS += -I../../../../../usr/include/
 TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg
 TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \
-	rdvl-sve \
+	rdvl-sme rdvl-sve \
 	sve-test sve-stress \
 	vlset
 
@@ -13,6 +13,7 @@ fp-pidbench: fp-pidbench.S asm-utils.o
 	$(CC) -nostdlib $^ -o $@
 fpsimd-test: fpsimd-test.o asm-utils.o
 	$(CC) -nostdlib $^ -o $@
+rdvl-sme: rdvl-sme.o rdvl.o
 rdvl-sve: rdvl-sve.o rdvl.o
 sve-ptrace: sve-ptrace.o
 sve-probe-vls: sve-probe-vls.o rdvl.o
diff --git a/tools/testing/selftests/arm64/fp/rdvl-sme.c b/tools/testing/selftests/arm64/fp/rdvl-sme.c
new file mode 100644
index 000000000000..49b0b2e08bac
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl-sme.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+
+#include "rdvl.h"
+
+int main(void)
+{
+	int vl = rdvl_sme();
+
+	printf("%d\n", vl);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/rdvl.S b/tools/testing/selftests/arm64/fp/rdvl.S
index c916c1c9defd..20dc29996dc6 100644
--- a/tools/testing/selftests/arm64/fp/rdvl.S
+++ b/tools/testing/selftests/arm64/fp/rdvl.S
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
 // Copyright (C) 2021 ARM Limited.
 
+#include "sme-inst.h"
+
 .arch_extension sve
 
 .globl rdvl_sve
@@ -8,3 +10,11 @@ rdvl_sve:
 	hint	34			// BTI C
 	rdvl	x0, #1
 	ret
+
+.globl rdvl_sme
+rdvl_sme:
+	hint	34			// BTI C
+
+	rdsvl	0, 1
+
+	ret
diff --git a/tools/testing/selftests/arm64/fp/rdvl.h b/tools/testing/selftests/arm64/fp/rdvl.h
index 7c9d953fc9e7..5d323679fbc9 100644
--- a/tools/testing/selftests/arm64/fp/rdvl.h
+++ b/tools/testing/selftests/arm64/fp/rdvl.h
@@ -3,6 +3,7 @@
 #ifndef RDVL_H
 #define RDVL_H
 
+int rdvl_sme(void);
 int rdvl_sve(void);
 
 #endif
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
index c90658811a83..9bcfcdc34ee9 100644
--- a/tools/testing/selftests/arm64/fp/vec-syscfg.c
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -51,6 +51,16 @@ static struct vec_data vec_data[] = {
 		.prctl_set = PR_SVE_SET_VL,
 		.default_vl_file = "/proc/sys/abi/sve_default_vector_length",
 	},
+	{
+		.name = "SME",
+		.hwcap_type = AT_HWCAP2,
+		.hwcap = HWCAP2_SME,
+		.rdvl = rdvl_sme,
+		.rdvl_binary = "./rdvl-sme",
+		.prctl_get = PR_SME_GET_VL,
+		.prctl_set = PR_SME_SET_VL,
+		.default_vl_file = "/proc/sys/abi/sme_default_vector_length",
+	},
 };
 
 static int stdio_read_integer(FILE *f, const char *what, int *val)
-- 
cgit 


From 4126bde025c8f973dfd278879fa32e293f563df5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:40 +0100
Subject: kselftest/arm64: sme: Provide streaming mode SVE stress test

One of the features of SME is the addition of streaming mode, in which we
have access to a set of streaming mode SVE registers at the SME vector
length. Since these are accessed using the SVE instructions let's reuse
the existing SVE stress test for testing with a compile time option for
controlling the few small differences needed:

 - Enter streaming mode immediately on starting the program.
 - In streaming mode FFR is removed so skip reading and writing FFR.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-33-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/.gitignore  |  1 +
 tools/testing/selftests/arm64/fp/Makefile    |  3 ++
 tools/testing/selftests/arm64/fp/ssve-stress | 59 ++++++++++++++++++++++++++++
 tools/testing/selftests/arm64/fp/sve-test.S  | 20 ++++++++++
 4 files changed, 83 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/fp/ssve-stress

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index 6e9a610c5e5d..5729a5b1adfc 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -5,5 +5,6 @@ rdvl-sve
 sve-probe-vls
 sve-ptrace
 sve-test
+ssve-test
 vec-syscfg
 vlset
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index a224fff8082b..e6643c9b0474 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -5,6 +5,7 @@ TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg
 TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \
 	rdvl-sme rdvl-sve \
 	sve-test sve-stress \
+	ssve-test ssve-stress \
 	vlset
 
 all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
@@ -19,6 +20,8 @@ sve-ptrace: sve-ptrace.o
 sve-probe-vls: sve-probe-vls.o rdvl.o
 sve-test: sve-test.o asm-utils.o
 	$(CC) -nostdlib $^ -o $@
+ssve-test: sve-test.S asm-utils.o
+	$(CC) -DSSVE -nostdlib $^ -o $@
 vec-syscfg: vec-syscfg.o rdvl.o
 vlset: vlset.o
 
diff --git a/tools/testing/selftests/arm64/fp/ssve-stress b/tools/testing/selftests/arm64/fp/ssve-stress
new file mode 100644
index 000000000000..e2bd2cc184ad
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/ssve-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+	trap - INT TERM CHLD
+	set +e
+
+	if [ -n "$pids" ]; then
+		kill $pids
+		wait $pids
+		pids=
+	fi
+
+	if [ -n "$logs" ]; then
+		cat $logs
+		rm $logs
+		logs=
+	fi
+}
+
+interrupt () {
+	cleanup
+	exit 0
+}
+
+child_died () {
+	cleanup
+	exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+	log=`mktemp`
+	logs=$logs\ $log
+	./ssve-test >$log &
+	pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+	kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
index f5b1b48ffff2..589264231a2d 100644
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -13,6 +13,7 @@
 #include <asm/unistd.h>
 #include "assembler.h"
 #include "asm-offsets.h"
+#include "sme-inst.h"
 
 #define NZR	32
 #define NPR	16
@@ -156,6 +157,7 @@ endfunction
 // We fill the upper lanes of FFR with zeros.
 // Beware: corrupts P0.
 function setup_ffr
+#ifndef SSVE
 	mov	x4, x30
 
 	and	w0, w0, #0x3
@@ -178,6 +180,9 @@ function setup_ffr
 	wrffr	p0.b
 
 	ret	x4
+#else
+	ret
+#endif
 endfunction
 
 // Trivial memory compare: compare x2 bytes starting at address x0 with
@@ -260,6 +265,7 @@ endfunction
 // Beware -- corrupts P0.
 // Clobbers x0-x5.
 function check_ffr
+#ifndef SSVE
 	mov	x3, x30
 
 	ldr	x4, =scratch
@@ -280,6 +286,9 @@ function check_ffr
 	mov	x2, x5
 	mov	x30, x3
 	b	memcmp
+#else
+	ret
+#endif
 endfunction
 
 // Any SVE register modified here can cause corruption in the main
@@ -295,10 +304,12 @@ function irritator_handler
 	movi	v0.8b, #1
 	movi	v9.16b, #2
 	movi	v31.8b, #3
+#ifndef SSVE
 	// And P0
 	rdffr	p0.b
 	// And FFR
 	wrffr	p15.b
+#endif
 
 	ret
 endfunction
@@ -359,6 +370,11 @@ endfunction
 .globl _start
 function _start
 _start:
+#ifdef SSVE
+	puts	"Streaming mode "
+	smstart_sm
+#endif
+
 	// Sanity-check and report the vector length
 
 	rdvl	x19, #8
@@ -407,6 +423,10 @@ _start:
 	orr	w2, w2, #SA_NODEFER
 	bl	setsignal
 
+#ifdef SSVE
+	smstart_sm		// syscalls will have exited streaming mode
+#endif
+
 	mov	x22, #0		// generation number, increments per iteration
 .Ltest_loop:
 	rdvl	x0, #8
-- 
cgit 


From 1a792b545519b6e49f9b1653095ed785eea19afe Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:41 +0100
Subject: kselftest/arm64: signal: Handle ZA signal context in core code

As part of the generic code for signal handling test cases we parse all
signal frames to make sure they have at least the basic form we expect
and that there are no unexpected frames present in the signal context.
Add coverage of the ZA signal frame to this code.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-34-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../selftests/arm64/signal/testcases/testcases.c   | 36 ++++++++++++++++++++++
 .../selftests/arm64/signal/testcases/testcases.h   |  3 +-
 2 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
index 8c2a57fc2f9c..84c36bee4d82 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -75,6 +75,31 @@ bool validate_sve_context(struct sve_context *sve, char **err)
 	return true;
 }
 
+bool validate_za_context(struct za_context *za, char **err)
+{
+	/* Size will be rounded up to a multiple of 16 bytes */
+	size_t regs_size
+		= ((ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za->vl)) + 15) / 16) * 16;
+
+	if (!za || !err)
+		return false;
+
+	/* Either a bare za_context or a za_context followed by regs data */
+	if ((za->head.size != sizeof(struct za_context)) &&
+	    (za->head.size != regs_size)) {
+		*err = "bad size for ZA context";
+		return false;
+	}
+
+	if (!sve_vl_valid(za->vl)) {
+		*err = "SME VL in ZA context invalid";
+
+		return false;
+	}
+
+	return true;
+}
+
 bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 {
 	bool terminated = false;
@@ -82,6 +107,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 	int flags = 0;
 	struct extra_context *extra = NULL;
 	struct sve_context *sve = NULL;
+	struct za_context *za = NULL;
 	struct _aarch64_ctx *head =
 		(struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
 
@@ -120,6 +146,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 			sve = (struct sve_context *)head;
 			flags |= SVE_CTX;
 			break;
+		case ZA_MAGIC:
+			if (flags & ZA_CTX)
+				*err = "Multiple ZA_MAGIC";
+			/* Size is validated in validate_za_context() */
+			za = (struct za_context *)head;
+			flags |= ZA_CTX;
+			break;
 		case EXTRA_MAGIC:
 			if (flags & EXTRA_CTX)
 				*err = "Multiple EXTRA_MAGIC";
@@ -165,6 +198,9 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 		if (flags & SVE_CTX)
 			if (!validate_sve_context(sve, err))
 				return false;
+		if (flags & ZA_CTX)
+			if (!validate_za_context(za, err))
+				return false;
 
 		head = GET_RESV_NEXT_HEAD(head);
 	}
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h
index ad884c135314..49f1d5de7b5b 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.h
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h
@@ -16,7 +16,8 @@
 
 #define FPSIMD_CTX	(1 << 0)
 #define SVE_CTX		(1 << 1)
-#define EXTRA_CTX	(1 << 2)
+#define ZA_CTX		(1 << 2)
+#define EXTRA_CTX	(1 << 3)
 
 #define KSFT_BAD_MAGIC	0xdeadbeef
 
-- 
cgit 


From 5aa45cc5355db3f5302e232a0fe29759ace4bc92 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:42 +0100
Subject: kselftest/arm64: Add stress test for SME ZA context switching

Add a stress test for context switching of the ZA register state based on
the similar tests Dave Martin wrote for FPSIMD and SVE registers. The test
loops indefinitely writing a data pattern to ZA then reading it back and
verifying that it's what was expected.

Unlike the other tests we manually assemble the SME instructions since at
present no released toolchain has SME support integrated.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-35-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/.gitignore |   1 +
 tools/testing/selftests/arm64/fp/Makefile   |   3 +
 tools/testing/selftests/arm64/fp/za-stress  |  59 +++++
 tools/testing/selftests/arm64/fp/za-test.S  | 388 ++++++++++++++++++++++++++++
 4 files changed, 451 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/fp/za-stress
 create mode 100644 tools/testing/selftests/arm64/fp/za-test.S

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index 5729a5b1adfc..ead3197e720b 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -8,3 +8,4 @@ sve-test
 ssve-test
 vec-syscfg
 vlset
+za-test
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index e6643c9b0474..38d2d0d5a0eb 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -6,6 +6,7 @@ TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \
 	rdvl-sme rdvl-sve \
 	sve-test sve-stress \
 	ssve-test ssve-stress \
+	za-test za-stress \
 	vlset
 
 all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
@@ -24,5 +25,7 @@ ssve-test: sve-test.S asm-utils.o
 	$(CC) -DSSVE -nostdlib $^ -o $@
 vec-syscfg: vec-syscfg.o rdvl.o
 vlset: vlset.o
+za-test: za-test.o asm-utils.o
+	$(CC) -nostdlib $^ -o $@
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/za-stress b/tools/testing/selftests/arm64/fp/za-stress
new file mode 100644
index 000000000000..5ac386b55b95
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+	trap - INT TERM CHLD
+	set +e
+
+	if [ -n "$pids" ]; then
+		kill $pids
+		wait $pids
+		pids=
+	fi
+
+	if [ -n "$logs" ]; then
+		cat $logs
+		rm $logs
+		logs=
+	fi
+}
+
+interrupt () {
+	cleanup
+	exit 0
+}
+
+child_died () {
+	cleanup
+	exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+	log=`mktemp`
+	logs=$logs\ $log
+	./za-test >$log &
+	pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+	kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S
new file mode 100644
index 000000000000..9ab6f9cd9623
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-test.S
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+//
+// Scalable Matrix Extension ZA context switch test
+// Repeatedly writes unique test patterns into each ZA tile
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define MAXVL     2048
+#define MAXVL_B   (MAXVL / 8)
+
+// Declare some storage space to shadow ZA register contents and a
+// scratch buffer for a vector.
+.pushsection .text
+.data
+.align 4
+zaref:
+	.space	MAXVL_B * MAXVL_B
+scratch:
+	.space	MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+	cmp	x2, #0
+	b.eq	1f
+0:	ldrb	w3, [x1], #1
+	strb	w3, [x0], #1
+	subs	x2, x2, #1
+	b.ne	0b
+1:	ret
+endfunction
+
+// Generate a test pattern for storage in ZA
+// x0: pid
+// x1: row in ZA
+// x2: generation
+
+// These values are used to constuct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:28	generation number (increments once per test_loop)
+// bits 27:16	pid
+// bits 15: 8	row number
+// bits  7: 0	32-bit lane index
+
+function pattern
+	mov	w3, wzr
+	bfi	w3, w0, #16, #12	// PID
+	bfi	w3, w1, #8, #8		// Row
+	bfi	w3, w2, #28, #4		// Generation
+
+	ldr	x0, =scratch
+	mov	w1, #MAXVL_B / 4
+
+0:	str	w3, [x0], #4
+	add	w3, w3, #1		// Lane
+	subs	w1, w1, #1
+	b.ne	0b
+
+	ret
+endfunction
+
+// Get the address of shadow data for ZA horizontal vector xn
+.macro _adrza xd, xn, nrtmp
+	ldr	\xd, =zaref
+	rdsvl	\nrtmp, 1
+	madd	\xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a ZA horizontal vector
+// x0: pid
+// x1: row number
+// x2: generation
+function setup_za
+	mov	x4, x30
+	mov	x12, x1			// Use x12 for vector select
+
+	bl	pattern			// Get pattern in scratch buffer
+	_adrza	x0, x12, 2		// Shadow buffer pointer to x0 and x5
+	mov	x5, x0
+	ldr	x1, =scratch
+	bl	memcpy			// length set up in x2 by _adrza
+
+	_ldr_za 12, 5			// load vector w12 from pointer x5
+
+	ret	x4
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+	cbz	x2, 2f
+
+	stp	x0, x1, [sp, #-0x20]!
+	str	x2, [sp, #0x10]
+
+	mov	x5, #0
+0:	ldrb	w3, [x0, x5]
+	ldrb	w4, [x1, x5]
+	add	x5, x5, #1
+	cmp	w3, w4
+	b.ne	1f
+	subs	x2, x2, #1
+	b.ne	0b
+
+1:	ldr	x2, [sp, #0x10]
+	ldp	x0, x1, [sp], #0x20
+	b.ne	barf
+
+2:	ret
+endfunction
+
+// Verify that a ZA vector matches its shadow in memory, else abort
+// x0: row number
+// Clobbers x0-x7 and x12.
+function check_za
+	mov	x3, x30
+
+	mov	x12, x0
+	_adrza	x5, x0, 6		// pointer to expected value in x5
+	mov	x4, x0
+	ldr	x7, =scratch		// x7 is scratch
+
+	mov	x0, x7			// Poison scratch
+	mov	x1, x6
+	bl	memfill_ae
+
+	_str_za 12, 7			// save vector w12 to pointer x7
+
+	mov	x0, x5
+	mov	x1, x7
+	mov	x2, x6
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Any SME register modified here can cause corruption in the main
+// thread -- but *only* the locations modified here.
+function irritator_handler
+	// Increment the irritation signal count (x23):
+	ldr	x0, [x2, #ucontext_regs + 8 * 23]
+	add	x0, x0, #1
+	str	x0, [x2, #ucontext_regs + 8 * 23]
+
+	// Corrupt some random ZA data
+#if 0
+	adr	x0, .text + (irritator_handler - .text) / 16 * 16
+	movi	v0.8b, #1
+	movi	v9.16b, #2
+	movi	v31.8b, #3
+#endif
+
+	ret
+endfunction
+
+function terminate_handler
+	mov	w21, w0
+	mov	x20, x2
+
+	puts	"Terminated by signal "
+	mov	w0, w21
+	bl	putdec
+	puts	", no error, iterations="
+	ldr	x0, [x20, #ucontext_regs + 8 * 22]
+	bl	putdec
+	puts	", signals="
+	ldr	x0, [x20, #ucontext_regs + 8 * 23]
+	bl	putdecn
+
+	mov	x0, #0
+	mov	x8, #__NR_exit
+	svc	#0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+	mov	w4, w0
+	mov	x5, x1
+	mov	w6, w2
+
+	add	x0, sp, #16
+	mov	x1, #sa_sz
+	bl	memclr
+
+	mov	w0, w4
+	add	x1, sp, #16
+	str	w6, [x1, #sa_flags]
+	str	x5, [x1, #sa_handler]
+	mov	x2, #0
+	mov	x3, #sa_mask_sz
+	mov	x8, #__NR_rt_sigaction
+	svc	#0
+
+	cbz	w0, 1f
+
+	puts	"sigaction failure\n"
+	b	.Labort
+
+1:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+	ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+	puts	"Streaming mode "
+	smstart_za
+
+	// Sanity-check and report the vector length
+
+	rdsvl	19, 8
+	cmp	x19, #128
+	b.lo	1f
+	cmp	x19, #2048
+	b.hi	1f
+	tst	x19, #(8 - 1)
+	b.eq	2f
+
+1:	puts	"bad vector length: "
+	mov	x0, x19
+	bl	putdecn
+	b	.Labort
+
+2:	puts	"vector length:\t"
+	mov	x0, x19
+	bl	putdec
+	puts	" bits\n"
+
+	// Obtain our PID, to ensure test pattern uniqueness between processes
+	mov	x8, #__NR_getpid
+	svc	#0
+	mov	x20, x0
+
+	puts	"PID:\t"
+	mov	x0, x20
+	bl	putdecn
+
+	mov	x23, #0		// Irritation signal count
+
+	mov	w0, #SIGINT
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGTERM
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGUSR1
+	adr	x1, irritator_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
+	mov	x22, #0		// generation number, increments per iteration
+.Ltest_loop:
+	rdsvl	0, 8
+	cmp	x0, x19
+	b.ne	vl_barf
+
+	rdsvl	21, 1		// Set up ZA & shadow with test pattern
+0:	mov	x0, x20
+	sub	x1, x21, #1
+	mov	x2, x22
+	bl	setup_za
+	subs	x21, x21, #1
+	b.ne	0b
+
+	and	x8, x22, #127		// Every 128 interations...
+	cbz	x8, 0f
+	mov	x8, #__NR_getpid	// (otherwise minimal syscall)
+	b	1f
+0:
+	mov	x8, #__NR_sched_yield	// ...encourage preemption
+1:
+	svc	#0
+
+	mrs	x0, S3_3_C4_C2_2	// SVCR should have ZA=1,SM=0
+	and	x1, x0, #3
+	cmp	x1, #2
+	b.ne	svcr_barf
+
+	rdsvl	21, 1			// Verify that the data made it through
+	rdsvl	24, 1			// Verify that the data made it through
+0:	sub	x0, x24, x21
+	bl	check_za
+	subs	x21, x21, #1
+	bne	0b
+
+	add	x22, x22, #1	// Everything still working
+	b	.Ltest_loop
+
+.Labort:
+	mov	x0, #0
+	mov	x1, #SIGABRT
+	mov	x8, #__NR_kill
+	svc	#0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+//	ldr	w0, =0xdeadc0de
+//	mov	w8, #__NR_exit
+//	svc	#0
+// end hack
+	smstop
+	mov	x10, x0	// expected data
+	mov	x11, x1	// actual data
+	mov	x12, x2	// data size
+
+	puts	"Mismatch: PID="
+	mov	x0, x20
+	bl	putdec
+	puts	", iteration="
+	mov	x0, x22
+	bl	putdec
+	puts	", row="
+	mov	x0, x21
+	bl	putdecn
+	puts	"\tExpected ["
+	mov	x0, x10
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n\tGot      ["
+	mov	x0, x11
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n"
+
+	mov	x8, #__NR_getpid
+	svc	#0
+// fpsimd.c acitivty log dump hack
+//	ldr	w0, =0xdeadc0de
+//	mov	w8, #__NR_exit
+//	svc	#0
+// ^ end of hack
+	mov	x1, #SIGABRT
+	mov	x8, #__NR_kill
+	svc	#0
+//	mov	x8, #__NR_exit
+//	mov	x1, #1
+//	svc	#0
+endfunction
+
+function vl_barf
+	mov	x10, x0
+
+	puts	"Bad active VL: "
+	mov	x0, x10
+	bl	putdecn
+
+	mov	x8, #__NR_exit
+	mov	x1, #1
+	svc	#0
+endfunction
+
+function svcr_barf
+	mov	x10, x0
+
+	puts	"Bad SVCR: "
+	mov	x0, x10
+	bl	putdecn
+
+	mov	x8, #__NR_exit
+	mov	x1, #1
+	svc	#0
+endfunction
-- 
cgit 


From 4963aeb35a9edca90f062885b0d78c47a00c1752 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:43 +0100
Subject: kselftest/arm64: signal: Add SME signal handling tests

Add test cases for the SME signal handing ABI patterned off the SVE tests.
Due to the small size of the tests and the differences in ABI (especially
around needing to account for both streaming SVE and ZA) there is some code
duplication here.

We currently cover:
 - Reporting of the vector length.
 - Lack of support for changing vector length.
 - Presence and size of register state for streaming SVE and ZA.

As with the SVE tests we do not yet have any validation of register
contents.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-36-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/signal/.gitignore    |   3 +
 .../testing/selftests/arm64/signal/test_signals.h  |   4 +
 .../selftests/arm64/signal/test_signals_utils.c    |   6 +
 .../testcases/fake_sigreturn_sme_change_vl.c       |  92 ++++++++++++++
 .../arm64/signal/testcases/sme_trap_no_sm.c        |  38 ++++++
 .../signal/testcases/sme_trap_non_streaming.c      |  45 +++++++
 .../selftests/arm64/signal/testcases/sme_trap_za.c |  36 ++++++
 .../selftests/arm64/signal/testcases/sme_vl.c      |  68 +++++++++++
 .../selftests/arm64/signal/testcases/ssve_regs.c   | 135 +++++++++++++++++++++
 .../selftests/arm64/signal/testcases/za_regs.c     | 128 +++++++++++++++++++
 10 files changed, 555 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
 create mode 100644 tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c
 create mode 100644 tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c
 create mode 100644 tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c
 create mode 100644 tools/testing/selftests/arm64/signal/testcases/sme_vl.c
 create mode 100644 tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
 create mode 100644 tools/testing/selftests/arm64/signal/testcases/za_regs.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index c1742755abb9..e8d2b57f73ec 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
 mangle_*
 fake_sigreturn_*
+sme_*
+ssve_*
 sve_*
+za_*
 !*.[ch]
diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
index f909b70d9e98..c70fdec7d7c4 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.h
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -34,11 +34,15 @@
 enum {
 	FSSBS_BIT,
 	FSVE_BIT,
+	FSME_BIT,
+	FSME_FA64_BIT,
 	FMAX_END
 };
 
 #define FEAT_SSBS		(1UL << FSSBS_BIT)
 #define FEAT_SVE		(1UL << FSVE_BIT)
+#define FEAT_SME		(1UL << FSME_BIT)
+#define FEAT_SME_FA64		(1UL << FSME_FA64_BIT)
 
 /*
  * A descriptor used to describe and configure a test case.
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 5743897984b0..b588d10afd5b 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -27,6 +27,8 @@ static int sig_copyctx = SIGTRAP;
 static char const *const feats_names[FMAX_END] = {
 	" SSBS ",
 	" SVE ",
+	" SME ",
+	" FA64 ",
 };
 
 #define MAX_FEATS_SZ	128
@@ -268,6 +270,10 @@ int test_init(struct tdescr *td)
 			td->feats_supported |= FEAT_SSBS;
 		if (getauxval(AT_HWCAP) & HWCAP_SVE)
 			td->feats_supported |= FEAT_SVE;
+		if (getauxval(AT_HWCAP2) & HWCAP2_SME)
+			td->feats_supported |= FEAT_SME;
+		if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
+			td->feats_supported |= FEAT_SME_FA64;
 		if (feats_ok(td)) {
 			if (td->feats_required & td->feats_supported)
 				fprintf(stderr,
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
new file mode 100644
index 000000000000..7ed762b7202f
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Attempt to change the streaming SVE vector length in a signal
+ * handler, this is not supported and is expected to segfault.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+	int vq, vl;
+
+	/*
+	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 */
+	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+		vl = prctl(PR_SVE_SET_VL, vq * 16);
+		if (vl == -1)
+			return false;
+
+		vl &= PR_SME_VL_LEN_MASK;
+
+		/* Skip missing VLs */
+		vq = sve_vq_from_vl(vl);
+
+		vls[nvls++] = vl;
+	}
+
+	/* We need at least two VLs */
+	if (nvls < 2) {
+		fprintf(stderr, "Only %d VL supported\n", nvls);
+		return false;
+	}
+
+	return true;
+}
+
+static int fake_sigreturn_ssve_change_vl(struct tdescr *td,
+					 siginfo_t *si, ucontext_t *uc)
+{
+	size_t resv_sz, offset;
+	struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+	struct sve_context *sve;
+
+	/* Get a signal context with a SME ZA frame in it */
+	if (!get_current_context(td, &sf.uc))
+		return 1;
+
+	resv_sz = GET_SF_RESV_SIZE(sf);
+	head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+	if (!head) {
+		fprintf(stderr, "No SVE context\n");
+		return 1;
+	}
+
+	if (head->size != sizeof(struct sve_context)) {
+		fprintf(stderr, "Register data present, aborting\n");
+		return 1;
+	}
+
+	sve = (struct sve_context *)head;
+
+	/* No changes are supported; init left us at minimum VL so go to max */
+	fprintf(stderr, "Attempting to change VL from %d to %d\n",
+		sve->vl, vls[0]);
+	sve->vl = vls[0];
+
+	fake_sigreturn(&sf, sizeof(sf), 0);
+
+	return 1;
+}
+
+struct tdescr tde = {
+	.name = "FAKE_SIGRETURN_SSVE_CHANGE",
+	.descr = "Attempt to change Streaming SVE VL",
+	.feats_required = FEAT_SME,
+	.sig_ok = SIGSEGV,
+	.timeout = 3,
+	.init = sme_get_vls,
+	.run = fake_sigreturn_ssve_change_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c
new file mode 100644
index 000000000000..f9d76ae32bba
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using a streaming mode instruction without enabling it
+ * generates a SIGILL.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_no_sm_trigger(struct tdescr *td)
+{
+	/* SMSTART ZA ; ADDHA ZA0.S, P0/M, P0/M, Z0.S */
+	asm volatile(".inst 0xd503457f ; .inst 0xc0900000");
+
+	return 0;
+}
+
+int sme_trap_no_sm_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+	return 1;
+}
+
+struct tdescr tde = {
+	.name = "SME trap without SM",
+	.descr = "Check that we get a SIGILL if we use streaming mode without enabling it",
+	.timeout = 3,
+	.feats_required = FEAT_SME,   /* We need a SMSTART ZA */
+	.sanity_disabled = true,
+	.trigger = sme_trap_no_sm_trigger,
+	.run = sme_trap_no_sm_run,
+	.sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c
new file mode 100644
index 000000000000..e469ae5348e3
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using an instruction not supported in streaming mode
+ * traps when in streaming mode.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_non_streaming_trigger(struct tdescr *td)
+{
+	/*
+	 * The framework will handle SIGILL so we need to exit SM to
+	 * stop any other code triggering a further SIGILL down the
+	 * line from using a streaming-illegal instruction.
+	 */
+	asm volatile(".inst 0xd503437f; /* SMSTART ZA */ \
+		      cnt v0.16b, v0.16b; \
+                      .inst 0xd503447f  /* SMSTOP ZA */");
+
+	return 0;
+}
+
+int sme_trap_non_streaming_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+	return 1;
+}
+
+struct tdescr tde = {
+	.name = "SME SM trap unsupported instruction",
+	.descr = "Check that we get a SIGILL if we use an unsupported instruction in streaming mode",
+	.feats_required = FEAT_SME,
+	.feats_incompatible = FEAT_SME_FA64,
+	.timeout = 3,
+	.sanity_disabled = true,
+	.trigger = sme_trap_non_streaming_trigger,
+	.run = sme_trap_non_streaming_run,
+	.sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c
new file mode 100644
index 000000000000..3a7747af4715
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that accessing ZA without enabling it generates a SIGILL.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_za_trigger(struct tdescr *td)
+{
+	/* ZERO ZA */
+	asm volatile(".inst 0xc00800ff");
+
+	return 0;
+}
+
+int sme_trap_za_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+	return 1;
+}
+
+struct tdescr tde = {
+	.name = "SME ZA trap",
+	.descr = "Check that we get a SIGILL if we access ZA without enabling",
+	.timeout = 3,
+	.sanity_disabled = true,
+	.trigger = sme_trap_za_trigger,
+	.run = sme_trap_za_run,
+	.sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_vl.c b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c
new file mode 100644
index 000000000000..13ff3b35cbaf
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Check that the SME vector length reported in signal contexts is the
+ * expected one.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+unsigned int vl;
+
+static bool get_sme_vl(struct tdescr *td)
+{
+	int ret = prctl(PR_SME_GET_VL);
+	if (ret == -1)
+		return false;
+
+	vl = ret;
+
+	return true;
+}
+
+static int sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+	size_t resv_sz, offset;
+	struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+	struct za_context *za;
+
+	/* Get a signal context which should have a ZA frame in it */
+	if (!get_current_context(td, &sf.uc))
+		return 1;
+
+	resv_sz = GET_SF_RESV_SIZE(sf);
+	head = get_header(head, ZA_MAGIC, resv_sz, &offset);
+	if (!head) {
+		fprintf(stderr, "No ZA context\n");
+		return 1;
+	}
+	za = (struct za_context *)head;
+
+	if (za->vl != vl) {
+		fprintf(stderr, "ZA sigframe VL %u, expected %u\n",
+			za->vl, vl);
+		return 1;
+	} else {
+		fprintf(stderr, "got expected VL %u\n", vl);
+	}
+
+	td->pass = 1;
+
+	return 0;
+}
+
+struct tdescr tde = {
+	.name = "SME VL",
+	.descr = "Check that we get the right SME VL reported",
+	.feats_required = FEAT_SME,
+	.timeout = 3,
+	.init = get_sme_vl,
+	.run = sme_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
new file mode 100644
index 000000000000..9022a6cab4b3
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the streaming SVE register context in signal frames is
+ * set up as expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+	int vq, vl;
+
+	/*
+	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 */
+	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+		vl = prctl(PR_SME_SET_VL, vq * 16);
+		if (vl == -1)
+			return false;
+
+		vl &= PR_SME_VL_LEN_MASK;
+
+		/* Skip missing VLs */
+		vq = sve_vq_from_vl(vl);
+
+		vls[nvls++] = vl;
+	}
+
+	/* We need at least one VL */
+	if (nvls < 1) {
+		fprintf(stderr, "Only %d VL supported\n", nvls);
+		return false;
+	}
+
+	return true;
+}
+
+static void setup_ssve_regs(void)
+{
+	/* smstart sm; real data is TODO */
+	asm volatile(".inst 0xd503437f" : : : );
+}
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+			 unsigned int vl)
+{
+	size_t resv_sz, offset;
+	struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+	struct sve_context *ssve;
+	int ret;
+
+	fprintf(stderr, "Testing VL %d\n", vl);
+
+	ret = prctl(PR_SME_SET_VL, vl);
+	if (ret != vl) {
+		fprintf(stderr, "Failed to set VL, got %d\n", ret);
+		return 1;
+	}
+
+	/*
+	 * Get a signal context which should have a SVE frame and registers
+	 * in it.
+	 */
+	setup_ssve_regs();
+	if (!get_current_context(td, &sf.uc))
+		return 1;
+
+	resv_sz = GET_SF_RESV_SIZE(sf);
+	head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+	if (!head) {
+		fprintf(stderr, "No SVE context\n");
+		return 1;
+	}
+
+	ssve = (struct sve_context *)head;
+	if (ssve->vl != vl) {
+		fprintf(stderr, "Got VL %d, expected %d\n", ssve->vl, vl);
+		return 1;
+	}
+
+	/* The actual size validation is done in get_current_context() */
+	fprintf(stderr, "Got expected size %u and VL %d\n",
+		head->size, ssve->vl);
+
+	return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+	int i;
+
+	for (i = 0; i < nvls; i++) {
+		/*
+		 * TODO: the signal test helpers can't currently cope
+		 * with signal frames bigger than struct sigcontext,
+		 * skip VLs that will trigger that.
+		 */
+		if (vls[i] > 64) {
+			printf("Skipping VL %u due to stack size\n", vls[i]);
+			continue;
+		}
+
+		if (do_one_sme_vl(td, si, uc, vls[i]))
+			return 1;
+	}
+
+	td->pass = 1;
+
+	return 0;
+}
+
+struct tdescr tde = {
+	.name = "Streaming SVE registers",
+	.descr = "Check that we get the right Streaming SVE registers reported",
+	/*
+	 * We shouldn't require FA64 but things like memset() used in the
+	 * helpers might use unsupported instructions so for now disable
+	 * the test unless we've got the full instruction set.
+	 */
+	.feats_required = FEAT_SME | FEAT_SME_FA64,
+	.timeout = 3,
+	.init = sme_get_vls,
+	.run = sme_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
new file mode 100644
index 000000000000..b94e4f99fcac
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the ZA register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+	int vq, vl;
+
+	/*
+	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 */
+	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+		vl = prctl(PR_SVE_SET_VL, vq * 16);
+		if (vl == -1)
+			return false;
+
+		vl &= PR_SME_VL_LEN_MASK;
+
+		/* Skip missing VLs */
+		vq = sve_vq_from_vl(vl);
+
+		vls[nvls++] = vl;
+	}
+
+	/* We need at least one VL */
+	if (nvls < 1) {
+		fprintf(stderr, "Only %d VL supported\n", nvls);
+		return false;
+	}
+
+	return true;
+}
+
+static void setup_za_regs(void)
+{
+	/* smstart za; real data is TODO */
+	asm volatile(".inst 0xd503457f" : : : );
+}
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+			 unsigned int vl)
+{
+	size_t resv_sz, offset;
+	struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+	struct za_context *za;
+
+	fprintf(stderr, "Testing VL %d\n", vl);
+
+	if (prctl(PR_SME_SET_VL, vl) != vl) {
+		fprintf(stderr, "Failed to set VL\n");
+		return 1;
+	}
+
+	/*
+	 * Get a signal context which should have a SVE frame and registers
+	 * in it.
+	 */
+	setup_za_regs();
+	if (!get_current_context(td, &sf.uc))
+		return 1;
+
+	resv_sz = GET_SF_RESV_SIZE(sf);
+	head = get_header(head, ZA_MAGIC, resv_sz, &offset);
+	if (!head) {
+		fprintf(stderr, "No ZA context\n");
+		return 1;
+	}
+
+	za = (struct za_context *)head;
+	if (za->vl != vl) {
+		fprintf(stderr, "Got VL %d, expected %d\n", za->vl, vl);
+		return 1;
+	}
+
+	/* The actual size validation is done in get_current_context() */
+	fprintf(stderr, "Got expected size %u and VL %d\n",
+		head->size, za->vl);
+
+	return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+	int i;
+
+	for (i = 0; i < nvls; i++) {
+		/*
+		 * TODO: the signal test helpers can't currently cope
+		 * with signal frames bigger than struct sigcontext,
+		 * skip VLs that will trigger that.
+		 */
+		if (vls[i] > 32) {
+			printf("Skipping VL %u due to stack size\n", vls[i]);
+			continue;
+		}
+
+		if (do_one_sme_vl(td, si, uc, vls[i]))
+			return 1;
+	}
+
+	td->pass = 1;
+
+	return 0;
+}
+
+struct tdescr tde = {
+	.name = "ZA register",
+	.descr = "Check that we get the right ZA registers reported",
+	.feats_required = FEAT_SME,
+	.timeout = 3,
+	.init = sme_get_vls,
+	.run = sme_regs,
+};
-- 
cgit 


From fa23100bbad0748f6503511b109cfec955e4183d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:44 +0100
Subject: kselftest/arm64: Add streaming SVE to SVE ptrace tests

In order to allow ptrace of streaming mode SVE registers we have added a
new regset for streaming mode which in isolation offers the same ABI as
regular SVE with a different vector type. Add this to the array of regsets
we handle, together with additional tests for the interoperation of the
two regsets.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-37-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/sve-ptrace.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index 36b6f0749f23..8c4847977583 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -26,6 +26,10 @@
 #define NT_ARM_SVE 0x405
 #endif
 
+#ifndef NT_ARM_SSVE
+#define NT_ARM_SSVE 0x40b
+#endif
+
 struct vec_type {
 	const char *name;
 	unsigned long hwcap_type;
@@ -42,6 +46,13 @@ static const struct vec_type vec_types[] = {
 		.regset = NT_ARM_SVE,
 		.prctl_set = PR_SVE_SET_VL,
 	},
+	{
+		.name = "Streaming SVE",
+		.hwcap_type = AT_HWCAP2,
+		.hwcap = HWCAP2_SME,
+		.regset = NT_ARM_SSVE,
+		.prctl_set = PR_SME_SET_VL,
+	},
 };
 
 #define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 4)
-- 
cgit 


From 86c8888f91a95a30d8a224c0c655ddac33d04eac Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:45 +0100
Subject: kselftest/arm64: Add coverage for the ZA ptrace interface

Add some basic coverage for the ZA ptrace interface, including walking
through all the vector lengths supported in the system.  Unlike SVE
doing syscalls does not discard the ZA state so when we set data in ZA
we run the child process briefly, having it add one to each byte in ZA
in order to validate that both the vector size and data are being read
and written as expected when the process runs.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-38-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/.gitignore  |   1 +
 tools/testing/selftests/arm64/fp/Makefile    |   3 +-
 tools/testing/selftests/arm64/fp/za-ptrace.c | 356 +++++++++++++++++++++++++++
 3 files changed, 359 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/arm64/fp/za-ptrace.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index ead3197e720b..d98d3d48b504 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -8,4 +8,5 @@ sve-test
 ssve-test
 vec-syscfg
 vlset
+za-ptrace
 za-test
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index 38d2d0d5a0eb..807a8faf8d57 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 CFLAGS += -I../../../../../usr/include/
-TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg za-ptrace
 TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \
 	rdvl-sme rdvl-sve \
 	sve-test sve-stress \
@@ -27,5 +27,6 @@ vec-syscfg: vec-syscfg.o rdvl.o
 vlset: vlset.o
 za-test: za-test.o asm-utils.o
 	$(CC) -nostdlib $^ -o $@
+za-ptrace: za-ptrace.o
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c
new file mode 100644
index 000000000000..bf6158654056
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-ptrace.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_ZA
+#define NT_ARM_ZA 0x40c
+#endif
+
+#define EXPECTED_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
+
+static void fill_buf(char *buf, size_t size)
+{
+	int i;
+
+	for (i = 0; i < size; i++)
+		buf[i] = random();
+}
+
+static int do_child(void)
+{
+	if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+		ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+	if (raise(SIGSTOP))
+		ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+	return EXIT_SUCCESS;
+}
+
+static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size)
+{
+	struct user_za_header *za;
+	void *p;
+	size_t sz = sizeof(*za);
+	struct iovec iov;
+
+	while (1) {
+		if (*size < sz) {
+			p = realloc(*buf, sz);
+			if (!p) {
+				errno = ENOMEM;
+				goto error;
+			}
+
+			*buf = p;
+			*size = sz;
+		}
+
+		iov.iov_base = *buf;
+		iov.iov_len = sz;
+		if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov))
+			goto error;
+
+		za = *buf;
+		if (za->size <= sz)
+			break;
+
+		sz = za->size;
+	}
+
+	return za;
+
+error:
+	return NULL;
+}
+
+static int set_za(pid_t pid, const struct user_za_header *za)
+{
+	struct iovec iov;
+
+	iov.iov_base = (void *)za;
+	iov.iov_len = za->size;
+	return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov);
+}
+
+/* Validate attempting to set the specfied VL via ptrace */
+static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported)
+{
+	struct user_za_header za;
+	struct user_za_header *new_za = NULL;
+	size_t new_za_size = 0;
+	int ret, prctl_vl;
+
+	*supported = false;
+
+	/* Check if the VL is supported in this process */
+	prctl_vl = prctl(PR_SME_SET_VL, vl);
+	if (prctl_vl == -1)
+		ksft_exit_fail_msg("prctl(PR_SME_SET_VL) failed: %s (%d)\n",
+				   strerror(errno), errno);
+
+	/* If the VL is not supported then a supported VL will be returned */
+	*supported = (prctl_vl == vl);
+
+	/* Set the VL by doing a set with no register payload */
+	memset(&za, 0, sizeof(za));
+	za.size = sizeof(za);
+	za.vl = vl;
+	ret = set_za(child, &za);
+	if (ret != 0) {
+		ksft_test_result_fail("Failed to set VL %u\n", vl);
+		return;
+	}
+
+	/*
+	 * Read back the new register state and verify that we have the
+	 * same VL that we got from prctl() on ourselves.
+	 */
+	if (!get_za(child, (void **)&new_za, &new_za_size)) {
+		ksft_test_result_fail("Failed to read VL %u\n", vl);
+		return;
+	}
+
+	ksft_test_result(new_za->vl = prctl_vl, "Set VL %u\n", vl);
+
+	free(new_za);
+}
+
+/* Validate attempting to set no ZA data and read it back */
+static void ptrace_set_no_data(pid_t child, unsigned int vl)
+{
+	void *read_buf = NULL;
+	struct user_za_header write_za;
+	struct user_za_header *read_za;
+	size_t read_za_size = 0;
+	int ret;
+
+	/* Set up some data and write it out */
+	memset(&write_za, 0, sizeof(write_za));
+	write_za.size = ZA_PT_ZA_OFFSET;
+	write_za.vl = vl;
+
+	ret = set_za(child, &write_za);
+	if (ret != 0) {
+		ksft_test_result_fail("Failed to set VL %u no data\n", vl);
+		return;
+	}
+
+	/* Read the data back */
+	if (!get_za(child, (void **)&read_buf, &read_za_size)) {
+		ksft_test_result_fail("Failed to read VL %u no data\n", vl);
+		return;
+	}
+	read_za = read_buf;
+
+	/* We might read more data if there's extensions we don't know */
+	if (read_za->size < write_za.size) {
+		ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n",
+				      vl, write_za.size, read_za->size);
+		goto out_read;
+	}
+
+	ksft_test_result(read_za->size == write_za.size,
+			 "Disabled ZA for VL %u\n", vl);
+
+out_read:
+	free(read_buf);
+}
+
+/* Validate attempting to set data and read it back */
+static void ptrace_set_get_data(pid_t child, unsigned int vl)
+{
+	void *write_buf;
+	void *read_buf = NULL;
+	struct user_za_header *write_za;
+	struct user_za_header *read_za;
+	size_t read_za_size = 0;
+	unsigned int vq = sve_vq_from_vl(vl);
+	int ret;
+	size_t data_size;
+
+	data_size = ZA_PT_SIZE(vq);
+	write_buf = malloc(data_size);
+	if (!write_buf) {
+		ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n",
+				      data_size, vl);
+		return;
+	}
+	write_za = write_buf;
+
+	/* Set up some data and write it out */
+	memset(write_za, 0, data_size);
+	write_za->size = data_size;
+	write_za->vl = vl;
+
+	fill_buf(write_buf + ZA_PT_ZA_OFFSET, ZA_PT_ZA_SIZE(vq));
+
+	ret = set_za(child, write_za);
+	if (ret != 0) {
+		ksft_test_result_fail("Failed to set VL %u data\n", vl);
+		goto out;
+	}
+
+	/* Read the data back */
+	if (!get_za(child, (void **)&read_buf, &read_za_size)) {
+		ksft_test_result_fail("Failed to read VL %u data\n", vl);
+		goto out;
+	}
+	read_za = read_buf;
+
+	/* We might read more data if there's extensions we don't know */
+	if (read_za->size < write_za->size) {
+		ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n",
+				      vl, write_za->size, read_za->size);
+		goto out_read;
+	}
+
+	ksft_test_result(memcmp(write_buf + ZA_PT_ZA_OFFSET,
+				read_buf + ZA_PT_ZA_OFFSET,
+				ZA_PT_ZA_SIZE(vq)) == 0,
+			 "Data match for VL %u\n", vl);
+
+out_read:
+	free(read_buf);
+out:
+	free(write_buf);
+}
+
+static int do_parent(pid_t child)
+{
+	int ret = EXIT_FAILURE;
+	pid_t pid;
+	int status;
+	siginfo_t si;
+	unsigned int vq, vl;
+	bool vl_supported;
+
+	/* Attach to the child */
+	while (1) {
+		int sig;
+
+		pid = wait(&status);
+		if (pid == -1) {
+			perror("wait");
+			goto error;
+		}
+
+		/*
+		 * This should never happen but it's hard to flag in
+		 * the framework.
+		 */
+		if (pid != child)
+			continue;
+
+		if (WIFEXITED(status) || WIFSIGNALED(status))
+			ksft_exit_fail_msg("Child died unexpectedly\n");
+
+		if (!WIFSTOPPED(status))
+			goto error;
+
+		sig = WSTOPSIG(status);
+
+		if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+			if (errno == ESRCH)
+				goto disappeared;
+
+			if (errno == EINVAL) {
+				sig = 0; /* bust group-stop */
+				goto cont;
+			}
+
+			ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+					      strerror(errno));
+			goto error;
+		}
+
+		if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+		    si.si_pid == pid)
+			break;
+
+	cont:
+		if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+			if (errno == ESRCH)
+				goto disappeared;
+
+			ksft_test_result_fail("PTRACE_CONT: %s\n",
+					      strerror(errno));
+			goto error;
+		}
+	}
+
+	ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
+	/* Step through every possible VQ */
+	for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
+		vl = sve_vl_from_vq(vq);
+
+		/* First, try to set this vector length */
+		ptrace_set_get_vl(child, vl, &vl_supported);
+
+		/* If the VL is supported validate data set/get */
+		if (vl_supported) {
+			ptrace_set_no_data(child, vl);
+			ptrace_set_get_data(child, vl);
+		} else {
+			ksft_test_result_skip("Disabled ZA for VL %u\n", vl);
+			ksft_test_result_skip("Get and set data for VL %u\n",
+					      vl);
+		}
+	}
+
+	ret = EXIT_SUCCESS;
+
+error:
+	kill(child, SIGKILL);
+
+disappeared:
+	return ret;
+}
+
+int main(void)
+{
+	int ret = EXIT_SUCCESS;
+	pid_t child;
+
+	srandom(getpid());
+
+	ksft_print_header();
+
+	if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) {
+		ksft_set_plan(1);
+		ksft_exit_skip("SME not available\n");
+	}
+
+	ksft_set_plan(EXPECTED_TESTS);
+
+	child = fork();
+	if (!child)
+		return do_child();
+
+	if (do_parent(child))
+		ret = EXIT_FAILURE;
+
+	ksft_print_cnts();
+
+	return ret;
+}
-- 
cgit 


From 43e3f85523e488f8acd6b371d457818d81977934 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:46 +0100
Subject: kselftest/arm64: Add SME support to syscall ABI test

For every possible combination of SVE and SME vector length verify that for
each possible value of SVCR after a syscall we leave streaming mode and ZA
is preserved. We don't need to take account of any streaming/non streaming
SVE vector length changes in the assembler code since the store instructions
will handle the vector length for us. We log if the system supports FA64 and
only try to set FFR in streaming mode if it does.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20220419112247.711548-39-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../testing/selftests/arm64/abi/syscall-abi-asm.S  |  79 +++++++-
 tools/testing/selftests/arm64/abi/syscall-abi.c    | 204 ++++++++++++++++++---
 tools/testing/selftests/arm64/abi/syscall-abi.h    |  15 ++
 3 files changed, 275 insertions(+), 23 deletions(-)
 create mode 100644 tools/testing/selftests/arm64/abi/syscall-abi.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
index 983467cfcee0..b523c21c2278 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
+++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
@@ -9,15 +9,42 @@
 // invoked is configured in x8 of the input GPR data.
 //
 // x0:	SVE VL, 0 for FP only
+// x1:	SME VL
 //
 //	GPRs:	gpr_in, gpr_out
 //	FPRs:	fpr_in, fpr_out
 //	Zn:	z_in, z_out
 //	Pn:	p_in, p_out
 //	FFR:	ffr_in, ffr_out
+//	ZA:	za_in, za_out
+//	SVCR:	svcr_in, svcr_out
+
+#include "syscall-abi.h"
 
 .arch_extension sve
 
+/*
+ * LDR (vector to ZA array):
+ *	LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _ldr_za nw, nxbase, offset=0
+	.inst	0xe1000000			\
+		| (((\nw) & 3) << 13)		\
+		| ((\nxbase) << 5)		\
+		| ((\offset) & 7)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ *	STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _str_za nw, nxbase, offset=0
+	.inst	0xe1200000			\
+		| (((\nw) & 3) << 13)		\
+		| ((\nxbase) << 5)		\
+		| ((\offset) & 7)
+.endm
+
 .globl do_syscall
 do_syscall:
 	// Store callee saved registers x19-x29 (80 bytes) plus x0 and x1
@@ -30,6 +57,24 @@ do_syscall:
 	stp	x25, x26, [sp, #80]
 	stp	x27, x28, [sp, #96]
 
+	// Set SVCR if we're doing SME
+	cbz	x1, 1f
+	adrp	x2, svcr_in
+	ldr	x2, [x2, :lo12:svcr_in]
+	msr	S3_3_C4_C2_2, x2
+1:
+
+	// Load ZA if it's enabled - uses x12 as scratch due to SME LDR
+	tbz	x2, #SVCR_ZA_SHIFT, 1f
+	mov	w12, #0
+	ldr	x2, =za_in
+2:	_ldr_za 12, 2
+	add	x2, x2, x1
+	add	x12, x12, #1
+	cmp	x1, x12
+	bne	2b
+1:
+
 	// Load GPRs x8-x28, and save our SP/FP for later comparison
 	ldr	x2, =gpr_in
 	add	x2, x2, #64
@@ -68,7 +113,7 @@ do_syscall:
 	ldp	q30, q31, [x2, #16 * 30]
 1:
 
-	// Load the SVE registers if we're doing SVE
+	// Load the SVE registers if we're doing SVE/SME
 	cbz	x0, 1f
 
 	ldr	x2, =z_in
@@ -105,9 +150,14 @@ do_syscall:
 	ldr	z30, [x2, #30, MUL VL]
 	ldr	z31, [x2, #31, MUL VL]
 
+	// Only set a non-zero FFR, test patterns must be zero since the
+	// syscall should clear it - this lets us handle FA64.
 	ldr	x2, =ffr_in
 	ldr	p0, [x2, #0]
+	ldr	x2, [x2, #0]
+	cbz	x2, 2f
 	wrffr	p0.b
+2:
 
 	ldr	x2, =p_in
 	ldr	p0, [x2, #0, MUL VL]
@@ -169,6 +219,24 @@ do_syscall:
 	stp	q28, q29, [x2, #16 * 28]
 	stp	q30, q31, [x2, #16 * 30]
 
+	// Save SVCR if we're doing SME
+	cbz	x1, 1f
+	mrs	x2, S3_3_C4_C2_2
+	adrp	x3, svcr_out
+	str	x2, [x3, :lo12:svcr_out]
+1:
+
+	// Save ZA if it's enabled - uses x12 as scratch due to SME STR
+	tbz	x2, #SVCR_ZA_SHIFT, 1f
+	mov	w12, #0
+	ldr	x2, =za_out
+2:	_str_za 12, 2
+	add	x2, x2, x1
+	add	x12, x12, #1
+	cmp	x1, x12
+	bne	2b
+1:
+
 	// Save the SVE state if we have some
 	cbz	x0, 1f
 
@@ -224,6 +292,10 @@ do_syscall:
 	str	p14, [x2, #14, MUL VL]
 	str	p15, [x2, #15, MUL VL]
 
+	// Only save FFR if we wrote a value for SME
+	ldr	x2, =ffr_in
+	ldr	x2, [x2, #0]
+	cbz	x2, 1f
 	ldr	x2, =ffr_out
 	rdffr	p0.b
 	str	p0, [x2, #0]
@@ -237,4 +309,9 @@ do_syscall:
 	ldp	x27, x28, [sp, #96]
 	ldp	x29, x30, [sp], #112
 
+	// Clear SVCR if we were doing SME so future tests don't have ZA
+	cbz	x1, 1f
+	msr	S3_3_C4_C2_2, xzr
+1:
+
 	ret
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
index 1e13b7523918..b632bfe9e022 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi.c
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -18,9 +18,13 @@
 
 #include "../../kselftest.h"
 
+#include "syscall-abi.h"
+
 #define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1)
 
-extern void do_syscall(int sve_vl);
+static int default_sme_vl;
+
+extern void do_syscall(int sve_vl, int sme_vl);
 
 static void fill_random(void *buf, size_t size)
 {
@@ -48,14 +52,15 @@ static struct syscall_cfg {
 uint64_t gpr_in[NUM_GPR];
 uint64_t gpr_out[NUM_GPR];
 
-static void setup_gpr(struct syscall_cfg *cfg, int sve_vl)
+static void setup_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		      uint64_t svcr)
 {
 	fill_random(gpr_in, sizeof(gpr_in));
 	gpr_in[8] = cfg->syscall_nr;
 	memset(gpr_out, 0, sizeof(gpr_out));
 }
 
-static int check_gpr(struct syscall_cfg *cfg, int sve_vl)
+static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr)
 {
 	int errors = 0;
 	int i;
@@ -79,13 +84,15 @@ static int check_gpr(struct syscall_cfg *cfg, int sve_vl)
 uint64_t fpr_in[NUM_FPR * 2];
 uint64_t fpr_out[NUM_FPR * 2];
 
-static void setup_fpr(struct syscall_cfg *cfg, int sve_vl)
+static void setup_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		      uint64_t svcr)
 {
 	fill_random(fpr_in, sizeof(fpr_in));
 	memset(fpr_out, 0, sizeof(fpr_out));
 }
 
-static int check_fpr(struct syscall_cfg *cfg, int sve_vl)
+static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		     uint64_t svcr)
 {
 	int errors = 0;
 	int i;
@@ -109,13 +116,15 @@ static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)];
 uint8_t z_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
 uint8_t z_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
 
-static void setup_z(struct syscall_cfg *cfg, int sve_vl)
+static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		    uint64_t svcr)
 {
 	fill_random(z_in, sizeof(z_in));
 	fill_random(z_out, sizeof(z_out));
 }
 
-static int check_z(struct syscall_cfg *cfg, int sve_vl)
+static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		   uint64_t svcr)
 {
 	size_t reg_size = sve_vl;
 	int errors = 0;
@@ -126,13 +135,17 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl)
 
 	/*
 	 * After a syscall the low 128 bits of the Z registers should
-	 * be preserved and the rest be zeroed or preserved.
+	 * be preserved and the rest be zeroed or preserved, except if
+	 * we were in streaming mode in which case the low 128 bits may
+	 * also be cleared by the transition out of streaming mode.
 	 */
 	for (i = 0; i < SVE_NUM_ZREGS; i++) {
 		void *in = &z_in[reg_size * i];
 		void *out = &z_out[reg_size * i];
 
-		if (memcmp(in, out, SVE_VQ_BYTES) != 0) {
+		if ((memcmp(in, out, SVE_VQ_BYTES) != 0) &&
+		    !((svcr & SVCR_SM_MASK) &&
+		      memcmp(z_zero, out, SVE_VQ_BYTES) == 0)) {
 			ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n",
 				       cfg->name, sve_vl, i);
 			errors++;
@@ -145,13 +158,15 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl)
 uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)];
 uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)];
 
-static void setup_p(struct syscall_cfg *cfg, int sve_vl)
+static void setup_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		    uint64_t svcr)
 {
 	fill_random(p_in, sizeof(p_in));
 	fill_random(p_out, sizeof(p_out));
 }
 
-static int check_p(struct syscall_cfg *cfg, int sve_vl)
+static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		   uint64_t svcr)
 {
 	size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */
 
@@ -175,8 +190,19 @@ static int check_p(struct syscall_cfg *cfg, int sve_vl)
 uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)];
 uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)];
 
-static void setup_ffr(struct syscall_cfg *cfg, int sve_vl)
+static void setup_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		      uint64_t svcr)
 {
+	/*
+	 * If we are in streaming mode and do not have FA64 then FFR
+	 * is unavailable.
+	 */
+	if ((svcr & SVCR_SM_MASK) &&
+	    !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)) {
+		memset(&ffr_in, 0, sizeof(ffr_in));
+		return;
+	}
+
 	/*
 	 * It is only valid to set a contiguous set of bits starting
 	 * at 0.  For now since we're expecting this to be cleared by
@@ -186,7 +212,8 @@ static void setup_ffr(struct syscall_cfg *cfg, int sve_vl)
 	fill_random(ffr_out, sizeof(ffr_out));
 }
 
-static int check_ffr(struct syscall_cfg *cfg, int sve_vl)
+static int check_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		     uint64_t svcr)
 {
 	size_t reg_size = sve_vq_from_vl(sve_vl) * 2;  /* 1 bit per VL byte */
 	int errors = 0;
@@ -195,6 +222,10 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl)
 	if (!sve_vl)
 		return 0;
 
+	if ((svcr & SVCR_SM_MASK) &&
+	    !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64))
+		return 0;
+
 	/* After a syscall the P registers should be preserved or zeroed */
 	for (i = 0; i < reg_size; i++)
 		if (ffr_out[i] && (ffr_in[i] != ffr_out[i]))
@@ -206,8 +237,65 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl)
 	return errors;
 }
 
-typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl);
-typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl);
+uint64_t svcr_in, svcr_out;
+
+static void setup_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		    uint64_t svcr)
+{
+	svcr_in = svcr;
+}
+
+static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		      uint64_t svcr)
+{
+	int errors = 0;
+
+	if (svcr_out & SVCR_SM_MASK) {
+		ksft_print_msg("%s Still in SM, SVCR %llx\n",
+			       cfg->name, svcr_out);
+		errors++;
+	}
+
+	if ((svcr_in & SVCR_ZA_MASK) != (svcr_out & SVCR_ZA_MASK)) {
+		ksft_print_msg("%s PSTATE.ZA changed, SVCR %llx != %llx\n",
+			       cfg->name, svcr_in, svcr_out);
+		errors++;
+	}
+
+	return errors;
+}
+
+uint8_t za_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
+uint8_t za_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
+
+static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		     uint64_t svcr)
+{
+	fill_random(za_in, sizeof(za_in));
+	memset(za_out, 0, sizeof(za_out));
+}
+
+static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		    uint64_t svcr)
+{
+	size_t reg_size = sme_vl * sme_vl;
+	int errors = 0;
+
+	if (!(svcr & SVCR_ZA_MASK))
+		return 0;
+
+	if (memcmp(za_in, za_out, reg_size) != 0) {
+		ksft_print_msg("SME VL %d ZA does not match\n", sme_vl);
+		errors++;
+	}
+
+	return errors;
+}
+
+typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+			 uint64_t svcr);
+typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+			uint64_t svcr);
 
 /*
  * Each set of registers has a setup function which is called before
@@ -225,20 +313,23 @@ static struct {
 	{ setup_z, check_z },
 	{ setup_p, check_p },
 	{ setup_ffr, check_ffr },
+	{ setup_svcr, check_svcr },
+	{ setup_za, check_za },
 };
 
-static bool do_test(struct syscall_cfg *cfg, int sve_vl)
+static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+		    uint64_t svcr)
 {
 	int errors = 0;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(regset); i++)
-		regset[i].setup(cfg, sve_vl);
+		regset[i].setup(cfg, sve_vl, sme_vl, svcr);
 
-	do_syscall(sve_vl);
+	do_syscall(sve_vl, sme_vl);
 
 	for (i = 0; i < ARRAY_SIZE(regset); i++)
-		errors += regset[i].check(cfg, sve_vl);
+		errors += regset[i].check(cfg, sve_vl, sme_vl, svcr);
 
 	return errors == 0;
 }
@@ -246,9 +337,10 @@ static bool do_test(struct syscall_cfg *cfg, int sve_vl)
 static void test_one_syscall(struct syscall_cfg *cfg)
 {
 	int sve_vq, sve_vl;
+	int sme_vq, sme_vl;
 
 	/* FPSIMD only case */
-	ksft_test_result(do_test(cfg, 0),
+	ksft_test_result(do_test(cfg, 0, default_sme_vl, 0),
 			 "%s FPSIMD\n", cfg->name);
 
 	if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
@@ -265,8 +357,36 @@ static void test_one_syscall(struct syscall_cfg *cfg)
 		if (sve_vq != sve_vq_from_vl(sve_vl))
 			sve_vq = sve_vq_from_vl(sve_vl);
 
-		ksft_test_result(do_test(cfg, sve_vl),
+		ksft_test_result(do_test(cfg, sve_vl, default_sme_vl, 0),
 				 "%s SVE VL %d\n", cfg->name, sve_vl);
+
+		if (!(getauxval(AT_HWCAP2) & HWCAP2_SME))
+			continue;
+
+		for (sme_vq = SVE_VQ_MAX; sme_vq > 0; --sme_vq) {
+			sme_vl = prctl(PR_SME_SET_VL, sme_vq * 16);
+			if (sme_vl == -1)
+				ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
+						   strerror(errno), errno);
+
+			sme_vl &= PR_SME_VL_LEN_MASK;
+
+			if (sme_vq != sve_vq_from_vl(sme_vl))
+				sme_vq = sve_vq_from_vl(sme_vl);
+
+			ksft_test_result(do_test(cfg, sve_vl, sme_vl,
+						 SVCR_ZA_MASK | SVCR_SM_MASK),
+					 "%s SVE VL %d/SME VL %d SM+ZA\n",
+					 cfg->name, sve_vl, sme_vl);
+			ksft_test_result(do_test(cfg, sve_vl, sme_vl,
+						 SVCR_SM_MASK),
+					 "%s SVE VL %d/SME VL %d SM\n",
+					 cfg->name, sve_vl, sme_vl);
+			ksft_test_result(do_test(cfg, sve_vl, sme_vl,
+						 SVCR_ZA_MASK),
+					 "%s SVE VL %d/SME VL %d ZA\n",
+					 cfg->name, sve_vl, sme_vl);
+		}
 	}
 }
 
@@ -299,14 +419,54 @@ int sve_count_vls(void)
 	return vl_count;
 }
 
+int sme_count_vls(void)
+{
+	unsigned int vq;
+	int vl_count = 0;
+	int vl;
+
+	if (!(getauxval(AT_HWCAP2) & HWCAP2_SME))
+		return 0;
+
+	/* Ensure we configure a SME VL, used to flag if SVCR is set */
+	default_sme_vl = 16;
+
+	/*
+	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 */
+	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+		vl = prctl(PR_SME_SET_VL, vq * 16);
+		if (vl == -1)
+			ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
+					   strerror(errno), errno);
+
+		vl &= PR_SME_VL_LEN_MASK;
+
+		if (vq != sve_vq_from_vl(vl))
+			vq = sve_vq_from_vl(vl);
+
+		vl_count++;
+	}
+
+	return vl_count;
+}
+
 int main(void)
 {
 	int i;
+	int tests = 1;  /* FPSIMD */
 
 	srandom(getpid());
 
 	ksft_print_header();
-	ksft_set_plan(ARRAY_SIZE(syscalls) * (sve_count_vls() + 1));
+	tests += sve_count_vls();
+	tests += (sve_count_vls() * sme_count_vls()) * 3;
+	ksft_set_plan(ARRAY_SIZE(syscalls) * tests);
+
+	if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
+		ksft_print_msg("SME with FA64\n");
+	else if (getauxval(AT_HWCAP2) & HWCAP2_SME)
+		ksft_print_msg("SME without FA64\n");
 
 	for (i = 0; i < ARRAY_SIZE(syscalls); i++)
 		test_one_syscall(&syscalls[i]);
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.h b/tools/testing/selftests/arm64/abi/syscall-abi.h
new file mode 100644
index 000000000000..bda5a87ad381
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+
+#ifndef SYSCALL_ABI_H
+#define SYSCALL_ABI_H
+
+#define SVCR_ZA_MASK		2
+#define SVCR_SM_MASK		1
+
+#define SVCR_ZA_SHIFT		1
+#define SVCR_SM_SHIFT		0
+
+#endif
-- 
cgit 


From 212b0426bc361eede2f9ce43fb2a5b01070000a1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 19 Apr 2022 12:22:47 +0100
Subject: selftests/arm64: Add a testcase for handling of ZA on clone()

Add a small testcase that attempts to do a clone() with ZA enabled and
verifies that it remains enabled with the same contents. We only check
one word in one horizontal vector of ZA since there's already other tests
that check for data corruption more broadly, we're just looking to make
sure that ZA is still enabled and it looks like the data got copied.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220419112247.711548-40-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/.gitignore    |   1 +
 tools/testing/selftests/arm64/fp/Makefile      |   9 +-
 tools/testing/selftests/arm64/fp/za-fork-asm.S |  61 ++++++++++
 tools/testing/selftests/arm64/fp/za-fork.c     | 156 +++++++++++++++++++++++++
 4 files changed, 226 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/arm64/fp/za-fork-asm.S
 create mode 100644 tools/testing/selftests/arm64/fp/za-fork.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index d98d3d48b504..ea947af63882 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -8,5 +8,6 @@ sve-test
 ssve-test
 vec-syscfg
 vlset
+za-fork
 za-ptrace
 za-test
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index 807a8faf8d57..95e707e32247 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 CFLAGS += -I../../../../../usr/include/
-TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg za-ptrace
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg za-fork za-ptrace
 TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \
 	rdvl-sme rdvl-sve \
 	sve-test sve-stress \
@@ -11,6 +11,7 @@ TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \
 
 all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
 
+# Build with nolibc to avoid effects due to libc's clone() support
 fp-pidbench: fp-pidbench.S asm-utils.o
 	$(CC) -nostdlib $^ -o $@
 fpsimd-test: fpsimd-test.o asm-utils.o
@@ -25,6 +26,12 @@ ssve-test: sve-test.S asm-utils.o
 	$(CC) -DSSVE -nostdlib $^ -o $@
 vec-syscfg: vec-syscfg.o rdvl.o
 vlset: vlset.o
+za-fork: za-fork.o za-fork-asm.o
+	$(CC) -nostdlib -static $^ -o $@ -lgcc
+za-fork.o: za-fork.c
+	$(CC) -c -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+		-include ../../../../include/nolibc/nolibc.h \
+		-ffreestanding -Wall $^ -o $@
 za-test: za-test.o asm-utils.o
 	$(CC) -nostdlib $^ -o $@
 za-ptrace: za-ptrace.o
diff --git a/tools/testing/selftests/arm64/fp/za-fork-asm.S b/tools/testing/selftests/arm64/fp/za-fork-asm.S
new file mode 100644
index 000000000000..2fafadd491c3
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-fork-asm.S
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define MAGIC     42
+
+#define MAXVL     2048
+#define MAXVL_B   (MAXVL / 8)
+
+.pushsection .text
+.data
+.align 4
+scratch:
+	.space	MAXVL_B
+.popsection
+
+.globl fork_test
+fork_test:
+	smstart_za
+
+	// For simplicity just set one word in one vector, other tests
+	// cover general data corruption issues.
+	ldr	x0, =scratch
+	mov	x1, #MAGIC
+	str	x1, [x0]
+	mov	w12, wzr
+	_ldr_za 12, 0			// ZA.H[W12] loaded from [X0]
+
+	// Tail call into the C portion that does the fork & verify
+	b	fork_test_c
+
+.globl verify_fork
+verify_fork:
+	// SVCR should have ZA=1, SM=0
+	mrs	x0, S3_3_C4_C2_2
+	and	x1, x0, #3
+	cmp	x1, #2
+	beq	1f
+	mov	x0, xzr
+	b	100f
+1:
+
+	// ZA should still have the value we loaded
+	ldr	x0, =scratch
+	mov	w12, wzr
+	_str_za 12, 0			// ZA.H[W12] stored to [X0]
+	ldr	x1, [x0]
+	cmp	x1, #MAGIC
+	beq	2f
+	mov	x0, xzr
+	b	100f
+
+2:
+	// All tests passed
+	mov	x0, #1
+100:
+	ret
+
diff --git a/tools/testing/selftests/arm64/fp/za-fork.c b/tools/testing/selftests/arm64/fp/za-fork.c
new file mode 100644
index 000000000000..ff475c649e96
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-fork.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ * Original author: Mark Brown <broonie@kernel.org>
+ */
+
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#define EXPECTED_TESTS 1
+
+static void putstr(const char *str)
+{
+	write(1, str, strlen(str));
+}
+
+static void putnum(unsigned int num)
+{
+	char c;
+
+	if (num / 10)
+		putnum(num / 10);
+
+	c = '0' + (num % 10);
+	write(1, &c, 1);
+}
+
+static int tests_run;
+static int tests_passed;
+static int tests_failed;
+static int tests_skipped;
+
+static void print_summary(void)
+{
+	if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS)
+		putstr("# UNEXPECTED TEST COUNT: ");
+
+	putstr("# Totals: pass:");
+	putnum(tests_passed);
+	putstr(" fail:");
+	putnum(tests_failed);
+	putstr(" xfail:0 xpass:0 skip:");
+	putnum(tests_skipped);
+	putstr(" error:0\n");
+}
+
+int fork_test(void);
+int verify_fork(void);
+
+/*
+ * If we fork the value in the parent should be unchanged and the
+ * child should start with the same value.  This is called from the
+ * fork_test() asm function.
+ */
+int fork_test_c(void)
+{
+	pid_t newpid, waiting;
+	int child_status, parent_result;
+
+	newpid = fork();
+	if (newpid == 0) {
+		/* In child */
+		if (!verify_fork()) {
+			putstr("# ZA state invalid in child\n");
+			exit(0);
+		} else {
+			exit(1);
+		}
+	}
+	if (newpid < 0) {
+		putstr("# fork() failed: -");
+		putnum(-newpid);
+		putstr("\n");
+		return 0;
+	}
+
+	parent_result = verify_fork();
+	if (!parent_result)
+		putstr("# ZA state invalid in parent\n");
+
+	for (;;) {
+		waiting = waitpid(newpid, &child_status, 0);
+
+		if (waiting < 0) {
+			if (errno == EINTR)
+				continue;
+			putstr("# waitpid() failed: ");
+			putnum(errno);
+			putstr("\n");
+			return 0;
+		}
+		if (waiting != newpid) {
+			putstr("# waitpid() returned wrong PID\n");
+			return 0;
+		}
+
+		if (!WIFEXITED(child_status)) {
+			putstr("# child did not exit\n");
+			return 0;
+		}
+
+		return WEXITSTATUS(child_status) && parent_result;
+	}
+}
+
+#define run_test(name)			     \
+	if (name()) {			     \
+		tests_passed++;		     \
+	} else {			     \
+		tests_failed++;		     \
+		putstr("not ");		     \
+	}				     \
+	putstr("ok ");			     \
+	putnum(++tests_run);		     \
+	putstr(" " #name "\n");
+
+int main(int argc, char **argv)
+{
+	int ret, i;
+
+	putstr("TAP version 13\n");
+	putstr("1..");
+	putnum(EXPECTED_TESTS);
+	putstr("\n");
+
+	putstr("# PID: ");
+	putnum(getpid());
+	putstr("\n");
+
+	/*
+	 * This test is run with nolibc which doesn't support hwcap and
+	 * it's probably disproportionate to implement so instead check
+	 * for the default vector length configuration in /proc.
+	 */
+	ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
+	if (ret >= 0) {
+		run_test(fork_test);
+
+	} else {
+		putstr("# SME support not present\n");
+
+		for (i = 0; i < EXPECTED_TESTS; i++) {
+			putstr("ok ");
+			putnum(i);
+			putstr(" skipped\n");
+		}
+
+		tests_skipped += EXPECTED_TESTS;
+	}
+
+	print_summary();
+
+	return 0;
+}
-- 
cgit 


From f82efe5b9a3ae75a557097a074b0125032e76a83 Mon Sep 17 00:00:00 2001
From: Guo Zhengkui <guozhengkui@vivo.com>
Date: Tue, 19 Apr 2022 11:24:51 +0800
Subject: kselftest/arm64: fix array_size.cocci warning

Fix the following coccicheck warnings:
tools/testing/selftests/arm64/mte/check_child_memory.c:110:25-26:
WARNING: Use ARRAY_SIZE
tools/testing/selftests/arm64/mte/check_child_memory.c:88:24-25:
WARNING: Use ARRAY_SIZE
tools/testing/selftests/arm64/mte/check_child_memory.c:90:20-21:
WARNING: Use ARRAY_SIZE
tools/testing/selftests/arm64/mte/check_child_memory.c:147:24-25:
WARNING: Use ARRAY_SIZE

`ARRAY_SIZE` macro is defined in tools/testing/selftests/kselftest.h.

Signed-off-by: Guo Zhengkui <guozhengkui@vivo.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220419032501.22790-1-guozhengkui@vivo.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/mte/check_child_memory.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
index 43bd94f853ba..7597fc632cad 100644
--- a/tools/testing/selftests/arm64/mte/check_child_memory.c
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -85,9 +85,9 @@ static int check_child_memory_mapping(int mem_type, int mode, int mapping)
 {
 	char *ptr;
 	int run, result;
-	int item = sizeof(sizes)/sizeof(int);
+	int item = ARRAY_SIZE(sizes);
 
-	item = sizeof(sizes)/sizeof(int);
+	item = ARRAY_SIZE(sizes);
 	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
 	for (run = 0; run < item; run++) {
 		ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
@@ -107,7 +107,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping)
 {
 	char *ptr, *map_ptr;
 	int run, fd, map_size, result = KSFT_PASS;
-	int total = sizeof(sizes)/sizeof(int);
+	int total = ARRAY_SIZE(sizes);
 
 	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
 	for (run = 0; run < total; run++) {
@@ -144,7 +144,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping)
 int main(int argc, char *argv[])
 {
 	int err;
-	int item = sizeof(sizes)/sizeof(int);
+	int item = ARRAY_SIZE(sizes);
 
 	page_size = getpagesize();
 	if (!page_size) {
-- 
cgit 


From a59f7a7f76407da78c21c42afe6d57bd885caa53 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 27 Apr 2022 19:19:51 +0100
Subject: selftests/arm64: Use TEST_GEN_PROGS_EXTENDED in the FP Makefile

The kselftest lib.mk provides a default all target which builds additional
programs from TEST_GEN_PROGS_EXTENDED, use that rather than using
TEST_PROGS_EXTENDED which is for programs that don't need to be built like
shell scripts. Leave fpsimd-stress and sve-stress there since they are
scripts.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220427181954.357975-2-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/Makefile | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index 95e707e32247..a0b8cc59947e 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -2,14 +2,13 @@
 
 CFLAGS += -I../../../../../usr/include/
 TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg za-fork za-ptrace
-TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \
+TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \
 	rdvl-sme rdvl-sve \
-	sve-test sve-stress \
-	ssve-test ssve-stress \
-	za-test za-stress \
+	sve-test \
+	ssve-test \
+	za-test \
 	vlset
-
-all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
+TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress
 
 # Build with nolibc to avoid effects due to libc's clone() support
 fp-pidbench: fp-pidbench.S asm-utils.o
-- 
cgit 


From 3a23a42d1a480095e5e6ab820594f194079b6a61 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 27 Apr 2022 19:19:52 +0100
Subject: selftests/arm64: Define top_srcdir for the fp tests

Some of the rules in lib.mk use a top_srcdir variable to figure out where
the top of the kselftest tree is, provide it.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220427181954.357975-3-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/Makefile | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index a0b8cc59947e..ba758a6c6b9a 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -1,6 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
 
-CFLAGS += -I../../../../../usr/include/
+# A proper top_srcdir is needed by KSFT(lib.mk)
+top_srcdir = $(realpath ../../../../../)
+
+CFLAGS += -I$(top_srcdir)/usr/include/
+
 TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg za-fork za-ptrace
 TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \
 	rdvl-sme rdvl-sve \
-- 
cgit 


From 399cf0a3e8a1a2cf93e87017282e682e7b65f01c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 27 Apr 2022 19:19:53 +0100
Subject: selftests/arm64: Clean the fp helper libraries

We provide a couple of object files with helpers linked into several of
the test programs, ensure they are cleaned.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220427181954.357975-4-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/Makefile | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index ba758a6c6b9a..7e5d48c4a59d 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -14,6 +14,8 @@ TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \
 	vlset
 TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress
 
+EXTRA_CLEAN += $(OUTPUT)/asm-utils.o $(OUTPUT)/rdvl.o $(OUTPUT)/za-fork-asm.o
+
 # Build with nolibc to avoid effects due to libc's clone() support
 fp-pidbench: fp-pidbench.S asm-utils.o
 	$(CC) -nostdlib $^ -o $@
-- 
cgit 


From aca43ad51661d46b0083614a5b75b6cb90c30741 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 27 Apr 2022 19:19:54 +0100
Subject: selftests/arm64: Fix O= builds for the floating point tests

Currently the arm64 floating point tests don't support out of tree builds
due to two quirks of the kselftest build system. One is that when building
a program from multiple files we shouldn't separately compile the main
program to an object file as that will result in the pattern rule not
matching when adjusted for the output directory. The other is that we also
need to include $(OUTPUT) in the names of the binaries when specifying the
dependencies in order to ensure that they get picked up with O=.

Rewrite the dependencies for the executables to fix these issues. The
kselftest build system will ensure OUTPUT is always defined.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220427181954.357975-5-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/Makefile | 32 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index 7e5d48c4a59d..a7c2286bf65b 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -17,28 +17,26 @@ TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress
 EXTRA_CLEAN += $(OUTPUT)/asm-utils.o $(OUTPUT)/rdvl.o $(OUTPUT)/za-fork-asm.o
 
 # Build with nolibc to avoid effects due to libc's clone() support
-fp-pidbench: fp-pidbench.S asm-utils.o
+$(OUTPUT)/fp-pidbench: fp-pidbench.S $(OUTPUT)/asm-utils.o
 	$(CC) -nostdlib $^ -o $@
-fpsimd-test: fpsimd-test.o asm-utils.o
+$(OUTPUT)/fpsimd-test: fpsimd-test.S $(OUTPUT)/asm-utils.o
 	$(CC) -nostdlib $^ -o $@
-rdvl-sme: rdvl-sme.o rdvl.o
-rdvl-sve: rdvl-sve.o rdvl.o
-sve-ptrace: sve-ptrace.o
-sve-probe-vls: sve-probe-vls.o rdvl.o
-sve-test: sve-test.o asm-utils.o
+$(OUTPUT)/rdvl-sve: rdvl-sve.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/rdvl-sme: rdvl-sme.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/sve-ptrace: sve-ptrace.c
+$(OUTPUT)/sve-probe-vls: sve-probe-vls.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/sve-test: sve-test.S $(OUTPUT)/asm-utils.o
 	$(CC) -nostdlib $^ -o $@
-ssve-test: sve-test.S asm-utils.o
+$(OUTPUT)/ssve-test: sve-test.S $(OUTPUT)/asm-utils.o
 	$(CC) -DSSVE -nostdlib $^ -o $@
-vec-syscfg: vec-syscfg.o rdvl.o
-vlset: vlset.o
-za-fork: za-fork.o za-fork-asm.o
-	$(CC) -nostdlib -static $^ -o $@ -lgcc
-za-fork.o: za-fork.c
-	$(CC) -c -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+$(OUTPUT)/vec-syscfg: vec-syscfg.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/vlset: vlset.c
+$(OUTPUT)/za-fork: za-fork.c $(OUTPUT)/za-fork-asm.o
+	$(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
 		-include ../../../../include/nolibc/nolibc.h \
-		-ffreestanding -Wall $^ -o $@
-za-test: za-test.o asm-utils.o
+		-static -ffreestanding -Wall $^ -o $@
+$(OUTPUT)/za-ptrace: za-ptrace.c
+$(OUTPUT)/za-test: za-test.S $(OUTPUT)/asm-utils.o
 	$(CC) -nostdlib $^ -o $@
-za-ptrace: za-ptrace.o
 
 include ../../lib.mk
-- 
cgit 


From 32c03c4954a03d46f603f94acac9d2705bd5c9c6 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 28 Apr 2022 11:53:49 -0700
Subject: selftests/bpf: Use target-less SEC() definitions in various tests

Add new or modify existing SEC() definitions to be target-less and
validate that libbpf handles such program definitions correctly.

For kprobe/kretprobe we also add explicit test that generic
bpf_program__attach() works in cases when kprobe definition contains
proper target. It wasn't previously tested as selftests code always
explicitly specified the target regardless.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20220428185349.3799599-4-andrii@kernel.org
---
 .../selftests/bpf/prog_tests/attach_probe.c        | 10 ++++++++++
 .../selftests/bpf/prog_tests/kprobe_multi_test.c   | 14 ++++++-------
 tools/testing/selftests/bpf/progs/kprobe_multi.c   | 14 +++++++++++++
 .../selftests/bpf/progs/test_attach_probe.c        | 23 +++++++++++++++++++---
 .../selftests/bpf/progs/test_module_attach.c       |  2 +-
 5 files changed, 52 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index c0c6d410751d..08c0601b3e84 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -55,6 +55,7 @@ void test_attach_probe(void)
 	if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
 		goto cleanup;
 
+	/* manual-attach kprobe/kretprobe */
 	kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
 						 false /* retprobe */,
 						 SYS_NANOSLEEP_KPROBE_NAME);
@@ -69,6 +70,13 @@ void test_attach_probe(void)
 		goto cleanup;
 	skel->links.handle_kretprobe = kretprobe_link;
 
+	/* auto-attachable kprobe and kretprobe */
+	skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto);
+	ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto");
+
+	skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto);
+	ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto");
+
 	if (!legacy)
 		ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
 
@@ -157,7 +165,9 @@ void test_attach_probe(void)
 	trigger_func2();
 
 	ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
+	ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res");
 	ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
+	ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res");
 	ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res");
 	ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res");
 	ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res");
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index b9876b55fc0c..c56db65d4c15 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -140,14 +140,14 @@ test_attach_api(const char *pattern, struct bpf_kprobe_multi_opts *opts)
 		goto cleanup;
 
 	skel->bss->pid = getpid();
-	link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+	link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						      pattern, opts);
 	if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts"))
 		goto cleanup;
 
 	if (opts) {
 		opts->retprobe = true;
-		link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe,
+		link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe_manual,
 							      pattern, opts);
 		if (!ASSERT_OK_PTR(link2, "bpf_program__attach_kprobe_multi_opts"))
 			goto cleanup;
@@ -232,7 +232,7 @@ static void test_attach_api_fails(void)
 	skel->bss->pid = getpid();
 
 	/* fail_1 - pattern and opts NULL */
-	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     NULL, NULL);
 	if (!ASSERT_ERR_PTR(link, "fail_1"))
 		goto cleanup;
@@ -246,7 +246,7 @@ static void test_attach_api_fails(void)
 	opts.cnt = ARRAY_SIZE(syms);
 	opts.cookies = NULL;
 
-	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     NULL, &opts);
 	if (!ASSERT_ERR_PTR(link, "fail_2"))
 		goto cleanup;
@@ -260,7 +260,7 @@ static void test_attach_api_fails(void)
 	opts.cnt = ARRAY_SIZE(syms);
 	opts.cookies = NULL;
 
-	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     "ksys_*", &opts);
 	if (!ASSERT_ERR_PTR(link, "fail_3"))
 		goto cleanup;
@@ -274,7 +274,7 @@ static void test_attach_api_fails(void)
 	opts.cnt = ARRAY_SIZE(syms);
 	opts.cookies = NULL;
 
-	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     "ksys_*", &opts);
 	if (!ASSERT_ERR_PTR(link, "fail_4"))
 		goto cleanup;
@@ -288,7 +288,7 @@ static void test_attach_api_fails(void)
 	opts.cnt = 0;
 	opts.cookies = cookies;
 
-	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     "ksys_*", &opts);
 	if (!ASSERT_ERR_PTR(link, "fail_5"))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c
index 600be50800f8..93510f4f0f3a 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c
@@ -98,3 +98,17 @@ int test_kretprobe(struct pt_regs *ctx)
 	kprobe_multi_check(ctx, true);
 	return 0;
 }
+
+SEC("kprobe.multi")
+int test_kprobe_manual(struct pt_regs *ctx)
+{
+	kprobe_multi_check(ctx, false);
+	return 0;
+}
+
+SEC("kretprobe.multi")
+int test_kretprobe_manual(struct pt_regs *ctx)
+{
+	kprobe_multi_check(ctx, true);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index af994d16bb10..ce9acf4db8d2 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -5,9 +5,12 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 int kprobe_res = 0;
+int kprobe2_res = 0;
 int kretprobe_res = 0;
+int kretprobe2_res = 0;
 int uprobe_res = 0;
 int uretprobe_res = 0;
 int uprobe_byname_res = 0;
@@ -15,20 +18,34 @@ int uretprobe_byname_res = 0;
 int uprobe_byname2_res = 0;
 int uretprobe_byname2_res = 0;
 
-SEC("kprobe/sys_nanosleep")
+SEC("kprobe")
 int handle_kprobe(struct pt_regs *ctx)
 {
 	kprobe_res = 1;
 	return 0;
 }
 
-SEC("kretprobe/sys_nanosleep")
-int BPF_KRETPROBE(handle_kretprobe)
+SEC("kprobe/" SYS_PREFIX "sys_nanosleep")
+int BPF_KPROBE(handle_kprobe_auto)
+{
+	kprobe2_res = 11;
+	return 0;
+}
+
+SEC("kretprobe")
+int handle_kretprobe(struct pt_regs *ctx)
 {
 	kretprobe_res = 2;
 	return 0;
 }
 
+SEC("kretprobe/" SYS_PREFIX "sys_nanosleep")
+int BPF_KRETPROBE(handle_kretprobe_auto)
+{
+	kretprobe2_res = 22;
+	return 0;
+}
+
 SEC("uprobe")
 int handle_uprobe(struct pt_regs *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index 50ce16d02da7..08628afedb77 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -64,7 +64,7 @@ int BPF_PROG(handle_fentry,
 
 __u32 fentry_manual_read_sz = 0;
 
-SEC("fentry/placeholder")
+SEC("fentry")
 int BPF_PROG(handle_fentry_manual,
 	     struct file *file, struct kobject *kobj,
 	     struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
-- 
cgit 


From 68964e155677ab6eca21784a248b8e65fb745660 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 27 Apr 2022 21:15:23 -0700
Subject: selftests/bpf: Test bpf_map__set_autocreate() and related log fixup
 logic

Add a subtest that excercises bpf_map__set_autocreate() API and
validates that libbpf properly fixes up BPF verifier log with correct
map information.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220428041523.4089853-5-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/log_fixup.c | 37 +++++++++++++++++++++-
 tools/testing/selftests/bpf/progs/test_log_fixup.c | 26 +++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
index be3a956cb3a5..f4ffdcabf4e4 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
@@ -85,7 +85,6 @@ static void bad_core_relo_subprog(void)
 	if (!ASSERT_ERR(err, "load_fail"))
 		goto cleanup;
 
-	/* there should be no prog loading log because we specified per-prog log buf */
 	ASSERT_HAS_SUBSTR(log_buf,
 			  ": <invalid CO-RE relocation>\n"
 			  "failed to resolve CO-RE relocation <byte_off> ",
@@ -101,6 +100,40 @@ cleanup:
 	test_log_fixup__destroy(skel);
 }
 
+static void missing_map(void)
+{
+	char log_buf[8 * 1024];
+	struct test_log_fixup* skel;
+	int err;
+
+	skel = test_log_fixup__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_map__set_autocreate(skel->maps.missing_map, false);
+
+	bpf_program__set_autoload(skel->progs.use_missing_map, true);
+	bpf_program__set_log_buf(skel->progs.use_missing_map, log_buf, sizeof(log_buf));
+
+	err = test_log_fixup__load(skel);
+	if (!ASSERT_ERR(err, "load_fail"))
+		goto cleanup;
+
+	ASSERT_TRUE(bpf_map__autocreate(skel->maps.existing_map), "existing_map_autocreate");
+	ASSERT_FALSE(bpf_map__autocreate(skel->maps.missing_map), "missing_map_autocreate");
+
+	ASSERT_HAS_SUBSTR(log_buf,
+			  "8: <invalid BPF map reference>\n"
+			  "BPF map 'missing_map' is referenced but wasn't created\n",
+			  "log_buf");
+
+	if (env.verbosity > VERBOSE_NONE)
+		printf("LOG:   \n=================\n%s=================\n", log_buf);
+
+cleanup:
+	test_log_fixup__destroy(skel);
+}
+
 void test_log_fixup(void)
 {
 	if (test__start_subtest("bad_core_relo_trunc_none"))
@@ -111,4 +144,6 @@ void test_log_fixup(void)
 		bad_core_relo(250, TRUNC_FULL  /* truncate also libbpf's message patch */);
 	if (test__start_subtest("bad_core_relo_subprog"))
 		bad_core_relo_subprog();
+	if (test__start_subtest("missing_map"))
+		missing_map();
 }
diff --git a/tools/testing/selftests/bpf/progs/test_log_fixup.c b/tools/testing/selftests/bpf/progs/test_log_fixup.c
index a78980d897b3..60450cb0e72e 100644
--- a/tools/testing/selftests/bpf/progs/test_log_fixup.c
+++ b/tools/testing/selftests/bpf/progs/test_log_fixup.c
@@ -35,4 +35,30 @@ int bad_relo_subprog(const void *ctx)
 	return bad_subprog() + bpf_core_field_size(t->pid);
 }
 
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} existing_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} missing_map SEC(".maps");
+
+SEC("?raw_tp/sys_enter")
+int use_missing_map(const void *ctx)
+{
+	int zero = 0, *value;
+
+	value = bpf_map_lookup_elem(&existing_map, &zero);
+
+	value = bpf_map_lookup_elem(&missing_map, &zero);
+
+	return value != NULL;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit 


From e96a76ee5283d509f2bf45133d147e77f73a1b15 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 18 Mar 2022 01:39:25 +1100
Subject: selftests/powerpc: Add a test of 4PB SLB handling

Add a test for a bug we had in the 4PB address space SLB handling. It
was fixed in commit 4c2de74cc869 ("powerpc/64: Interrupts save PPR on
stack rather than thread_struct").

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220317143925.1030447-1-mpe@ellerman.id.au
---
 tools/testing/selftests/powerpc/mm/.gitignore      |   1 +
 tools/testing/selftests/powerpc/mm/Makefile        |   4 +-
 .../selftests/powerpc/mm/large_vm_gpr_corruption.c | 156 +++++++++++++++++++++
 3 files changed, 160 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index aac4a59f9e28..4e1a294eec35 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -12,3 +12,4 @@ pkey_exec_prot
 pkey_siginfo
 stack_expansion_ldst
 stack_expansion_signal
+large_vm_gpr_corruption
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 40253abc6208..27dc09d0bfee 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -4,7 +4,8 @@ noarg:
 
 TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
 		  large_vm_fork_separation bad_accesses pkey_exec_prot \
-		  pkey_siginfo stack_expansion_signal stack_expansion_ldst
+		  pkey_siginfo stack_expansion_signal stack_expansion_ldst \
+		  large_vm_gpr_corruption
 TEST_PROGS := stress_code_patching.sh
 
 TEST_GEN_PROGS_EXTENDED := tlbie_test
@@ -19,6 +20,7 @@ $(OUTPUT)/prot_sao: ../utils.c
 
 $(OUTPUT)/wild_bctr: CFLAGS += -m64
 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
+$(OUTPUT)/large_vm_gpr_corruption: CFLAGS += -m64
 $(OUTPUT)/bad_accesses: CFLAGS += -m64
 $(OUTPUT)/pkey_exec_prot: CFLAGS += -m64
 $(OUTPUT)/pkey_siginfo: CFLAGS += -m64
diff --git a/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c b/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c
new file mode 100644
index 000000000000..927bfae99ed9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2022, Michael Ellerman, IBM Corp.
+//
+// Test that the 4PB address space SLB handling doesn't corrupt userspace registers
+// (r9-r13) due to a SLB fault while saving the PPR.
+//
+// The bug was introduced in f384796c4 ("powerpc/mm: Add support for handling > 512TB
+// address in SLB miss") and fixed in 4c2de74cc869 ("powerpc/64: Interrupts save PPR on
+// stack rather than thread_struct").
+//
+// To hit the bug requires the task struct and kernel stack to be in different segments.
+// Usually that requires more than 1TB of RAM, or if that's not practical, boot the kernel
+// with "disable_1tb_segments".
+//
+// The test works by creating mappings above 512TB, to trigger the large address space
+// support. It creates 64 mappings, double the size of the SLB, to cause SLB faults on
+// each access (assuming naive replacement). It then loops over those mappings touching
+// each, and checks that r9-r13 aren't corrupted.
+//
+// It then forks another child and tries again, because a new child process will get a new
+// kernel stack and thread struct allocated, which may be more optimally placed to trigger
+// the bug. It would probably be better to leave the previous child processes hanging
+// around, so that kernel stack & thread struct allocations are not reused, but that would
+// amount to a 30 second fork bomb. The current design reliably triggers the bug on
+// unpatched kernels.
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB
+#endif
+
+#define BASE_ADDRESS (1ul << 50) // 1PB
+#define STRIDE	     (2ul << 40) // 2TB
+#define SLB_SIZE     32
+#define NR_MAPPINGS  (SLB_SIZE * 2)
+
+static volatile sig_atomic_t signaled;
+
+static void signal_handler(int sig)
+{
+	signaled = 1;
+}
+
+#define CHECK_REG(_reg)                                                                \
+	if (_reg != _reg##_orig) {                                                     \
+		printf(str(_reg) " corrupted! Expected 0x%lx != 0x%lx\n", _reg##_orig, \
+		       _reg);                                                          \
+		_exit(1);                                                              \
+	}
+
+static int touch_mappings(void)
+{
+	unsigned long r9_orig, r10_orig, r11_orig, r12_orig, r13_orig;
+	unsigned long r9, r10, r11, r12, r13;
+	unsigned long addr, *p;
+	int i;
+
+	for (i = 0; i < NR_MAPPINGS; i++) {
+		addr = BASE_ADDRESS + (i * STRIDE);
+		p = (unsigned long *)addr;
+
+		asm volatile("mr   %0, %%r9	;" // Read original GPR values
+			     "mr   %1, %%r10	;"
+			     "mr   %2, %%r11	;"
+			     "mr   %3, %%r12	;"
+			     "mr   %4, %%r13	;"
+			     "std %10, 0(%11)   ;" // Trigger SLB fault
+			     "mr   %5, %%r9	;" // Save possibly corrupted values
+			     "mr   %6, %%r10	;"
+			     "mr   %7, %%r11	;"
+			     "mr   %8, %%r12	;"
+			     "mr   %9, %%r13	;"
+			     "mr   %%r9,  %0	;" // Restore original values
+			     "mr   %%r10, %1	;"
+			     "mr   %%r11, %2	;"
+			     "mr   %%r12, %3	;"
+			     "mr   %%r13, %4	;"
+			     : "=&b"(r9_orig), "=&b"(r10_orig), "=&b"(r11_orig),
+			       "=&b"(r12_orig), "=&b"(r13_orig), "=&b"(r9), "=&b"(r10),
+			       "=&b"(r11), "=&b"(r12), "=&b"(r13)
+			     : "b"(i), "b"(p)
+			     : "r9", "r10", "r11", "r12", "r13");
+
+		CHECK_REG(r9);
+		CHECK_REG(r10);
+		CHECK_REG(r11);
+		CHECK_REG(r12);
+		CHECK_REG(r13);
+	}
+
+	return 0;
+}
+
+static int test(void)
+{
+	unsigned long page_size, addr, *p;
+	struct sigaction action;
+	bool hash_mmu;
+	int i, status;
+	pid_t pid;
+
+	// This tests a hash MMU specific bug.
+	FAIL_IF(using_hash_mmu(&hash_mmu));
+	SKIP_IF(!hash_mmu);
+
+	page_size = sysconf(_SC_PAGESIZE);
+
+	for (i = 0; i < NR_MAPPINGS; i++) {
+		addr = BASE_ADDRESS + (i * STRIDE);
+
+		p = mmap((void *)addr, page_size, PROT_READ | PROT_WRITE,
+			 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
+		if (p == MAP_FAILED) {
+			perror("mmap");
+			printf("Error: couldn't mmap(), confirm kernel has 4PB support?\n");
+			return 1;
+		}
+	}
+
+	action.sa_handler = signal_handler;
+	action.sa_flags = SA_RESTART;
+	FAIL_IF(sigaction(SIGALRM, &action, NULL) < 0);
+
+	// Seen to always crash in under ~10s on affected kernels.
+	alarm(30);
+
+	while (!signaled) {
+		// Fork new processes, to increase the chance that we hit the case where
+		// the kernel stack and task struct are in different segments.
+		pid = fork();
+		if (pid == 0)
+			exit(touch_mappings());
+
+		FAIL_IF(waitpid(-1, &status, 0) == -1);
+		FAIL_IF(WIFSIGNALED(status));
+		FAIL_IF(!WIFEXITED(status));
+		FAIL_IF(WEXITSTATUS(status));
+	}
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test, "large_vm_gpr_corruption");
+}
-- 
cgit 


From 20b87e7c29dffcfa3f96f2e99daec84fd46cabdb Mon Sep 17 00:00:00 2001
From: Mykola Lysenko <mykolal@fb.com>
Date: Thu, 28 Apr 2022 15:57:44 -0700
Subject: selftests/bpf: Fix two memory leaks in prog_tests

Fix log_fp memory leak in dispatch_thread_read_log.
Remove obsolete log_fp clean-up code in dispatch_thread.

Also, release memory of subtest_selector. This can be
reproduced with -n 2/1 parameters.

Signed-off-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220428225744.1961643-1-mykolal@fb.com
---
 tools/testing/selftests/bpf/test_progs.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 22fe283710fa..a07da648af3b 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -77,10 +77,12 @@ static void stdio_restore_cleanup(void)
 
 	if (env.subtest_state) {
 		fclose(env.subtest_state->stdout);
+		env.subtest_state->stdout = NULL;
 		stdout = env.test_state->stdout;
 		stderr = env.test_state->stdout;
 	} else {
 		fclose(env.test_state->stdout);
+		env.test_state->stdout = NULL;
 	}
 #endif
 }
@@ -1077,6 +1079,7 @@ static int read_prog_test_msg(int sock_fd, struct msg *msg, enum msg_type type)
 static int dispatch_thread_read_log(int sock_fd, char **log_buf, size_t *log_cnt)
 {
 	FILE *log_fp = NULL;
+	int result = 0;
 
 	log_fp = open_memstream(log_buf, log_cnt);
 	if (!log_fp)
@@ -1085,16 +1088,20 @@ static int dispatch_thread_read_log(int sock_fd, char **log_buf, size_t *log_cnt
 	while (true) {
 		struct msg msg;
 
-		if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_LOG))
-			return 1;
+		if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_LOG)) {
+			result = 1;
+			goto out;
+		}
 
 		fprintf(log_fp, "%s", msg.test_log.log_buf);
 		if (msg.test_log.is_last)
 			break;
 	}
+
+out:
 	fclose(log_fp);
 	log_fp = NULL;
-	return 0;
+	return result;
 }
 
 static int dispatch_thread_send_subtests(int sock_fd, struct test_state *state)
@@ -1132,7 +1139,6 @@ static void *dispatch_thread(void *ctx)
 {
 	struct dispatch_data *data = ctx;
 	int sock_fd;
-	FILE *log_fp = NULL;
 
 	sock_fd = data->sock_fd;
 
@@ -1214,8 +1220,6 @@ error:
 	if (env.debug)
 		fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno));
 
-	if (log_fp)
-		fclose(log_fp);
 done:
 	{
 		struct msg msg_exit;
@@ -1685,6 +1689,7 @@ out:
 		unload_bpf_testmod();
 
 	free_test_selector(&env.test_selector);
+	free_test_selector(&env.subtest_selector);
 	free_test_states();
 
 	if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
-- 
cgit 


From c85bcc912f4f404bf6eaf4b6bdb8480ef2c2faa1 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Thu, 28 Apr 2022 23:15:59 -0700
Subject: kselftests: memcg: update the oom group leaf events test

Patch series "mm: memcg kselftests fixes".


This patch (of 4):

Commit 9852ae3fe529 ("mm, memcg: consider subtrees in memory.events") made
memory.events recursive: all events are propagated upwards by the tree.
It was a change in semantics.

It broke the oom group leaf events test: it assumes that after an OOM the
oom_kill counter is zero on parent's level.

Let's adjust the test: it should have similar expectations for the child
and parent levels.

The test passes after this fix.

Link: https://lkml.kernel.org/r/20220415000133.3955987-2-roman.gushchin@linux.dev
Link: https://lkml.kernel.org/r/20220415000133.3955987-1-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: David Vernet <void@manifault.com>
Cc: Chris Down <chris@chrisdown.name>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 36ccf2322e21..00b430e7f2a2 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -1079,7 +1079,8 @@ cleanup:
 /*
  * This test disables swapping and tries to allocate anonymous memory
  * up to OOM with memory.group.oom set. Then it checks that all
- * processes in the leaf (but not the parent) were killed.
+ * processes in the leaf were killed. It also checks that oom_events
+ * were propagated to the parent level.
  */
 static int test_memcg_oom_group_leaf_events(const char *root)
 {
@@ -1122,7 +1123,7 @@ static int test_memcg_oom_group_leaf_events(const char *root)
 	if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
 		goto cleanup;
 
-	if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
+	if (cg_read_key_long(parent, "memory.events", "oom_kill ") <= 0)
 		goto cleanup;
 
 	ret = KSFT_PASS;
-- 
cgit 


From be74553f250fb2154375b8e14e9f9b58aafd23b0 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Thu, 28 Apr 2022 23:15:59 -0700
Subject: kselftests: memcg: speed up the memory.high test

After commit 0e4b01df8659 ("mm, memcg: throttle allocators when failing
reclaim over memory.high") allocating memory over memory.high became very
time consuming.  But it's exactly what the memory.high test from cgroup
kselftests is doing: it tries to allocate 100M with 30M memory.high value.
It takes forever to complete.

In order to keep it passing (or failing) in a reasonable amount of time
let's try to allocate only a little over 30M: 31M to be precise.

With this change test_memcontrol finishes in a reasonable amount of
time:
  $ time ./test_memcontrol
  ok 1 test_memcg_subtree_control
  ok 2 test_memcg_current
  ok 3 test_memcg_min
  ok 4 test_memcg_low
  ok 5 test_memcg_high
  ok 6 test_memcg_max
  ok 7 test_memcg_oom_events
  ok 8 test_memcg_swap_max
  ok 9 test_memcg_sock
  ok 10 test_memcg_oom_group_leaf_events
  ok 11 test_memcg_oom_group_parent_events
  ok 12 test_memcg_oom_group_score_events

  real	0m2.273s
  user	0m0.064s
  sys	0m0.739s

Link: https://lkml.kernel.org/r/20220415000133.3955987-3-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: David Vernet <void@manifault.com>
Cc: Chris Down <chris@chrisdown.name>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 00b430e7f2a2..9c1f19fe2e37 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -607,7 +607,7 @@ static int test_memcg_high(const char *root)
 	if (cg_write(memcg, "memory.high", "30M"))
 		goto cleanup;
 
-	if (cg_run(memcg, alloc_anon, (void *)MB(100)))
+	if (cg_run(memcg, alloc_anon, (void *)MB(31)))
 		goto cleanup;
 
 	if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
-- 
cgit 


From 0c2d08728470b93a3f05416f53222f38a89868e2 Mon Sep 17 00:00:00 2001
From: Alistair Popple <apopple@nvidia.com>
Date: Thu, 28 Apr 2022 23:16:07 -0700
Subject: mm: add selftests for migration entries

Add some basic migration tests and in particular tests that will
stress both the pte and pmd migration entry wait paths.

Link: https://lkml.kernel.org/r/20220324014349.229253-1-apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile    |   3 +
 tools/testing/selftests/vm/migration.c | 193 +++++++++++++++++++++++++++++++++
 2 files changed, 196 insertions(+)
 create mode 100644 tools/testing/selftests/vm/migration.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 04a49e876a46..ff0c7a87785b 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -41,6 +41,7 @@ TEST_GEN_FILES += map_fixed_noreplace
 TEST_GEN_FILES += map_hugetlb
 TEST_GEN_FILES += map_populate
 TEST_GEN_FILES += memfd_secret
+TEST_GEN_FILES += migration
 TEST_GEN_FILES += mlock-random-test
 TEST_GEN_FILES += mlock2-tests
 TEST_GEN_FILES += mremap_dontunmap
@@ -149,6 +150,8 @@ $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
 
 $(OUTPUT)/ksm_tests: LDLIBS += -lnuma
 
+$(OUTPUT)/migration: LDLIBS += -lnuma
+
 local_config.mk local_config.h: check_config.sh
 	/bin/sh ./check_config.sh $(CC)
 
diff --git a/tools/testing/selftests/vm/migration.c b/tools/testing/selftests/vm/migration.c
new file mode 100644
index 000000000000..1cec8425e3ca
--- /dev/null
+++ b/tools/testing/selftests/vm/migration.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The main purpose of the tests here is to exercise the migration entry code
+ * paths in the kernel.
+ */
+
+#include "../kselftest_harness.h"
+#include <strings.h>
+#include <pthread.h>
+#include <numa.h>
+#include <numaif.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <time.h>
+
+#define TWOMEG (2<<20)
+#define RUNTIME (60)
+
+#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+
+FIXTURE(migration)
+{
+	pthread_t *threads;
+	pid_t *pids;
+	int nthreads;
+	int n1;
+	int n2;
+};
+
+FIXTURE_SETUP(migration)
+{
+	int n;
+
+	ASSERT_EQ(numa_available(), 0);
+	self->nthreads = numa_num_task_cpus() - 1;
+	self->n1 = -1;
+	self->n2 = -1;
+
+	for (n = 0; n < numa_max_possible_node(); n++)
+		if (numa_bitmask_isbitset(numa_all_nodes_ptr, n)) {
+			if (self->n1 == -1) {
+				self->n1 = n;
+			} else {
+				self->n2 = n;
+				break;
+			}
+		}
+
+	self->threads = malloc(self->nthreads * sizeof(*self->threads));
+	ASSERT_NE(self->threads, NULL);
+	self->pids = malloc(self->nthreads * sizeof(*self->pids));
+	ASSERT_NE(self->pids, NULL);
+};
+
+FIXTURE_TEARDOWN(migration)
+{
+	free(self->threads);
+	free(self->pids);
+}
+
+int migrate(uint64_t *ptr, int n1, int n2)
+{
+	int ret, tmp;
+	int status = 0;
+	struct timespec ts1, ts2;
+
+	if (clock_gettime(CLOCK_MONOTONIC, &ts1))
+		return -1;
+
+	while (1) {
+		if (clock_gettime(CLOCK_MONOTONIC, &ts2))
+			return -1;
+
+		if (ts2.tv_sec - ts1.tv_sec >= RUNTIME)
+			return 0;
+
+		ret = move_pages(0, 1, (void **) &ptr, &n2, &status,
+				MPOL_MF_MOVE_ALL);
+		if (ret) {
+			if (ret > 0)
+				printf("Didn't migrate %d pages\n", ret);
+			else
+				perror("Couldn't migrate pages");
+			return -2;
+		}
+
+		tmp = n2;
+		n2 = n1;
+		n1 = tmp;
+	}
+
+	return 0;
+}
+
+void *access_mem(void *ptr)
+{
+	uint64_t y = 0;
+	volatile uint64_t *x = ptr;
+
+	while (1) {
+		pthread_testcancel();
+		y += *x;
+	}
+
+	return NULL;
+}
+
+/*
+ * Basic migration entry testing. One thread will move pages back and forth
+ * between nodes whilst other threads try and access them triggering the
+ * migration entry wait paths in the kernel.
+ */
+TEST_F_TIMEOUT(migration, private_anon, 2*RUNTIME)
+{
+	uint64_t *ptr;
+	int i;
+
+	if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+		SKIP(return, "Not enough threads or NUMA nodes available");
+
+	ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	memset(ptr, 0xde, TWOMEG);
+	for (i = 0; i < self->nthreads - 1; i++)
+		if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+			perror("Couldn't create thread");
+
+	ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+	for (i = 0; i < self->nthreads - 1; i++)
+		ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+/*
+ * Same as the previous test but with shared memory.
+ */
+TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME)
+{
+	pid_t pid;
+	uint64_t *ptr;
+	int i;
+
+	if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+		SKIP(return, "Not enough threads or NUMA nodes available");
+
+	ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+		MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	memset(ptr, 0xde, TWOMEG);
+	for (i = 0; i < self->nthreads - 1; i++) {
+		pid = fork();
+		if (!pid)
+			access_mem(ptr);
+		else
+			self->pids[i] = pid;
+	}
+
+	ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+	for (i = 0; i < self->nthreads - 1; i++)
+		ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+}
+
+/*
+ * Tests the pmd migration entry paths.
+ */
+TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME)
+{
+	uint64_t *ptr;
+	int i;
+
+	if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+		SKIP(return, "Not enough threads or NUMA nodes available");
+
+	ptr = mmap(NULL, 2*TWOMEG, PROT_READ | PROT_WRITE,
+		MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG);
+	ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0);
+	memset(ptr, 0xde, TWOMEG);
+	for (i = 0; i < self->nthreads - 1; i++)
+		if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+			perror("Couldn't create thread");
+
+	ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+	for (i = 0; i < self->nthreads - 1; i++)
+		ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+TEST_HARNESS_MAIN
-- 
cgit 


From 62e80f2b5072ed80a41fc6a272e44e8e17fdcf66 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 28 Apr 2022 23:16:10 -0700
Subject: tools/testing/selftests/vm/gup_test.c: clarify error statement

Print three possible reasons /sys/kernel/debug/gup_test cannot be opened
to help users of this test diagnose failures.

Link: https://lkml.kernel.org/r/20220405214809.3351223-1-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/gup_test.c     | 22 +++++++++++++++++++--
 tools/testing/selftests/vm/run_vmtests.sh | 33 ++++++++++++++++++++++---------
 2 files changed, 44 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index cda837a14736..593262555e18 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -1,7 +1,9 @@
 #include <fcntl.h>
+#include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include <dirent.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
@@ -9,6 +11,7 @@
 #include <pthread.h>
 #include <assert.h>
 #include "../../../../mm/gup_test.h"
+#include "../kselftest.h"
 
 #include "util.h"
 
@@ -206,8 +209,23 @@ int main(int argc, char **argv)
 
 	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
 	if (gup_fd == -1) {
-		perror("open");
-		exit(1);
+		switch (errno) {
+		case EACCES:
+			if (getuid())
+				printf("Please run this test as root\n");
+			break;
+		case ENOENT:
+			if (opendir("/sys/kernel/debug") == NULL) {
+				printf("mount debugfs at /sys/kernel/debug\n");
+				break;
+			}
+			printf("check if CONFIG_GUP_TEST is enabled in kernel config\n");
+			break;
+		default:
+			perror("failed to open /sys/kernel/debug/gup_test");
+			break;
+		}
+		exit(KSFT_SKIP);
 	}
 
 	p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 352ba00cf26b..8865ff365cc6 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -162,22 +162,32 @@ echo "------------------------------------------------------"
 echo "running: gup_test -u # get_user_pages_fast() benchmark"
 echo "------------------------------------------------------"
 ./gup_test -u
-if [ $? -ne 0 ]; then
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	 echo "[SKIP]"
+	 exitcode=$ksft_skip
+else
 	echo "[FAIL]"
 	exitcode=1
-else
-	echo "[PASS]"
 fi
 
 echo "------------------------------------------------------"
 echo "running: gup_test -a # pin_user_pages_fast() benchmark"
 echo "------------------------------------------------------"
 ./gup_test -a
-if [ $? -ne 0 ]; then
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	 echo "[SKIP]"
+	 exitcode=$ksft_skip
+else
 	echo "[FAIL]"
 	exitcode=1
-else
-	echo "[PASS]"
 fi
 
 echo "------------------------------------------------------------"
@@ -185,11 +195,16 @@ echo "# Dump pages 0, 19, and 4096, using pin_user_pages:"
 echo "running: gup_test -ct -F 0x1 0 19 0x1000 # dump_page() test"
 echo "------------------------------------------------------------"
 ./gup_test -ct -F 0x1 0 19 0x1000
-if [ $? -ne 0 ]; then
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	 echo "[SKIP]"
+	 exitcode=$ksft_skip
+else
 	echo "[FAIL]"
 	exitcode=1
-else
-	echo "[PASS]"
 fi
 
 echo "-------------------"
-- 
cgit 


From 642bc52aed9c99e8c9c9cfb6781f77719717a36c Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Thu, 28 Apr 2022 23:16:11 -0700
Subject: selftests: vm: bring common functions to a new file

Bring common functions to a new file while keeping code as much same as
possible.  These functions can be used in the new tests.  This helps in
avoiding code duplication.

Link: https://lkml.kernel.org/r/20220420084036.4101604-1-usama.anjum@collabora.com
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Gabriel Krisman Bertazi <krisman@collabora.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile               |   7 +-
 tools/testing/selftests/vm/madv_populate.c        |  34 +------
 tools/testing/selftests/vm/split_huge_page_test.c |  79 +---------------
 tools/testing/selftests/vm/vm_util.c              | 108 ++++++++++++++++++++++
 tools/testing/selftests/vm/vm_util.h              |   9 ++
 5 files changed, 124 insertions(+), 113 deletions(-)
 create mode 100644 tools/testing/selftests/vm/vm_util.c
 create mode 100644 tools/testing/selftests/vm/vm_util.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index ff0c7a87785b..6fd967839ccd 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -36,7 +36,7 @@ TEST_GEN_FILES += hugepage-mremap
 TEST_GEN_FILES += hugepage-shm
 TEST_GEN_FILES += hugepage-vmemmap
 TEST_GEN_FILES += khugepaged
-TEST_GEN_FILES += madv_populate
+TEST_GEN_PROGS = madv_populate
 TEST_GEN_FILES += map_fixed_noreplace
 TEST_GEN_FILES += map_hugetlb
 TEST_GEN_FILES += map_populate
@@ -50,7 +50,7 @@ TEST_GEN_FILES += on-fault-limit
 TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += userfaultfd
-TEST_GEN_FILES += split_huge_page_test
+TEST_GEN_PROGS += split_huge_page_test
 TEST_GEN_FILES += ksm_tests
 
 ifeq ($(MACHINE),x86_64)
@@ -94,6 +94,9 @@ TEST_FILES := test_vmalloc.sh
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
+$(OUTPUT)/madv_populate: vm_util.c
+$(OUTPUT)/split_huge_page_test: vm_util.c
+
 ifeq ($(MACHINE),x86_64)
 BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
 BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c
index 3ee0e8275600..715a42e8e2cd 100644
--- a/tools/testing/selftests/vm/madv_populate.c
+++ b/tools/testing/selftests/vm/madv_populate.c
@@ -18,6 +18,7 @@
 #include <sys/mman.h>
 
 #include "../kselftest.h"
+#include "vm_util.h"
 
 /*
  * For now, we're using 2 MiB of private anonymous memory for all tests.
@@ -26,18 +27,6 @@
 
 static size_t pagesize;
 
-static uint64_t pagemap_get_entry(int fd, char *start)
-{
-	const unsigned long pfn = (unsigned long)start / pagesize;
-	uint64_t entry;
-	int ret;
-
-	ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
-	if (ret != sizeof(entry))
-		ksft_exit_fail_msg("reading pagemap failed\n");
-	return entry;
-}
-
 static bool pagemap_is_populated(int fd, char *start)
 {
 	uint64_t entry = pagemap_get_entry(fd, start);
@@ -46,13 +35,6 @@ static bool pagemap_is_populated(int fd, char *start)
 	return entry & 0xc000000000000000ull;
 }
 
-static bool pagemap_is_softdirty(int fd, char *start)
-{
-	uint64_t entry = pagemap_get_entry(fd, start);
-
-	return entry & 0x0080000000000000ull;
-}
-
 static void sense_support(void)
 {
 	char *addr;
@@ -258,20 +240,6 @@ static bool range_is_not_softdirty(char *start, ssize_t size)
 	return ret;
 }
 
-static void clear_softdirty(void)
-{
-	int fd = open("/proc/self/clear_refs", O_WRONLY);
-	const char *ctrl = "4";
-	int ret;
-
-	if (fd < 0)
-		ksft_exit_fail_msg("opening clear_refs failed\n");
-	ret = write(fd, ctrl, strlen(ctrl));
-	if (ret != strlen(ctrl))
-		ksft_exit_fail_msg("writing clear_refs failed\n");
-	close(fd);
-}
-
 static void test_softdirty(void)
 {
 	char *addr;
diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c
index 52497b7b9f1d..6aa2b8253aed 100644
--- a/tools/testing/selftests/vm/split_huge_page_test.c
+++ b/tools/testing/selftests/vm/split_huge_page_test.c
@@ -16,14 +16,13 @@
 #include <sys/mount.h>
 #include <malloc.h>
 #include <stdbool.h>
+#include "vm_util.h"
 
 uint64_t pagesize;
 unsigned int pageshift;
 uint64_t pmd_pagesize;
 
-#define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
-#define SMAP_PATH "/proc/self/smaps"
 #define INPUT_MAX 80
 
 #define PID_FMT "%d,0x%lx,0x%lx"
@@ -51,30 +50,6 @@ int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
 	return 0;
 }
 
-
-static uint64_t read_pmd_pagesize(void)
-{
-	int fd;
-	char buf[20];
-	ssize_t num_read;
-
-	fd = open(PMD_SIZE_PATH, O_RDONLY);
-	if (fd == -1) {
-		perror("Open hpage_pmd_size failed");
-		exit(EXIT_FAILURE);
-	}
-	num_read = read(fd, buf, 19);
-	if (num_read < 1) {
-		close(fd);
-		perror("Read hpage_pmd_size failed");
-		exit(EXIT_FAILURE);
-	}
-	buf[num_read] = '\0';
-	close(fd);
-
-	return strtoul(buf, NULL, 10);
-}
-
 static int write_file(const char *path, const char *buf, size_t buflen)
 {
 	int fd;
@@ -113,58 +88,6 @@ static void write_debugfs(const char *fmt, ...)
 	}
 }
 
-#define MAX_LINE_LENGTH 500
-
-static bool check_for_pattern(FILE *fp, const char *pattern, char *buf)
-{
-	while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
-		if (!strncmp(buf, pattern, strlen(pattern)))
-			return true;
-	}
-	return false;
-}
-
-static uint64_t check_huge(void *addr)
-{
-	uint64_t thp = 0;
-	int ret;
-	FILE *fp;
-	char buffer[MAX_LINE_LENGTH];
-	char addr_pattern[MAX_LINE_LENGTH];
-
-	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
-		       (unsigned long) addr);
-	if (ret >= MAX_LINE_LENGTH) {
-		printf("%s: Pattern is too long\n", __func__);
-		exit(EXIT_FAILURE);
-	}
-
-
-	fp = fopen(SMAP_PATH, "r");
-	if (!fp) {
-		printf("%s: Failed to open file %s\n", __func__, SMAP_PATH);
-		exit(EXIT_FAILURE);
-	}
-	if (!check_for_pattern(fp, addr_pattern, buffer))
-		goto err_out;
-
-	/*
-	 * Fetch the AnonHugePages: in the same block and check the number of
-	 * hugepages.
-	 */
-	if (!check_for_pattern(fp, "AnonHugePages:", buffer))
-		goto err_out;
-
-	if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) {
-		printf("Reading smap error\n");
-		exit(EXIT_FAILURE);
-	}
-
-err_out:
-	fclose(fp);
-	return thp;
-}
-
 void split_pmd_thp(void)
 {
 	char *one_page;
diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c
new file mode 100644
index 000000000000..b58ab11a7a30
--- /dev/null
+++ b/tools/testing/selftests/vm/vm_util.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <fcntl.h>
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
+#define SMAP_FILE_PATH "/proc/self/smaps"
+#define MAX_LINE_LENGTH 500
+
+uint64_t pagemap_get_entry(int fd, char *start)
+{
+	const unsigned long pfn = (unsigned long)start / getpagesize();
+	uint64_t entry;
+	int ret;
+
+	ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
+	if (ret != sizeof(entry))
+		ksft_exit_fail_msg("reading pagemap failed\n");
+	return entry;
+}
+
+bool pagemap_is_softdirty(int fd, char *start)
+{
+	uint64_t entry = pagemap_get_entry(fd, start);
+
+	// Check if dirty bit (55th bit) is set
+	return entry & 0x0080000000000000ull;
+}
+
+void clear_softdirty(void)
+{
+	int ret;
+	const char *ctrl = "4";
+	int fd = open("/proc/self/clear_refs", O_WRONLY);
+
+	if (fd < 0)
+		ksft_exit_fail_msg("opening clear_refs failed\n");
+	ret = write(fd, ctrl, strlen(ctrl));
+	close(fd);
+	if (ret != strlen(ctrl))
+		ksft_exit_fail_msg("writing clear_refs failed\n");
+}
+
+static bool check_for_pattern(FILE *fp, const char *pattern, char *buf)
+{
+	while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
+		if (!strncmp(buf, pattern, strlen(pattern)))
+			return true;
+	}
+	return false;
+}
+
+uint64_t read_pmd_pagesize(void)
+{
+	int fd;
+	char buf[20];
+	ssize_t num_read;
+
+	fd = open(PMD_SIZE_FILE_PATH, O_RDONLY);
+	if (fd == -1)
+		ksft_exit_fail_msg("Open hpage_pmd_size failed\n");
+
+	num_read = read(fd, buf, 19);
+	if (num_read < 1) {
+		close(fd);
+		ksft_exit_fail_msg("Read hpage_pmd_size failed\n");
+	}
+	buf[num_read] = '\0';
+	close(fd);
+
+	return strtoul(buf, NULL, 10);
+}
+
+uint64_t check_huge(void *addr)
+{
+	uint64_t thp = 0;
+	int ret;
+	FILE *fp;
+	char buffer[MAX_LINE_LENGTH];
+	char addr_pattern[MAX_LINE_LENGTH];
+
+	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+		       (unsigned long) addr);
+	if (ret >= MAX_LINE_LENGTH)
+		ksft_exit_fail_msg("%s: Pattern is too long\n", __func__);
+
+	fp = fopen(SMAP_FILE_PATH, "r");
+	if (!fp)
+		ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);
+
+	if (!check_for_pattern(fp, addr_pattern, buffer))
+		goto err_out;
+
+	/*
+	 * Fetch the AnonHugePages: in the same block and check the number of
+	 * hugepages.
+	 */
+	if (!check_for_pattern(fp, "AnonHugePages:", buffer))
+		goto err_out;
+
+	if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1)
+		ksft_exit_fail_msg("Reading smap error\n");
+
+err_out:
+	fclose(fp);
+	return thp;
+}
diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h
new file mode 100644
index 000000000000..2e512bd57ae1
--- /dev/null
+++ b/tools/testing/selftests/vm/vm_util.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdint.h>
+#include <stdbool.h>
+
+uint64_t pagemap_get_entry(int fd, char *start);
+bool pagemap_is_softdirty(int fd, char *start);
+void clear_softdirty(void);
+uint64_t read_pmd_pagesize(void);
+uint64_t check_huge(void *addr);
-- 
cgit 


From 9f3265db6ae87de27a5e382410b8eb9af53b161e Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Thu, 28 Apr 2022 23:16:11 -0700
Subject: selftests: vm: add test for Soft-Dirty PTE bit

This introduces three tests:

1) Sanity check soft dirty basic semantics: allocate area, clean,
   dirty, check if the SD bit is flipped.

2) Check VMA reuse: validate the VM_SOFTDIRTY usage

3) Check soft-dirty on huge pages

This was motivated by Will Deacon's fix commit 912efa17e512 ("mm: proc:
Invalidate TLB after clearing soft-dirty page state").  I was tracking the
same issue that he fixed, and this test would have caught it.

Link: https://lkml.kernel.org/r/20220420084036.4101604-2-usama.anjum@collabora.com
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Co-developed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Cc: Will Deacon <will@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/.gitignore   |   1 +
 tools/testing/selftests/vm/Makefile     |   2 +
 tools/testing/selftests/vm/config       |   2 +
 tools/testing/selftests/vm/soft-dirty.c | 145 ++++++++++++++++++++++++++++++++
 4 files changed, 150 insertions(+)
 create mode 100644 tools/testing/selftests/vm/soft-dirty.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index d7507f3c7c76..3cb4fa771ec2 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -29,5 +29,6 @@ write_to_hugetlbfs
 hmm-tests
 memfd_secret
 local_config.*
+soft-dirty
 split_huge_page_test
 ksm_tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 6fd967839ccd..f1228370e99b 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -50,6 +50,7 @@ TEST_GEN_FILES += on-fault-limit
 TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += userfaultfd
+TEST_GEN_PROGS += soft-dirty
 TEST_GEN_PROGS += split_huge_page_test
 TEST_GEN_FILES += ksm_tests
 
@@ -95,6 +96,7 @@ KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
 $(OUTPUT)/madv_populate: vm_util.c
+$(OUTPUT)/soft-dirty: vm_util.c
 $(OUTPUT)/split_huge_page_test: vm_util.c
 
 ifeq ($(MACHINE),x86_64)
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
index 60e82da0de85..be087c4bc396 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/vm/config
@@ -4,3 +4,5 @@ CONFIG_TEST_VMALLOC=m
 CONFIG_DEVICE_PRIVATE=y
 CONFIG_TEST_HMM=m
 CONFIG_GUP_TEST=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_MEM_SOFT_DIRTY=y
diff --git a/tools/testing/selftests/vm/soft-dirty.c b/tools/testing/selftests/vm/soft-dirty.c
new file mode 100644
index 000000000000..08ab62a4a9d0
--- /dev/null
+++ b/tools/testing/selftests/vm/soft-dirty.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <sys/mman.h>
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define PAGEMAP_FILE_PATH "/proc/self/pagemap"
+#define TEST_ITERATIONS 10000
+
+static void test_simple(int pagemap_fd, int pagesize)
+{
+	int i;
+	char *map;
+
+	map = aligned_alloc(pagesize, pagesize);
+	if (!map)
+		ksft_exit_fail_msg("mmap failed\n");
+
+	clear_softdirty();
+
+	for (i = 0 ; i < TEST_ITERATIONS; i++) {
+		if (pagemap_is_softdirty(pagemap_fd, map) == 1) {
+			ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i);
+			break;
+		}
+
+		clear_softdirty();
+		// Write something to the page to get the dirty bit enabled on the page
+		map[0]++;
+
+		if (pagemap_is_softdirty(pagemap_fd, map) == 0) {
+			ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i);
+			break;
+		}
+
+		clear_softdirty();
+	}
+	free(map);
+
+	ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__);
+}
+
+static void test_vma_reuse(int pagemap_fd, int pagesize)
+{
+	char *map, *map2;
+
+	map = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0);
+	if (map == MAP_FAILED)
+		ksft_exit_fail_msg("mmap failed");
+
+	// The kernel always marks new regions as soft dirty
+	ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+			 "Test %s dirty bit of allocated page\n", __func__);
+
+	clear_softdirty();
+	munmap(map, pagesize);
+
+	map2 = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0);
+	if (map2 == MAP_FAILED)
+		ksft_exit_fail_msg("mmap failed");
+
+	// Dirty bit is set for new regions even if they are reused
+	if (map == map2)
+		ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
+				 "Test %s dirty bit of reused address page\n", __func__);
+	else
+		ksft_test_result_skip("Test %s dirty bit of reused address page\n", __func__);
+
+	munmap(map2, pagesize);
+}
+
+static void test_hugepage(int pagemap_fd, int pagesize)
+{
+	char *map;
+	int i, ret;
+	size_t hpage_len = read_pmd_pagesize();
+
+	map = memalign(hpage_len, hpage_len);
+	if (!map)
+		ksft_exit_fail_msg("memalign failed\n");
+
+	ret = madvise(map, hpage_len, MADV_HUGEPAGE);
+	if (ret)
+		ksft_exit_fail_msg("madvise failed %d\n", ret);
+
+	for (i = 0; i < hpage_len; i++)
+		map[i] = (char)i;
+
+	if (check_huge(map)) {
+		ksft_test_result_pass("Test %s huge page allocation\n", __func__);
+
+		clear_softdirty();
+		for (i = 0 ; i < TEST_ITERATIONS ; i++) {
+			if (pagemap_is_softdirty(pagemap_fd, map) == 1) {
+				ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i);
+				break;
+			}
+
+			clear_softdirty();
+			// Write something to the page to get the dirty bit enabled on the page
+			map[0]++;
+
+			if (pagemap_is_softdirty(pagemap_fd, map) == 0) {
+				ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i);
+				break;
+			}
+			clear_softdirty();
+		}
+
+		ksft_test_result(i == TEST_ITERATIONS, "Test %s huge page dirty bit\n", __func__);
+	} else {
+		// hugepage allocation failed. skip these tests
+		ksft_test_result_skip("Test %s huge page allocation\n", __func__);
+		ksft_test_result_skip("Test %s huge page dirty bit\n", __func__);
+	}
+	free(map);
+}
+
+int main(int argc, char **argv)
+{
+	int pagemap_fd;
+	int pagesize;
+
+	ksft_print_header();
+	ksft_set_plan(5);
+
+	pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY);
+	if (pagemap_fd < 0)
+		ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH);
+
+	pagesize = getpagesize();
+
+	test_simple(pagemap_fd, pagesize);
+	test_vma_reuse(pagemap_fd, pagesize);
+	test_hugepage(pagemap_fd, pagesize);
+
+	close(pagemap_fd);
+
+	return ksft_exit_pass();
+}
-- 
cgit 


From b67bd551201a3e2c7e1def84980e9b2f0b3a3c77 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Thu, 28 Apr 2022 23:16:11 -0700
Subject: selftests: vm: refactor run_vmtests.sh to reduce boilerplate

Previously, each test printed out its own header, dealt with its own
return code, etc.  By just putting this standard stuff in a function, we
can delete > 300 lines from the script.

This also makes adding future tests easier. And, it gets rid of various
inconsistencies that already exist:

- Some tests correctly deal with ksft_skip, but others don't.
- Some tests just print the executable name, others print arguments, and
  yet others print some comment in the header.
- Most tests print out a header with two separator lines, but not the
  HMM smoke test or the memfd_secret test, which only print one.
- We had a redundant "exit" at the end, with all the boilerplate it's an
  easy oversight.

Link: https://lkml.kernel.org/r/20220421224928.1848230-1-axelrasmussen@google.com
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/run_vmtests.sh | 479 ++++--------------------------
 1 file changed, 64 insertions(+), 415 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 8865ff365cc6..2d5a3da42cbe 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -66,467 +66,116 @@ fi
 VADDR64=0
 echo "$ARCH64STR" | grep $ARCH && VADDR64=1
 
+# Usage: run_test [test binary] [arbitrary test arguments...]
+run_test() {
+	local title="running $*"
+	local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -)
+	printf "%s\n%s\n%s\n" "$sep" "$title" "$sep"
+
+	"$@"
+	local ret=$?
+	if [ $ret -eq 0 ]; then
+		echo "[PASS]"
+	elif [ $ret -eq $ksft_skip ]; then
+		echo "[SKIP]"
+		exitcode=$ksft_skip
+	else
+		echo "[FAIL]"
+		exitcode=1
+	fi
+}
+
 mkdir $mnt
 mount -t hugetlbfs none $mnt
 
-echo "---------------------"
-echo "running hugepage-mmap"
-echo "---------------------"
-./hugepage-mmap
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test ./hugepage-mmap
 
 shmmax=`cat /proc/sys/kernel/shmmax`
 shmall=`cat /proc/sys/kernel/shmall`
 echo 268435456 > /proc/sys/kernel/shmmax
 echo 4194304 > /proc/sys/kernel/shmall
-echo "--------------------"
-echo "running hugepage-shm"
-echo "--------------------"
-./hugepage-shm
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test ./hugepage-shm
 echo $shmmax > /proc/sys/kernel/shmmax
 echo $shmall > /proc/sys/kernel/shmall
 
-echo "-------------------"
-echo "running map_hugetlb"
-echo "-------------------"
-./map_hugetlb
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test ./map_hugetlb
 
-echo "-----------------------"
-echo "running hugepage-mremap"
-echo "-----------------------"
-./hugepage-mremap $mnt/huge_mremap
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test ./hugepage-mremap $mnt/huge_mremap
 rm -f $mnt/huge_mremap
 
-echo "------------------------"
-echo "running hugepage-vmemmap"
-echo "------------------------"
-./hugepage-vmemmap
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test ./hugepage-vmemmap
 
-echo "-----------------------"
-echo "running hugetlb-madvise"
-echo "-----------------------"
-./hugetlb-madvise $mnt/madvise-test
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test ./hugetlb-madvise $mnt/madvise-test
 rm -f $mnt/madvise-test
 
 echo "NOTE: The above hugetlb tests provide minimal coverage.  Use"
 echo "      https://github.com/libhugetlbfs/libhugetlbfs.git for"
 echo "      hugetlb regression testing."
 
-echo "---------------------------"
-echo "running map_fixed_noreplace"
-echo "---------------------------"
-./map_fixed_noreplace
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
-
-echo "------------------------------------------------------"
-echo "running: gup_test -u # get_user_pages_fast() benchmark"
-echo "------------------------------------------------------"
-./gup_test -u
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	 echo "[SKIP]"
-	 exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
+run_test ./map_fixed_noreplace
 
-echo "------------------------------------------------------"
-echo "running: gup_test -a # pin_user_pages_fast() benchmark"
-echo "------------------------------------------------------"
-./gup_test -a
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	 echo "[SKIP]"
-	 exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
+# get_user_pages_fast() benchmark
+run_test ./gup_test -u
+# pin_user_pages_fast() benchmark
+run_test ./gup_test -a
+# Dump pages 0, 19, and 4096, using pin_user_pages:
+run_test ./gup_test -ct -F 0x1 0 19 0x1000
 
-echo "------------------------------------------------------------"
-echo "# Dump pages 0, 19, and 4096, using pin_user_pages:"
-echo "running: gup_test -ct -F 0x1 0 19 0x1000 # dump_page() test"
-echo "------------------------------------------------------------"
-./gup_test -ct -F 0x1 0 19 0x1000
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	 echo "[SKIP]"
-	 exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
-
-echo "-------------------"
-echo "running userfaultfd"
-echo "-------------------"
-./userfaultfd anon 20 16
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
-
-echo "---------------------------"
-echo "running userfaultfd_hugetlb"
-echo "---------------------------"
+run_test ./userfaultfd anon 20 16
 # Test requires source and destination huge pages.  Size of source
 # (half_ufd_size_MB) is passed as argument to test.
-./userfaultfd hugetlb $half_ufd_size_MB 32
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
-
-echo "-------------------------"
-echo "running userfaultfd_shmem"
-echo "-------------------------"
-./userfaultfd shmem 20 16
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test ./userfaultfd hugetlb $half_ufd_size_MB 32
+run_test ./userfaultfd shmem 20 16
 
 #cleanup
 umount $mnt
 rm -rf $mnt
 echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
 
-echo "-----------------------"
-echo "running compaction_test"
-echo "-----------------------"
-./compaction_test
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test ./compaction_test
 
-echo "----------------------"
-echo "running on-fault-limit"
-echo "----------------------"
-sudo -u nobody ./on-fault-limit
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test sudo -u nobody ./on-fault-limit
 
-echo "--------------------"
-echo "running map_populate"
-echo "--------------------"
-./map_populate
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test ./map_populate
 
-echo "-------------------------"
-echo "running mlock-random-test"
-echo "-------------------------"
-./mlock-random-test
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test ./mlock-random-test
 
-echo "--------------------"
-echo "running mlock2-tests"
-echo "--------------------"
-./mlock2-tests
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test ./mlock2-tests
 
-echo "-------------------"
-echo "running mremap_test"
-echo "-------------------"
-./mremap_test
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	 echo "[SKIP]"
-	 exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
+run_test ./mremap_test
 
-echo "-----------------"
-echo "running thuge-gen"
-echo "-----------------"
-./thuge-gen
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+run_test ./thuge-gen
 
 if [ $VADDR64 -ne 0 ]; then
-echo "-----------------------------"
-echo "running virtual_address_range"
-echo "-----------------------------"
-./virtual_address_range
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	exitcode=1
-else
-	echo "[PASS]"
-fi
+	run_test ./virtual_address_range
 
-echo "-----------------------------"
-echo "running virtual address 128TB switch test"
-echo "-----------------------------"
-./va_128TBswitch
-if [ $? -ne 0 ]; then
-    echo "[FAIL]"
-    exitcode=1
-else
-    echo "[PASS]"
-fi
+	# virtual address 128TB switch test
+	run_test ./va_128TBswitch
 fi # VADDR64
 
-echo "------------------------------------"
-echo "running vmalloc stability smoke test"
-echo "------------------------------------"
-./test_vmalloc.sh smoke
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	 echo "[SKIP]"
-	 exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
-
-echo "------------------------------------"
-echo "running MREMAP_DONTUNMAP smoke test"
-echo "------------------------------------"
-./mremap_dontunmap
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	 echo "[SKIP]"
-	 exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
-
-echo "running HMM smoke test"
-echo "------------------------------------"
-./test_hmm.sh smoke
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	echo "[SKIP]"
-	exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
-
-echo "--------------------------------------------------------"
-echo "running MADV_POPULATE_READ and MADV_POPULATE_WRITE tests"
-echo "--------------------------------------------------------"
-./madv_populate
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	echo "[SKIP]"
-	exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
-
-echo "running memfd_secret test"
-echo "------------------------------------"
-./memfd_secret
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	echo "[SKIP]"
-	exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
-
-echo "-------------------------------------------------------"
-echo "running KSM MADV_MERGEABLE test with 10 identical pages"
-echo "-------------------------------------------------------"
-./ksm_tests -M -p 10
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	 echo "[SKIP]"
-	 exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
-
-echo "------------------------"
-echo "running KSM unmerge test"
-echo "------------------------"
-./ksm_tests -U
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	 echo "[SKIP]"
-	 exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
+# vmalloc stability smoke test
+run_test ./test_vmalloc.sh smoke
 
-echo "----------------------------------------------------------"
-echo "running KSM test with 10 zero pages and use_zero_pages = 0"
-echo "----------------------------------------------------------"
-./ksm_tests -Z -p 10 -z 0
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	 echo "[SKIP]"
-	 exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
+run_test ./mremap_dontunmap
 
-echo "----------------------------------------------------------"
-echo "running KSM test with 10 zero pages and use_zero_pages = 1"
-echo "----------------------------------------------------------"
-./ksm_tests -Z -p 10 -z 1
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	 echo "[SKIP]"
-	 exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
+run_test ./test_hmm.sh smoke
 
-echo "-------------------------------------------------------------"
-echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 1"
-echo "-------------------------------------------------------------"
-./ksm_tests -N -m 1
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	 echo "[SKIP]"
-	 exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
+# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
+run_test ./madv_populate
 
-echo "-------------------------------------------------------------"
-echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 0"
-echo "-------------------------------------------------------------"
-./ksm_tests -N -m 0
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
-	echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
-	 echo "[SKIP]"
-	 exitcode=$ksft_skip
-else
-	echo "[FAIL]"
-	exitcode=1
-fi
+run_test ./memfd_secret
 
-exit $exitcode
+# KSM MADV_MERGEABLE test with 10 identical pages
+run_test ./ksm_tests -M -p 10
+# KSM unmerge test
+run_test ./ksm_tests -U
+# KSM test with 10 zero pages and use_zero_pages = 0
+run_test ./ksm_tests -Z -p 10 -z 0
+# KSM test with 10 zero pages and use_zero_pages = 1
+run_test ./ksm_tests -Z -p 10 -z 1
+# KSM test with 2 NUMA nodes and merge_across_nodes = 1
+run_test ./ksm_tests -N -m 1
+# KSM test with 2 NUMA nodes and merge_across_nodes = 0
+run_test ./ksm_tests -N -m 0
 
 exit $exitcode
-- 
cgit 


From 241ec63a9a0fbb39292ea1dd2d07f8dabedfe3df Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Thu, 28 Apr 2022 23:16:11 -0700
Subject: selftests: vm: fix shellcheck warnings in run_vmtests.sh

These might not be issues yet, but they make the script more fragile.
Also by fixing them we give a better example to future readers, who might
copy/paste or otherwise re-use snippets from our script.

- Use "read -r", since we don't ever want read to be interpreting '\'
  characters as escape sequences...
- Quote variables, to deal with spaces properly.
- Use $() instead of the older and harder-to-nest ``.
- Get rid of superfluous "$" prefixes inside arithmetic $(()).

Link: https://lkml.kernel.org/r/20220421224928.1848230-2-axelrasmussen@google.com
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/run_vmtests.sh | 55 +++++++++++++++----------------
 1 file changed, 27 insertions(+), 28 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 2d5a3da42cbe..a2302b5faaf2 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -9,12 +9,12 @@ mnt=./huge
 exitcode=0
 
 #get huge pagesize and freepages from /proc/meminfo
-while read name size unit; do
+while read -r name size unit; do
 	if [ "$name" = "HugePages_Free:" ]; then
-		freepgs=$size
+		freepgs="$size"
 	fi
 	if [ "$name" = "Hugepagesize:" ]; then
-		hpgsize_KB=$size
+		hpgsize_KB="$size"
 	fi
 done < /proc/meminfo
 
@@ -30,27 +30,26 @@ needmem_KB=$((half_ufd_size_MB * 2 * 1024))
 
 #set proper nr_hugepages
 if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
-	nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
+	nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
 	needpgs=$((needmem_KB / hpgsize_KB))
 	tries=2
-	while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do
-		lackpgs=$(( $needpgs - $freepgs ))
+	while [ "$tries" -gt 0 ] && [ "$freepgs" -lt "$needpgs" ]; do
+		lackpgs=$((needpgs - freepgs))
 		echo 3 > /proc/sys/vm/drop_caches
-		echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
-		if [ $? -ne 0 ]; then
+		if ! echo $((lackpgs + nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then
 			echo "Please run this test as root"
 			exit $ksft_skip
 		fi
-		while read name size unit; do
+		while read -r name size unit; do
 			if [ "$name" = "HugePages_Free:" ]; then
 				freepgs=$size
 			fi
 		done < /proc/meminfo
 		tries=$((tries - 1))
 	done
-	if [ $freepgs -lt $needpgs ]; then
+	if [ "$freepgs" -lt "$needpgs" ]; then
 		printf "Not enough huge pages available (%d < %d)\n" \
-		       $freepgs $needpgs
+		       "$freepgs" "$needpgs"
 		exit 1
 	fi
 else
@@ -60,11 +59,11 @@ fi
 
 #filter 64bit architectures
 ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64"
-if [ -z $ARCH ]; then
-  ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'`
+if [ -z "$ARCH" ]; then
+	ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/')
 fi
 VADDR64=0
-echo "$ARCH64STR" | grep $ARCH && VADDR64=1
+echo "$ARCH64STR" | grep "$ARCH" && VADDR64=1
 
 # Usage: run_test [test binary] [arbitrary test arguments...]
 run_test() {
@@ -85,28 +84,28 @@ run_test() {
 	fi
 }
 
-mkdir $mnt
-mount -t hugetlbfs none $mnt
+mkdir "$mnt"
+mount -t hugetlbfs none "$mnt"
 
 run_test ./hugepage-mmap
 
-shmmax=`cat /proc/sys/kernel/shmmax`
-shmall=`cat /proc/sys/kernel/shmall`
+shmmax=$(cat /proc/sys/kernel/shmmax)
+shmall=$(cat /proc/sys/kernel/shmall)
 echo 268435456 > /proc/sys/kernel/shmmax
 echo 4194304 > /proc/sys/kernel/shmall
 run_test ./hugepage-shm
-echo $shmmax > /proc/sys/kernel/shmmax
-echo $shmall > /proc/sys/kernel/shmall
+echo "$shmmax" > /proc/sys/kernel/shmmax
+echo "$shmall" > /proc/sys/kernel/shmall
 
 run_test ./map_hugetlb
 
-run_test ./hugepage-mremap $mnt/huge_mremap
-rm -f $mnt/huge_mremap
+run_test ./hugepage-mremap "$mnt"/huge_mremap
+rm -f "$mnt"/huge_mremap
 
 run_test ./hugepage-vmemmap
 
-run_test ./hugetlb-madvise $mnt/madvise-test
-rm -f $mnt/madvise-test
+run_test ./hugetlb-madvise "$mnt"/madvise-test
+rm -f "$mnt"/madvise-test
 
 echo "NOTE: The above hugetlb tests provide minimal coverage.  Use"
 echo "      https://github.com/libhugetlbfs/libhugetlbfs.git for"
@@ -124,13 +123,13 @@ run_test ./gup_test -ct -F 0x1 0 19 0x1000
 run_test ./userfaultfd anon 20 16
 # Test requires source and destination huge pages.  Size of source
 # (half_ufd_size_MB) is passed as argument to test.
-run_test ./userfaultfd hugetlb $half_ufd_size_MB 32
+run_test ./userfaultfd hugetlb "$half_ufd_size_MB" 32
 run_test ./userfaultfd shmem 20 16
 
 #cleanup
-umount $mnt
-rm -rf $mnt
-echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+umount "$mnt"
+rm -rf "$mnt"
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
 
 run_test ./compaction_test
 
-- 
cgit 


From a37ddddd86037c896c702b4df416bc4e51b2a5a0 Mon Sep 17 00:00:00 2001
From: Russ Weight <russell.h.weight@intel.com>
Date: Tue, 26 Apr 2022 09:35:32 -0700
Subject: selftests: firmware: Add firmware upload selftests

Add selftests to verify the firmware upload mechanism. These test
include simple firmware uploads as well as upload cancellation and
error injection. The test creates three firmware devices and verifies
that they all work correctly and independently.

Tested-by: Matthew Gerlach <matthew.gerlach@linux.intel.com>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Tianfei zhang <tianfei.zhang@intel.com>
Signed-off-by: Russ Weight <russell.h.weight@intel.com>
Link: https://lore.kernel.org/r/20220426163532.114961-1-russell.h.weight@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/Makefile        |   2 +-
 tools/testing/selftests/firmware/config          |   1 +
 tools/testing/selftests/firmware/fw_lib.sh       |   7 +
 tools/testing/selftests/firmware/fw_run_tests.sh |   4 +
 tools/testing/selftests/firmware/fw_upload.sh    | 214 +++++++++++++++++++++++
 5 files changed, 227 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/firmware/fw_upload.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index 40211cd8f0e6..7992969deaa2 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -4,7 +4,7 @@ CFLAGS = -Wall \
          -O2
 
 TEST_PROGS := fw_run_tests.sh
-TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh
+TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_upload.sh fw_lib.sh
 TEST_GEN_FILES := fw_namespace
 
 include ../lib.mk
diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config
index bf634dda0720..6e402519b117 100644
--- a/tools/testing/selftests/firmware/config
+++ b/tools/testing/selftests/firmware/config
@@ -3,3 +3,4 @@ CONFIG_FW_LOADER=y
 CONFIG_FW_LOADER_USER_HELPER=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_FW_UPLOAD=y
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index 3fa8282b053b..7bffd67800bf 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -64,6 +64,7 @@ check_setup()
 	HAS_FW_LOADER_USER_HELPER_FALLBACK="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y)"
 	HAS_FW_LOADER_COMPRESS_XZ="$(kconfig_has CONFIG_FW_LOADER_COMPRESS_XZ=y)"
 	HAS_FW_LOADER_COMPRESS_ZSTD="$(kconfig_has CONFIG_FW_LOADER_COMPRESS_ZSTD=y)"
+	HAS_FW_UPLOAD="$(kconfig_has CONFIG_FW_UPLOAD=y)"
 	PROC_FW_IGNORE_SYSFS_FALLBACK="0"
 	PROC_FW_FORCE_SYSFS_FALLBACK="0"
 
@@ -119,6 +120,12 @@ verify_reqs()
 			exit 0
 		fi
 	fi
+	if [ "$TEST_REQS_FW_UPLOAD" = "yes" ]; then
+		if [ ! "$HAS_FW_UPLOAD" = "yes" ]; then
+			echo "firmware upload disabled so ignoring test"
+			exit 0
+		fi
+	fi
 }
 
 setup_tmp_file()
diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh
index 777377078d5e..f6d95a2d5124 100755
--- a/tools/testing/selftests/firmware/fw_run_tests.sh
+++ b/tools/testing/selftests/firmware/fw_run_tests.sh
@@ -22,6 +22,10 @@ run_tests()
 	proc_set_force_sysfs_fallback $1
 	proc_set_ignore_sysfs_fallback $2
 	$TEST_DIR/fw_fallback.sh
+
+	proc_set_force_sysfs_fallback $1
+	proc_set_ignore_sysfs_fallback $2
+	$TEST_DIR/fw_upload.sh
 }
 
 run_test_config_0001()
diff --git a/tools/testing/selftests/firmware/fw_upload.sh b/tools/testing/selftests/firmware/fw_upload.sh
new file mode 100755
index 000000000000..c7a6f06c9adb
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_upload.sh
@@ -0,0 +1,214 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# This validates the user-initiated fw upload mechanism of the firmware
+# loader. It verifies that one or more firmware devices can be created
+# for a device driver. It also verifies the data transfer, the
+# cancellation support, and the error flows.
+set -e
+
+TEST_REQS_FW_UPLOAD="yes"
+TEST_DIR=$(dirname $0)
+
+progress_states="preparing transferring  programming"
+errors="hw-error
+	timeout
+	device-busy
+	invalid-file-size
+	read-write-error
+	flash-wearout"
+error_abort="user-abort"
+fwname1=fw1
+fwname2=fw2
+fwname3=fw3
+
+source $TEST_DIR/fw_lib.sh
+
+check_mods
+check_setup
+verify_reqs
+
+trap "upload_finish" EXIT
+
+upload_finish() {
+	local fwdevs="$fwname1 $fwname2 $fwname3"
+
+	for name in $fwdevs; do
+		if [ -e "$DIR/$name" ]; then
+			echo -n "$name" > "$DIR"/upload_unregister
+		fi
+	done
+}
+
+upload_fw() {
+	local name="$1"
+	local file="$2"
+
+	echo 1 > "$DIR"/"$name"/loading
+	cat "$file" > "$DIR"/"$name"/data
+	echo 0 > "$DIR"/"$name"/loading
+}
+
+verify_fw() {
+	local name="$1"
+	local file="$2"
+
+	echo -n "$name" > "$DIR"/config_upload_name
+	if ! cmp "$file" "$DIR"/upload_read > /dev/null 2>&1; then
+		echo "$0: firmware compare for $name did not match" >&2
+		exit 1
+	fi
+
+	echo "$0: firmware upload for $name works" >&2
+	return 0
+}
+
+inject_error() {
+	local name="$1"
+	local status="$2"
+	local error="$3"
+
+	echo 1 > "$DIR"/"$name"/loading
+	echo -n "inject":"$status":"$error" > "$DIR"/"$name"/data
+	echo 0 > "$DIR"/"$name"/loading
+}
+
+await_status() {
+	local name="$1"
+	local expected="$2"
+	local status
+	local i
+
+	let i=0
+	while [ $i -lt 50 ]; do
+		status=$(cat "$DIR"/"$name"/status)
+		if [ "$status" = "$expected" ]; then
+			return 0;
+		fi
+		sleep 1e-03
+		let i=$i+1
+	done
+
+	echo "$0: Invalid status: Expected $expected, Actual $status" >&2
+	return 1;
+}
+
+await_idle() {
+	local name="$1"
+
+	await_status "$name" "idle"
+	return $?
+}
+
+expect_error() {
+	local name="$1"
+	local expected="$2"
+	local error=$(cat "$DIR"/"$name"/error)
+
+	if [ "$error" != "$expected" ]; then
+		echo "Invalid error: Expected $expected, Actual $error" >&2
+		return 1
+	fi
+
+	return 0
+}
+
+random_firmware() {
+	local bs="$1"
+	local count="$2"
+	local file=$(mktemp -p /tmp uploadfwXXX.bin)
+
+	dd if=/dev/urandom of="$file" bs="$bs" count="$count" > /dev/null 2>&1
+	echo "$file"
+}
+
+test_upload_cancel() {
+	local name="$1"
+	local status
+
+	for status in $progress_states; do
+		inject_error $name $status $error_abort
+		if ! await_status $name $status; then
+			exit 1
+		fi
+
+		echo 1 > "$DIR"/"$name"/cancel
+
+		if ! await_idle $name; then
+			exit 1
+		fi
+
+		if ! expect_error $name "$status":"$error_abort"; then
+			exit 1
+		fi
+	done
+
+	echo "$0: firmware upload cancellation works"
+	return 0
+}
+
+test_error_handling() {
+	local name=$1
+	local status
+	local error
+
+	for status in $progress_states; do
+		for error in $errors; do
+			inject_error $name $status $error
+
+			if ! await_idle $name; then
+				exit 1
+			fi
+
+			if ! expect_error $name "$status":"$error"; then
+				exit 1
+			fi
+
+		done
+	done
+	echo "$0: firmware upload error handling works"
+}
+
+test_fw_too_big() {
+	local name=$1
+	local fw_too_big=`random_firmware 512 5`
+	local expected="preparing:invalid-file-size"
+
+	upload_fw $name $fw_too_big
+	rm -f $fw_too_big
+
+	if ! await_idle $name; then
+		exit 1
+	fi
+
+	if ! expect_error $name $expected; then
+		exit 1
+	fi
+
+	echo "$0: oversized firmware error handling works"
+}
+
+echo -n "$fwname1" > "$DIR"/upload_register
+echo -n "$fwname2" > "$DIR"/upload_register
+echo -n "$fwname3" > "$DIR"/upload_register
+
+test_upload_cancel $fwname1
+test_error_handling $fwname1
+test_fw_too_big $fwname1
+
+fw_file1=`random_firmware 512 4`
+fw_file2=`random_firmware 512 3`
+fw_file3=`random_firmware 512 2`
+
+upload_fw $fwname1 $fw_file1
+upload_fw $fwname2 $fw_file2
+upload_fw $fwname3 $fw_file3
+
+verify_fw ${fwname1} ${fw_file1}
+verify_fw ${fwname2} ${fw_file2}
+verify_fw ${fwname3} ${fw_file3}
+
+echo -n "$fwname1" > "$DIR"/upload_unregister
+echo -n "$fwname2" > "$DIR"/upload_unregister
+echo -n "$fwname3" > "$DIR"/upload_unregister
+
+exit 0
-- 
cgit 


From dccfbc73a9ddd2c7232696f563c39d0ca09ae905 Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Fri, 29 Apr 2022 15:40:39 +0200
Subject: testing: nvdimm: iomap: make __nfit_test_ioremap a macro

The ioremap passed as argument to __nfit_test_ioremap can be a macro so
it cannot be passed as function argument. Make __nfit_test_ioremap into
a macro so that ioremap can be passed as untyped macro argument.

Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Fixes: 6bc756193ff6 ("tools/testing/nvdimm: libnvdimm unit test infrastructure")
Link: https://lore.kernel.org/r/20220429134039.18252-1-msuchanek@suse.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/iomap.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index b752ce47ead3..ea956082e6a4 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -62,16 +62,14 @@ struct nfit_test_resource *get_nfit_res(resource_size_t resource)
 }
 EXPORT_SYMBOL(get_nfit_res);
 
-static void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
-		void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
-{
-	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
-
-	if (nfit_res)
-		return (void __iomem *) nfit_res->buf + offset
-			- nfit_res->res.start;
-	return fallback_fn(offset, size);
-}
+#define __nfit_test_ioremap(offset, size, fallback_fn) ({		\
+	struct nfit_test_resource *nfit_res = get_nfit_res(offset);	\
+	nfit_res ?							\
+		(void __iomem *) nfit_res->buf + (offset)		\
+			- nfit_res->res.start				\
+	:								\
+		fallback_fn((offset), (size)) ;				\
+})
 
 void __iomem *__wrap_devm_ioremap(struct device *dev,
 		resource_size_t offset, unsigned long size)
-- 
cgit 


From d43fae7c4d3e6dfee9d26287907dbe8e27ff8846 Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Fri, 29 Apr 2022 09:43:34 +0200
Subject: testing: nvdimm: asm/mce.h is not needed in nfit.c

asm/mce.h is not available on arm, and it is not needed to build nfit.c.
Remove the include.

It was likely needed for COPY_MC_TEST

Fixes: 3adb776384f2 ("x86, libnvdimm/test: Remove COPY_MC_TEST")
Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Link: https://lore.kernel.org/r/20220429074334.21771-1-msuchanek@suse.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index e7e1a640e482..c75abb497a1a 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,8 +23,6 @@
 #include "nfit_test.h"
 #include "../watermark.h"
 
-#include <asm/mce.h>
-
 /*
  * Generate an NFIT table to describe the following topology:
  *
-- 
cgit 


From 2bfed7d2ffa5d86c462d3e2067f2832eaf8c04c7 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Sat, 19 Mar 2022 02:00:11 +0100
Subject: selftests/seccomp: Don't call read() on TTY from background pgrp

Since commit 92d25637a3a4 ("kselftest: signal all child processes"), tests
are executed in background process groups. This means that trying to read
from stdin now throws SIGTTIN when stdin is a TTY, which breaks some
seccomp selftests that try to use read(0, NULL, 0) as a dummy syscall.

The simplest way to fix that is probably to just use -1 instead of 0 as
the dummy read()'s FD.

Fixes: 92d25637a3a4 ("kselftest: signal all child processes")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220319010011.1374622-1-jannh@google.com
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 9d126d7fabdb..313bb0cbfb1e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -955,7 +955,7 @@ TEST(ERRNO_valid)
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
-	EXPECT_EQ(-1, read(0, NULL, 0));
+	EXPECT_EQ(-1, read(-1, NULL, 0));
 	EXPECT_EQ(E2BIG, errno);
 }
 
@@ -974,7 +974,7 @@ TEST(ERRNO_zero)
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
 	/* "errno" of 0 is ok. */
-	EXPECT_EQ(0, read(0, NULL, 0));
+	EXPECT_EQ(0, read(-1, NULL, 0));
 }
 
 /*
@@ -995,7 +995,7 @@ TEST(ERRNO_capped)
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
-	EXPECT_EQ(-1, read(0, NULL, 0));
+	EXPECT_EQ(-1, read(-1, NULL, 0));
 	EXPECT_EQ(4095, errno);
 }
 
@@ -1026,7 +1026,7 @@ TEST(ERRNO_order)
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
-	EXPECT_EQ(-1, read(0, NULL, 0));
+	EXPECT_EQ(-1, read(-1, NULL, 0));
 	EXPECT_EQ(12, errno);
 }
 
@@ -2623,7 +2623,7 @@ void *tsync_sibling(void *data)
 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
 	if (!ret)
 		return (void *)SIBLING_EXIT_NEWPRIVS;
-	read(0, NULL, 0);
+	read(-1, NULL, 0);
 	return (void *)SIBLING_EXIT_UNKILLED;
 }
 
-- 
cgit 


From d250a3e4e5b41d9d805a8bfd2458b548d1681742 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Sat, 19 Mar 2022 02:08:38 +0100
Subject: selftests/seccomp: Test PTRACE_O_SUSPEND_SECCOMP without
 CAP_SYS_ADMIN

Add a test to check that PTRACE_O_SUSPEND_SECCOMP can't be set without
CAP_SYS_ADMIN through PTRACE_SEIZE or PTRACE_SETOPTIONS.

Signed-off-by: Jann Horn <jannh@google.com>
Co-developed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/Makefile      |  1 +
 tools/testing/selftests/seccomp/seccomp_bpf.c | 63 +++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
index 585f7a0c10cb..f017c382c036 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -Wl,-no-as-needed -Wall -isystem ../../../../usr/include/
 LDFLAGS += -lpthread
+LDLIBS += -lcap
 
 TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark
 include ../lib.mk
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 313bb0cbfb1e..4da905180f89 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -46,6 +46,7 @@
 #include <sys/ioctl.h>
 #include <linux/kcmp.h>
 #include <sys/resource.h>
+#include <sys/capability.h>
 
 #include <unistd.h>
 #include <sys/syscall.h>
@@ -4231,6 +4232,68 @@ TEST(user_notification_addfd_rlimit)
 	close(memfd);
 }
 
+/* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
+FIXTURE(O_SUSPEND_SECCOMP) {
+	pid_t pid;
+};
+
+FIXTURE_SETUP(O_SUSPEND_SECCOMP)
+{
+	ERRNO_FILTER(block_read, E2BIG);
+	cap_value_t cap_list[] = { CAP_SYS_ADMIN };
+	cap_t caps;
+
+	self->pid = 0;
+
+	/* make sure we don't have CAP_SYS_ADMIN */
+	caps = cap_get_proc();
+	ASSERT_NE(NULL, caps);
+	ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR));
+	ASSERT_EQ(0, cap_set_proc(caps));
+	cap_free(caps);
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+	ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read));
+
+	self->pid = fork();
+	ASSERT_GE(self->pid, 0);
+
+	if (self->pid == 0) {
+		while (1)
+			pause();
+		_exit(127);
+	}
+}
+
+FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP)
+{
+	if (self->pid)
+		kill(self->pid, SIGKILL);
+}
+
+TEST_F(O_SUSPEND_SECCOMP, setoptions)
+{
+	int wstatus;
+
+	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0));
+	ASSERT_EQ(self->pid, wait(&wstatus));
+	ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP));
+	if (errno == EINVAL)
+		SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
+	ASSERT_EQ(EPERM, errno);
+}
+
+TEST_F(O_SUSPEND_SECCOMP, seize)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP);
+	ASSERT_EQ(-1, ret);
+	if (errno == EINVAL)
+		SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
+	ASSERT_EQ(EPERM, errno);
+}
+
 /*
  * TODO:
  * - expand NNP testing
-- 
cgit 


From 95a126d9812ff51f7ff3e42d956390ff9a1801f8 Mon Sep 17 00:00:00 2001
From: Yang Guang <yang.guang5@zte.com.cn>
Date: Wed, 30 Mar 2022 08:22:10 +0800
Subject: selftests/seccomp: Add SKIP for failed unshare()

Running the seccomp tests under the kernel with "defconfig"
shouldn't fail. Because the CONFIG_USER_NS is not supported
in "defconfig". Skipping this case instead of failing it is
better.

Signed-off-by: Yang Guang <yang.guang5@zte.com.cn>
Signed-off-by: David Yang <davidcomponentone@gmail.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/7f7687696a5c0a2d040a24474616e945c7cf2bb5.1648599460.git.yang.guang5@zte.com.cn
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 4da905180f89..38839ad939f8 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -3743,7 +3743,10 @@ TEST(user_notification_fault_recv)
 	struct seccomp_notif req = {};
 	struct seccomp_notif_resp resp = {};
 
-	ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
+	ASSERT_EQ(unshare(CLONE_NEWUSER), 0) {
+		if (errno == EINVAL)
+			SKIP(return, "kernel missing CLONE_NEWUSER support");
+	}
 
 	listener = user_notif_syscall(__NR_getppid,
 				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
-- 
cgit 


From 662340ef921828507c931da6db303fa3cb02228e Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Wed, 27 Apr 2022 18:54:47 -0700
Subject: selftests/seccomp: Ensure that notifications come in FIFO order

When multiple notifications are waiting, ensure they show up in order, as
defined by the (predictable) seccomp notification ID. This ensures FIFO
ordering of notification delivery as notification ids are monitonic and
decided when the notification is generated (as opposed to received).

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Cc: linux-kselftest@vger.kernel.org
Acked-by: Tycho Andersen <tycho@tycho.pizza>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220428015447.13661-2-sargun@sargun.me
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 109 ++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 38839ad939f8..6001e9ecfaf5 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -4297,6 +4297,115 @@ TEST_F(O_SUSPEND_SECCOMP, seize)
 	ASSERT_EQ(EPERM, errno);
 }
 
+static char get_proc_stat(int pid)
+{
+	char proc_path[100] = {0};
+	char *line = NULL;
+	size_t len = 0;
+	ssize_t nread;
+	char status;
+	FILE *f;
+	int i;
+
+	snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid);
+	f = fopen(proc_path, "r");
+	if (f == NULL)
+		ksft_exit_fail_msg("%s - Could not open %s\n",
+				   strerror(errno), proc_path);
+
+	for (i = 0; i < 3; i++) {
+		nread = getdelim(&line, &len, ' ', f);
+		if (nread <= 0)
+			ksft_exit_fail_msg("Failed to read status: %s\n",
+					   strerror(errno));
+	}
+
+	status = *line;
+	free(line);
+	fclose(f);
+
+	return status;
+}
+
+TEST(user_notification_fifo)
+{
+	struct seccomp_notif_resp resp = {};
+	struct seccomp_notif req = {};
+	int i, status, listener;
+	pid_t pid, pids[3];
+	__u64 baseid;
+	long ret;
+	/* 100 ms */
+	struct timespec delay = { .tv_nsec = 100000000 };
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	/* Setup a listener */
+	listener = user_notif_syscall(__NR_getppid,
+				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
+	ASSERT_GE(listener, 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		ret = syscall(__NR_getppid);
+		exit(ret != USER_NOTIF_MAGIC);
+	}
+
+	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+	baseid = req.id + 1;
+
+	resp.id = req.id;
+	resp.error = 0;
+	resp.val = USER_NOTIF_MAGIC;
+
+	/* check that we make sure flags == 0 */
+	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+	EXPECT_EQ(waitpid(pid, &status, 0), pid);
+	EXPECT_EQ(true, WIFEXITED(status));
+	EXPECT_EQ(0, WEXITSTATUS(status));
+
+	/* Start children, and generate notifications */
+	for (i = 0; i < ARRAY_SIZE(pids); i++) {
+		pid = fork();
+		if (pid == 0) {
+			ret = syscall(__NR_getppid);
+			exit(ret != USER_NOTIF_MAGIC);
+		}
+		pids[i] = pid;
+	}
+
+	/* This spins until all of the children are sleeping */
+restart_wait:
+	for (i = 0; i < ARRAY_SIZE(pids); i++) {
+		if (get_proc_stat(pids[i]) != 'S') {
+			nanosleep(&delay, NULL);
+			goto restart_wait;
+		}
+	}
+
+	/* Read the notifications in order (and respond) */
+	for (i = 0; i < ARRAY_SIZE(pids); i++) {
+		memset(&req, 0, sizeof(req));
+		EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+		EXPECT_EQ(req.id, baseid + i);
+		resp.id = req.id;
+		EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+	}
+
+	/* Make sure notifications were received */
+	for (i = 0; i < ARRAY_SIZE(pids); i++) {
+		EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]);
+		EXPECT_EQ(true, WIFEXITED(status));
+		EXPECT_EQ(0, WEXITSTATUS(status));
+	}
+}
+
 /*
  * TODO:
  * - expand NNP testing
-- 
cgit 


From 6c26df84e1f2f9181c0741865105a53537da842c Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Fri, 29 Apr 2022 14:36:59 -0700
Subject: selftests: cgroup: return -errno from cg_read()/cg_write() on failure

Currently, cg_read()/cg_write() returns 0 on success and -1 on failure.
Modify them to return the -errno on failure.

Link: https://lkml.kernel.org/r/20220425190040.2475377-3-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Shakeel Butt <shakeelb@google.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Chen Wandun <chenwandun@huawei.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Michal Koutn" <mkoutny@suse.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Vaibhav Jain <vaibhav@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/cgroup_util.c | 44 ++++++++++++----------------
 1 file changed, 19 insertions(+), 25 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index dbaa7aabbb4a..e6f3679cdcc0 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -19,6 +19,7 @@
 #include "cgroup_util.h"
 #include "../clone3/clone3_selftests.h"
 
+/* Returns read len on success, or -errno on failure. */
 static ssize_t read_text(const char *path, char *buf, size_t max_len)
 {
 	ssize_t len;
@@ -26,35 +27,29 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len)
 
 	fd = open(path, O_RDONLY);
 	if (fd < 0)
-		return fd;
+		return -errno;
 
 	len = read(fd, buf, max_len - 1);
-	if (len < 0)
-		goto out;
 
-	buf[len] = 0;
-out:
+	if (len >= 0)
+		buf[len] = 0;
+
 	close(fd);
-	return len;
+	return len < 0 ? -errno : len;
 }
 
+/* Returns written len on success, or -errno on failure. */
 static ssize_t write_text(const char *path, char *buf, ssize_t len)
 {
 	int fd;
 
 	fd = open(path, O_WRONLY | O_APPEND);
 	if (fd < 0)
-		return fd;
+		return -errno;
 
 	len = write(fd, buf, len);
-	if (len < 0) {
-		close(fd);
-		return len;
-	}
-
 	close(fd);
-
-	return len;
+	return len < 0 ? -errno : len;
 }
 
 char *cg_name(const char *root, const char *name)
@@ -87,16 +82,16 @@ char *cg_control(const char *cgroup, const char *control)
 	return ret;
 }
 
+/* Returns 0 on success, or -errno on failure. */
 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
 {
 	char path[PATH_MAX];
+	ssize_t ret;
 
 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
 
-	if (read_text(path, buf, len) >= 0)
-		return 0;
-
-	return -1;
+	ret = read_text(path, buf, len);
+	return ret >= 0 ? 0 : ret;
 }
 
 int cg_read_strcmp(const char *cgroup, const char *control,
@@ -177,17 +172,15 @@ long cg_read_lc(const char *cgroup, const char *control)
 	return cnt;
 }
 
+/* Returns 0 on success, or -errno on failure. */
 int cg_write(const char *cgroup, const char *control, char *buf)
 {
 	char path[PATH_MAX];
-	ssize_t len = strlen(buf);
+	ssize_t len = strlen(buf), ret;
 
 	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
-
-	if (write_text(path, buf, len) == len)
-		return 0;
-
-	return -1;
+	ret = write_text(path, buf, len);
+	return ret == len ? 0 : ret;
 }
 
 int cg_find_unified_root(char *root, size_t len)
@@ -545,7 +538,8 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t
 	else
 		snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
 
-	return read_text(path, buf, size);
+	size = read_text(path, buf, size);
+	return size < 0 ? -1 : size;
 }
 
 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
-- 
cgit 


From a3622a53e620700053b648478dbc638ad373be3b Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Fri, 29 Apr 2022 14:36:59 -0700
Subject: selftests: cgroup: fix alloc_anon_noexit() instantly freeing memory

Currently, alloc_anon_noexit() calls alloc_anon() which instantly frees
the allocated memory. alloc_anon_noexit() is usually used with
cg_run_nowait() to run a process in the background that allocates
memory. It makes sense for the background process to keep the memory
allocated and not instantly free it (otherwise there is no point of
running it in the background).

Link: https://lkml.kernel.org/r/20220425190040.2475377-4-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Shakeel Butt <shakeelb@google.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Chen Wandun <chenwandun@huawei.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Michal Koutn" <mkoutny@suse.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Vaibhav Jain <vaibhav@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 9c1f19fe2e37..a639bf49d396 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -211,13 +211,17 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
 static int alloc_anon_noexit(const char *cgroup, void *arg)
 {
 	int ppid = getppid();
+	size_t size = (unsigned long)arg;
+	char *buf, *ptr;
 
-	if (alloc_anon(cgroup, arg))
-		return -1;
+	buf = malloc(size);
+	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+		*ptr = 0;
 
 	while (getppid() == ppid)
 		sleep(1);
 
+	free(buf);
 	return 0;
 }
 
-- 
cgit 


From eae3cb2e87ff84547e66211b81301a8f9122840f Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Fri, 29 Apr 2022 14:37:00 -0700
Subject: selftests: cgroup: add a selftest for memory.reclaim

Add a new test for memory.reclaim that verifies that the interface
correctly reclaims memory as intended, from both anon and file pages.

Link: https://lkml.kernel.org/r/20220425190040.2475377-5-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Chen Wandun <chenwandun@huawei.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Michal Koutn" <mkoutny@suse.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Vaibhav Jain <vaibhav@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 106 +++++++++++++++++++++++
 1 file changed, 106 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index a639bf49d396..ee7defb17280 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -760,6 +760,111 @@ cleanup:
 	return ret;
 }
 
+/*
+ * This test checks that memory.reclaim reclaims the given
+ * amount of memory (from both anon and file, if possible).
+ */
+static int test_memcg_reclaim(const char *root)
+{
+	int ret = KSFT_FAIL, fd, retries;
+	char *memcg;
+	long current, expected_usage, to_reclaim;
+	char buf[64];
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	current = cg_read_long(memcg, "memory.current");
+	if (current != 0)
+		goto cleanup;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		goto cleanup;
+
+	cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
+
+	/*
+	 * If swap is enabled, try to reclaim from both anon and file, else try
+	 * to reclaim from file only.
+	 */
+	if (is_swap_enabled()) {
+		cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
+		expected_usage = MB(100);
+	} else
+		expected_usage = MB(50);
+
+	/*
+	 * Wait until current usage reaches the expected usage (or we run out of
+	 * retries).
+	 */
+	retries = 5;
+	while (!values_close(cg_read_long(memcg, "memory.current"),
+			    expected_usage, 10)) {
+		if (retries--) {
+			sleep(1);
+			continue;
+		} else {
+			fprintf(stderr,
+				"failed to allocate %ld for memcg reclaim test\n",
+				expected_usage);
+			goto cleanup;
+		}
+	}
+
+	/*
+	 * Reclaim until current reaches 30M, this makes sure we hit both anon
+	 * and file if swap is enabled.
+	 */
+	retries = 5;
+	while (true) {
+		int err;
+
+		current = cg_read_long(memcg, "memory.current");
+		to_reclaim = current - MB(30);
+
+		/*
+		 * We only keep looping if we get EAGAIN, which means we could
+		 * not reclaim the full amount.
+		 */
+		if (to_reclaim <= 0)
+			goto cleanup;
+
+
+		snprintf(buf, sizeof(buf), "%ld", to_reclaim);
+		err = cg_write(memcg, "memory.reclaim", buf);
+		if (!err) {
+			/*
+			 * If writing succeeds, then the written amount should have been
+			 * fully reclaimed (and maybe more).
+			 */
+			current = cg_read_long(memcg, "memory.current");
+			if (!values_close(current, MB(30), 3) && current > MB(30))
+				goto cleanup;
+			break;
+		}
+
+		/* The kernel could not reclaim the full amount, try again. */
+		if (err == -EAGAIN && retries--)
+			continue;
+
+		/* We got an unexpected error or ran out of retries. */
+		goto cleanup;
+	}
+
+	ret = KSFT_PASS;
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+	close(fd);
+
+	return ret;
+}
+
 static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
 {
 	long mem_max = (long)arg;
@@ -1264,6 +1369,7 @@ struct memcg_test {
 	T(test_memcg_high),
 	T(test_memcg_high_sync),
 	T(test_memcg_max),
+	T(test_memcg_reclaim),
 	T(test_memcg_oom_events),
 	T(test_memcg_swap_max),
 	T(test_memcg_sock),
-- 
cgit 


From 5ac1d2d6345190907e260daedd980ab3be512cf0 Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Wed, 27 Apr 2022 15:50:02 -0700
Subject: selftests: mptcp: Add tests for userspace PM type

These tests ensure that the in-kernel path manager is bypassed when
the userspace path manager is configured. Kernel code is still
responsible for ADD_ADDR echo, so also make sure that's working.

Tested-by: Geliang Tang <geliang.tang@suse.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 66 +++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index e5c8fc2816fb..b27854f976f7 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -70,6 +70,7 @@ init_partial()
 		ip netns add $netns || exit $ksft_skip
 		ip -net $netns link set lo up
 		ip netns exec $netns sysctl -q net.mptcp.enabled=1
+		ip netns exec $netns sysctl -q net.mptcp.pm_type=0
 		ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
 		ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
 		if [ $checksum -eq 1 ]; then
@@ -1611,6 +1612,13 @@ wait_attempt_fail()
 	return 1
 }
 
+set_userspace_pm()
+{
+	local ns=$1
+
+	ip netns exec $ns sysctl -q net.mptcp.pm_type=1
+}
+
 subflows_tests()
 {
 	if reset "no JOIN"; then
@@ -2698,6 +2706,63 @@ fail_tests()
 	fi
 }
 
+userspace_tests()
+{
+	# userspace pm type prevents add_addr
+	if reset "userspace pm type prevents add_addr"; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_add_nr 0 0
+	fi
+
+	# userspace pm type rejects join
+	if reset "userspace pm type rejects join"; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 0
+	fi
+
+	# userspace pm type does not send join
+	if reset "userspace pm type does not send join"; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# userspace pm type prevents mp_prio
+	if reset "userspace pm type prevents mp_prio"; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+		chk_join_nr 1 1 0
+		chk_prio_nr 0 0
+	fi
+
+	# userspace pm type prevents rm_addr
+	if reset "userspace pm type prevents rm_addr"; then
+		set_userspace_pm $ns1
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
+		chk_join_nr 0 0 0
+		chk_rm_nr 0 0
+	fi
+}
+
 implicit_tests()
 {
 	# userspace pm type prevents add_addr
@@ -2767,6 +2832,7 @@ all_tests_sorted=(
 	m@fullmesh_tests
 	z@fastclose_tests
 	F@fail_tests
+	u@userspace_tests
 	I@implicit_tests
 )
 
-- 
cgit 


From 38dcd9570d6fced1dcfee226d5b29722b070ce90 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 28 Apr 2022 12:45:10 +0800
Subject: selftests/net: add missing tests to Makefile

When generating the selftests to another folder, the fixed tests are
missing as they are not in Makefile, e.g.

  make -C tools/testing/selftests/ install \
  	TARGETS="net" INSTALL_PATH=/tmp/kselftests

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3fe2515aa616..0f2ebc38d893 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -30,7 +30,7 @@ TEST_PROGS += ioam6.sh
 TEST_PROGS += gro.sh
 TEST_PROGS += gre_gso.sh
 TEST_PROGS += cmsg_so_mark.sh
-TEST_PROGS += cmsg_time.sh
+TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
 TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -54,6 +54,7 @@ TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 TEST_GEN_FILES += toeplitz
 TEST_GEN_FILES += cmsg_sender
+TEST_PROGS += test_vxlan_vnifiltering.sh
 
 TEST_FILES := settings
 
-- 
cgit 


From f62c5acc800eebfe25bf6738b2cc8aa5d6aa7598 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 28 Apr 2022 12:45:11 +0800
Subject: selftests/net/forwarding: add missing tests to Makefile

When generating the selftests to another folder, the fixed tests are
missing as they are not in Makefile, e.g.

  make -C tools/testing/selftests/ install \
  	TARGETS="net/forwarding" INSTALL_PATH=/tmp/kselftests

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/Makefile | 33 +++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 8fa97ae9af9e..c87e674b61b1 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -2,15 +2,31 @@
 
 TEST_PROGS = bridge_igmp.sh \
 	bridge_locked_port.sh \
+	bridge_mld.sh \
 	bridge_port_isolation.sh \
 	bridge_sticky_fdb.sh \
 	bridge_vlan_aware.sh \
+	bridge_vlan_mcast.sh \
 	bridge_vlan_unaware.sh \
+	custom_multipath_hash.sh \
+	dual_vxlan_bridge.sh \
+	ethtool_extended_state.sh \
 	ethtool.sh \
+	gre_custom_multipath_hash.sh \
 	gre_inner_v4_multipath.sh \
 	gre_inner_v6_multipath.sh \
+	gre_multipath_nh_res.sh \
+	gre_multipath_nh.sh \
 	gre_multipath.sh \
+	hw_stats_l3.sh \
 	ip6_forward_instats_vrf.sh \
+	ip6gre_custom_multipath_hash.sh \
+	ip6gre_flat_key.sh \
+	ip6gre_flat_keys.sh \
+	ip6gre_flat.sh \
+	ip6gre_hier_key.sh \
+	ip6gre_hier_keys.sh \
+	ip6gre_hier.sh \
 	ip6gre_inner_v4_multipath.sh \
 	ip6gre_inner_v6_multipath.sh \
 	ipip_flat_gre_key.sh \
@@ -34,36 +50,53 @@ TEST_PROGS = bridge_igmp.sh \
 	mirror_gre_vlan_bridge_1q.sh \
 	mirror_gre_vlan.sh \
 	mirror_vlan.sh \
+	pedit_dsfield.sh \
+	pedit_ip.sh \
+	pedit_l4port.sh \
+	q_in_vni_ipv6.sh \
+	q_in_vni.sh \
 	router_bridge.sh \
 	router_bridge_vlan.sh \
 	router_broadcast.sh \
+	router_mpath_nh_res.sh \
 	router_mpath_nh.sh \
 	router_multicast.sh \
 	router_multipath.sh \
+	router_nh.sh \
 	router.sh \
 	router_vid_1.sh \
 	sch_ets.sh \
+	sch_red.sh \
 	sch_tbf_ets.sh \
 	sch_tbf_prio.sh \
 	sch_tbf_root.sh \
+	skbedit_priority.sh \
 	tc_actions.sh \
 	tc_chains.sh \
 	tc_flower_router.sh \
 	tc_flower.sh \
 	tc_mpls_l2vpn.sh \
+	tc_police.sh \
 	tc_shblocks.sh \
 	tc_vlan_modify.sh \
+	vxlan_asymmetric_ipv6.sh \
 	vxlan_asymmetric.sh \
+	vxlan_bridge_1d_ipv6.sh \
+	vxlan_bridge_1d_port_8472_ipv6.sh \
 	vxlan_bridge_1d_port_8472.sh \
 	vxlan_bridge_1d.sh \
+	vxlan_bridge_1q_ipv6.sh \
+	vxlan_bridge_1q_port_8472_ipv6.sh
 	vxlan_bridge_1q_port_8472.sh \
 	vxlan_bridge_1q.sh \
+	vxlan_symmetric_ipv6.sh \
 	vxlan_symmetric.sh
 
 TEST_PROGS_EXTENDED := devlink_lib.sh \
 	ethtool_lib.sh \
 	fib_offload_lib.sh \
 	forwarding.config.sample \
+	ip6gre_lib.sh \
 	ipip_lib.sh \
 	lib.sh \
 	mirror_gre_lib.sh \
-- 
cgit 


From a313f858ed368d11579df62e8f7d0ac65f853bc9 Mon Sep 17 00:00:00 2001
From: Jaehee Park <jhpark1013@gmail.com>
Date: Fri, 29 Apr 2022 12:46:58 -0400
Subject: selftests: net: vrf_strict_mode_test: add support to select a test to
 run

Add a boilerplate test loop to run all tests in
vrf_strict_mode_test.sh. Add a -t flag that allows a selected test to
run. Remove the vrf_strict_mode_tests function which is now unused.

Signed-off-by: Jaehee Park <jhpark1013@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20220429164658.GA656707@jaehee-ThinkPad-X1-Extreme
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../testing/selftests/net/vrf_strict_mode_test.sh  | 48 ++++++++++++++++++----
 1 file changed, 39 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
index 865d53c1781c..417d214264f3 100755
--- a/tools/testing/selftests/net/vrf_strict_mode_test.sh
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -14,6 +14,8 @@ INIT_NETNS_NAME="init"
 
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 
+TESTS="init testns mix"
+
 log_test()
 {
 	local rc=$1
@@ -262,6 +264,8 @@ cleanup()
 
 vrf_strict_mode_tests_init()
 {
+	log_section "VRF strict_mode test on init network namespace"
+
 	vrf_strict_mode_check_support init
 
 	strict_mode_check_default init
@@ -292,6 +296,8 @@ vrf_strict_mode_tests_init()
 
 vrf_strict_mode_tests_testns()
 {
+	log_section "VRF strict_mode test on testns network namespace"
+
 	vrf_strict_mode_check_support testns
 
 	strict_mode_check_default testns
@@ -318,6 +324,8 @@ vrf_strict_mode_tests_testns()
 
 vrf_strict_mode_tests_mix()
 {
+	log_section "VRF strict_mode test mixing init and testns network namespaces"
+
 	read_strict_mode_compare_and_check init 1
 
 	read_strict_mode_compare_and_check testns 0
@@ -341,18 +349,30 @@ vrf_strict_mode_tests_mix()
 	read_strict_mode_compare_and_check testns 0
 }
 
-vrf_strict_mode_tests()
-{
-	log_section "VRF strict_mode test on init network namespace"
-	vrf_strict_mode_tests_init
+################################################################################
+# usage
 
-	log_section "VRF strict_mode test on testns network namespace"
-	vrf_strict_mode_tests_testns
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
 
-	log_section "VRF strict_mode test mixing init and testns network namespaces"
-	vrf_strict_mode_tests_mix
+	-t <test>	Test(s) to run (default: all)
+			(options: $TESTS)
+EOF
 }
 
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+	case $opt in
+		t) TESTS=$OPTARG;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
 vrf_strict_mode_check_support()
 {
 	local nsname=$1
@@ -391,7 +411,17 @@ fi
 cleanup &> /dev/null
 
 setup
-vrf_strict_mode_tests
+for t in $TESTS
+do
+	case $t in
+	vrf_strict_mode_tests_init|init) vrf_strict_mode_tests_init;;
+	vrf_strict_mode_tests_testns|testns) vrf_strict_mode_tests_testns;;
+	vrf_strict_mode_tests_mix|mix) vrf_strict_mode_tests_mix;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+
+	esac
+done
 cleanup
 
 print_log_test_results
-- 
cgit 


From 954f46d2f0b4a76405a5e0788e3f80a24c657fd4 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 1 May 2022 14:29:53 +0300
Subject: selftests: forwarding: add Per-Stream Filtering and Policing test for
 Ocelot

The Felix VSC9959 switch in NXP LS1028A supports the tc-gate action
which enforced time-based access control per stream. A stream as seen by
this switch is identified by {MAC DA, VID}.

We use the standard forwarding selftest topology with 2 host interfaces
and 2 switch interfaces. The host ports must require timestamping non-IP
packets and supporting tc-etf offload, for isochron to work. The
isochron program monitors network sync status (ptp4l, phc2sys) and
deterministically transmits packets to the switch such that the tc-gate
action either (a) always accepts them based on its schedule, or
(b) always drops them.

I tried to keep as much of the logic that isn't specific to the NXP
LS1028A in a new tsn_lib.sh, for future reuse. This covers
synchronization using ptp4l and phc2sys, and isochron.

The cycle-time chosen for this selftest isn't particularly impressive
(and the focus is the functionality of the switch), but I didn't really
know what to do better, considering that it will mostly be run during
debugging sessions, various kernel bloatware would be enabled, like
lockdep, KASAN, etc, and we certainly can't run any races with those on.

I tried to look through the kselftest framework for other real time
applications and didn't really find any, so I'm not sure how better to
prepare the environment in case we want to go for a lower cycle time.
At the moment, the only thing the selftest is ensuring is that dynamic
frequency scaling is disabled on the CPU that isochron runs on. It would
probably be useful to have a blacklist of kernel config options (checked
through zcat /proc/config.gz) and some cyclictest scripts to run
beforehand, but I saw none of those.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
Link: https://lore.kernel.org/r/20220501112953.3298973-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/ocelot/psfp.sh | 327 +++++++++++++++++++++
 tools/testing/selftests/net/forwarding/tsn_lib.sh  | 235 +++++++++++++++
 2 files changed, 562 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/ocelot/psfp.sh
 create mode 100644 tools/testing/selftests/net/forwarding/tsn_lib.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
new file mode 100755
index 000000000000..5a5cee92c665
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
@@ -0,0 +1,327 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2021-2022 NXP
+
+# Note: On LS1028A, in lack of enough user ports, this setup requires patching
+# the device tree to use the second CPU port as a user port
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/tsn_lib.sh
+
+UDS_ADDRESS_H1="/var/run/ptp4l_h1"
+UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1"
+
+# Tunables
+NUM_PKTS=1000
+STREAM_VID=100
+STREAM_PRIO=6
+# Use a conservative cycle of 10 ms to allow the test to still pass when the
+# kernel has some extra overhead like lockdep etc
+CYCLE_TIME_NS=10000000
+# Create two Gate Control List entries, one OPEN and one CLOSE, of equal
+# durations
+GATE_DURATION_NS=$((${CYCLE_TIME_NS} / 2))
+# Give 2/3 of the cycle time to user space and 1/3 to the kernel
+FUDGE_FACTOR=$((${CYCLE_TIME_NS} / 3))
+# Shift the isochron base time by half the gate time, so that packets are
+# always received by swp1 close to the middle of the time slot, to minimize
+# inaccuracies due to network sync
+SHIFT_TIME_NS=$((${GATE_DURATION_NS} / 2))
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# Chain number exported by the ocelot driver for
+# Per-Stream Filtering and Policing filters
+PSFP()
+{
+	echo 30000
+}
+
+psfp_chain_create()
+{
+	local if_name=$1
+
+	tc qdisc add dev $if_name clsact
+
+	tc filter add dev $if_name ingress chain 0 pref 49152 flower \
+		skip_sw action goto chain $(PSFP)
+}
+
+psfp_chain_destroy()
+{
+	local if_name=$1
+
+	tc qdisc del dev $if_name clsact
+}
+
+psfp_filter_check()
+{
+	local expected=$1
+	local packets=""
+	local drops=""
+	local stats=""
+
+	stats=$(tc -j -s filter show dev ${swp1} ingress chain $(PSFP) pref 1)
+	packets=$(echo ${stats} | jq ".[1].options.actions[].stats.packets")
+	drops=$(echo ${stats} | jq ".[1].options.actions[].stats.drops")
+
+	if ! [ "${packets}" = "${expected}" ]; then
+		printf "Expected filter to match on %d packets but matched on %d instead\n" \
+			"${expected}" "${packets}"
+	fi
+
+	echo "Hardware filter reports ${drops} drops"
+}
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+switch_create()
+{
+	local h2_mac_addr=$(mac_get $h2)
+
+	ip link set ${swp1} up
+	ip link set ${swp2} up
+
+	ip link add br0 type bridge vlan_filtering 1
+	ip link set ${swp1} master br0
+	ip link set ${swp2} master br0
+	ip link set br0 up
+
+	bridge vlan add dev ${swp2} vid ${STREAM_VID}
+	bridge vlan add dev ${swp1} vid ${STREAM_VID}
+	# PSFP on Ocelot requires the filter to also be added to the bridge
+	# FDB, and not be removed
+	bridge fdb add dev ${swp2} \
+		${h2_mac_addr} vlan ${STREAM_VID} static master
+
+	psfp_chain_create ${swp1}
+
+	tc filter add dev ${swp1} ingress chain $(PSFP) pref 1 \
+		protocol 802.1Q flower skip_sw \
+		dst_mac ${h2_mac_addr} vlan_id ${STREAM_VID} \
+		action gate base-time 0.000000000 \
+		sched-entry OPEN  ${GATE_DURATION_NS} -1 -1 \
+		sched-entry CLOSE ${GATE_DURATION_NS} -1 -1
+}
+
+switch_destroy()
+{
+	psfp_chain_destroy ${swp1}
+	ip link del br0
+}
+
+txtime_setup()
+{
+	local if_name=$1
+
+	tc qdisc add dev ${if_name} clsact
+	# Classify PTP on TC 7 and isochron on TC 6
+	tc filter add dev ${if_name} egress protocol 0x88f7 \
+		flower action skbedit priority 7
+	tc filter add dev ${if_name} egress protocol 802.1Q \
+		flower vlan_ethtype 0xdead action skbedit priority 6
+	tc qdisc add dev ${if_name} handle 100: parent root mqprio num_tc 8 \
+		queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+		map 0 1 2 3 4 5 6 7 \
+		hw 1
+	# Set up TC 6 for SO_TXTIME. tc-mqprio queues count from 1.
+	tc qdisc replace dev ${if_name} parent 100:$((${STREAM_PRIO} + 1)) etf \
+		clockid CLOCK_TAI offload delta ${FUDGE_FACTOR}
+}
+
+txtime_cleanup()
+{
+	local if_name=$1
+
+	tc qdisc del dev ${if_name} root
+	tc qdisc del dev ${if_name} clsact
+}
+
+setup_prepare()
+{
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+
+	txtime_setup ${h1}
+
+	# Set up swp1 as a master PHC for h1, synchronized to the local
+	# CLOCK_REALTIME.
+	phc2sys_start ${swp1} ${UDS_ADDRESS_SWP1}
+
+	# Assumption true for LS1028A: h1 and h2 use the same PHC. So by
+	# synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized
+	# to swp1 (and both to CLOCK_REALTIME).
+	ptp4l_start ${h1} true ${UDS_ADDRESS_H1}
+	ptp4l_start ${swp1} false ${UDS_ADDRESS_SWP1}
+
+	# Make sure there are no filter matches at the beginning of the test
+	psfp_filter_check 0
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ptp4l_stop ${swp1}
+	ptp4l_stop ${h1}
+	phc2sys_stop
+	isochron_recv_stop
+
+	txtime_cleanup ${h1}
+
+	h2_destroy
+	h1_destroy
+	switch_destroy
+
+	vrf_cleanup
+}
+
+debug_incorrectly_dropped_packets()
+{
+	local isochron_dat=$1
+	local dropped_seqids
+	local seqid
+
+	echo "Packets incorrectly dropped:"
+
+	dropped_seqids=$(isochron report \
+		--input-file "${isochron_dat}" \
+		--printf-format "%u RX hw %T\n" \
+		--printf-args "qR" | \
+		grep 'RX hw 0.000000000' | \
+		awk '{print $1}')
+
+	for seqid in ${dropped_seqids}; do
+		isochron report \
+			--input-file "${isochron_dat}" \
+			--start ${seqid} --stop ${seqid} \
+			--printf-format "seqid %u scheduled for %T, HW TX timestamp %T\n" \
+			--printf-args "qST"
+	done
+}
+
+debug_incorrectly_received_packets()
+{
+	local isochron_dat=$1
+
+	echo "Packets incorrectly received:"
+
+	isochron report \
+		--input-file "${isochron_dat}" \
+		--printf-format "seqid %u scheduled for %T, HW TX timestamp %T, HW RX timestamp %T\n" \
+		--printf-args "qSTR" |
+		grep -v 'HW RX timestamp 0.000000000'
+}
+
+run_test()
+{
+	local base_time=$1
+	local expected=$2
+	local test_name=$3
+	local debug=$4
+	local isochron_dat="$(mktemp)"
+	local extra_args=""
+	local received
+
+	isochron_do \
+		"${h1}" \
+		"${h2}" \
+		"${UDS_ADDRESS_H1}" \
+		"" \
+		"${base_time}" \
+		"${CYCLE_TIME_NS}" \
+		"${SHIFT_TIME_NS}" \
+		"${NUM_PKTS}" \
+		"${STREAM_VID}" \
+		"${STREAM_PRIO}" \
+		"" \
+		"${isochron_dat}"
+
+	# Count all received packets by looking at the non-zero RX timestamps
+	received=$(isochron report \
+		--input-file "${isochron_dat}" \
+		--printf-format "%u\n" --printf-args "R" | \
+		grep -w -v '0' | wc -l)
+
+	if [ "${received}" = "${expected}" ]; then
+		RET=0
+	else
+		RET=1
+		echo "Expected isochron to receive ${expected} packets but received ${received}"
+	fi
+
+	log_test "${test_name}"
+
+	if [ "$RET" = "1" ]; then
+		${debug} "${isochron_dat}"
+	fi
+
+	rm ${isochron_dat} 2> /dev/null
+}
+
+test_gate_in_band()
+{
+	# Send packets in-band with the OPEN gate entry
+	run_test 0.000000000 ${NUM_PKTS} "In band" \
+		debug_incorrectly_dropped_packets
+
+	psfp_filter_check ${NUM_PKTS}
+}
+
+test_gate_out_of_band()
+{
+	# Send packets in-band with the CLOSE gate entry
+	run_test 0.005000000 0 "Out of band" \
+		debug_incorrectly_received_packets
+
+	psfp_filter_check $((2 * ${NUM_PKTS}))
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+	test_gate_in_band
+	test_gate_out_of_band
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
new file mode 100644
index 000000000000..60a1423e8116
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -0,0 +1,235 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2021-2022 NXP
+
+REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes}
+REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes}
+
+# Tunables
+UTC_TAI_OFFSET=37
+ISOCHRON_CPU=1
+
+if [[ "$REQUIRE_ISOCHRON" = "yes" ]]; then
+	# https://github.com/vladimiroltean/tsn-scripts
+	# WARNING: isochron versions pre-1.0 are unstable,
+	# always use the latest version
+	require_command isochron
+fi
+if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then
+	require_command phc2sys
+	require_command ptp4l
+fi
+
+phc2sys_start()
+{
+	local if_name=$1
+	local uds_address=$2
+	local extra_args=""
+
+	if ! [ -z "${uds_address}" ]; then
+		extra_args="${extra_args} -z ${uds_address}"
+	fi
+
+	phc2sys_log="$(mktemp)"
+
+	chrt -f 10 phc2sys -m \
+		-c ${if_name} \
+		-s CLOCK_REALTIME \
+		-O ${UTC_TAI_OFFSET} \
+		--step_threshold 0.00002 \
+		--first_step_threshold 0.00002 \
+		${extra_args} \
+		> "${phc2sys_log}" 2>&1 &
+	phc2sys_pid=$!
+
+	echo "phc2sys logs to ${phc2sys_log} and has pid ${phc2sys_pid}"
+
+	sleep 1
+}
+
+phc2sys_stop()
+{
+	{ kill ${phc2sys_pid} && wait ${phc2sys_pid}; } 2> /dev/null
+	rm "${phc2sys_log}" 2> /dev/null
+}
+
+ptp4l_start()
+{
+	local if_name=$1
+	local slave_only=$2
+	local uds_address=$3
+	local log="ptp4l_log_${if_name}"
+	local pid="ptp4l_pid_${if_name}"
+	local extra_args=""
+
+	if [ "${slave_only}" = true ]; then
+		extra_args="${extra_args} -s"
+	fi
+
+	# declare dynamic variables ptp4l_log_${if_name} and ptp4l_pid_${if_name}
+	# as global, so that they can be referenced later
+	declare -g "${log}=$(mktemp)"
+
+	chrt -f 10 ptp4l -m -2 -P \
+		-i ${if_name} \
+		--step_threshold 0.00002 \
+		--first_step_threshold 0.00002 \
+		--tx_timestamp_timeout 100 \
+		--uds_address="${uds_address}" \
+		${extra_args} \
+		> "${!log}" 2>&1 &
+	declare -g "${pid}=$!"
+
+	echo "ptp4l for interface ${if_name} logs to ${!log} and has pid ${!pid}"
+
+	sleep 1
+}
+
+ptp4l_stop()
+{
+	local if_name=$1
+	local log="ptp4l_log_${if_name}"
+	local pid="ptp4l_pid_${if_name}"
+
+	{ kill ${!pid} && wait ${!pid}; } 2> /dev/null
+	rm "${!log}" 2> /dev/null
+}
+
+cpufreq_max()
+{
+	local cpu=$1
+	local freq="cpu${cpu}_freq"
+	local governor="cpu${cpu}_governor"
+
+	# Kernel may be compiled with CONFIG_CPU_FREQ disabled
+	if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then
+		return
+	fi
+
+	# declare dynamic variables cpu${cpu}_freq and cpu${cpu}_governor as
+	# global, so they can be referenced later
+	declare -g "${freq}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq)"
+	declare -g "${governor}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor)"
+
+	cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_max_freq > \
+		/sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq
+	echo -n "performance" > \
+		/sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor
+}
+
+cpufreq_restore()
+{
+	local cpu=$1
+	local freq="cpu${cpu}_freq"
+	local governor="cpu${cpu}_governor"
+
+	if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then
+		return
+	fi
+
+	echo "${!freq}" > /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq
+	echo -n "${!governor}" > \
+		/sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor
+}
+
+isochron_recv_start()
+{
+	local if_name=$1
+	local uds=$2
+	local extra_args=$3
+
+	if ! [ -z "${uds}" ]; then
+		extra_args="--unix-domain-socket ${uds}"
+	fi
+
+	isochron rcv \
+		--interface ${if_name} \
+		--sched-priority 98 \
+		--sched-fifo \
+		--utc-tai-offset ${UTC_TAI_OFFSET} \
+		--quiet \
+		${extra_args} & \
+	isochron_pid=$!
+
+	sleep 1
+}
+
+isochron_recv_stop()
+{
+	{ kill ${isochron_pid} && wait ${isochron_pid}; } 2> /dev/null
+}
+
+isochron_do()
+{
+	local sender_if_name=$1; shift
+	local receiver_if_name=$1; shift
+	local sender_uds=$1; shift
+	local receiver_uds=$1; shift
+	local base_time=$1; shift
+	local cycle_time=$1; shift
+	local shift_time=$1; shift
+	local num_pkts=$1; shift
+	local vid=$1; shift
+	local priority=$1; shift
+	local dst_ip=$1; shift
+	local isochron_dat=$1; shift
+	local extra_args=""
+	local receiver_extra_args=""
+	local vrf="$(master_name_get ${sender_if_name})"
+	local use_l2="true"
+
+	if ! [ -z "${dst_ip}" ]; then
+		use_l2="false"
+	fi
+
+	if ! [ -z "${vrf}" ]; then
+		dst_ip="${dst_ip}%${vrf}"
+	fi
+
+	if ! [ -z "${vid}" ]; then
+		vid="--vid=${vid}"
+	fi
+
+	if [ -z "${receiver_uds}" ]; then
+		extra_args="${extra_args} --omit-remote-sync"
+	fi
+
+	if ! [ -z "${shift_time}" ]; then
+		extra_args="${extra_args} --shift-time=${shift_time}"
+	fi
+
+	if [ "${use_l2}" = "true" ]; then
+		extra_args="${extra_args} --l2 --etype=0xdead ${vid}"
+		receiver_extra_args="--l2 --etype=0xdead"
+	else
+		extra_args="${extra_args} --l4 --ip-destination=${dst_ip}"
+		receiver_extra_args="--l4"
+	fi
+
+	cpufreq_max ${ISOCHRON_CPU}
+
+	isochron_recv_start "${h2}" "${receiver_uds}" "${receiver_extra_args}"
+
+	isochron send \
+		--interface ${sender_if_name} \
+		--unix-domain-socket ${sender_uds} \
+		--priority ${priority} \
+		--base-time ${base_time} \
+		--cycle-time ${cycle_time} \
+		--num-frames ${num_pkts} \
+		--frame-size 64 \
+		--txtime \
+		--utc-tai-offset ${UTC_TAI_OFFSET} \
+		--cpu-mask $((1 << ${ISOCHRON_CPU})) \
+		--sched-fifo \
+		--sched-priority 98 \
+		--client 127.0.0.1 \
+		--sync-threshold 5000 \
+		--output-file ${isochron_dat} \
+		${extra_args} \
+		--quiet
+
+	isochron_recv_stop
+
+	cpufreq_restore ${ISOCHRON_CPU}
+}
-- 
cgit 


From 57b19468b369c5f70068540d698ea2a5f4598945 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Sun, 1 May 2022 11:55:24 +0800
Subject: selftests/sysctl: add sysctl macro test

Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: David Ahern <dsahern@kernel.org>
Cc: Simon Horman <horms@verge.net.au>
Cc: Julian Anastasov <ja@ssi.bg>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Jozsef Kadlecsik <kadlec@netfilter.org>
Cc: Florian Westphal <fw@strlen.de>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Cc: Akhmat Karakotov <hmukos@yandex-team.ru>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 lib/test_sysctl.c                        | 32 ++++++++++++++++++++++++++++++++
 tools/testing/selftests/sysctl/sysctl.sh | 23 +++++++++++++++++++++++
 2 files changed, 55 insertions(+)

(limited to 'tools/testing')

diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index a5a3d6c27e1f..9a564971f539 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -38,6 +38,7 @@
 
 static int i_zero;
 static int i_one_hundred = 100;
+static int match_int_ok = 1;
 
 struct test_sysctl_data {
 	int int_0001;
@@ -95,6 +96,13 @@ static struct ctl_table test_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "match_int",
+		.data		= &match_int_ok,
+		.maxlen		= sizeof(match_int_ok),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
 	{
 		.procname	= "boot_int",
 		.data		= &test_data.boot_int,
@@ -132,6 +140,30 @@ static struct ctl_table_header *test_sysctl_header;
 
 static int __init test_sysctl_init(void)
 {
+	int i;
+
+	struct {
+		int defined;
+		int wanted;
+	} match_int[] = {
+		{.defined = *(int *)SYSCTL_ZERO,	.wanted = 0},
+		{.defined = *(int *)SYSCTL_ONE,		.wanted = 1},
+		{.defined = *(int *)SYSCTL_TWO,		.wanted = 2},
+		{.defined = *(int *)SYSCTL_THREE,	.wanted = 3},
+		{.defined = *(int *)SYSCTL_FOUR,	.wanted = 4},
+		{.defined = *(int *)SYSCTL_ONE_HUNDRED, .wanted = 100},
+		{.defined = *(int *)SYSCTL_TWO_HUNDRED,	.wanted = 200},
+		{.defined = *(int *)SYSCTL_ONE_THOUSAND, .wanted = 1000},
+		{.defined = *(int *)SYSCTL_THREE_THOUSAND, .wanted = 3000},
+		{.defined = *(int *)SYSCTL_INT_MAX,	.wanted = INT_MAX},
+		{.defined = *(int *)SYSCTL_MAXOLDUID,	.wanted = 65535},
+		{.defined = *(int *)SYSCTL_NEG_ONE,	.wanted = -1},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(match_int); i++)
+		if (match_int[i].defined != match_int[i].wanted)
+			match_int_ok = 0;
+
 	test_data.bitmap_0001 = kzalloc(SYSCTL_TEST_BITMAP_SIZE/8, GFP_KERNEL);
 	if (!test_data.bitmap_0001)
 		return -ENOMEM;
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 19515dcb7d04..f50778a3d744 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -40,6 +40,7 @@ ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001"
 ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003"
 ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001"
 ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int"
+ALL_TESTS="$ALL_TESTS 0008:1:1:match_int"
 
 function allow_user_defaults()
 {
@@ -785,6 +786,27 @@ sysctl_test_0007()
 	return $ksft_skip
 }
 
+sysctl_test_0008()
+{
+	TARGET="${SYSCTL}/match_int"
+	if [ ! -f $TARGET ]; then
+		echo "Skipping test for $TARGET as it is not present ..."
+		return $ksft_skip
+	fi
+
+	echo -n "Testing if $TARGET is matched in kernel"
+	ORIG_VALUE=$(cat "${TARGET}")
+
+	if [ $ORIG_VALUE -ne 1 ]; then
+		echo "TEST FAILED"
+		rc=1
+		test_rc
+	fi
+
+	echo "ok"
+	return 0
+}
+
 list_tests()
 {
 	echo "Test ID list:"
@@ -800,6 +822,7 @@ list_tests()
 	echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
 	echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()"
 	echo "0007 x $(get_test_count 0007) - tests setting sysctl from kernel boot param"
+	echo "0008 x $(get_test_count 0008) - tests sysctl macro values match"
 }
 
 usage()
-- 
cgit 


From 3122257c02afd9f199a8fc84ae981e1fc4958532 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 2 May 2022 11:45:07 +0300
Subject: selftests: mirror_gre_bridge_1q: Avoid changing PVID while interface
 is operational

In emulated environments, the bridge ports enslaved to br1 get a carrier
before changing br1's PVID. This means that by the time the PVID is
changed, br1 is already operational and configured with an IPv6
link-local address.

When the test is run with netdevs registered by mlxsw, changing the PVID
is vetoed, as changing the VID associated with an existing L3 interface
is forbidden. This restriction is similar to the 8021q driver's
restriction of changing the VID of an existing interface.

Fix this by taking br1 down and bringing it back up when it is fully
configured.

With this fix, the test reliably passes on top of both the SW and HW
data paths (emulated or not).

Fixes: 239e754af854 ("selftests: forwarding: Test mirror-to-gretap w/ UL 802.1q")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Link: https://lore.kernel.org/r/20220502084507.364774-1-idosch@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
index a3402cd8d5b6..9ff22f28032d 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -61,9 +61,12 @@ setup_prepare()
 
 	vrf_prepare
 	mirror_gre_topo_create
+	# Avoid changing br1's PVID while it is operational as a L3 interface.
+	ip link set dev br1 down
 
 	ip link set dev $swp3 master br1
 	bridge vlan add dev br1 vid 555 pvid untagged self
+	ip link set dev br1 up
 	ip address add dev br1 192.0.2.129/28
 	ip address add dev br1 2001:db8:2::1/64
 
-- 
cgit 


From 1531cc632d13af2846bd2b533cea1c4aaee8cd90 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Mon, 2 May 2022 11:49:25 +0300
Subject: selftests: forwarding: lib: Add start_traffic_pktsize() helpers

Add two helpers, start_traffic_pktsize() and start_tcp_traffic_pktsize(),
that allow explicit overriding of packet size. Change start_traffic() and
start_tcp_traffic() to dispatch through these helpers with the default
packet size.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/forwarding/lib.sh | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 5386c826e46a..66681a2bcdd3 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1375,25 +1375,40 @@ flood_test()
 
 __start_traffic()
 {
+	local pktsize=$1; shift
 	local proto=$1; shift
 	local h_in=$1; shift    # Where the traffic egresses the host
 	local sip=$1; shift
 	local dip=$1; shift
 	local dmac=$1; shift
 
-	$MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
+	$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
 		-a own -b $dmac -t "$proto" -q "$@" &
 	sleep 1
 }
 
+start_traffic_pktsize()
+{
+	local pktsize=$1; shift
+
+	__start_traffic $pktsize udp "$@"
+}
+
+start_tcp_traffic_pktsize()
+{
+	local pktsize=$1; shift
+
+	__start_traffic $pktsize tcp "$@"
+}
+
 start_traffic()
 {
-	__start_traffic udp "$@"
+	start_traffic_pktsize 8000 "$@"
 }
 
 start_tcp_traffic()
 {
-	__start_traffic tcp "$@"
+	start_tcp_traffic_pktsize 8000 "$@"
 }
 
 stop_traffic()
-- 
cgit 


From 1d267aa8699b5985a297f697c493aadc507711a9 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Mon, 2 May 2022 11:49:26 +0300
Subject: selftests: mlxsw: Add a test for soaking up a burst of traffic

Add a test that sends 1Gbps of traffic through the switch, into which it
then injects a burst of traffic and tests that there are no drops.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../selftests/drivers/net/mlxsw/qos_burst.sh       | 480 +++++++++++++++++++++
 1 file changed, 480 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh
new file mode 100755
index 000000000000..82a47b903f92
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh
@@ -0,0 +1,480 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test sends 1Gbps of traffic through the switch, into which it then
+# injects a burst of traffic and tests that there are no drops.
+#
+# The 1Gbps stream is created by sending >1Gbps stream from H1. This stream
+# ingresses through $swp1, and is forwarded thtrough a small temporary pool to a
+# 1Gbps $swp3.
+#
+# Thus a 1Gbps stream enters $swp4, and is forwarded through a large pool to
+# $swp2, and eventually to H2. Since $swp2 is a 1Gbps port as well, no backlog
+# is generated.
+#
+# At this point, a burst of traffic is forwarded from H3. This enters $swp5, is
+# forwarded to $swp2, which is fully subscribed by the 1Gbps stream. The
+# expectation is that the burst is wholly absorbed by the large pool and no
+# drops are caused. After the burst, there should be a backlog that is hard to
+# get rid of, because $sw2 is fully subscribed. But because each individual
+# packet is scheduled soon after getting enqueued, SLL and HLL do not impact the
+# test.
+#
+# +-----------------------+                           +-----------------------+
+# | H1                    |			      | H3                    |
+# |   + $h1.111           |			      |          $h3.111 +    |
+# |   | 192.0.2.33/28     |			      |    192.0.2.35/28 |    |
+# |   |                   |			      |                  |    |
+# |   + $h1               |			      |              $h3 +    |
+# +---|-------------------+  +--------------------+   +------------------|----+
+#     |                      |                    |       		 |
+# +---|----------------------|--------------------|----------------------|----+
+# |   + $swp1          $swp3 +                    + $swp4          $swp5 |    |
+# |   | iPOOL1        iPOOL0 |                    | iPOOL2        iPOOL2 |    |
+# |   | ePOOL4        ePOOL5 |                    | ePOOL4        ePOOL4 |    |
+# |   |                1Gbps |                    | 1Gbps                |    |
+# | +-|----------------------|-+                +-|----------------------|-+  |
+# | | + $swp1.111  $swp3.111 + |                | + $swp4.111  $swp5.111 + |  |
+# | |                          |                |                          |  |
+# | | BR1                      |                | BR2                      |  |
+# | |                          |                |                          |  |
+# | |                          |                |         + $swp2.111      |  |
+# | +--------------------------+                +---------|----------------+  |
+# |                                                       |                   |
+# | iPOOL0: 500KB dynamic                                 |                   |
+# | iPOOL1: 500KB dynamic                                 |                   |
+# | iPOOL2: 10MB dynamic                                  + $swp2             |
+# | ePOOL4: 500KB dynamic                                 | iPOOL0            |
+# | ePOOL5: 500KB dnamic                                  | ePOOL6            |
+# | ePOOL6: 10MB dynamic                                  | 1Gbps             |
+# +-------------------------------------------------------|-------------------+
+#                                                         |
+#                                                     +---|-------------------+
+#                                                     |   + $h2            H2 |
+#                                                     |   | 1Gbps             |
+#                                                     |   |                   |
+#                                                     |   + $h2.111           |
+#                                                     |     192.0.2.34/28     |
+#                                                     +-----------------------+
+#
+# iPOOL0+ePOOL4 are helper pools for control traffic etc.
+# iPOOL1+ePOOL5 are helper pools for modeling the 1Gbps stream
+# iPOOL2+ePOOL6 are pools for soaking the burst traffic
+
+ALL_TESTS="
+	ping_ipv4
+	test_8K
+	test_800
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=8
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+source mlxsw_lib.sh
+
+_1KB=1000
+_500KB=$((500 * _1KB))
+_1MB=$((1000 * _1KB))
+
+# The failure mode that this specifically tests is exhaustion of descriptor
+# buffer. The point is to produce a burst that shared buffer should be able
+# to accommodate, but produce it with small enough packets that the machine
+# runs out of the descriptor buffer space with default configuration.
+#
+# The machine therefore needs to be able to produce line rate with as small
+# packets as possible, and at the same time have large enough buffer that
+# when filled with these small packets, it runs out of descriptors.
+# Spectrum-2 is very close, but cannot perform this test. Therefore use
+# Spectrum-3 as a minimum, and permit larger burst size, and therefore
+# larger packets, to reduce spurious failures.
+#
+mlxsw_only_on_spectrum 3+ || exit
+
+BURST_SIZE=$((50000000))
+POOL_SIZE=$BURST_SIZE
+
+h1_create()
+{
+	simple_if_init $h1
+	mtu_set $h1 10000
+
+	vlan_create $h1 111 v$h1 192.0.2.33/28
+	ip link set dev $h1.111 type vlan egress-qos-map 0:1
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 111
+
+	mtu_restore $h1
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	mtu_set $h2 10000
+	ethtool -s $h2 speed 1000 autoneg off
+
+	vlan_create $h2 111 v$h2 192.0.2.34/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 111
+
+	ethtool -s $h2 autoneg on
+	mtu_restore $h2
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3
+	mtu_set $h3 10000
+
+	vlan_create $h3 111 v$h3 192.0.2.35/28
+}
+
+h3_destroy()
+{
+	vlan_destroy $h3 111
+
+	mtu_restore $h3
+	simple_if_fini $h3
+}
+
+switch_create()
+{
+	# pools
+	# -----
+
+	devlink_pool_size_thtype_save 0
+	devlink_pool_size_thtype_save 4
+	devlink_pool_size_thtype_save 1
+	devlink_pool_size_thtype_save 5
+	devlink_pool_size_thtype_save 2
+	devlink_pool_size_thtype_save 6
+
+	devlink_port_pool_th_save $swp1 1
+	devlink_port_pool_th_save $swp2 6
+	devlink_port_pool_th_save $swp3 5
+	devlink_port_pool_th_save $swp4 2
+	devlink_port_pool_th_save $swp5 2
+
+	devlink_tc_bind_pool_th_save $swp1 1 ingress
+	devlink_tc_bind_pool_th_save $swp2 1 egress
+	devlink_tc_bind_pool_th_save $swp3 1 egress
+	devlink_tc_bind_pool_th_save $swp4 1 ingress
+	devlink_tc_bind_pool_th_save $swp5 1 ingress
+
+	# Control traffic pools. Just reduce the size.
+	devlink_pool_size_thtype_set 0 dynamic $_500KB
+	devlink_pool_size_thtype_set 4 dynamic $_500KB
+
+	# Stream modeling pools.
+	devlink_pool_size_thtype_set 1 dynamic $_500KB
+	devlink_pool_size_thtype_set 5 dynamic $_500KB
+
+	# Burst soak pools.
+	devlink_pool_size_thtype_set 2 static $POOL_SIZE
+	devlink_pool_size_thtype_set 6 static $POOL_SIZE
+
+	# $swp1
+	# -----
+
+	ip link set dev $swp1 up
+	mtu_set $swp1 10000
+	vlan_create $swp1 111
+	ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp1 1 16
+	devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16
+
+	# Configure qdisc...
+	tc qdisc replace dev $swp1 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	# ... so that we can assign prio1 traffic to PG1.
+	dcb buffer set dev $swp1 prio-buffer all:0 1:1
+
+	# $swp2
+	# -----
+
+	ip link set dev $swp2 up
+	mtu_set $swp2 10000
+	ethtool -s $swp2 speed 1000 autoneg off
+	vlan_create $swp2 111
+	ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp2 6 $POOL_SIZE
+	devlink_tc_bind_pool_th_set $swp2 1 egress 6 $POOL_SIZE
+
+	# prio 0->TC0 (band 7), 1->TC1 (band 6)
+	tc qdisc replace dev $swp2 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+
+	# $swp3
+	# -----
+
+	ip link set dev $swp3 up
+	mtu_set $swp3 10000
+	ethtool -s $swp3 speed 1000 autoneg off
+	vlan_create $swp3 111
+	ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp3 5 16
+	devlink_tc_bind_pool_th_set $swp3 1 egress 5 16
+
+	# prio 0->TC0 (band 7), 1->TC1 (band 6)
+	tc qdisc replace dev $swp3 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+
+	# $swp4
+	# -----
+
+	ip link set dev $swp4 up
+	mtu_set $swp4 10000
+	ethtool -s $swp4 speed 1000 autoneg off
+	vlan_create $swp4 111
+	ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp4 2 $POOL_SIZE
+	devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $POOL_SIZE
+
+	# Configure qdisc...
+	tc qdisc replace dev $swp4 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	# ... so that we can assign prio1 traffic to PG1.
+	dcb buffer set dev $swp4 prio-buffer all:0 1:1
+
+	# $swp5
+	# -----
+
+	ip link set dev $swp5 up
+	mtu_set $swp5 10000
+	vlan_create $swp5 111
+	ip link set dev $swp5.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp5 2 $POOL_SIZE
+	devlink_tc_bind_pool_th_set $swp5 1 ingress 2 $POOL_SIZE
+
+	# Configure qdisc...
+	tc qdisc replace dev $swp5 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	# ... so that we can assign prio1 traffic to PG1.
+	dcb buffer set dev $swp5 prio-buffer all:0 1:1
+
+	# bridges
+	# -------
+
+	ip link add name br1 type bridge vlan_filtering 0
+	ip link set dev $swp1.111 master br1
+	ip link set dev $swp3.111 master br1
+	ip link set dev br1 up
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev $swp2.111 master br2
+	ip link set dev $swp4.111 master br2
+	ip link set dev $swp5.111 master br2
+	ip link set dev br2 up
+}
+
+switch_destroy()
+{
+	# Do this first so that we can reset the limits to values that are only
+	# valid for the original static / dynamic setting.
+	devlink_pool_size_thtype_restore 6
+	devlink_pool_size_thtype_restore 5
+	devlink_pool_size_thtype_restore 4
+	devlink_pool_size_thtype_restore 2
+	devlink_pool_size_thtype_restore 1
+	devlink_pool_size_thtype_restore 0
+
+	# bridges
+	# -------
+
+	ip link set dev br2 down
+	ip link set dev $swp5.111 nomaster
+	ip link set dev $swp4.111 nomaster
+	ip link set dev $swp2.111 nomaster
+	ip link del dev br2
+
+	ip link set dev br1 down
+	ip link set dev $swp3.111 nomaster
+	ip link set dev $swp1.111 nomaster
+	ip link del dev br1
+
+	# $swp5
+	# -----
+
+	dcb buffer set dev $swp5 prio-buffer all:0
+	tc qdisc del dev $swp5 root
+
+	devlink_tc_bind_pool_th_restore $swp5 1 ingress
+	devlink_port_pool_th_restore $swp5 2
+
+	vlan_destroy $swp5 111
+	mtu_restore $swp5
+	ip link set dev $swp5 down
+
+	# $swp4
+	# -----
+
+	dcb buffer set dev $swp4 prio-buffer all:0
+	tc qdisc del dev $swp4 root
+
+	devlink_tc_bind_pool_th_restore $swp4 1 ingress
+	devlink_port_pool_th_restore $swp4 2
+
+	vlan_destroy $swp4 111
+	ethtool -s $swp4 autoneg on
+	mtu_restore $swp4
+	ip link set dev $swp4 down
+
+	# $swp3
+	# -----
+
+	tc qdisc del dev $swp3 root
+
+	devlink_tc_bind_pool_th_restore $swp3 1 egress
+	devlink_port_pool_th_restore $swp3 5
+
+	vlan_destroy $swp3 111
+	ethtool -s $swp3 autoneg on
+	mtu_restore $swp3
+	ip link set dev $swp3 down
+
+	# $swp2
+	# -----
+
+	tc qdisc del dev $swp2 root
+
+	devlink_tc_bind_pool_th_restore $swp2 1 egress
+	devlink_port_pool_th_restore $swp2 6
+
+	vlan_destroy $swp2 111
+	ethtool -s $swp2 autoneg on
+	mtu_restore $swp2
+	ip link set dev $swp2 down
+
+	# $swp1
+	# -----
+
+	dcb buffer set dev $swp1 prio-buffer all:0
+	tc qdisc del dev $swp1 root
+
+	devlink_tc_bind_pool_th_restore $swp1 1 ingress
+	devlink_port_pool_th_restore $swp1 1
+
+	vlan_destroy $swp1 111
+	mtu_restore $swp1
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	swp4=${NETIFS[p6]}
+
+	swp5=${NETIFS[p7]}
+	h3=${NETIFS[p8]}
+
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.34 " h1->h2"
+	ping_test $h3 192.0.2.34 " h3->h2"
+}
+
+__test_qos_burst()
+{
+	local pktsize=$1; shift
+
+	RET=0
+
+	start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac
+	sleep 1
+
+	local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1)
+	((q0 == 0))
+	check_err $? "Transmit queue non-zero?"
+
+	local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+
+	local cell_size=$(devlink_cell_size_get)
+	local cells=$((BURST_SIZE / cell_size))
+	# Each packet is $pktsize of payload + headers.
+	local pkt_cells=$(((pktsize + 50 + cell_size - 1)  / cell_size))
+	# How many packets can we admit:
+	local pkts=$((cells / pkt_cells))
+
+	$MZ $h3 -p $pktsize -Q 1:111 -A 192.0.2.35 -B 192.0.2.34 \
+		-a own -b $h2mac -c $pkts -t udp -q
+	sleep 1
+
+	local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+	((d1 == d0))
+	check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))"
+
+	# Check that the queue is somewhat close to the burst size This
+	# makes sure that the lack of drops above was not due to port
+	# undersubscribtion.
+	local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1)
+	local qe=$((90 * BURST_SIZE / 100))
+	((q0 > qe))
+	check_err $? "Queue size expected >$qe, got $q0"
+
+	stop_traffic
+	sleep 2
+
+	log_test "Burst: absorb $pkts ${pktsize}-B packets"
+}
+
+test_8K()
+{
+	__test_qos_burst 8000
+}
+
+test_800()
+{
+	__test_qos_burst 800
+}
+
+bail_on_lldpad
+
+trap cleanup EXIT
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From 97926d5a847ca1758ad8702ce591e3b05a701e0d Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 2 May 2022 11:46:37 +0200
Subject: selftests/net: so_txtime: fix parsing of start time stamp on 32 bit
 systems

This patch fixes the parsing of the cmd line supplied start time on 32
bit systems. A "long" on 32 bit systems is only 32 bit wide and cannot
hold a timestamp in nano second resolution.

Fixes: 040806343bb4 ("selftests/net: so_txtime multi-host support")
Cc: Carlos Llamas <cmllamas@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Acked-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Carlos Llamas <cmllamas@google.com>
Link: https://lore.kernel.org/r/20220502094638.1921702-2-mkl@pengutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/so_txtime.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 59067f64b775..103f6bf28e35 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -475,7 +475,7 @@ static void parse_opts(int argc, char **argv)
 			cfg_rx = true;
 			break;
 		case 't':
-			cfg_start_time_ns = strtol(optarg, NULL, 0);
+			cfg_start_time_ns = strtoll(optarg, NULL, 0);
 			break;
 		case 'm':
 			cfg_mark = strtol(optarg, NULL, 0);
-- 
cgit 


From f5c2174a3775491e890ce285df52f5715fbef875 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 2 May 2022 11:46:38 +0200
Subject: selftests/net: so_txtime: usage(): fix documentation of default clock

The program uses CLOCK_TAI as default clock since it was added to the
Linux repo. In commit:
| 040806343bb4 ("selftests/net: so_txtime multi-host support")
a help text stating the wrong default clock was added.

This patch fixes the help text.

Fixes: 040806343bb4 ("selftests/net: so_txtime multi-host support")
Cc: Carlos Llamas <cmllamas@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Acked-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Carlos Llamas <cmllamas@google.com>
Link: https://lore.kernel.org/r/20220502094638.1921702-3-mkl@pengutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/so_txtime.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 103f6bf28e35..2672ac0b6d1f 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -421,7 +421,7 @@ static void usage(const char *progname)
 			"Options:\n"
 			"  -4            only IPv4\n"
 			"  -6            only IPv6\n"
-			"  -c <clock>    monotonic (default) or tai\n"
+			"  -c <clock>    monotonic or tai (default)\n"
 			"  -D <addr>     destination IP address (server)\n"
 			"  -S <addr>     source IP address (client)\n"
 			"  -r            run rx mode\n"
-- 
cgit 


From bf08515d39cb843c81f991ee67ff543eecdba0c3 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Sat, 9 Apr 2022 18:45:45 +0000
Subject: selftests: KVM: Rename psci_cpu_on_test to psci_test

There are other interactions with PSCI worth testing; rename the PSCI
test to make it more generic.

No functional change intended.

Signed-off-by: Oliver Upton <oupton@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220409184549.1681189-10-oupton@google.com
---
 tools/testing/selftests/kvm/.gitignore             |   2 +-
 tools/testing/selftests/kvm/Makefile               |   2 +-
 .../selftests/kvm/aarch64/psci_cpu_on_test.c       | 121 ---------------------
 tools/testing/selftests/kvm/aarch64/psci_test.c    | 121 +++++++++++++++++++++
 4 files changed, 123 insertions(+), 123 deletions(-)
 delete mode 100644 tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
 create mode 100644 tools/testing/selftests/kvm/aarch64/psci_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 0b0e4402bba6..1bb575dfc42e 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -2,7 +2,7 @@
 /aarch64/arch_timer
 /aarch64/debug-exceptions
 /aarch64/get-reg-list
-/aarch64/psci_cpu_on_test
+/aarch64/psci_test
 /aarch64/vcpu_width_config
 /aarch64/vgic_init
 /aarch64/vgic_irq
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 681b173aa87c..c2cf4d318296 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -105,7 +105,7 @@ TEST_GEN_PROGS_x86_64 += system_counter_offset_test
 TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
-TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
+TEST_GEN_PROGS_aarch64 += aarch64/psci_test
 TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
diff --git a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
deleted file mode 100644
index 4c5f6814030f..000000000000
--- a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
+++ /dev/null
@@ -1,121 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the
- * CPU_ON PSCI call matches what the caller requested.
- *
- * Copyright (c) 2021 Google LLC.
- *
- * This is a regression test for a race between KVM servicing the PSCI call and
- * userspace reading the vCPUs registers.
- */
-
-#define _GNU_SOURCE
-
-#include <linux/psci.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-#define VCPU_ID_SOURCE 0
-#define VCPU_ID_TARGET 1
-
-#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
-#define CPU_ON_CONTEXT_ID 0xdeadc0deul
-
-static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
-			    uint64_t context_id)
-{
-	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_CPU_ON;
-	register uint64_t x1 asm("x1") = target_cpu;
-	register uint64_t x2 asm("x2") = entry_addr;
-	register uint64_t x3 asm("x3") = context_id;
-
-	asm("hvc #0"
-	    : "=r"(x0)
-	    : "r"(x0), "r"(x1), "r"(x2), "r"(x3)
-	    : "memory");
-
-	return x0;
-}
-
-static uint64_t psci_affinity_info(uint64_t target_affinity,
-				   uint64_t lowest_affinity_level)
-{
-	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_AFFINITY_INFO;
-	register uint64_t x1 asm("x1") = target_affinity;
-	register uint64_t x2 asm("x2") = lowest_affinity_level;
-
-	asm("hvc #0"
-	    : "=r"(x0)
-	    : "r"(x0), "r"(x1), "r"(x2)
-	    : "memory");
-
-	return x0;
-}
-
-static void guest_main(uint64_t target_cpu)
-{
-	GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
-	uint64_t target_state;
-
-	do {
-		target_state = psci_affinity_info(target_cpu, 0);
-
-		GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
-			     (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
-	} while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
-
-	GUEST_DONE();
-}
-
-int main(void)
-{
-	uint64_t target_mpidr, obs_pc, obs_x0;
-	struct kvm_vcpu_init init;
-	struct kvm_vm *vm;
-	struct ucall uc;
-
-	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
-	kvm_vm_elf_load(vm, program_invocation_name);
-	ucall_init(vm, NULL);
-
-	vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
-	init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
-
-	aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_main);
-
-	/*
-	 * make sure the target is already off when executing the test.
-	 */
-	init.features[0] |= (1 << KVM_ARM_VCPU_POWER_OFF);
-	aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main);
-
-	get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
-	vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK);
-	vcpu_run(vm, VCPU_ID_SOURCE);
-
-	switch (get_ucall(vm, VCPU_ID_SOURCE, &uc)) {
-	case UCALL_DONE:
-		break;
-	case UCALL_ABORT:
-		TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__,
-			  uc.args[1]);
-		break;
-	default:
-		TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
-	}
-
-	get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.pc), &obs_pc);
-	get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
-
-	TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
-		    "unexpected target cpu pc: %lx (expected: %lx)",
-		    obs_pc, CPU_ON_ENTRY_ADDR);
-	TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
-		    "unexpected target context id: %lx (expected: %lx)",
-		    obs_x0, CPU_ON_CONTEXT_ID);
-
-	kvm_vm_free(vm);
-	return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
new file mode 100644
index 000000000000..4c5f6814030f
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the
+ * CPU_ON PSCI call matches what the caller requested.
+ *
+ * Copyright (c) 2021 Google LLC.
+ *
+ * This is a regression test for a race between KVM servicing the PSCI call and
+ * userspace reading the vCPUs registers.
+ */
+
+#define _GNU_SOURCE
+
+#include <linux/psci.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define VCPU_ID_SOURCE 0
+#define VCPU_ID_TARGET 1
+
+#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
+#define CPU_ON_CONTEXT_ID 0xdeadc0deul
+
+static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
+			    uint64_t context_id)
+{
+	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_CPU_ON;
+	register uint64_t x1 asm("x1") = target_cpu;
+	register uint64_t x2 asm("x2") = entry_addr;
+	register uint64_t x3 asm("x3") = context_id;
+
+	asm("hvc #0"
+	    : "=r"(x0)
+	    : "r"(x0), "r"(x1), "r"(x2), "r"(x3)
+	    : "memory");
+
+	return x0;
+}
+
+static uint64_t psci_affinity_info(uint64_t target_affinity,
+				   uint64_t lowest_affinity_level)
+{
+	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_AFFINITY_INFO;
+	register uint64_t x1 asm("x1") = target_affinity;
+	register uint64_t x2 asm("x2") = lowest_affinity_level;
+
+	asm("hvc #0"
+	    : "=r"(x0)
+	    : "r"(x0), "r"(x1), "r"(x2)
+	    : "memory");
+
+	return x0;
+}
+
+static void guest_main(uint64_t target_cpu)
+{
+	GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
+	uint64_t target_state;
+
+	do {
+		target_state = psci_affinity_info(target_cpu, 0);
+
+		GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
+			     (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
+	} while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
+
+	GUEST_DONE();
+}
+
+int main(void)
+{
+	uint64_t target_mpidr, obs_pc, obs_x0;
+	struct kvm_vcpu_init init;
+	struct kvm_vm *vm;
+	struct ucall uc;
+
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+	kvm_vm_elf_load(vm, program_invocation_name);
+	ucall_init(vm, NULL);
+
+	vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+	init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+	aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_main);
+
+	/*
+	 * make sure the target is already off when executing the test.
+	 */
+	init.features[0] |= (1 << KVM_ARM_VCPU_POWER_OFF);
+	aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main);
+
+	get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
+	vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK);
+	vcpu_run(vm, VCPU_ID_SOURCE);
+
+	switch (get_ucall(vm, VCPU_ID_SOURCE, &uc)) {
+	case UCALL_DONE:
+		break;
+	case UCALL_ABORT:
+		TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__,
+			  uc.args[1]);
+		break;
+	default:
+		TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
+	}
+
+	get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.pc), &obs_pc);
+	get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
+
+	TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
+		    "unexpected target cpu pc: %lx (expected: %lx)",
+		    obs_pc, CPU_ON_ENTRY_ADDR);
+	TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
+		    "unexpected target context id: %lx (expected: %lx)",
+		    obs_x0, CPU_ON_CONTEXT_ID);
+
+	kvm_vm_free(vm);
+	return 0;
+}
-- 
cgit 


From e918e2bc52c8ac1cccd6ef822ac23eded41761b6 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Sat, 9 Apr 2022 18:45:46 +0000
Subject: selftests: KVM: Create helper for making SMCCC calls

The PSCI and PV stolen time tests both need to make SMCCC calls within
the guest. Create a helper for making SMCCC calls and rework the
existing tests to use the library function.

Signed-off-by: Oliver Upton <oupton@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220409184549.1681189-11-oupton@google.com
---
 tools/testing/selftests/kvm/aarch64/psci_test.c    | 25 +++++++---------------
 .../selftests/kvm/include/aarch64/processor.h      | 22 +++++++++++++++++++
 .../testing/selftests/kvm/lib/aarch64/processor.c  | 25 ++++++++++++++++++++++
 tools/testing/selftests/kvm/steal_time.c           | 13 +++--------
 4 files changed, 58 insertions(+), 27 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
index 4c5f6814030f..8c998f0b802c 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -26,32 +26,23 @@
 static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
 			    uint64_t context_id)
 {
-	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_CPU_ON;
-	register uint64_t x1 asm("x1") = target_cpu;
-	register uint64_t x2 asm("x2") = entry_addr;
-	register uint64_t x3 asm("x3") = context_id;
+	struct arm_smccc_res res;
 
-	asm("hvc #0"
-	    : "=r"(x0)
-	    : "r"(x0), "r"(x1), "r"(x2), "r"(x3)
-	    : "memory");
+	smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
+		  0, 0, 0, 0, &res);
 
-	return x0;
+	return res.a0;
 }
 
 static uint64_t psci_affinity_info(uint64_t target_affinity,
 				   uint64_t lowest_affinity_level)
 {
-	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_AFFINITY_INFO;
-	register uint64_t x1 asm("x1") = target_affinity;
-	register uint64_t x2 asm("x2") = lowest_affinity_level;
+	struct arm_smccc_res res;
 
-	asm("hvc #0"
-	    : "=r"(x0)
-	    : "r"(x0), "r"(x1), "r"(x2)
-	    : "memory");
+	smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
+		  0, 0, 0, 0, 0, &res);
 
-	return x0;
+	return res.a0;
 }
 
 static void guest_main(uint64_t target_cpu)
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 8f9f46979a00..59ece9d4e0d1 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -185,4 +185,26 @@ static inline void local_irq_disable(void)
 	asm volatile("msr daifset, #3" : : : "memory");
 }
 
+/**
+ * struct arm_smccc_res - Result from SMC/HVC call
+ * @a0-a3 result values from registers 0 to 3
+ */
+struct arm_smccc_res {
+	unsigned long a0;
+	unsigned long a1;
+	unsigned long a2;
+	unsigned long a3;
+};
+
+/**
+ * smccc_hvc - Invoke a SMCCC function using the hvc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+	       uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+	       uint64_t arg6, struct arm_smccc_res *res);
+
 #endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 9343d82519b4..6a041289fa80 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -500,3 +500,28 @@ void __attribute__((constructor)) init_guest_modes(void)
 {
        guest_modes_append_default();
 }
+
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+	       uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+	       uint64_t arg6, struct arm_smccc_res *res)
+{
+	asm volatile("mov   w0, %w[function_id]\n"
+		     "mov   x1, %[arg0]\n"
+		     "mov   x2, %[arg1]\n"
+		     "mov   x3, %[arg2]\n"
+		     "mov   x4, %[arg3]\n"
+		     "mov   x5, %[arg4]\n"
+		     "mov   x6, %[arg5]\n"
+		     "mov   x7, %[arg6]\n"
+		     "hvc   #0\n"
+		     "mov   %[res0], x0\n"
+		     "mov   %[res1], x1\n"
+		     "mov   %[res2], x2\n"
+		     "mov   %[res3], x3\n"
+		     : [res0] "=r"(res->a0), [res1] "=r"(res->a1),
+		       [res2] "=r"(res->a2), [res3] "=r"(res->a3)
+		     : [function_id] "r"(function_id), [arg0] "r"(arg0),
+		       [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3),
+		       [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6)
+		     : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7");
+}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index 62f2eb9ee3d5..8c4e811bd586 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -118,17 +118,10 @@ struct st_time {
 
 static int64_t smccc(uint32_t func, uint64_t arg)
 {
-	unsigned long ret;
+	struct arm_smccc_res res;
 
-	asm volatile(
-		"mov	w0, %w1\n"
-		"mov	x1, %2\n"
-		"hvc	#0\n"
-		"mov	%0, x0\n"
-	: "=r" (ret) : "r" (func), "r" (arg) :
-	  "x0", "x1", "x2", "x3");
-
-	return ret;
+	smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res);
+	return res.a0;
 }
 
 static void check_status(struct st_time *st)
-- 
cgit 


From 5ca24697d54027c1c94c94a5b920a75448108ed0 Mon Sep 17 00:00:00 2001
From: Raghavendra Rao Ananta <rananta@google.com>
Date: Mon, 2 May 2022 23:38:52 +0000
Subject: selftests: KVM: aarch64: Introduce hypercall ABI test

Introduce a KVM selftest to check the hypercall interface
for arm64 platforms. The test validates the user-space'
[GET|SET]_ONE_REG interface to read/write the psuedo-firmware
registers as well as its effects on the guest upon certain
configurations.

Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220502233853.1233742-9-rananta@google.com
---
 tools/testing/selftests/kvm/.gitignore           |   1 +
 tools/testing/selftests/kvm/Makefile             |   1 +
 tools/testing/selftests/kvm/aarch64/hypercalls.c | 336 +++++++++++++++++++++++
 3 files changed, 338 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/aarch64/hypercalls.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 1bb575dfc42e..b17e464ec661 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -2,6 +2,7 @@
 /aarch64/arch_timer
 /aarch64/debug-exceptions
 /aarch64/get-reg-list
+/aarch64/hypercalls
 /aarch64/psci_test
 /aarch64/vcpu_width_config
 /aarch64/vgic_init
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index c2cf4d318296..97eef0c03d3b 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -105,6 +105,7 @@ TEST_GEN_PROGS_x86_64 += system_counter_offset_test
 TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
+TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
 TEST_GEN_PROGS_aarch64 += aarch64/psci_test
 TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c
new file mode 100644
index 000000000000..41e0210b7a5e
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* hypercalls: Check the ARM64's psuedo-firmware bitmap register interface.
+ *
+ * The test validates the basic hypercall functionalities that are exposed
+ * via the psuedo-firmware bitmap register. This includes the registers'
+ * read/write behavior before and after the VM has started, and if the
+ * hypercalls are properly masked or unmasked to the guest when disabled or
+ * enabled from the KVM userspace, respectively.
+ */
+
+#include <errno.h>
+#include <linux/arm-smccc.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "processor.h"
+
+#define FW_REG_ULIMIT_VAL(max_feat_bit) (GENMASK(max_feat_bit, 0))
+
+/* Last valid bits of the bitmapped firmware registers */
+#define KVM_REG_ARM_STD_BMAP_BIT_MAX		0
+#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX	0
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX	1
+
+struct kvm_fw_reg_info {
+	uint64_t reg;		/* Register definition */
+	uint64_t max_feat_bit;	/* Bit that represents the upper limit of the feature-map */
+};
+
+#define FW_REG_INFO(r)			\
+	{					\
+		.reg = r,			\
+		.max_feat_bit = r##_BIT_MAX,	\
+	}
+
+static const struct kvm_fw_reg_info fw_reg_info[] = {
+	FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
+	FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
+	FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
+};
+
+enum test_stage {
+	TEST_STAGE_REG_IFACE,
+	TEST_STAGE_HVC_IFACE_FEAT_DISABLED,
+	TEST_STAGE_HVC_IFACE_FEAT_ENABLED,
+	TEST_STAGE_HVC_IFACE_FALSE_INFO,
+	TEST_STAGE_END,
+};
+
+static int stage = TEST_STAGE_REG_IFACE;
+
+struct test_hvc_info {
+	uint32_t func_id;
+	uint64_t arg1;
+};
+
+#define TEST_HVC_INFO(f, a1)	\
+	{			\
+		.func_id = f,	\
+		.arg1 = a1,	\
+	}
+
+static const struct test_hvc_info hvc_info[] = {
+	/* KVM_REG_ARM_STD_BMAP */
+	TEST_HVC_INFO(ARM_SMCCC_TRNG_VERSION, 0),
+	TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_TRNG_RND64),
+	TEST_HVC_INFO(ARM_SMCCC_TRNG_GET_UUID, 0),
+	TEST_HVC_INFO(ARM_SMCCC_TRNG_RND32, 0),
+	TEST_HVC_INFO(ARM_SMCCC_TRNG_RND64, 0),
+
+	/* KVM_REG_ARM_STD_HYP_BMAP */
+	TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_HV_PV_TIME_FEATURES),
+	TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_HV_PV_TIME_ST),
+	TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_ST, 0),
+
+	/* KVM_REG_ARM_VENDOR_HYP_BMAP */
+	TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID,
+			ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
+	TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0),
+	TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID, KVM_PTP_VIRT_COUNTER),
+};
+
+/* Feed false hypercall info to test the KVM behavior */
+static const struct test_hvc_info false_hvc_info[] = {
+	/* Feature support check against a different family of hypercalls */
+	TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
+	TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_TRNG_RND64),
+	TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_TRNG_RND64),
+};
+
+static void guest_test_hvc(const struct test_hvc_info *hc_info)
+{
+	unsigned int i;
+	struct arm_smccc_res res;
+	unsigned int hvc_info_arr_sz;
+
+	hvc_info_arr_sz =
+	hc_info == hvc_info ? ARRAY_SIZE(hvc_info) : ARRAY_SIZE(false_hvc_info);
+
+	for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
+		memset(&res, 0, sizeof(res));
+		smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
+
+		switch (stage) {
+		case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+		case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+			GUEST_ASSERT_3(res.a0 == SMCCC_RET_NOT_SUPPORTED,
+					res.a0, hc_info->func_id, hc_info->arg1);
+			break;
+		case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+			GUEST_ASSERT_3(res.a0 != SMCCC_RET_NOT_SUPPORTED,
+					res.a0, hc_info->func_id, hc_info->arg1);
+			break;
+		default:
+			GUEST_ASSERT_1(0, stage);
+		}
+	}
+}
+
+static void guest_code(void)
+{
+	while (stage != TEST_STAGE_END) {
+		switch (stage) {
+		case TEST_STAGE_REG_IFACE:
+			break;
+		case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+		case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+			guest_test_hvc(hvc_info);
+			break;
+		case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+			guest_test_hvc(false_hvc_info);
+			break;
+		default:
+			GUEST_ASSERT_1(0, stage);
+		}
+
+		GUEST_SYNC(stage);
+	}
+
+	GUEST_DONE();
+}
+
+static int set_fw_reg(struct kvm_vm *vm, uint64_t id, uint64_t val)
+{
+	struct kvm_one_reg reg = {
+		.id = id,
+		.addr = (uint64_t)&val,
+	};
+
+	return _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+}
+
+static void get_fw_reg(struct kvm_vm *vm, uint64_t id, uint64_t *addr)
+{
+	struct kvm_one_reg reg = {
+		.id = id,
+		.addr = (uint64_t)addr,
+	};
+
+	vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
+}
+
+struct st_time {
+	uint32_t rev;
+	uint32_t attr;
+	uint64_t st_time;
+};
+
+#define STEAL_TIME_SIZE		((sizeof(struct st_time) + 63) & ~63)
+#define ST_GPA_BASE		(1 << 30)
+
+static void steal_time_init(struct kvm_vm *vm)
+{
+	uint64_t st_ipa = (ulong)ST_GPA_BASE;
+	unsigned int gpages;
+	struct kvm_device_attr dev = {
+		.group = KVM_ARM_VCPU_PVTIME_CTRL,
+		.attr = KVM_ARM_VCPU_PVTIME_IPA,
+		.addr = (uint64_t)&st_ipa,
+	};
+
+	gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE);
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
+
+	vcpu_ioctl(vm, 0, KVM_SET_DEVICE_ATTR, &dev);
+}
+
+static void test_fw_regs_before_vm_start(struct kvm_vm *vm)
+{
+	uint64_t val;
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
+		const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+
+		/* First 'read' should be an upper limit of the features supported */
+		get_fw_reg(vm, reg_info->reg, &val);
+		TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
+			"Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx\n",
+			reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
+
+		/* Test a 'write' by disabling all the features of the register map */
+		ret = set_fw_reg(vm, reg_info->reg, 0);
+		TEST_ASSERT(ret == 0,
+			"Failed to clear all the features of reg: 0x%lx; ret: %d\n",
+			reg_info->reg, errno);
+
+		get_fw_reg(vm, reg_info->reg, &val);
+		TEST_ASSERT(val == 0,
+			"Expected all the features to be cleared for reg: 0x%lx\n", reg_info->reg);
+
+		/*
+		 * Test enabling a feature that's not supported.
+		 * Avoid this check if all the bits are occupied.
+		 */
+		if (reg_info->max_feat_bit < 63) {
+			ret = set_fw_reg(vm, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
+			TEST_ASSERT(ret != 0 && errno == EINVAL,
+			"Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx\n",
+			errno, reg_info->reg);
+		}
+	}
+}
+
+static void test_fw_regs_after_vm_start(struct kvm_vm *vm)
+{
+	uint64_t val;
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
+		const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+
+		/*
+		 * Before starting the VM, the test clears all the bits.
+		 * Check if that's still the case.
+		 */
+		get_fw_reg(vm, reg_info->reg, &val);
+		TEST_ASSERT(val == 0,
+			"Expected all the features to be cleared for reg: 0x%lx\n",
+			reg_info->reg);
+
+		/*
+		 * Since the VM has run at least once, KVM shouldn't allow modification of
+		 * the registers and should return EBUSY. Set the registers and check for
+		 * the expected errno.
+		 */
+		ret = set_fw_reg(vm, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
+		TEST_ASSERT(ret != 0 && errno == EBUSY,
+		"Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx\n",
+		errno, reg_info->reg);
+	}
+}
+
+static struct kvm_vm *test_vm_create(void)
+{
+	struct kvm_vm *vm;
+
+	vm = vm_create_default(0, 0, guest_code);
+
+	ucall_init(vm, NULL);
+	steal_time_init(vm);
+
+	return vm;
+}
+
+static struct kvm_vm *test_guest_stage(struct kvm_vm *vm)
+{
+	struct kvm_vm *ret_vm = vm;
+
+	pr_debug("Stage: %d\n", stage);
+
+	switch (stage) {
+	case TEST_STAGE_REG_IFACE:
+		test_fw_regs_after_vm_start(vm);
+		break;
+	case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+		/* Start a new VM so that all the features are now enabled by default */
+		kvm_vm_free(vm);
+		ret_vm = test_vm_create();
+		break;
+	case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+	case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+		break;
+	default:
+		TEST_FAIL("Unknown test stage: %d\n", stage);
+	}
+
+	stage++;
+	sync_global_to_guest(vm, stage);
+
+	return ret_vm;
+}
+
+static void test_run(void)
+{
+	struct kvm_vm *vm;
+	struct ucall uc;
+	bool guest_done = false;
+
+	vm = test_vm_create();
+
+	test_fw_regs_before_vm_start(vm);
+
+	while (!guest_done) {
+		vcpu_run(vm, 0);
+
+		switch (get_ucall(vm, 0, &uc)) {
+		case UCALL_SYNC:
+			vm = test_guest_stage(vm);
+			break;
+		case UCALL_DONE:
+			guest_done = true;
+			break;
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld\n\tvalues: 0x%lx, 0x%lx; 0x%lx, stage: %u",
+			(const char *)uc.args[0], __FILE__, uc.args[1],
+			uc.args[2], uc.args[3], uc.args[4], stage);
+			break;
+		default:
+			TEST_FAIL("Unexpected guest exit\n");
+		}
+	}
+
+	kvm_vm_free(vm);
+}
+
+int main(void)
+{
+	setbuf(stdout, NULL);
+
+	test_run();
+	return 0;
+}
-- 
cgit 


From 920f4a55fdaa6f68b31c50cca6e51fecac5857a0 Mon Sep 17 00:00:00 2001
From: Raghavendra Rao Ananta <rananta@google.com>
Date: Mon, 2 May 2022 23:38:53 +0000
Subject: selftests: KVM: aarch64: Add the bitmap firmware registers to
 get-reg-list

Add the psuedo-firmware registers KVM_REG_ARM_STD_BMAP,
KVM_REG_ARM_STD_HYP_BMAP, and KVM_REG_ARM_VENDOR_HYP_BMAP to
the base_regs[] list.

Also, add the COPROC support for KVM_REG_ARM_FW_FEAT_BMAP.

Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220502233853.1233742-10-rananta@google.com
---
 tools/testing/selftests/kvm/aarch64/get-reg-list.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 0b571f3fe64c..d3a7dbfcbb3d 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -294,6 +294,11 @@ static void print_reg(struct vcpu_config *c, __u64 id)
 			    "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id);
 		printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
 		break;
+	case KVM_REG_ARM_FW_FEAT_BMAP:
+		TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff),
+			    "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", config_name(c), id);
+		printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff);
+		break;
 	case KVM_REG_ARM64_SVE:
 		if (has_cap(c, KVM_CAP_ARM_SVE))
 			printf("\t%s,\n", sve_id_to_str(c, id));
@@ -692,6 +697,9 @@ static __u64 base_regs[] = {
 	KVM_REG_ARM_FW_REG(1),		/* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */
 	KVM_REG_ARM_FW_REG(2),		/* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */
 	KVM_REG_ARM_FW_REG(3),		/* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */
+	KVM_REG_ARM_FW_FEAT_BMAP_REG(0),	/* KVM_REG_ARM_STD_BMAP */
+	KVM_REG_ARM_FW_FEAT_BMAP_REG(1),	/* KVM_REG_ARM_STD_HYP_BMAP */
+	KVM_REG_ARM_FW_FEAT_BMAP_REG(2),	/* KVM_REG_ARM_VENDOR_HYP_BMAP */
 	ARM64_SYS_REG(3, 3, 14, 3, 1),	/* CNTV_CTL_EL0 */
 	ARM64_SYS_REG(3, 3, 14, 3, 2),	/* CNTV_CVAL_EL0 */
 	ARM64_SYS_REG(3, 3, 14, 0, 2),
-- 
cgit 


From 922a1b520c5ffb09079dddeb0c686f9c008a9923 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Tue, 3 May 2022 01:09:57 -0700
Subject: selftests/seccomp: Refactor get_proc_stat to split out file reading
 code

This splits up the get_proc_stat function to make it so we can use it as a
generic helper to read the nth field from multiple different files, versus
replicating the logic in multiple places.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Cc: linux-kselftest@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220503080958.20220-3-sargun@sargun.me
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 54 +++++++++++++++++++--------
 1 file changed, 38 insertions(+), 16 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 6001e9ecfaf5..e282f521f8d7 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -4297,32 +4297,54 @@ TEST_F(O_SUSPEND_SECCOMP, seize)
 	ASSERT_EQ(EPERM, errno);
 }
 
-static char get_proc_stat(int pid)
+/*
+ * get_nth - Get the nth, space separated entry in a file.
+ *
+ * Returns the length of the read field.
+ * Throws error if field is zero-lengthed.
+ */
+static ssize_t get_nth(struct __test_metadata *_metadata, const char *path,
+		     const unsigned int position, char **entry)
 {
-	char proc_path[100] = {0};
 	char *line = NULL;
-	size_t len = 0;
+	unsigned int i;
 	ssize_t nread;
-	char status;
+	size_t len = 0;
 	FILE *f;
-	int i;
 
-	snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid);
-	f = fopen(proc_path, "r");
-	if (f == NULL)
-		ksft_exit_fail_msg("%s - Could not open %s\n",
-				   strerror(errno), proc_path);
+	f = fopen(path, "r");
+	ASSERT_NE(f, NULL) {
+		TH_LOG("Coud not open %s: %s", path, strerror(errno));
+	}
 
-	for (i = 0; i < 3; i++) {
+	for (i = 0; i < position; i++) {
 		nread = getdelim(&line, &len, ' ', f);
-		if (nread <= 0)
-			ksft_exit_fail_msg("Failed to read status: %s\n",
-					   strerror(errno));
+		ASSERT_GE(nread, 0) {
+			TH_LOG("Failed to read %d entry in file %s", i, path);
+		}
 	}
+	fclose(f);
+
+	ASSERT_GT(nread, 0) {
+		TH_LOG("Entry in file %s had zero length", path);
+	}
+
+	*entry = line;
+	return nread - 1;
+}
+
+/* For a given PID, get the task state (D, R, etc...) */
+static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid)
+{
+	char proc_path[100] = {0};
+	char status;
+	char *line;
+
+	snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid);
+	ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1);
 
 	status = *line;
 	free(line);
-	fclose(f);
 
 	return status;
 }
@@ -4383,7 +4405,7 @@ TEST(user_notification_fifo)
 	/* This spins until all of the children are sleeping */
 restart_wait:
 	for (i = 0; i < ARRAY_SIZE(pids); i++) {
-		if (get_proc_stat(pids[i]) != 'S') {
+		if (get_proc_stat(_metadata, pids[i]) != 'S') {
 			nanosleep(&delay, NULL);
 			goto restart_wait;
 		}
-- 
cgit 


From 3b96a9c522b2ee267fa1f46943ebc5d9cdd7b3dc Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Tue, 3 May 2022 01:09:58 -0700
Subject: selftests/seccomp: Add test for wait killable notifier

This verifies that if a filter is set up with the wait killable feature
that it obeys the semantics that non-fatal signals are ignored during
a notification after the notification is received.

Cases tested:
 * Non-fatal signal prior to receive
 * Non-fatal signal during receive
 * Fatal signal after receive

The normal signal handling is tested in user_notification_signal. That
behaviour remains unchanged.

On an unsupported kernel, these tests will immediately bail as it relies
on a new seccomp flag.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220503080958.20220-4-sargun@sargun.me
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 228 ++++++++++++++++++++++++++
 1 file changed, 228 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e282f521f8d7..29c973f606b2 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -60,6 +60,8 @@
 #define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
 #endif
 
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+
 #ifndef PR_SET_PTRACER
 # define PR_SET_PTRACER 0x59616d61
 #endif
@@ -269,6 +271,10 @@ struct seccomp_notif_addfd_big {
 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
 #endif
 
+#ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV
+#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
@@ -4428,6 +4434,228 @@ restart_wait:
 	}
 }
 
+/* get_proc_syscall - Get the syscall in progress for a given pid
+ *
+ * Returns the current syscall number for a given process
+ * Returns -1 if not in syscall (running or blocked)
+ */
+static long get_proc_syscall(struct __test_metadata *_metadata, int pid)
+{
+	char proc_path[100] = {0};
+	long ret = -1;
+	ssize_t nread;
+	char *line;
+
+	snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid);
+	nread = get_nth(_metadata, proc_path, 1, &line);
+	ASSERT_GT(nread, 0);
+
+	if (!strncmp("running", line, MIN(7, nread)))
+		ret = strtol(line, NULL, 16);
+
+	free(line);
+	return ret;
+}
+
+/* Ensure non-fatal signals prior to receive are unmodified */
+TEST(user_notification_wait_killable_pre_notification)
+{
+	struct sigaction new_action = {
+		.sa_handler = signal_handler,
+	};
+	int listener, status, sk_pair[2];
+	pid_t pid;
+	long ret;
+	char c;
+	/* 100 ms */
+	struct timespec delay = { .tv_nsec = 100000000 };
+
+	ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret)
+	{
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
+
+	listener = user_notif_syscall(
+		__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+				      SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+	ASSERT_GE(listener, 0);
+
+	/*
+	 * Check that we can kill the process with SIGUSR1 prior to receiving
+	 * the notification. SIGUSR1 is wired up to a custom signal handler,
+	 * and make sure it gets called.
+	 */
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		close(sk_pair[0]);
+		handled = sk_pair[1];
+
+		/* Setup the non-fatal sigaction without SA_RESTART */
+		if (sigaction(SIGUSR1, &new_action, NULL)) {
+			perror("sigaction");
+			exit(1);
+		}
+
+		ret = syscall(__NR_getppid);
+		/* Make sure we got a return from a signal interruption */
+		exit(ret != -1 || errno != EINTR);
+	}
+
+	/*
+	 * Make sure we've gotten to the seccomp user notification wait
+	 * from getppid prior to sending any signals
+	 */
+	while (get_proc_syscall(_metadata, pid) != __NR_getppid &&
+	       get_proc_stat(_metadata, pid) != 'S')
+		nanosleep(&delay, NULL);
+
+	/* Send non-fatal kill signal */
+	EXPECT_EQ(kill(pid, SIGUSR1), 0);
+
+	/* wait for process to exit (exit checks for EINTR) */
+	EXPECT_EQ(waitpid(pid, &status, 0), pid);
+	EXPECT_EQ(true, WIFEXITED(status));
+	EXPECT_EQ(0, WEXITSTATUS(status));
+
+	EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
+}
+
+/* Ensure non-fatal signals after receive are blocked */
+TEST(user_notification_wait_killable)
+{
+	struct sigaction new_action = {
+		.sa_handler = signal_handler,
+	};
+	struct seccomp_notif_resp resp = {};
+	struct seccomp_notif req = {};
+	int listener, status, sk_pair[2];
+	pid_t pid;
+	long ret;
+	char c;
+	/* 100 ms */
+	struct timespec delay = { .tv_nsec = 100000000 };
+
+	ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret)
+	{
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
+
+	listener = user_notif_syscall(
+		__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+				      SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+	ASSERT_GE(listener, 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		close(sk_pair[0]);
+		handled = sk_pair[1];
+
+		/* Setup the sigaction without SA_RESTART */
+		if (sigaction(SIGUSR1, &new_action, NULL)) {
+			perror("sigaction");
+			exit(1);
+		}
+
+		/* Make sure that the syscall is completed (no EINTR) */
+		ret = syscall(__NR_getppid);
+		exit(ret != USER_NOTIF_MAGIC);
+	}
+
+	/*
+	 * Get the notification, to make move the notifying process into a
+	 * non-preemptible (TASK_KILLABLE) state.
+	 */
+	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+	/* Send non-fatal kill signal */
+	EXPECT_EQ(kill(pid, SIGUSR1), 0);
+
+	/*
+	 * Make sure the task enters moves to TASK_KILLABLE by waiting for
+	 * D (Disk Sleep) state after receiving non-fatal signal.
+	 */
+	while (get_proc_stat(_metadata, pid) != 'D')
+		nanosleep(&delay, NULL);
+
+	resp.id = req.id;
+	resp.val = USER_NOTIF_MAGIC;
+	/* Make sure the notification is found and able to be replied to */
+	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+	/*
+	 * Make sure that the signal handler does get called once we're back in
+	 * userspace.
+	 */
+	EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
+	/* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */
+	EXPECT_EQ(waitpid(pid, &status, 0), pid);
+	EXPECT_EQ(true, WIFEXITED(status));
+	EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+/* Ensure fatal signals after receive are not blocked */
+TEST(user_notification_wait_killable_fatal)
+{
+	struct seccomp_notif req = {};
+	int listener, status;
+	pid_t pid;
+	long ret;
+	/* 100 ms */
+	struct timespec delay = { .tv_nsec = 100000000 };
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret)
+	{
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	listener = user_notif_syscall(
+		__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+				      SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+	ASSERT_GE(listener, 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* This should never complete as it should get a SIGTERM */
+		syscall(__NR_getppid);
+		exit(1);
+	}
+
+	while (get_proc_stat(_metadata, pid) != 'S')
+		nanosleep(&delay, NULL);
+
+	/*
+	 * Get the notification, to make move the notifying process into a
+	 * non-preemptible (TASK_KILLABLE) state.
+	 */
+	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+	/* Kill the process with a fatal signal */
+	EXPECT_EQ(kill(pid, SIGTERM), 0);
+
+	/*
+	 * Wait for the process to exit, and make sure the process terminated
+	 * due to the SIGTERM signal.
+	 */
+	EXPECT_EQ(waitpid(pid, &status, 0), pid);
+	EXPECT_EQ(true, WIFSIGNALED(status));
+	EXPECT_EQ(SIGTERM, WTERMSIG(status));
+}
+
 /*
  * TODO:
  * - expand NNP testing
-- 
cgit 


From b3b71bf915210969ef4807e39c8f037c40a05e76 Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Mon, 2 May 2022 13:52:32 -0700
Subject: selftests: mptcp: ADD_ADDR echo test with missing userspace daemon

Check userspace PM behavior to ensure ADD_ADDR echoes are only sent when
there is an active userspace daemon. If the daemon is restarting or
hasn't loaded yet, the missing echo will cause the peer to retransmit
the ADD_ADDR - and hopefully the daemon will be ready to receive it at
that later time.

Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index b27854f976f7..d1de1e7702fb 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -2719,6 +2719,17 @@ userspace_tests()
 		chk_add_nr 0 0
 	fi
 
+	# userspace pm type does not echo add_addr without daemon
+	if reset "userspace pm no echo w/o daemon"; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_add_nr 1 0
+	fi
+
 	# userspace pm type rejects join
 	if reset "userspace pm type rejects join"; then
 		set_userspace_pm $ns1
-- 
cgit 


From 7d4e91e06486a9adb9126ce14ccb4f564ebeceae Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 2 May 2022 18:54:24 +0300
Subject: selftests: forwarding: add basic QoS classification test for Ocelot
 switches

Test basic (port-default, VLAN PCP and IP DSCP) QoS classification for
Ocelot switches. Advanced QoS classification using tc filters is covered
by tc_flower_chains.sh in the same directory.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20220502155424.4098917-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/ocelot/basic_qos.sh      | 253 +++++++++++++++++++++
 1 file changed, 253 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/ocelot/basic_qos.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh
new file mode 100755
index 000000000000..c51c83421c61
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh
@@ -0,0 +1,253 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2022 NXP
+
+# The script is mostly generic, with the exception of the
+# ethtool per-TC counter names ("rx_green_prio_${tc}")
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command dcb
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h1_vlan_create()
+{
+	local vid=$1
+
+	vlan_create $h1 $vid
+	simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+	ip link set $h1.$vid type vlan \
+		egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \
+		ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+h1_vlan_destroy()
+{
+	local vid=$1
+
+	simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+	vlan_destroy $h1 $vid
+}
+
+h2_vlan_create()
+{
+	local vid=$1
+
+	vlan_create $h2 $vid
+	simple_if_init $h2.$vid $H2_IPV4/24 $H2_IPV6/64
+	ip link set $h2.$vid type vlan \
+		egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \
+		ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+h2_vlan_destroy()
+{
+	local vid=$1
+
+	simple_if_fini $h2.$vid $H2_IPV4/24 $H2_IPV6/64
+	vlan_destroy $h2 $vid
+}
+
+vlans_prepare()
+{
+	h1_vlan_create 100
+	h2_vlan_create 100
+
+	tc qdisc add dev ${h1}.100 clsact
+	tc filter add dev ${h1}.100 egress protocol ipv4 \
+		flower ip_proto icmp action skbedit priority 3
+	tc filter add dev ${h1}.100 egress protocol ipv6 \
+		flower ip_proto icmpv6 action skbedit priority 3
+}
+
+vlans_destroy()
+{
+	tc qdisc del dev ${h1}.100 clsact
+
+	h1_vlan_destroy 100
+	h2_vlan_destroy 100
+}
+
+switch_create()
+{
+	ip link set ${swp1} up
+	ip link set ${swp2} up
+
+	# Ports should trust VLAN PCP even with vlan_filtering=0
+	ip link add br0 type bridge
+	ip link set ${swp1} master br0
+	ip link set ${swp2} master br0
+	ip link set br0 up
+}
+
+switch_destroy()
+{
+	ip link del br0
+}
+
+setup_prepare()
+{
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+	switch_destroy
+
+	vrf_cleanup
+}
+
+dscp_cs_to_tos()
+{
+	local dscp_cs=$1
+
+	# https://datatracker.ietf.org/doc/html/rfc2474
+	# 4.2.2.1  The Class Selector Codepoints
+	echo $((${dscp_cs} << 5))
+}
+
+run_test()
+{
+	local test_name=$1; shift
+	local if_name=$1; shift
+	local tc=$1; shift
+	local tos=$1; shift
+	local counter_name="rx_green_prio_${tc}"
+	local ipv4_before
+	local ipv4_after
+	local ipv6_before
+	local ipv6_after
+
+	ipv4_before=$(ethtool_stats_get ${swp1} "${counter_name}")
+	ping_do ${if_name} $H2_IPV4 "-Q ${tos}"
+	ipv4_after=$(ethtool_stats_get ${swp1} "${counter_name}")
+
+	if [ $((${ipv4_after} - ${ipv4_before})) -lt ${PING_COUNT} ]; then
+		RET=1
+	else
+		RET=0
+	fi
+	log_test "IPv4 ${test_name}"
+
+	ipv6_before=$(ethtool_stats_get ${swp1} "${counter_name}")
+	ping_do ${if_name} $H2_IPV6 "-Q ${tos}"
+	ipv6_after=$(ethtool_stats_get ${swp1} "${counter_name}")
+
+	if [ $((${ipv6_after} - ${ipv6_before})) -lt ${PING_COUNT} ]; then
+		RET=1
+	else
+		RET=0
+	fi
+	log_test "IPv6 ${test_name}"
+}
+
+port_default_prio_get()
+{
+	local if_name=$1
+	local prio
+
+	prio="$(dcb -j app show dev ${if_name} default-prio | \
+		jq '.default_prio[]')"
+	if [ -z "${prio}" ]; then
+		prio=0
+	fi
+
+	echo ${prio}
+}
+
+test_port_default()
+{
+	local orig=$(port_default_prio_get ${swp1})
+	local dmac=$(mac_get ${h2})
+
+	dcb app replace dev ${swp1} default-prio 5
+
+	run_test "Port-default QoS classification" ${h1} 5 0
+
+	dcb app replace dev ${swp1} default-prio ${orig}
+}
+
+test_vlan_pcp()
+{
+	vlans_prepare
+
+	run_test "Trusted VLAN PCP QoS classification" ${h1}.100 3 0
+
+	vlans_destroy
+}
+
+test_ip_dscp()
+{
+	local port_default=$(port_default_prio_get ${swp1})
+	local tos=$(dscp_cs_to_tos 4)
+
+	dcb app add dev ${swp1} dscp-prio CS4:4
+	run_test "Trusted DSCP QoS classification" ${h1} 4 ${tos}
+	dcb app del dev ${swp1} dscp-prio CS4:4
+
+	vlans_prepare
+	run_test "Untrusted DSCP QoS classification follows VLAN PCP" \
+		${h1}.100 3 ${tos}
+	vlans_destroy
+
+	run_test "Untrusted DSCP QoS classification follows port default" \
+		${h1} ${port_default} ${tos}
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+	test_port_default
+	test_vlan_pcp
+	test_ip_dscp
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From 6689fb8f21ecf5fd99278b622c9579ffbc2742c2 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 4 May 2022 03:24:42 +0000
Subject: selftests: KVM: Rename psci_cpu_on_test to psci_test

There are other interactions with PSCI worth testing; rename the PSCI
test to make it more generic.

No functional change intended.

Signed-off-by: Oliver Upton <oupton@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220504032446.4133305-9-oupton@google.com
---
 tools/testing/selftests/kvm/.gitignore             |   2 +-
 tools/testing/selftests/kvm/Makefile               |   2 +-
 .../selftests/kvm/aarch64/psci_cpu_on_test.c       | 121 ---------------------
 tools/testing/selftests/kvm/aarch64/psci_test.c    | 121 +++++++++++++++++++++
 4 files changed, 123 insertions(+), 123 deletions(-)
 delete mode 100644 tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
 create mode 100644 tools/testing/selftests/kvm/aarch64/psci_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 0b0e4402bba6..1bb575dfc42e 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -2,7 +2,7 @@
 /aarch64/arch_timer
 /aarch64/debug-exceptions
 /aarch64/get-reg-list
-/aarch64/psci_cpu_on_test
+/aarch64/psci_test
 /aarch64/vcpu_width_config
 /aarch64/vgic_init
 /aarch64/vgic_irq
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 681b173aa87c..c2cf4d318296 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -105,7 +105,7 @@ TEST_GEN_PROGS_x86_64 += system_counter_offset_test
 TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
-TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
+TEST_GEN_PROGS_aarch64 += aarch64/psci_test
 TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
diff --git a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
deleted file mode 100644
index 4c5f6814030f..000000000000
--- a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
+++ /dev/null
@@ -1,121 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the
- * CPU_ON PSCI call matches what the caller requested.
- *
- * Copyright (c) 2021 Google LLC.
- *
- * This is a regression test for a race between KVM servicing the PSCI call and
- * userspace reading the vCPUs registers.
- */
-
-#define _GNU_SOURCE
-
-#include <linux/psci.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "test_util.h"
-
-#define VCPU_ID_SOURCE 0
-#define VCPU_ID_TARGET 1
-
-#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
-#define CPU_ON_CONTEXT_ID 0xdeadc0deul
-
-static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
-			    uint64_t context_id)
-{
-	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_CPU_ON;
-	register uint64_t x1 asm("x1") = target_cpu;
-	register uint64_t x2 asm("x2") = entry_addr;
-	register uint64_t x3 asm("x3") = context_id;
-
-	asm("hvc #0"
-	    : "=r"(x0)
-	    : "r"(x0), "r"(x1), "r"(x2), "r"(x3)
-	    : "memory");
-
-	return x0;
-}
-
-static uint64_t psci_affinity_info(uint64_t target_affinity,
-				   uint64_t lowest_affinity_level)
-{
-	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_AFFINITY_INFO;
-	register uint64_t x1 asm("x1") = target_affinity;
-	register uint64_t x2 asm("x2") = lowest_affinity_level;
-
-	asm("hvc #0"
-	    : "=r"(x0)
-	    : "r"(x0), "r"(x1), "r"(x2)
-	    : "memory");
-
-	return x0;
-}
-
-static void guest_main(uint64_t target_cpu)
-{
-	GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
-	uint64_t target_state;
-
-	do {
-		target_state = psci_affinity_info(target_cpu, 0);
-
-		GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
-			     (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
-	} while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
-
-	GUEST_DONE();
-}
-
-int main(void)
-{
-	uint64_t target_mpidr, obs_pc, obs_x0;
-	struct kvm_vcpu_init init;
-	struct kvm_vm *vm;
-	struct ucall uc;
-
-	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
-	kvm_vm_elf_load(vm, program_invocation_name);
-	ucall_init(vm, NULL);
-
-	vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
-	init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
-
-	aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_main);
-
-	/*
-	 * make sure the target is already off when executing the test.
-	 */
-	init.features[0] |= (1 << KVM_ARM_VCPU_POWER_OFF);
-	aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main);
-
-	get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
-	vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK);
-	vcpu_run(vm, VCPU_ID_SOURCE);
-
-	switch (get_ucall(vm, VCPU_ID_SOURCE, &uc)) {
-	case UCALL_DONE:
-		break;
-	case UCALL_ABORT:
-		TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__,
-			  uc.args[1]);
-		break;
-	default:
-		TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
-	}
-
-	get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.pc), &obs_pc);
-	get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
-
-	TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
-		    "unexpected target cpu pc: %lx (expected: %lx)",
-		    obs_pc, CPU_ON_ENTRY_ADDR);
-	TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
-		    "unexpected target context id: %lx (expected: %lx)",
-		    obs_x0, CPU_ON_CONTEXT_ID);
-
-	kvm_vm_free(vm);
-	return 0;
-}
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
new file mode 100644
index 000000000000..4c5f6814030f
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the
+ * CPU_ON PSCI call matches what the caller requested.
+ *
+ * Copyright (c) 2021 Google LLC.
+ *
+ * This is a regression test for a race between KVM servicing the PSCI call and
+ * userspace reading the vCPUs registers.
+ */
+
+#define _GNU_SOURCE
+
+#include <linux/psci.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define VCPU_ID_SOURCE 0
+#define VCPU_ID_TARGET 1
+
+#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
+#define CPU_ON_CONTEXT_ID 0xdeadc0deul
+
+static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
+			    uint64_t context_id)
+{
+	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_CPU_ON;
+	register uint64_t x1 asm("x1") = target_cpu;
+	register uint64_t x2 asm("x2") = entry_addr;
+	register uint64_t x3 asm("x3") = context_id;
+
+	asm("hvc #0"
+	    : "=r"(x0)
+	    : "r"(x0), "r"(x1), "r"(x2), "r"(x3)
+	    : "memory");
+
+	return x0;
+}
+
+static uint64_t psci_affinity_info(uint64_t target_affinity,
+				   uint64_t lowest_affinity_level)
+{
+	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_AFFINITY_INFO;
+	register uint64_t x1 asm("x1") = target_affinity;
+	register uint64_t x2 asm("x2") = lowest_affinity_level;
+
+	asm("hvc #0"
+	    : "=r"(x0)
+	    : "r"(x0), "r"(x1), "r"(x2)
+	    : "memory");
+
+	return x0;
+}
+
+static void guest_main(uint64_t target_cpu)
+{
+	GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
+	uint64_t target_state;
+
+	do {
+		target_state = psci_affinity_info(target_cpu, 0);
+
+		GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
+			     (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
+	} while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
+
+	GUEST_DONE();
+}
+
+int main(void)
+{
+	uint64_t target_mpidr, obs_pc, obs_x0;
+	struct kvm_vcpu_init init;
+	struct kvm_vm *vm;
+	struct ucall uc;
+
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+	kvm_vm_elf_load(vm, program_invocation_name);
+	ucall_init(vm, NULL);
+
+	vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+	init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+	aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_main);
+
+	/*
+	 * make sure the target is already off when executing the test.
+	 */
+	init.features[0] |= (1 << KVM_ARM_VCPU_POWER_OFF);
+	aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main);
+
+	get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
+	vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK);
+	vcpu_run(vm, VCPU_ID_SOURCE);
+
+	switch (get_ucall(vm, VCPU_ID_SOURCE, &uc)) {
+	case UCALL_DONE:
+		break;
+	case UCALL_ABORT:
+		TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__,
+			  uc.args[1]);
+		break;
+	default:
+		TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
+	}
+
+	get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.pc), &obs_pc);
+	get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
+
+	TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
+		    "unexpected target cpu pc: %lx (expected: %lx)",
+		    obs_pc, CPU_ON_ENTRY_ADDR);
+	TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
+		    "unexpected target context id: %lx (expected: %lx)",
+		    obs_x0, CPU_ON_CONTEXT_ID);
+
+	kvm_vm_free(vm);
+	return 0;
+}
-- 
cgit 


From 694e3dcc47471b8b409a0ef647319b746eabcb3a Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 4 May 2022 03:24:43 +0000
Subject: selftests: KVM: Create helper for making SMCCC calls

The PSCI and PV stolen time tests both need to make SMCCC calls within
the guest. Create a helper for making SMCCC calls and rework the
existing tests to use the library function.

Signed-off-by: Oliver Upton <oupton@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220504032446.4133305-10-oupton@google.com
---
 tools/testing/selftests/kvm/aarch64/psci_test.c    | 25 +++++++---------------
 .../selftests/kvm/include/aarch64/processor.h      | 22 +++++++++++++++++++
 .../testing/selftests/kvm/lib/aarch64/processor.c  | 25 ++++++++++++++++++++++
 tools/testing/selftests/kvm/steal_time.c           | 13 +++--------
 4 files changed, 58 insertions(+), 27 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
index 4c5f6814030f..8c998f0b802c 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -26,32 +26,23 @@
 static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
 			    uint64_t context_id)
 {
-	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_CPU_ON;
-	register uint64_t x1 asm("x1") = target_cpu;
-	register uint64_t x2 asm("x2") = entry_addr;
-	register uint64_t x3 asm("x3") = context_id;
+	struct arm_smccc_res res;
 
-	asm("hvc #0"
-	    : "=r"(x0)
-	    : "r"(x0), "r"(x1), "r"(x2), "r"(x3)
-	    : "memory");
+	smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
+		  0, 0, 0, 0, &res);
 
-	return x0;
+	return res.a0;
 }
 
 static uint64_t psci_affinity_info(uint64_t target_affinity,
 				   uint64_t lowest_affinity_level)
 {
-	register uint64_t x0 asm("x0") = PSCI_0_2_FN64_AFFINITY_INFO;
-	register uint64_t x1 asm("x1") = target_affinity;
-	register uint64_t x2 asm("x2") = lowest_affinity_level;
+	struct arm_smccc_res res;
 
-	asm("hvc #0"
-	    : "=r"(x0)
-	    : "r"(x0), "r"(x1), "r"(x2)
-	    : "memory");
+	smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
+		  0, 0, 0, 0, 0, &res);
 
-	return x0;
+	return res.a0;
 }
 
 static void guest_main(uint64_t target_cpu)
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 8f9f46979a00..59ece9d4e0d1 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -185,4 +185,26 @@ static inline void local_irq_disable(void)
 	asm volatile("msr daifset, #3" : : : "memory");
 }
 
+/**
+ * struct arm_smccc_res - Result from SMC/HVC call
+ * @a0-a3 result values from registers 0 to 3
+ */
+struct arm_smccc_res {
+	unsigned long a0;
+	unsigned long a1;
+	unsigned long a2;
+	unsigned long a3;
+};
+
+/**
+ * smccc_hvc - Invoke a SMCCC function using the hvc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+	       uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+	       uint64_t arg6, struct arm_smccc_res *res);
+
 #endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 9343d82519b4..6a041289fa80 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -500,3 +500,28 @@ void __attribute__((constructor)) init_guest_modes(void)
 {
        guest_modes_append_default();
 }
+
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+	       uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+	       uint64_t arg6, struct arm_smccc_res *res)
+{
+	asm volatile("mov   w0, %w[function_id]\n"
+		     "mov   x1, %[arg0]\n"
+		     "mov   x2, %[arg1]\n"
+		     "mov   x3, %[arg2]\n"
+		     "mov   x4, %[arg3]\n"
+		     "mov   x5, %[arg4]\n"
+		     "mov   x6, %[arg5]\n"
+		     "mov   x7, %[arg6]\n"
+		     "hvc   #0\n"
+		     "mov   %[res0], x0\n"
+		     "mov   %[res1], x1\n"
+		     "mov   %[res2], x2\n"
+		     "mov   %[res3], x3\n"
+		     : [res0] "=r"(res->a0), [res1] "=r"(res->a1),
+		       [res2] "=r"(res->a2), [res3] "=r"(res->a3)
+		     : [function_id] "r"(function_id), [arg0] "r"(arg0),
+		       [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3),
+		       [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6)
+		     : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7");
+}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index 62f2eb9ee3d5..8c4e811bd586 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -118,17 +118,10 @@ struct st_time {
 
 static int64_t smccc(uint32_t func, uint64_t arg)
 {
-	unsigned long ret;
+	struct arm_smccc_res res;
 
-	asm volatile(
-		"mov	w0, %w1\n"
-		"mov	x1, %2\n"
-		"hvc	#0\n"
-		"mov	%0, x0\n"
-	: "=r" (ret) : "r" (func), "r" (arg) :
-	  "x0", "x1", "x2", "x3");
-
-	return ret;
+	smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res);
+	return res.a0;
 }
 
 static void check_status(struct st_time *st)
-- 
cgit 


From d135399a97cc3e27716a8e468a5fd1a209346831 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 4 May 2022 03:24:44 +0000
Subject: selftests: KVM: Use KVM_SET_MP_STATE to power off vCPU in psci_test

Setting a vCPU's MP state to KVM_MP_STATE_STOPPED has the effect of
powering off the vCPU. Rather than using the vCPU init feature flag, use
the KVM_SET_MP_STATE ioctl to power off the target vCPU.

Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220504032446.4133305-11-oupton@google.com
---
 tools/testing/selftests/kvm/aarch64/psci_test.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
index 8c998f0b802c..fe1d5d343a2f 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -60,6 +60,15 @@ static void guest_main(uint64_t target_cpu)
 	GUEST_DONE();
 }
 
+static void vcpu_power_off(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct kvm_mp_state mp_state = {
+		.mp_state = KVM_MP_STATE_STOPPED,
+	};
+
+	vcpu_set_mp_state(vm, vcpuid, &mp_state);
+}
+
 int main(void)
 {
 	uint64_t target_mpidr, obs_pc, obs_x0;
@@ -75,12 +84,12 @@ int main(void)
 	init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
 
 	aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_main);
+	aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main);
 
 	/*
 	 * make sure the target is already off when executing the test.
 	 */
-	init.features[0] |= (1 << KVM_ARM_VCPU_POWER_OFF);
-	aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main);
+	vcpu_power_off(vm, VCPU_ID_TARGET);
 
 	get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
 	vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK);
-- 
cgit 


From 67a36a821312e9c0d2a2f7e6c2225204500cc01c Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 4 May 2022 03:24:45 +0000
Subject: selftests: KVM: Refactor psci_test to make it amenable to new tests

Split up the current test into several helpers that will be useful to
subsequent test cases added to the PSCI test suite.

Signed-off-by: Oliver Upton <oupton@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220504032446.4133305-12-oupton@google.com
---
 tools/testing/selftests/kvm/aarch64/psci_test.c | 97 +++++++++++++++----------
 1 file changed, 60 insertions(+), 37 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
index fe1d5d343a2f..535130d5e97f 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -45,21 +45,6 @@ static uint64_t psci_affinity_info(uint64_t target_affinity,
 	return res.a0;
 }
 
-static void guest_main(uint64_t target_cpu)
-{
-	GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
-	uint64_t target_state;
-
-	do {
-		target_state = psci_affinity_info(target_cpu, 0);
-
-		GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
-			     (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
-	} while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
-
-	GUEST_DONE();
-}
-
 static void vcpu_power_off(struct kvm_vm *vm, uint32_t vcpuid)
 {
 	struct kvm_mp_state mp_state = {
@@ -69,12 +54,10 @@ static void vcpu_power_off(struct kvm_vm *vm, uint32_t vcpuid)
 	vcpu_set_mp_state(vm, vcpuid, &mp_state);
 }
 
-int main(void)
+static struct kvm_vm *setup_vm(void *guest_code)
 {
-	uint64_t target_mpidr, obs_pc, obs_x0;
 	struct kvm_vcpu_init init;
 	struct kvm_vm *vm;
-	struct ucall uc;
 
 	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
 	kvm_vm_elf_load(vm, program_invocation_name);
@@ -83,31 +66,28 @@ int main(void)
 	vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
 	init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
 
-	aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_main);
-	aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main);
+	aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_code);
+	aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_code);
 
-	/*
-	 * make sure the target is already off when executing the test.
-	 */
-	vcpu_power_off(vm, VCPU_ID_TARGET);
+	return vm;
+}
 
-	get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
-	vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK);
-	vcpu_run(vm, VCPU_ID_SOURCE);
+static void enter_guest(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct ucall uc;
 
-	switch (get_ucall(vm, VCPU_ID_SOURCE, &uc)) {
-	case UCALL_DONE:
-		break;
-	case UCALL_ABORT:
+	vcpu_run(vm, vcpuid);
+	if (get_ucall(vm, vcpuid, &uc) == UCALL_ABORT)
 		TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__,
 			  uc.args[1]);
-		break;
-	default:
-		TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
-	}
+}
+
+static void assert_vcpu_reset(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	uint64_t obs_pc, obs_x0;
 
-	get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.pc), &obs_pc);
-	get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
+	get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &obs_pc);
+	get_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
 
 	TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
 		    "unexpected target cpu pc: %lx (expected: %lx)",
@@ -115,7 +95,50 @@ int main(void)
 	TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
 		    "unexpected target context id: %lx (expected: %lx)",
 		    obs_x0, CPU_ON_CONTEXT_ID);
+}
+
+static void guest_test_cpu_on(uint64_t target_cpu)
+{
+	uint64_t target_state;
+
+	GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
+
+	do {
+		target_state = psci_affinity_info(target_cpu, 0);
+
+		GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
+			     (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
+	} while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
+
+	GUEST_DONE();
+}
+
+static void host_test_cpu_on(void)
+{
+	uint64_t target_mpidr;
+	struct kvm_vm *vm;
+	struct ucall uc;
+
+	vm = setup_vm(guest_test_cpu_on);
+
+	/*
+	 * make sure the target is already off when executing the test.
+	 */
+	vcpu_power_off(vm, VCPU_ID_TARGET);
+
+	get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
+	vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK);
+	enter_guest(vm, VCPU_ID_SOURCE);
+
+	if (get_ucall(vm, VCPU_ID_SOURCE, &uc) != UCALL_DONE)
+		TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
 
+	assert_vcpu_reset(vm, VCPU_ID_TARGET);
 	kvm_vm_free(vm);
+}
+
+int main(void)
+{
+	host_test_cpu_on();
 	return 0;
 }
-- 
cgit 


From b26dafc8a9e74254a390e8f21ff028a2573ee4fc Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Wed, 4 May 2022 03:24:46 +0000
Subject: selftests: KVM: Test SYSTEM_SUSPEND PSCI call

Assert that the vCPU exits to userspace with KVM_SYSTEM_EVENT_SUSPEND if
the guest calls PSCI SYSTEM_SUSPEND. Additionally, guarantee that the
SMC32 and SMC64 flavors of this call are discoverable with the
PSCI_FEATURES call.

Signed-off-by: Oliver Upton <oupton@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220504032446.4133305-13-oupton@google.com
---
 tools/testing/selftests/kvm/aarch64/psci_test.c | 69 +++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
index 535130d5e97f..88541de21c41 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -45,6 +45,25 @@ static uint64_t psci_affinity_info(uint64_t target_affinity,
 	return res.a0;
 }
 
+static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
+{
+	struct arm_smccc_res res;
+
+	smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
+		  0, 0, 0, 0, 0, &res);
+
+	return res.a0;
+}
+
+static uint64_t psci_features(uint32_t func_id)
+{
+	struct arm_smccc_res res;
+
+	smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
+
+	return res.a0;
+}
+
 static void vcpu_power_off(struct kvm_vm *vm, uint32_t vcpuid)
 {
 	struct kvm_mp_state mp_state = {
@@ -137,8 +156,58 @@ static void host_test_cpu_on(void)
 	kvm_vm_free(vm);
 }
 
+static void enable_system_suspend(struct kvm_vm *vm)
+{
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_ARM_SYSTEM_SUSPEND,
+	};
+
+	vm_enable_cap(vm, &cap);
+}
+
+static void guest_test_system_suspend(void)
+{
+	uint64_t ret;
+
+	/* assert that SYSTEM_SUSPEND is discoverable */
+	GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND));
+	GUEST_ASSERT(!psci_features(PSCI_1_0_FN64_SYSTEM_SUSPEND));
+
+	ret = psci_system_suspend(CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID);
+	GUEST_SYNC(ret);
+}
+
+static void host_test_system_suspend(void)
+{
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+
+	vm = setup_vm(guest_test_system_suspend);
+	enable_system_suspend(vm);
+
+	vcpu_power_off(vm, VCPU_ID_TARGET);
+	run = vcpu_state(vm, VCPU_ID_SOURCE);
+
+	enter_guest(vm, VCPU_ID_SOURCE);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+		    "Unhandled exit reason: %u (%s)",
+		    run->exit_reason, exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND,
+		    "Unhandled system event: %u (expected: %u)",
+		    run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND);
+
+	kvm_vm_free(vm);
+}
+
 int main(void)
 {
+	if (!kvm_check_cap(KVM_CAP_ARM_SYSTEM_SUSPEND)) {
+		print_skip("KVM_CAP_ARM_SYSTEM_SUSPEND not supported");
+		exit(KSFT_SKIP);
+	}
+
 	host_test_cpu_on();
+	host_test_system_suspend();
 	return 0;
 }
-- 
cgit 


From ae60e0763e97e977b03af1ac6ba782a4a86c3a5a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 4 May 2022 00:16:55 +0100
Subject: kselftest/arm64: Fix ABI header directory location

Currently the arm64 kselftests attempt to locate the ABI headers using
custom logic which doesn't work correctly in the case of out of tree builds
if KBUILD_OUTPUT is not specified. Since lib.mk defines KHDR_INCLUDES with
the appropriate flags we can simply remove the custom logic and use that
instead.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220503231655.211346-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/Makefile | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 1e8d9a8f59df..9460cbe81bcc 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -17,16 +17,7 @@ top_srcdir = $(realpath ../../../../)
 # Additional include paths needed by kselftest.h and local headers
 CFLAGS += -I$(top_srcdir)/tools/testing/selftests/
 
-# Guessing where the Kernel headers could have been installed
-# depending on ENV config
-ifeq ($(KBUILD_OUTPUT),)
-khdr_dir = $(top_srcdir)/usr/include
-else
-# the KSFT preferred location when KBUILD_OUTPUT is set
-khdr_dir = $(KBUILD_OUTPUT)/kselftest/usr/include
-endif
-
-CFLAGS += -I$(khdr_dir)
+CFLAGS += $(KHDR_INCLUDES)
 
 export CFLAGS
 export top_srcdir
-- 
cgit 


From dcbff9ad418497c2608d4b4d9423efc8e87e130e Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Tue, 8 Jun 2021 15:48:51 +1000
Subject: selftests/powerpc: Fix typo in spectre_v2

Signed-off-by: Russell Currey <ruscur@russell.cc>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210608054851.164659-1-ruscur@russell.cc
---
 tools/testing/selftests/powerpc/security/spectre_v2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index d42ca8c676c3..80dc97e3ec57 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -207,7 +207,7 @@ int spectre_v2_test(void)
 		break;
 	case COUNT_CACHE_DISABLED:
 		if (miss_percent < 95) {
-			printf("Branch misses < 20%% unexpected in this configuration!\n");
+			printf("Branch misses < 95%% unexpected in this configuration!\n");
 			printf("Possible mis-match between reported & actual mitigation\n");
 			return 1;
 		}
-- 
cgit 


From 9a0b36509df07f2e3b24d3ae6f222a4f099e0709 Mon Sep 17 00:00:00 2001
From: Kishen Maloor <kishen.maloor@intel.com>
Date: Tue, 3 May 2022 19:38:53 -0700
Subject: selftests: mptcp: support MPTCP_PM_CMD_ANNOUNCE

This change updates the "pm_nl_ctl" testing sample with an "ann"
(announce) option to support the newly added netlink interface command
MPTCP_PM_CMD_ANNOUNCE to issue ADD_ADDR advertisements over the
chosen MPTCP connection.

E.g. ./pm_nl_ctl ann 192.168.122.75 token 823274047 id 25 dev enp1s0

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 131 ++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index a75a68ad652e..0ef35c3f6419 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -6,6 +6,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <limits.h>
 
 #include <sys/socket.h>
 #include <sys/types.h>
@@ -26,6 +27,7 @@ static void syntax(char *argv[])
 {
 	fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
 	fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
+	fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n");
 	fprintf(stderr, "\tdel <id> [<ip>]\n");
 	fprintf(stderr, "\tget <id>\n");
 	fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n");
@@ -170,6 +172,133 @@ static int resolve_mptcp_pm_netlink(int fd)
 	return genl_parse_getfamily((void *)data);
 }
 
+int announce_addr(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	u_int32_t flags = MPTCP_PM_ADDR_FLAG_SIGNAL;
+	u_int32_t token = UINT_MAX;
+	struct rtattr *rta, *addr;
+	u_int32_t id = UINT_MAX;
+	struct nlmsghdr *nh;
+	u_int16_t family;
+	int addr_start;
+	int off = 0;
+	int arg;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_ANNOUNCE,
+			    MPTCP_PM_VER);
+
+	if (argc < 7)
+		syntax(argv);
+
+	/* local-ip header */
+	addr_start = off;
+	addr = (void *)(data + off);
+	addr->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+	addr->rta_len = RTA_LENGTH(0);
+	off += NLMSG_ALIGN(addr->rta_len);
+
+	/* local-ip data */
+	/* record addr type */
+	rta = (void *)(data + off);
+	if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) {
+		family = AF_INET;
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+		rta->rta_len = RTA_LENGTH(4);
+	} else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) {
+		family = AF_INET6;
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+		rta->rta_len = RTA_LENGTH(16);
+	} else
+		error(1, errno, "can't parse ip %s", argv[2]);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	/* addr family */
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+	rta->rta_len = RTA_LENGTH(2);
+	memcpy(RTA_DATA(rta), &family, 2);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	for (arg = 3; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "id")) {
+			/* local-id */
+			if (++arg >= argc)
+				error(1, 0, " missing id value");
+
+			id = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+			rta->rta_len = RTA_LENGTH(1);
+			memcpy(RTA_DATA(rta), &id, 1);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "dev")) {
+			/* for the if_index */
+			int32_t ifindex;
+
+			if (++arg >= argc)
+				error(1, 0, " missing dev name");
+
+			ifindex = if_nametoindex(argv[arg]);
+			if (!ifindex)
+				error(1, errno, "unknown device %s", argv[arg]);
+
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_IF_IDX;
+			rta->rta_len = RTA_LENGTH(4);
+			memcpy(RTA_DATA(rta), &ifindex, 4);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "port")) {
+			/* local-port (optional) */
+			u_int16_t port;
+
+			if (++arg >= argc)
+				error(1, 0, " missing port value");
+
+			port = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+			rta->rta_len = RTA_LENGTH(2);
+			memcpy(RTA_DATA(rta), &port, 2);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "token")) {
+			/* MPTCP connection token */
+			if (++arg >= argc)
+				error(1, 0, " missing token value");
+
+			token = atoi(argv[arg]);
+		} else
+			error(1, 0, "unknown keyword %s", argv[arg]);
+	}
+
+	/* addr flags */
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+	rta->rta_len = RTA_LENGTH(4);
+	memcpy(RTA_DATA(rta), &flags, 4);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	addr->rta_len = off - addr_start;
+
+	if (id == UINT_MAX || token == UINT_MAX)
+		error(1, 0, " missing mandatory inputs");
+
+	/* token */
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+	rta->rta_len = RTA_LENGTH(4);
+	memcpy(RTA_DATA(rta), &token, 4);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	do_nl_req(fd, nh, off, 0);
+
+	return 0;
+}
+
 int add_addr(int fd, int pm_family, int argc, char *argv[])
 {
 	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
@@ -786,6 +915,8 @@ int main(int argc, char *argv[])
 
 	if (!strcmp(argv[1], "add"))
 		return add_addr(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "ann"))
+		return announce_addr(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "del"))
 		return del_addr(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "flush"))
-- 
cgit 


From ecd2a77d672f0ea954b00bb5700f25bb1f9a32be Mon Sep 17 00:00:00 2001
From: Kishen Maloor <kishen.maloor@intel.com>
Date: Tue, 3 May 2022 19:38:55 -0700
Subject: selftests: mptcp: support MPTCP_PM_CMD_REMOVE

This change updates the "pm_nl_ctl" testing sample with a "rem"
(remove) option to support the newly added netlink interface command
MPTCP_PM_CMD_REMOVE to issue a REMOVE_ADDR signal over the
chosen MPTCP connection.

E.g. ./pm_nl_ctl rem token 823274047 id 23

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 52 +++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 0ef35c3f6419..3506b0416c41 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -28,6 +28,7 @@ static void syntax(char *argv[])
 	fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
 	fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
 	fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n");
+	fprintf(stderr, "\trem id <local-id> token <token>\n");
 	fprintf(stderr, "\tdel <id> [<ip>]\n");
 	fprintf(stderr, "\tget <id>\n");
 	fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n");
@@ -172,6 +173,55 @@ static int resolve_mptcp_pm_netlink(int fd)
 	return genl_parse_getfamily((void *)data);
 }
 
+int remove_addr(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	struct nlmsghdr *nh;
+	struct rtattr *rta;
+	u_int32_t token;
+	u_int8_t id;
+	int off = 0;
+	int arg;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_REMOVE,
+			    MPTCP_PM_VER);
+
+	if (argc < 6)
+		syntax(argv);
+
+	for (arg = 2; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "id")) {
+			if (++arg >= argc)
+				error(1, 0, " missing id value");
+
+			id = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ATTR_LOC_ID;
+			rta->rta_len = RTA_LENGTH(1);
+			memcpy(RTA_DATA(rta), &id, 1);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "token")) {
+			if (++arg >= argc)
+				error(1, 0, " missing token value");
+
+			token = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+			rta->rta_len = RTA_LENGTH(4);
+			memcpy(RTA_DATA(rta), &token, 4);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else
+			error(1, 0, "unknown keyword %s", argv[arg]);
+	}
+
+	do_nl_req(fd, nh, off, 0);
+	return 0;
+}
+
 int announce_addr(int fd, int pm_family, int argc, char *argv[])
 {
 	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
@@ -917,6 +967,8 @@ int main(int argc, char *argv[])
 		return add_addr(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "ann"))
 		return announce_addr(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "rem"))
+		return remove_addr(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "del"))
 		return del_addr(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "flush"))
-- 
cgit 


From cf8d0a6dfd649acd6b450444b56eb244cd2e86c1 Mon Sep 17 00:00:00 2001
From: Kishen Maloor <kishen.maloor@intel.com>
Date: Tue, 3 May 2022 19:38:57 -0700
Subject: selftests: mptcp: support MPTCP_PM_CMD_SUBFLOW_CREATE

This change updates the "pm_nl_ctl" testing sample with a "csf"
(create subflow) option to support the newly added netlink interface
command MPTCP_PM_CMD_SUBFLOW_CREATE over the chosen MPTCP connection.

E.g. ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport 56789
token 823274047

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 129 ++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 3506b0416c41..e2437bacd133 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -29,6 +29,7 @@ static void syntax(char *argv[])
 	fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
 	fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n");
 	fprintf(stderr, "\trem id <local-id> token <token>\n");
+	fprintf(stderr, "\tcsf lip <local-ip> lid <local-id> rip <remote-ip> rport <remote-port> token <token>\n");
 	fprintf(stderr, "\tdel <id> [<ip>]\n");
 	fprintf(stderr, "\tget <id>\n");
 	fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n");
@@ -173,6 +174,132 @@ static int resolve_mptcp_pm_netlink(int fd)
 	return genl_parse_getfamily((void *)data);
 }
 
+int csf(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	const char *params[5];
+	struct nlmsghdr *nh;
+	struct rtattr *addr;
+	struct rtattr *rta;
+	u_int16_t family;
+	u_int32_t token;
+	u_int16_t port;
+	int addr_start;
+	u_int8_t id;
+	int off = 0;
+	int arg;
+
+	memset(params, 0, 5 * sizeof(const char *));
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_CREATE,
+			    MPTCP_PM_VER);
+
+	if (argc < 12)
+		syntax(argv);
+
+	/* Params recorded in this order:
+	 * <local-ip>, <local-id>, <remote-ip>, <remote-port>, <token>
+	 */
+	for (arg = 2; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "lip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing local IP");
+
+			params[0] = argv[arg];
+		} else if (!strcmp(argv[arg], "lid")) {
+			if (++arg >= argc)
+				error(1, 0, " missing local id");
+
+			params[1] = argv[arg];
+		} else if (!strcmp(argv[arg], "rip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote ip");
+
+			params[2] = argv[arg];
+		} else if (!strcmp(argv[arg], "rport")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote port");
+
+			params[3] = argv[arg];
+		} else if (!strcmp(argv[arg], "token")) {
+			if (++arg >= argc)
+				error(1, 0, " missing token");
+
+			params[4] = argv[arg];
+		} else
+			error(1, 0, "unknown param %s", argv[arg]);
+	}
+
+	for (arg = 0; arg < 4; arg = arg + 2) {
+		/*  addr header */
+		addr_start = off;
+		addr = (void *)(data + off);
+		addr->rta_type = NLA_F_NESTED |
+			((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE);
+		addr->rta_len = RTA_LENGTH(0);
+		off += NLMSG_ALIGN(addr->rta_len);
+
+		/*  addr data */
+		rta = (void *)(data + off);
+		if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) {
+			family = AF_INET;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+			rta->rta_len = RTA_LENGTH(4);
+		} else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) {
+			family = AF_INET6;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+			rta->rta_len = RTA_LENGTH(16);
+		} else
+			error(1, errno, "can't parse ip %s", params[arg]);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* family */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &family, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		if (arg == 2) {
+			/*  port */
+			port = atoi(params[arg + 1]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+			rta->rta_len = RTA_LENGTH(2);
+			memcpy(RTA_DATA(rta), &port, 2);
+			off += NLMSG_ALIGN(rta->rta_len);
+		}
+
+		if (arg == 0) {
+			/* id */
+			id = atoi(params[arg + 1]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+			rta->rta_len = RTA_LENGTH(1);
+			memcpy(RTA_DATA(rta), &id, 1);
+			off += NLMSG_ALIGN(rta->rta_len);
+		}
+
+		addr->rta_len = off - addr_start;
+	}
+
+	/* token */
+	token = atoi(params[4]);
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+	rta->rta_len = RTA_LENGTH(4);
+	memcpy(RTA_DATA(rta), &token, 4);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	do_nl_req(fd, nh, off, 0);
+
+	return 0;
+}
+
 int remove_addr(int fd, int pm_family, int argc, char *argv[])
 {
 	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
@@ -969,6 +1096,8 @@ int main(int argc, char *argv[])
 		return announce_addr(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "rem"))
 		return remove_addr(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "csf"))
+		return csf(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "del"))
 		return del_addr(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "flush"))
-- 
cgit 


From 57cc361b8d38d5fc4c97abe13094d72ea0cfe8c4 Mon Sep 17 00:00:00 2001
From: Kishen Maloor <kishen.maloor@intel.com>
Date: Tue, 3 May 2022 19:38:58 -0700
Subject: selftests: mptcp: support MPTCP_PM_CMD_SUBFLOW_DESTROY

This change updates the "pm_nl_ctl" testing sample with a "dsf"
(destroy subflow) option to support the newly added netlink interface
command MPTCP_PM_CMD_SUBFLOW_DESTROY over the chosen MPTCP connection.

E.g. ./pm_nl_ctl dsf lip 10.0.2.1 lport 44567 rip 10.0.2.2 rport 56789
token 823274047

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 115 ++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index e2437bacd133..8d74fcb04929 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -30,6 +30,7 @@ static void syntax(char *argv[])
 	fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n");
 	fprintf(stderr, "\trem id <local-id> token <token>\n");
 	fprintf(stderr, "\tcsf lip <local-ip> lid <local-id> rip <remote-ip> rport <remote-port> token <token>\n");
+	fprintf(stderr, "\tdsf lip <local-ip> lport <local-port> rip <remote-ip> rport <remote-port> token <token>\n");
 	fprintf(stderr, "\tdel <id> [<ip>]\n");
 	fprintf(stderr, "\tget <id>\n");
 	fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n");
@@ -174,6 +175,118 @@ static int resolve_mptcp_pm_netlink(int fd)
 	return genl_parse_getfamily((void *)data);
 }
 
+int dsf(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	struct rtattr *rta, *addr;
+	u_int16_t family, port;
+	struct nlmsghdr *nh;
+	u_int32_t token;
+	int addr_start;
+	int off = 0;
+	int arg;
+
+	const char *params[5];
+
+	memset(params, 0, 5 * sizeof(const char *));
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_DESTROY,
+			    MPTCP_PM_VER);
+
+	if (argc < 12)
+		syntax(argv);
+
+	/* Params recorded in this order:
+	 * <local-ip>, <local-port>, <remote-ip>, <remote-port>, <token>
+	 */
+	for (arg = 2; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "lip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing local IP");
+
+			params[0] = argv[arg];
+		} else if (!strcmp(argv[arg], "lport")) {
+			if (++arg >= argc)
+				error(1, 0, " missing local port");
+
+			params[1] = argv[arg];
+		} else if (!strcmp(argv[arg], "rip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote IP");
+
+			params[2] = argv[arg];
+		} else if (!strcmp(argv[arg], "rport")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote port");
+
+			params[3] = argv[arg];
+		} else if (!strcmp(argv[arg], "token")) {
+			if (++arg >= argc)
+				error(1, 0, " missing token");
+
+			params[4] = argv[arg];
+		} else
+			error(1, 0, "unknown keyword %s", argv[arg]);
+	}
+
+	for (arg = 0; arg < 4; arg = arg + 2) {
+		/*  addr header */
+		addr_start = off;
+		addr = (void *)(data + off);
+		addr->rta_type = NLA_F_NESTED |
+			((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE);
+		addr->rta_len = RTA_LENGTH(0);
+		off += NLMSG_ALIGN(addr->rta_len);
+
+		/*  addr data */
+		rta = (void *)(data + off);
+		if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) {
+			family = AF_INET;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+			rta->rta_len = RTA_LENGTH(4);
+		} else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) {
+			family = AF_INET6;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+			rta->rta_len = RTA_LENGTH(16);
+		} else
+			error(1, errno, "can't parse ip %s", params[arg]);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* family */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &family, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/*  port */
+		port = atoi(params[arg + 1]);
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &port, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		addr->rta_len = off - addr_start;
+	}
+
+	/* token */
+	token = atoi(params[4]);
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+	rta->rta_len = RTA_LENGTH(4);
+	memcpy(RTA_DATA(rta), &token, 4);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	do_nl_req(fd, nh, off, 0);
+
+	return 0;
+}
+
 int csf(int fd, int pm_family, int argc, char *argv[])
 {
 	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
@@ -1098,6 +1211,8 @@ int main(int argc, char *argv[])
 		return remove_addr(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "csf"))
 		return csf(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "dsf"))
+		return dsf(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "del"))
 		return del_addr(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "flush"))
-- 
cgit 


From b3e5fd653d3959f2845018f60db497a056aa41b2 Mon Sep 17 00:00:00 2001
From: Kishen Maloor <kishen.maloor@intel.com>
Date: Tue, 3 May 2022 19:38:59 -0700
Subject: selftests: mptcp: capture netlink events

This change adds to self-testing support for the MPTCP netlink interface
by capturing various MPTCP netlink events (and all their metadata)
associated with connections, subflows and address announcements.
It is used in self-testing scripts that exercise MPTCP netlink commands
to precisely validate those operations by examining the dispatched
MPTCP netlink events in response to those commands.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 164 ++++++++++++++++++++++++--
 1 file changed, 157 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 8d74fcb04929..f881d8548153 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -22,6 +22,9 @@
 #ifndef MPTCP_PM_NAME
 #define MPTCP_PM_NAME		"mptcp_pm"
 #endif
+#ifndef MPTCP_PM_EVENTS
+#define MPTCP_PM_EVENTS		"mptcp_pm_events"
+#endif
 
 static void syntax(char *argv[])
 {
@@ -37,6 +40,7 @@ static void syntax(char *argv[])
 	fprintf(stderr, "\tflush\n");
 	fprintf(stderr, "\tdump\n");
 	fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
+	fprintf(stderr, "\tevents\n");
 	exit(0);
 }
 
@@ -88,6 +92,108 @@ static void nl_error(struct nlmsghdr *nh)
 	}
 }
 
+static int capture_events(int fd, int event_group)
+{
+	u_int8_t buffer[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+			NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024];
+	struct genlmsghdr *ghdr;
+	struct rtattr *attrs;
+	struct nlmsghdr *nh;
+	int ret = 0;
+	int res_len;
+	int msg_len;
+	fd_set rfds;
+
+	if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
+		       &event_group, sizeof(event_group)) < 0)
+		error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group");
+
+	do {
+		FD_ZERO(&rfds);
+		FD_SET(fd, &rfds);
+		res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024;
+
+		ret = select(FD_SETSIZE, &rfds, NULL, NULL, NULL);
+
+		if (ret < 0)
+			error(1, ret, "error in select() on NL socket");
+
+		res_len = recv(fd, buffer, res_len, 0);
+		if (res_len < 0)
+			error(1, res_len, "error on recv() from NL socket");
+
+		nh = (struct nlmsghdr *)buffer;
+
+		for (; NLMSG_OK(nh, res_len); nh = NLMSG_NEXT(nh, res_len)) {
+			if (nh->nlmsg_type == NLMSG_ERROR)
+				error(1, NLMSG_ERROR, "received invalid NL message");
+
+			ghdr = (struct genlmsghdr *)NLMSG_DATA(nh);
+
+			if (ghdr->cmd == 0)
+				continue;
+
+			fprintf(stderr, "type:%d", ghdr->cmd);
+
+			msg_len = nh->nlmsg_len - NLMSG_LENGTH(GENL_HDRLEN);
+
+			attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+			while (RTA_OK(attrs, msg_len)) {
+				if (attrs->rta_type == MPTCP_ATTR_TOKEN)
+					fprintf(stderr, ",token:%u", *(__u32 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_FAMILY)
+					fprintf(stderr, ",family:%u", *(__u16 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_LOC_ID)
+					fprintf(stderr, ",loc_id:%u", *(__u8 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_REM_ID)
+					fprintf(stderr, ",rem_id:%u", *(__u8 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_SADDR4) {
+					u_int32_t saddr4 = ntohl(*(__u32 *)RTA_DATA(attrs));
+
+					fprintf(stderr, ",saddr4:%u.%u.%u.%u", saddr4 >> 24,
+					       (saddr4 >> 16) & 0xFF, (saddr4 >> 8) & 0xFF,
+					       (saddr4 & 0xFF));
+				} else if (attrs->rta_type == MPTCP_ATTR_SADDR6) {
+					char buf[INET6_ADDRSTRLEN];
+
+					if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf,
+						      sizeof(buf)) != NULL)
+						fprintf(stderr, ",saddr6:%s", buf);
+				} else if (attrs->rta_type == MPTCP_ATTR_DADDR4) {
+					u_int32_t daddr4 = ntohl(*(__u32 *)RTA_DATA(attrs));
+
+					fprintf(stderr, ",daddr4:%u.%u.%u.%u", daddr4 >> 24,
+					       (daddr4 >> 16) & 0xFF, (daddr4 >> 8) & 0xFF,
+					       (daddr4 & 0xFF));
+				} else if (attrs->rta_type == MPTCP_ATTR_DADDR6) {
+					char buf[INET6_ADDRSTRLEN];
+
+					if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf,
+						      sizeof(buf)) != NULL)
+						fprintf(stderr, ",daddr6:%s", buf);
+				} else if (attrs->rta_type == MPTCP_ATTR_SPORT)
+					fprintf(stderr, ",sport:%u",
+						ntohs(*(__u16 *)RTA_DATA(attrs)));
+				else if (attrs->rta_type == MPTCP_ATTR_DPORT)
+					fprintf(stderr, ",dport:%u",
+						ntohs(*(__u16 *)RTA_DATA(attrs)));
+				else if (attrs->rta_type == MPTCP_ATTR_BACKUP)
+					fprintf(stderr, ",backup:%u", *(__u8 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_ERROR)
+					fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE)
+					fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs));
+
+				attrs = RTA_NEXT(attrs, msg_len);
+			}
+		}
+		fprintf(stderr, "\n");
+	} while (1);
+
+	return 0;
+}
+
 /* do a netlink command and, if max > 0, fetch the reply  */
 static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
 {
@@ -121,11 +227,18 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
 	return ret;
 }
 
-static int genl_parse_getfamily(struct nlmsghdr *nlh)
+static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family,
+				int *events_mcast_grp)
 {
 	struct genlmsghdr *ghdr = NLMSG_DATA(nlh);
 	int len = nlh->nlmsg_len;
 	struct rtattr *attrs;
+	struct rtattr *grps;
+	struct rtattr *grp;
+	int got_events_grp;
+	int got_family;
+	int grps_len;
+	int grp_len;
 
 	if (nlh->nlmsg_type != GENL_ID_CTRL)
 		error(1, errno, "Not a controller message, len=%d type=0x%x\n",
@@ -140,9 +253,42 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh)
 		error(1, errno, "Unknown controller command %d\n", ghdr->cmd);
 
 	attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+	got_family = 0;
+	got_events_grp = 0;
+
 	while (RTA_OK(attrs, len)) {
-		if (attrs->rta_type == CTRL_ATTR_FAMILY_ID)
-			return *(__u16 *)RTA_DATA(attrs);
+		if (attrs->rta_type == CTRL_ATTR_FAMILY_ID) {
+			*pm_family = *(__u16 *)RTA_DATA(attrs);
+			got_family = 1;
+		} else if (attrs->rta_type == CTRL_ATTR_MCAST_GROUPS) {
+			grps = RTA_DATA(attrs);
+			grps_len = RTA_PAYLOAD(attrs);
+
+			while (RTA_OK(grps, grps_len)) {
+				grp = RTA_DATA(grps);
+				grp_len = RTA_PAYLOAD(grps);
+				got_events_grp = 0;
+
+				while (RTA_OK(grp, grp_len)) {
+					if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID)
+						*events_mcast_grp = *(__u32 *)RTA_DATA(grp);
+					else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME &&
+						 !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS))
+						got_events_grp = 1;
+
+					grp = RTA_NEXT(grp, grp_len);
+				}
+
+				if (got_events_grp)
+					break;
+
+				grps = RTA_NEXT(grps, grps_len);
+			}
+		}
+
+		if (got_family && got_events_grp)
+			return 0;
+
 		attrs = RTA_NEXT(attrs, len);
 	}
 
@@ -150,7 +296,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh)
 	return -1;
 }
 
-static int resolve_mptcp_pm_netlink(int fd)
+static int resolve_mptcp_pm_netlink(int fd, int *pm_family, int *events_mcast_grp)
 {
 	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
 		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
@@ -172,7 +318,7 @@ static int resolve_mptcp_pm_netlink(int fd)
 	off += NLMSG_ALIGN(rta->rta_len);
 
 	do_nl_req(fd, nh, off, sizeof(data));
-	return genl_parse_getfamily((void *)data);
+	return genl_parse_getfamily((void *)data, pm_family, events_mcast_grp);
 }
 
 int dsf(int fd, int pm_family, int argc, char *argv[])
@@ -1192,7 +1338,9 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
 
 int main(int argc, char *argv[])
 {
-	int fd, pm_family;
+	int events_mcast_grp;
+	int pm_family;
+	int fd;
 
 	if (argc < 2)
 		syntax(argv);
@@ -1201,7 +1349,7 @@ int main(int argc, char *argv[])
 	if (fd == -1)
 		error(1, errno, "socket netlink");
 
-	pm_family = resolve_mptcp_pm_netlink(fd);
+	resolve_mptcp_pm_netlink(fd, &pm_family, &events_mcast_grp);
 
 	if (!strcmp(argv[1], "add"))
 		return add_addr(fd, pm_family, argc, argv);
@@ -1225,6 +1373,8 @@ int main(int argc, char *argv[])
 		return get_set_limits(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "set"))
 		return set_flags(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "events"))
+		return capture_events(fd, events_mcast_grp);
 
 	fprintf(stderr, "unknown sub-command: %s", argv[1]);
 	syntax(argv);
-- 
cgit 


From bdde081d728ab86812947a62bf84a3b4dfeb2635 Mon Sep 17 00:00:00 2001
From: Kishen Maloor <kishen.maloor@intel.com>
Date: Tue, 3 May 2022 19:39:00 -0700
Subject: selftests: mptcp: create listeners to receive MPJs

This change updates the "pm_nl_ctl" testing sample with a
"listen" option to bind a MPTCP listening socket to the
provided addr+port. This option is exercised in testing
subflow initiation scenarios in conjunction with userspace
path managers where the MPTCP application does not hold an
active listener to accept requests for new subflows.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 54 +++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index f881d8548153..6a2f4b981e1d 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -25,6 +25,9 @@
 #ifndef MPTCP_PM_EVENTS
 #define MPTCP_PM_EVENTS		"mptcp_pm_events"
 #endif
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
 
 static void syntax(char *argv[])
 {
@@ -41,6 +44,7 @@ static void syntax(char *argv[])
 	fprintf(stderr, "\tdump\n");
 	fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
 	fprintf(stderr, "\tevents\n");
+	fprintf(stderr, "\tlisten <local-ip> <local-port>\n");
 	exit(0);
 }
 
@@ -1219,6 +1223,54 @@ int get_set_limits(int fd, int pm_family, int argc, char *argv[])
 	return 0;
 }
 
+int add_listener(int argc, char *argv[])
+{
+	struct sockaddr_storage addr;
+	struct sockaddr_in6 *a6;
+	struct sockaddr_in *a4;
+	u_int16_t family;
+	int enable = 1;
+	int sock;
+	int err;
+
+	if (argc < 4)
+		syntax(argv);
+
+	memset(&addr, 0, sizeof(struct sockaddr_storage));
+	a4 = (struct sockaddr_in *)&addr;
+	a6 = (struct sockaddr_in6 *)&addr;
+
+	if (inet_pton(AF_INET, argv[2], &a4->sin_addr)) {
+		family = AF_INET;
+		a4->sin_family = family;
+		a4->sin_port = htons(atoi(argv[3]));
+	} else if (inet_pton(AF_INET6, argv[2], &a6->sin6_addr)) {
+		family = AF_INET6;
+		a6->sin6_family = family;
+		a6->sin6_port = htons(atoi(argv[3]));
+	} else
+		error(1, errno, "can't parse ip %s", argv[2]);
+
+	sock = socket(family, SOCK_STREAM, IPPROTO_MPTCP);
+	if (sock < 0)
+		error(1, errno, "can't create listener sock\n");
+
+	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable))) {
+		close(sock);
+		error(1, errno, "can't set SO_REUSEADDR on listener sock\n");
+	}
+
+	err = bind(sock, (struct sockaddr *)&addr,
+		   ((family == AF_INET) ? sizeof(struct sockaddr_in) :
+		    sizeof(struct sockaddr_in6)));
+
+	if (err == 0 && listen(sock, 30) == 0)
+		pause();
+
+	close(sock);
+	return 0;
+}
+
 int set_flags(int fd, int pm_family, int argc, char *argv[])
 {
 	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
@@ -1375,6 +1427,8 @@ int main(int argc, char *argv[])
 		return set_flags(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "events"))
 		return capture_events(fd, events_mcast_grp);
+	else if (!strcmp(argv[1], "listen"))
+		return add_listener(argc, argv);
 
 	fprintf(stderr, "unknown sub-command: %s", argv[1]);
 	syntax(argv);
-- 
cgit 


From 259a834fadda06db430bcd4ab95e1fcf5e63c4cb Mon Sep 17 00:00:00 2001
From: Kishen Maloor <kishen.maloor@intel.com>
Date: Tue, 3 May 2022 19:39:01 -0700
Subject: selftests: mptcp: functional tests for the userspace PM type

This change adds a selftest script that performs a comprehensive
behavioral/functional test of all userspace PM capabilities by exercising
all the newly added APIs and changes to support said capabilities.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Kishen Maloor <kishen.maloor@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/userspace_pm.sh | 779 ++++++++++++++++++++++
 1 file changed, 779 insertions(+)
 create mode 100755 tools/testing/selftests/net/mptcp/userspace_pm.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
new file mode 100755
index 000000000000..78d0bb640b11
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -0,0 +1,779 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Cannot not run test without ip tool"
+	exit 1
+fi
+
+ANNOUNCED=6        # MPTCP_EVENT_ANNOUNCED
+REMOVED=7          # MPTCP_EVENT_REMOVED
+SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
+SUB_CLOSED=11      # MPTCP_EVENT_SUB_CLOSED
+
+AF_INET=2
+AF_INET6=10
+
+evts_pid=0
+client4_pid=0
+server4_pid=0
+client6_pid=0
+server6_pid=0
+client4_token=""
+server4_token=""
+client6_token=""
+server6_token=""
+client4_port=0;
+client6_port=0;
+app4_port=50002
+new4_port=50003
+app6_port=50004
+client_addr_id=${RANDOM:0:2}
+server_addr_id=${RANDOM:0:2}
+
+sec=$(date +%s)
+rndh=$(stdbuf -o0 -e0 printf %x "$sec")-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+
+cleanup()
+{
+	echo "cleanup"
+
+	rm -rf $file
+
+	# Terminate the MPTCP connection and related processes
+	if [ $client4_pid -ne 0 ]; then
+		kill -SIGUSR1 $client4_pid > /dev/null 2>&1
+	fi
+	if [ $server4_pid -ne 0 ]; then
+		kill $server4_pid > /dev/null 2>&1
+	fi
+	if [ $client6_pid -ne 0 ]; then
+		kill -SIGUSR1 $client6_pid > /dev/null 2>&1
+	fi
+	if [ $server6_pid -ne 0 ]; then
+		kill $server6_pid > /dev/null 2>&1
+	fi
+	if [ $evts_pid -ne 0 ]; then
+		kill $evts_pid > /dev/null 2>&1
+	fi
+	local netns
+	for netns in "$ns1" "$ns2" ;do
+		ip netns del "$netns"
+	done
+}
+
+trap cleanup EXIT
+
+# Create and configure network namespaces for testing
+for i in "$ns1" "$ns2" ;do
+	ip netns add "$i" || exit 1
+	ip -net "$i" link set lo up
+	ip netns exec "$i" sysctl -q net.mptcp.enabled=1
+	ip netns exec "$i" sysctl -q net.mptcp.pm_type=1
+done
+
+#  "$ns1"              ns2
+#     ns1eth2    ns2eth1
+
+ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+
+# Add IPv4/v6 addresses to the namespaces
+ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2
+ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
+ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad
+ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
+ip -net "$ns1" link set ns1eth2 up
+
+ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
+ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth1
+ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
+ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad
+ip -net "$ns2" link set ns2eth1 up
+
+stdbuf -o0 -e0 printf "Created network namespaces ns1, ns2         \t\t\t[OK]\n"
+
+make_file()
+{
+	# Store a chunk of data in a file to transmit over an MPTCP connection
+	local name=$1
+	local ksize=1
+
+	dd if=/dev/urandom of="$name" bs=2 count=$ksize 2> /dev/null
+	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+}
+
+make_connection()
+{
+	local file
+	file=$(mktemp)
+	make_file "$file" "client"
+
+	local is_v6=$1
+	local app_port=$app4_port
+	local connect_addr="10.0.1.1"
+	local listen_addr="0.0.0.0"
+	if [ "$is_v6" = "v6" ]
+	then
+		connect_addr="dead:beef:1::1"
+		listen_addr="::"
+		app_port=$app6_port
+	else
+		is_v6="v4"
+	fi
+
+	# Capture netlink events over the two network namespaces running
+	# the MPTCP client and server
+	local client_evts
+	client_evts=$(mktemp)
+	:>"$client_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 &
+	local client_evts_pid=$!
+	local server_evts
+	server_evts=$(mktemp)
+	:>"$server_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 &
+	local server_evts_pid=$!
+	sleep 0.5
+
+	# Run the server
+	ip netns exec "$ns1" \
+	   ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 &
+	local server_pid=$!
+	sleep 0.5
+
+	# Run the client, transfer $file and stay connected to the server
+	# to conduct tests
+	ip netns exec "$ns2" \
+	   ./mptcp_connect -s MPTCP -w 300 -m sendfile -p $app_port $connect_addr\
+	   2>&1 > /dev/null < "$file" &
+	local client_pid=$!
+	sleep 1
+
+	# Capture client/server attributes from MPTCP connection netlink events
+	kill $client_evts_pid
+
+	local client_token
+	local client_port
+	local client_serverside
+	local server_token
+	local server_serverside
+
+	client_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+	client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+	client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
+				      "$client_evts")
+	kill $server_evts_pid
+	server_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	server_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
+				      "$server_evts")
+	rm -f "$client_evts" "$server_evts" "$file"
+
+	if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
+		   [ "$server_serverside" = 1 ]
+	then
+		stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1    \t\t[OK]\n" $is_v6
+	else
+		exit 1
+	fi
+
+	if [ "$is_v6" = "v6" ]
+	then
+		client6_token=$client_token
+		server6_token=$server_token
+		client6_port=$client_port
+		client6_pid=$client_pid
+		server6_pid=$server_pid
+	else
+		client4_token=$client_token
+		server4_token=$server_token
+		client4_port=$client_port
+		client4_pid=$client_pid
+		server4_pid=$server_pid
+	fi
+}
+
+verify_announce_event()
+{
+	local evt=$1
+	local e_type=$2
+	local e_token=$3
+	local e_addr=$4
+	local e_id=$5
+	local e_dport=$6
+	local e_af=$7
+	local type
+	local token
+	local addr
+	local dport
+	local id
+
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	if [ "$e_af" = "v6" ]
+	then
+		addr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+	else
+		addr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+	fi
+	dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
+		   [ "$addr" = "$e_addr" ] && [ "$dport" = "$e_dport" ] &&
+		   [ "$id" = "$e_id" ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+		return 0
+	fi
+	stdbuf -o0 -e0 printf "[FAIL]\n"
+	exit 1
+}
+
+test_announce()
+{
+	local evts
+	evts=$(mktemp)
+	# Capture events on the network namespace running the server
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 &
+	evts_pid=$!
+	sleep 0.5
+
+	# ADD_ADDR using an invalid token should result in no action
+	local invalid_token=$(( client4_token - 1))
+	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token $invalid_token id\
+	   $client_addr_id dev ns2eth1 > /dev/null 2>&1
+
+	local type
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	stdbuf -o0 -e0 printf "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token    \t\t"
+	if [ "$type" = "" ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+	else
+		stdbuf -o0 -e0 printf "[FAIL]\n"
+		exit 1
+	fi
+
+	# ADD_ADDR from the client to server machine reusing the subflow port
+	:>"$evts"
+	ip netns exec "$ns2"\
+	   ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\
+	   ns2eth1 > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, reuse port \t\t" $client_addr_id
+	sleep 0.5
+	verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2" "$client_addr_id"\
+			      "$client4_port"
+
+	# ADD_ADDR6 from the client to server machine reusing the subflow port
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl ann\
+	   dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::2 (ns2) => ns1, reuse port\t\t" $client_addr_id
+	sleep 0.5
+	verify_announce_event "$evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\
+			      "$client_addr_id" "$client6_port" "v6"
+
+	# ADD_ADDR from the client to server machine using a new port
+	:>"$evts"
+	client_addr_id=$((client_addr_id+1))
+	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+	   $client_addr_id dev ns2eth1 port $new4_port > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, new port \t\t\t" $client_addr_id
+	sleep 0.5
+	verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\
+			      "$client_addr_id" "$new4_port"
+
+	kill $evts_pid
+
+	# Capture events on the network namespace running the client
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 &
+	evts_pid=$!
+	sleep 0.5
+
+	# ADD_ADDR from the server to client machine reusing the subflow port
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+	   $server_addr_id dev ns1eth2 > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, reuse port \t\t" $server_addr_id
+	sleep 0.5
+	verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
+			      "$server_addr_id" "$app4_port"
+
+	# ADD_ADDR6 from the server to client machine reusing the subflow port
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
+	   $server_addr_id dev ns1eth2 > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::1 (ns1) => ns2, reuse port\t\t" $server_addr_id
+	sleep 0.5
+	verify_announce_event "$evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\
+			      "$server_addr_id" "$app6_port" "v6"
+
+	# ADD_ADDR from the server to client machine using a new port
+	:>"$evts"
+	server_addr_id=$((server_addr_id+1))
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+	   $server_addr_id dev ns1eth2 port $new4_port > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, new port \t\t\t" $server_addr_id
+	sleep 0.5
+	verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
+			      "$server_addr_id" "$new4_port"
+
+	kill $evts_pid
+	rm -f "$evts"
+}
+
+verify_remove_event()
+{
+	local evt=$1
+	local e_type=$2
+	local e_token=$3
+	local e_id=$4
+	local type
+	local token
+	local id
+
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
+		   [ "$id" = "$e_id" ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+		return 0
+	fi
+	stdbuf -o0 -e0 printf "[FAIL]\n"
+	exit 1
+}
+
+test_remove()
+{
+	local evts
+	evts=$(mktemp)
+
+	# Capture events on the network namespace running the server
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 &
+	evts_pid=$!
+	sleep 0.5
+
+	# RM_ADDR using an invalid token should result in no action
+	local invalid_token=$(( client4_token - 1 ))
+	ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\
+	   $client_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid token                    \t"\
+	       $client_addr_id
+	local type
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	if [ "$type" = "" ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+	else
+		stdbuf -o0 -e0 printf "[FAIL]\n"
+	fi
+
+	# RM_ADDR using an invalid addr id should result in no action
+	local invalid_id=$(( client_addr_id + 1 ))
+	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+	   $invalid_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid id                    \t"\
+	       $invalid_id
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	if [ "$type" = "" ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+	else
+		stdbuf -o0 -e0 printf "[FAIL]\n"
+	fi
+
+	# RM_ADDR from the client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+	   $client_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1                                \t"\
+	       $client_addr_id
+	sleep 0.5
+	verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id"
+
+	# RM_ADDR from the client to server machine
+	:>"$evts"
+	client_addr_id=$(( client_addr_id - 1 ))
+	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+	   $client_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1                                \t"\
+	       $client_addr_id
+	sleep 0.5
+	verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id"
+
+	# RM_ADDR6 from the client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\
+	   $client_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns2 => ns1                               \t"\
+	       $client_addr_id
+	sleep 0.5
+	verify_remove_event "$evts" "$REMOVED" "$server6_token" "$client_addr_id"
+
+	kill $evts_pid
+
+	# Capture events on the network namespace running the client
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 &
+	evts_pid=$!
+	sleep 0.5
+
+	# RM_ADDR from the server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
+	   $server_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2                                \t"\
+	       $server_addr_id
+	sleep 0.5
+	verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id"
+
+	# RM_ADDR from the server to client machine
+	:>"$evts"
+	server_addr_id=$(( server_addr_id - 1 ))
+	ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
+	   $server_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2                                \t" $server_addr_id
+	sleep 0.5
+	verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id"
+
+	# RM_ADDR6 from the server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\
+	   $server_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns1 => ns2                               \t" $server_addr_id
+	sleep 0.5
+	verify_remove_event "$evts" "$REMOVED" "$client6_token" "$server_addr_id"
+
+	kill $evts_pid
+	rm -f "$evts"
+}
+
+verify_subflow_events()
+{
+	local evt=$1
+	local e_type=$2
+	local e_token=$3
+	local e_family=$4
+	local e_saddr=$5
+	local e_daddr=$6
+	local e_dport=$7
+	local e_locid=$8
+	local e_remid=$9
+	shift 2
+	local e_from=$8
+	local e_to=$9
+	local type
+	local token
+	local family
+	local saddr
+	local daddr
+	local dport
+	local locid
+	local remid
+
+	if [ "$e_type" = "$SUB_ESTABLISHED" ]
+	then
+		if [ "$e_family" = "$AF_INET6" ]
+		then
+			stdbuf -o0 -e0 printf "CREATE_SUBFLOW6 %s (%s) => %s (%s)    "\
+			       "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+		else
+			stdbuf -o0 -e0 printf "CREATE_SUBFLOW %s (%s) => %s (%s)         \t"\
+			       "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+		fi
+	else
+		if [ "$e_family" = "$AF_INET6" ]
+		then
+			stdbuf -o0 -e0 printf "DESTROY_SUBFLOW6 %s (%s) => %s (%s)   "\
+			       "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+		else
+			stdbuf -o0 -e0 printf "DESTROY_SUBFLOW %s (%s) => %s (%s)         \t"\
+			       "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+		fi
+	fi
+
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	family=$(sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	locid=$(sed --unbuffered -n 's/.*\(loc_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	remid=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	if [ "$family" = "$AF_INET6" ]
+	then
+		saddr=$(sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+		daddr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+	else
+		saddr=$(sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+		daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+	fi
+
+	if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
+		   [ "$daddr" = "$e_daddr" ] && [ "$e_dport" = "$dport" ] &&
+		   [ "$family" = "$e_family" ] && [ "$saddr" = "$e_saddr" ] &&
+		   [ "$e_locid" = "$locid" ] && [ "$e_remid" = "$remid" ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+		return 0
+	fi
+	stdbuf -o0 -e0 printf "[FAIL]\n"
+	exit 1
+}
+
+test_subflows()
+{
+	local evts
+	evts=$(mktemp)
+	# Capture events on the network namespace running the server
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 &
+	evts_pid=$!
+	sleep 0.5
+
+	# Attempt to add a listener at 10.0.2.2:<subflow-port>
+	ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\
+	   "$client4_port" > /dev/null 2>&1 &
+	local listener_pid=$!
+
+	# ADD_ADDR from client to server machine reusing the subflow port
+	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+	   $client_addr_id > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW from server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\
+	   rport "$client4_port" token "$server4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET" "10.0.2.1"\
+			      "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+	# Delete the listener from the client ns, if one was created
+	kill $listener_pid > /dev/null 2>&1
+
+	local sport
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+	# DESTROY_SUBFLOW from server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\
+	   "$client4_port" token "$server4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
+			      "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+	# RM_ADDR from client to server machine
+	ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+	   "$client4_token" > /dev/null 2>&1
+	sleep 0.5
+
+	# Attempt to add a listener at dead:beef:2::2:<subflow-port>
+	ip netns exec "$ns2" ./pm_nl_ctl listen dead:beef:2::2\
+	   "$client6_port" > /dev/null 2>&1 &
+	listener_pid=$!
+
+	# ADD_ADDR6 from client to server machine reusing the subflow port
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl ann dead:beef:2::2 token "$client6_token" id\
+	   $client_addr_id > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW6 from server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl csf lip dead:beef:2::1 lid 23 rip\
+	   dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\
+			      "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\
+			      "$client_addr_id" "ns1" "ns2"
+
+	# Delete the listener from the client ns, if one was created
+	kill $listener_pid > /dev/null 2>&1
+
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+	# DESTROY_SUBFLOW6 from server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl dsf lip dead:beef:2::1 lport "$sport" rip\
+	   dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\
+			      "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\
+			      "$client_addr_id" "ns1" "ns2"
+
+	# RM_ADDR from client to server machine
+	ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+	   "$client6_token" > /dev/null 2>&1
+	sleep 0.5
+
+	# Attempt to add a listener at 10.0.2.2:<new-port>
+	ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\
+	   $new4_port > /dev/null 2>&1 &
+	listener_pid=$!
+
+	# ADD_ADDR from client to server machine using a new port
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+	   $client_addr_id port $new4_port > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW from server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport\
+	   $new4_port token "$server4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\
+			      "10.0.2.1" "10.0.2.2" "$new4_port" "23"\
+			      "$client_addr_id" "ns1" "ns2"
+
+	# Delete the listener from the client ns, if one was created
+	kill $listener_pid > /dev/null 2>&1
+
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+	# DESTROY_SUBFLOW from server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\
+	   $new4_port token "$server4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
+			      "10.0.2.2" "$new4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+	# RM_ADDR from client to server machine
+	ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+	   "$client4_token" > /dev/null 2>&1
+
+	kill $evts_pid
+
+	# Capture events on the network namespace running the client
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 &
+	evts_pid=$!
+	sleep 0.5
+
+	# Attempt to add a listener at 10.0.2.1:<subflow-port>
+	ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+	   $app4_port > /dev/null 2>&1 &
+	listener_pid=$!
+
+	# ADD_ADDR from server to client machine reusing the subflow port
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+	   $server_addr_id > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW from client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+	   $app4_port token "$client4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET" "10.0.2.2"\
+			      "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# Delete the listener from the server ns, if one was created
+	kill $listener_pid> /dev/null 2>&1
+
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+	# DESTROY_SUBFLOW from client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+	   $app4_port token "$client4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
+			      "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# RM_ADDR from server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+	   "$server4_token" > /dev/null 2>&1
+	sleep 0.5
+
+	# Attempt to add a listener at dead:beef:2::1:<subflow-port>
+	ip netns exec "$ns1" ./pm_nl_ctl listen dead:beef:2::1\
+	   $app6_port > /dev/null 2>&1 &
+	listener_pid=$!
+
+	# ADD_ADDR6 from server to client machine reusing the subflow port
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
+	   $server_addr_id > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW6 from client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl csf lip dead:beef:2::2 lid 23 rip\
+	   dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client6_token"\
+			      "$AF_INET6" "dead:beef:2::2"\
+			      "dead:beef:2::1" "$app6_port" "23"\
+			      "$server_addr_id" "ns2" "ns1"
+
+	# Delete the listener from the server ns, if one was created
+	kill $listener_pid > /dev/null 2>&1
+
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+	# DESTROY_SUBFLOW6 from client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl dsf lip dead:beef:2::2 lport "$sport" rip\
+	   dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_CLOSED" "$client6_token" "$AF_INET6" "dead:beef:2::2"\
+			      "dead:beef:2::1" "$app6_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# RM_ADDR6 from server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+	   "$server6_token" > /dev/null 2>&1
+	sleep 0.5
+
+	# Attempt to add a listener at 10.0.2.1:<new-port>
+	ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+	   $new4_port > /dev/null 2>&1 &
+	listener_pid=$!
+
+	# ADD_ADDR from server to client machine using a new port
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+	   $server_addr_id port $new4_port > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW from client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+	   $new4_port token "$client4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\
+			      "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# Delete the listener from the server ns, if one was created
+	kill $listener_pid > /dev/null 2>&1
+
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+	# DESTROY_SUBFLOW from client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+	   $new4_port token "$client4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
+			      "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# RM_ADDR from server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+	   "$server4_token" > /dev/null 2>&1
+
+	kill $evts_pid
+	rm -f "$evts"
+}
+
+make_connection
+make_connection "v6"
+test_announce
+test_remove
+test_subflows
+
+exit 0
-- 
cgit 


From 18d2c710e5dfa0cad7d8e170496dafe0939d26a2 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Wed, 4 May 2022 09:29:02 +0300
Subject: selftests: mlxsw: bail_on_lldpad before installing the cleanup trap

A number of mlxsw-specific QoS tests use manual QoS DCB management. As
such, they need to make sure lldpad is not running, because it would
override the configuration the test has applied using other tools. To that
end, these selftests invoke the bail_on_lldpad() helper, which terminates
the selftest if th lldpad is running.

Some of these tests however first install the bash exit trap, which invokes
a cleanup() at the test exit. If bail_on_lldpad() has terminated the script
even before the setup part was run, the cleanup part will be very confused.

Therefore make sure bail_on_lldpad() is invoked before the cleanup is
registered.

While there are still edge cases where the user terminates the script
before the setup was fully done, this takes care of a common situation
where the cleanup would be invoked in an inconsistent state.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh | 4 ++--
 tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh      | 4 ++--
 tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh  | 5 ++---
 tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh | 5 ++---
 4 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
index f4493ef9cca1..3569ff45f7d5 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -371,9 +371,9 @@ test_tc_int_buf()
 	tc qdisc delete dev $swp root
 }
 
-trap cleanup EXIT
-
 bail_on_lldpad
+
+trap cleanup EXIT
 setup_wait
 tests_run
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
index 5d5622fc2758..f9858e221996 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -393,9 +393,9 @@ test_qos_pfc()
 	log_test "PFC"
 }
 
-trap cleanup EXIT
-
 bail_on_lldpad
+
+trap cleanup EXIT
 setup_prepare
 setup_wait
 tests_run
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index 1e5ad3209436..7a73057206cd 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -166,12 +166,11 @@ ecn_mirror_test()
 	uninstall_qdisc
 }
 
-trap cleanup EXIT
+bail_on_lldpad
 
+trap cleanup EXIT
 setup_prepare
 setup_wait
-
-bail_on_lldpad
 tests_run
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
index d79a82f317d2..501d192529ac 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -73,12 +73,11 @@ red_mirror_test()
 	uninstall_qdisc
 }
 
-trap cleanup EXIT
+bail_on_lldpad
 
+trap cleanup EXIT
 setup_prepare
 setup_wait
-
-bail_on_lldpad
 tests_run
 
 exit $EXIT_STATUS
-- 
cgit 


From 5ade50e2df2ba93fae29b0aaeb8a71f6721b6a06 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Wed, 4 May 2022 09:29:03 +0300
Subject: selftests: router_vid_1: Add a diagram, fix coding style

It is customary for selftests to have a comment with a topology diagram,
which serves to illustrate the situation in which the test is done. This
selftest lacks it. Add it.

While at it, fix the list of tests so that the test names are enumerated
one at a line.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/router_vid_1.sh       | 27 +++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/router_vid_1.sh b/tools/testing/selftests/net/forwarding/router_vid_1.sh
index a7306c7ac06d..865c9f7d8143 100755
--- a/tools/testing/selftests/net/forwarding/router_vid_1.sh
+++ b/tools/testing/selftests/net/forwarding/router_vid_1.sh
@@ -1,7 +1,32 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="ping_ipv4 ping_ipv6"
+# +--------------------+                     +----------------------+
+# | H1                 |                     |                   H2 |
+# |                    |                     |                      |
+# |            $h1.1 + |                     | + $h2.1              |
+# |     192.0.2.2/24 | |                     | | 198.51.100.2/24    |
+# | 2001:db8:1::2/64 | |                     | | 2001:db8:2::2/64   |
+# |                  | |                     | |                    |
+# |              $h1 + |                     | + $h2                |
+# |                  | |                     | |                    |
+# +------------------|-+                     +-|--------------------+
+#                    |                         |
+# +------------------|-------------------------|--------------------+
+# | SW               |                         |                    |
+# |                  |                         |                    |
+# |             $rp1 +                         + $rp2               |
+# |                  |                         |                    |
+# |           $rp1.1 +                         + $rp2.1             |
+# |     192.0.2.1/24                             198.51.100.1/24    |
+# | 2001:db8:1::1/64                             2001:db8:2::1/64   |
+# |                                                                 |
+# +-----------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+"
 NUM_NETIFS=4
 source lib.sh
 
-- 
cgit 


From faa7521add89416983e61283057a101d135e9174 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Wed, 4 May 2022 09:29:04 +0300
Subject: selftests: router.sh: Add a diagram

It is customary for selftests to have a comment with a topology diagram,
which serves to illustrate the situation in which the test is done. This
selftest lacks it. Add it.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/router.sh | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index 057f91b05098..b98ea9449b8b 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -1,6 +1,24 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# +--------------------+                     +----------------------+
+# | H1                 |                     |                   H2 |
+# |                    |                     |                      |
+# |              $h1 + |                     | + $h2                |
+# |     192.0.2.2/24 | |                     | | 198.51.100.2/24    |
+# | 2001:db8:1::2/64 | |                     | | 2001:db8:2::2/64   |
+# |                  | |                     | |                    |
+# +------------------|-+                     +-|--------------------+
+#                    |                         |
+# +------------------|-------------------------|--------------------+
+# | SW               |                         |                    |
+# |                  |                         |                    |
+# |             $rp1 +                         + $rp2               |
+# |     192.0.2.1/24                             198.51.100.1/24    |
+# | 2001:db8:1::1/64                             2001:db8:2::1/64   |
+# |                                                                 |
+# +-----------------------------------------------------------------+
+
 ALL_TESTS="
 	ping_ipv4
 	ping_ipv6
-- 
cgit 


From 5e91d2a4146946ea0abc984ca957f12b70632901 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 4 May 2022 16:55:35 +0100
Subject: selftests/seccomp: Fix spelling mistake "Coud" -> "Could"

There is a spelling mistake in an error message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220504155535.239180-1-colin.i.king@gmail.com
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 29c973f606b2..136df5b76319 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -4320,7 +4320,7 @@ static ssize_t get_nth(struct __test_metadata *_metadata, const char *path,
 
 	f = fopen(path, "r");
 	ASSERT_NE(f, NULL) {
-		TH_LOG("Coud not open %s: %s", path, strerror(errno));
+		TH_LOG("Could not open %s: %s", path, strerror(errno));
 	}
 
 	for (i = 0; i < position; i++) {
-- 
cgit 


From ae2de669c14a18b5144cdacf49933ad400ed7e1c Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 4 May 2022 22:29:15 +0200
Subject: wireguard: selftests: make routing loop test non-fatal

I hate to do this, but I still do not have a good solution to actually
fix this bug across architectures. So just disable it for now, so that
the CI can still deliver actionable results. This commit adds a large
red warning, so that at least the failure isn't lost forever, and
hopefully this can be revisited down the line.

Link: https://lore.kernel.org/netdev/CAHmME9pv1x6C4TNdL6648HydD8r+txpV4hTUXOBVkrapBXH4QQ@mail.gmail.com/
Link: https://lore.kernel.org/netdev/YmszSXueTxYOC41G@zx2c4.com/
Link: https://lore.kernel.org/wireguard/CAHmME9rNnBiNvBstb7MPwK-7AmAN0sOfnhdR=eeLrowWcKxaaQ@mail.gmail.com/
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/wireguard/netns.sh | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 8a9461aa0878..8a543200a61a 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -280,7 +280,19 @@ read _ _ tx_bytes_before < <(n0 wg show wg1 transfer)
 ! n0 ping -W 1 -c 10 -f 192.168.241.2 || false
 sleep 1
 read _ _ tx_bytes_after < <(n0 wg show wg1 transfer)
-(( tx_bytes_after - tx_bytes_before < 70000 ))
+if ! (( tx_bytes_after - tx_bytes_before < 70000 )); then
+	errstart=$'\x1b[37m\x1b[41m\x1b[1m'
+	errend=$'\x1b[0m'
+	echo "${errstart}                                                ${errend}"
+	echo "${errstart}                   E  R  R  O  R                ${errend}"
+	echo "${errstart}                                                ${errend}"
+	echo "${errstart} This architecture does not do the right thing  ${errend}"
+	echo "${errstart} with cross-namespace routing loops. This test  ${errend}"
+	echo "${errstart} has thus technically failed but, as this issue ${errend}"
+	echo "${errstart} is as yet unsolved, these tests will continue  ${errend}"
+	echo "${errstart} onward. :(                                     ${errend}"
+	echo "${errstart}                                                ${errend}"
+fi
 
 ip0 link del wg1
 ip1 link del wg0
-- 
cgit 


From 39f02bf1e5ce9d72045de01e3d618ade1067158c Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 4 May 2022 22:29:16 +0200
Subject: wireguard: selftests: limit parallelism to $(nproc) tests at once

The parallel tests were added to catch queueing issues from multiple
cores. But what happens in reality when testing tons of processes is
that these separate threads wind up fighting with the scheduler, and we
wind up with contention in places we don't care about that decrease the
chances of hitting a bug. So just do a test with the number of CPU
cores, rather than trying to scale up arbitrarily.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/wireguard/netns.sh | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 8a543200a61a..69c7796c7ca9 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -22,10 +22,12 @@
 # interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further
 # details on how this is accomplished.
 set -e
+shopt -s extglob
 
 exec 3>&1
 export LANG=C
 export WG_HIDE_KEYS=never
+NPROC=( /sys/devices/system/cpu/cpu+([0-9]) ); NPROC=${#NPROC[@]}
 netns0="wg-test-$$-0"
 netns1="wg-test-$$-1"
 netns2="wg-test-$$-2"
@@ -143,17 +145,15 @@ tests() {
 	n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
 
 	# TCP over IPv4, in parallel
-	for max in 4 5 50; do
-		local pids=( )
-		for ((i=0; i < max; ++i)) do
-			n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
-			pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
-		done
-		for ((i=0; i < max; ++i)) do
-			n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
-		done
-		wait "${pids[@]}"
+	local pids=( ) i
+	for ((i=0; i < NPROC; ++i)) do
+		n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
+		pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
 	done
+	for ((i=0; i < NPROC; ++i)) do
+		n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
+	done
+	wait "${pids[@]}"
 }
 
 [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
-- 
cgit 


From d5d9b29bc963cc084c5c0f3a7c28e2632a22e0c4 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 4 May 2022 22:29:17 +0200
Subject: wireguard: selftests: use newer toolchains to fill out architectures

Rather than relying on the system to have cross toolchains available,
simply download musl.cc's ones and use that libc.so, and then we use it
to fill in a few missing platforms, such as riscv64, riscv64, powerpc64,
and s390x.

Since riscv doesn't have a second serial port in its device description,
we have to use virtio's vport. This is actually the same situation on
ARM, but we were previously hacking QEMU up to work around this, which
required a custom QEMU. Instead just do the vport trick on ARM too.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/wireguard/qemu/Makefile    | 169 ++++++++++++++-------
 .../selftests/wireguard/qemu/arch/aarch64.config   |   5 +-
 .../wireguard/qemu/arch/aarch64_be.config          |   5 +-
 .../selftests/wireguard/qemu/arch/arm.config       |   5 +-
 .../selftests/wireguard/qemu/arch/armeb.config     |   5 +-
 .../selftests/wireguard/qemu/arch/powerpc64.config |  13 ++
 .../selftests/wireguard/qemu/arch/riscv32.config   |  12 ++
 .../selftests/wireguard/qemu/arch/riscv64.config   |  12 ++
 .../selftests/wireguard/qemu/arch/s390x.config     |   6 +
 9 files changed, 169 insertions(+), 63 deletions(-)
 create mode 100644 tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
 create mode 100644 tools/testing/selftests/wireguard/qemu/arch/riscv32.config
 create mode 100644 tools/testing/selftests/wireguard/qemu/arch/riscv64.config
 create mode 100644 tools/testing/selftests/wireguard/qemu/arch/s390x.config

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 4bdd6c1a19d3..930f59c9e714 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -4,26 +4,24 @@
 
 PWD := $(shell pwd)
 
-CHOST := $(shell gcc -dumpmachine)
-HOST_ARCH := $(firstword $(subst -, ,$(CHOST)))
-ifneq (,$(ARCH))
-CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
-ifeq (,$(CBUILD))
-$(error The toolchain for $(ARCH) is not installed)
-endif
-else
-CBUILD := $(CHOST)
-ARCH := $(firstword $(subst -, ,$(CBUILD)))
-endif
-
 # Set these from the environment to override
 KERNEL_PATH ?= $(PWD)/../../../../..
 BUILD_PATH ?= $(PWD)/build/$(ARCH)
 DISTFILES_PATH ?= $(PWD)/distfiles
 NR_CPUS ?= 4
+ARCH ?=
+CBUILD := $(shell gcc -dumpmachine)
+HOST_ARCH := $(firstword $(subst -, ,$(CBUILD)))
+ifeq ($(ARCH),)
+ARCH := $(HOST_ARCH)
+endif
 
 MIRROR := https://download.wireguard.com/qemu-test/distfiles/
 
+KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
+rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
+WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
+
 default: qemu
 
 # variable name, tarball project name, version, tarball extension, default URI base
@@ -36,12 +34,11 @@ $(call file_download,$$($(1)_NAME)$(4),$(5),$(6))
 endef
 
 define file_download =
-$(DISTFILES_PATH)/$(1):
+$(DISTFILES_PATH)/$(1): | $(4)
 	mkdir -p $(DISTFILES_PATH)
-	flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3)  $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
+	flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if ([ -n "$(4)" ] && sed -n "s#^\([a-f0-9]\{64\}\)  \($(1)\)\$$$$#\1  $(DISTFILES_PATH)/\2.tmp#p" "$(4)" || echo "$(3)  $$@.tmp") | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
 endef
 
-$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8))
 $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
 $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
 $(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692))
@@ -50,28 +47,20 @@ $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a
 $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
 $(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64))
 
-KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
-rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
-WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
-
-export CFLAGS ?= -O3 -pipe
-export LDFLAGS ?=
-export CPPFLAGS := -I$(BUILD_PATH)/include
-
+export CFLAGS := -O3 -pipe
 ifeq ($(HOST_ARCH),$(ARCH))
-CROSS_COMPILE_FLAG := --host=$(CHOST)
 CFLAGS += -march=native
-STRIP := strip
-else
-$(info Cross compilation: building for $(CBUILD) using $(CHOST))
-CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
-export CROSS_COMPILE=$(CBUILD)-
-STRIP := $(CBUILD)-strip
 endif
+export LDFLAGS :=
+export CPPFLAGS :=
+
+QEMU_VPORT_RESULT :=
 ifeq ($(ARCH),aarch64)
+CHOST := aarch64-linux-musl
 QEMU_ARCH := aarch64
 KERNEL_ARCH := arm64
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
 else
@@ -79,9 +68,11 @@ QEMU_MACHINE := -cpu cortex-a53 -machine virt
 CFLAGS += -march=armv8-a -mtune=cortex-a53
 endif
 else ifeq ($(ARCH),aarch64_be)
+CHOST := aarch64_be-linux-musl
 QEMU_ARCH := aarch64
 KERNEL_ARCH := arm64
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
 else
@@ -89,9 +80,11 @@ QEMU_MACHINE := -cpu cortex-a53 -machine virt
 CFLAGS += -march=armv8-a -mtune=cortex-a53
 endif
 else ifeq ($(ARCH),arm)
+CHOST := arm-linux-musleabi
 QEMU_ARCH := arm
 KERNEL_ARCH := arm
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+QEMU_VPORT_RESULT := virtio-serial-device
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
 else
@@ -99,9 +92,11 @@ QEMU_MACHINE := -cpu cortex-a15 -machine virt
 CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
 endif
 else ifeq ($(ARCH),armeb)
+CHOST := armeb-linux-musleabi
 QEMU_ARCH := arm
 KERNEL_ARCH := arm
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+QEMU_VPORT_RESULT := virtio-serial-device
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
 else
@@ -110,6 +105,7 @@ CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due
 LDFLAGS += -Wl,--be8
 endif
 else ifeq ($(ARCH),x86_64)
+CHOST := x86_64-linux-musl
 QEMU_ARCH := x86_64
 KERNEL_ARCH := x86_64
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
@@ -120,6 +116,7 @@ QEMU_MACHINE := -cpu Skylake-Server -machine q35
 CFLAGS += -march=skylake-avx512
 endif
 else ifeq ($(ARCH),i686)
+CHOST := i686-linux-musl
 QEMU_ARCH := i386
 KERNEL_ARCH := x86
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
@@ -130,6 +127,7 @@ QEMU_MACHINE := -cpu coreduo -machine q35
 CFLAGS += -march=prescott
 endif
 else ifeq ($(ARCH),mips64)
+CHOST := mips64-linux-musl
 QEMU_ARCH := mips64
 KERNEL_ARCH := mips
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -141,6 +139,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
 CFLAGS += -march=mips64r2 -EB
 endif
 else ifeq ($(ARCH),mips64el)
+CHOST := mips64el-linux-musl
 QEMU_ARCH := mips64el
 KERNEL_ARCH := mips
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -152,6 +151,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
 CFLAGS += -march=mips64r2 -EL
 endif
 else ifeq ($(ARCH),mips)
+CHOST := mips-linux-musl
 QEMU_ARCH := mips
 KERNEL_ARCH := mips
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -163,6 +163,7 @@ QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
 CFLAGS += -march=mips32r2 -EB
 endif
 else ifeq ($(ARCH),mipsel)
+CHOST := mipsel-linux-musl
 QEMU_ARCH := mipsel
 KERNEL_ARCH := mips
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -173,7 +174,18 @@ else
 QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
 CFLAGS += -march=mips32r2 -EL
 endif
+else ifeq ($(ARCH),powerpc64)
+CHOST := powerpc64-linux-musl
+QEMU_ARCH := ppc64
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
+else
+QEMU_MACHINE := -machine pseries
+endif
 else ifeq ($(ARCH),powerpc64le)
+CHOST := powerpc64le-linux-musl
 QEMU_ARCH := ppc64
 KERNEL_ARCH := powerpc
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -182,8 +194,8 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
 else
 QEMU_MACHINE := -machine pseries
 endif
-CFLAGS += -mcpu=powerpc64le -mlong-double-64
 else ifeq ($(ARCH),powerpc)
+CHOST := powerpc-linux-musl
 QEMU_ARCH := ppc
 KERNEL_ARCH := powerpc
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
@@ -192,26 +204,72 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500
 else
 QEMU_MACHINE := -machine ppce500
 endif
-CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt
 else ifeq ($(ARCH),m68k)
+CHOST := m68k-linux-musl
 QEMU_ARCH := m68k
 KERNEL_ARCH := m68k
 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
 KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
 ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -append $(KERNEL_CMDLINE)
 else
 QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
 endif
+else ifeq ($(ARCH),riscv64)
+CHOST := riscv64-linux-musl
+QEMU_ARCH := riscv64
+KERNEL_ARCH := riscv
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt
+else
+QEMU_MACHINE := -cpu rv64 -machine virt
+endif
+else ifeq ($(ARCH),riscv32)
+CHOST := riscv32-linux-musl
+QEMU_ARCH := riscv32
+KERNEL_ARCH := riscv
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt
+else
+QEMU_MACHINE := -cpu rv32 -machine virt
+endif
+else ifeq ($(ARCH),s390x)
+CHOST := s390x-linux-musl
+QEMU_ARCH := s390x
+KERNEL_ARCH := s390
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/s390/boot/bzImage
+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/s390x.config)
+QEMU_VPORT_RESULT := virtio-serial-ccw
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
 else
-$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
+QEMU_MACHINE := -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
 endif
+else
+$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x)
+endif
+
+TOOLCHAIN_FILENAME := $(CHOST)-cross.tgz
+TOOLCHAIN_TAR := $(DISTFILES_PATH)/$(TOOLCHAIN_FILENAME)
+TOOLCHAIN_PATH := $(BUILD_PATH)/$(CHOST)-cross
+TOOLCHAIN_DIR := https://download.wireguard.com/qemu-test/toolchains/20211123/
+$(eval $(call file_download,toolchain-sha256sums-20211123,$(TOOLCHAIN_DIR)SHA256SUMS#,83da033fd8c798df476c21d9612da2dfb896ec62fbed4ceec5eefc0e56b3f0c8))
+$(eval $(call file_download,$(TOOLCHAIN_FILENAME),$(TOOLCHAIN_DIR),,$(DISTFILES_PATH)/toolchain-sha256sums-20211123))
 
-REAL_CC := $(CBUILD)-gcc
-MUSL_CC := $(BUILD_PATH)/musl-gcc
-export CC := $(MUSL_CC)
-USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed
+STRIP := $(CHOST)-strip
+CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
+$(info Building for $(CHOST) using $(CBUILD))
+export CROSS_COMPILE := $(CHOST)-
+export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH)
+export CC := $(CHOST)-gcc
+
+USERSPACE_DEPS := $(TOOLCHAIN_PATH)/.installed $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed
 
+comma := ,
 build: $(KERNEL_BZIMAGE)
 qemu: $(KERNEL_BZIMAGE)
 	rm -f $(BUILD_PATH)/result
@@ -222,13 +280,14 @@ qemu: $(KERNEL_BZIMAGE)
 		$(QEMU_MACHINE) \
 		-m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \
 		-serial stdio \
-		-serial file:$(BUILD_PATH)/result \
+		-chardev file,path=$(BUILD_PATH)/result,id=result \
+		$(if $(QEMU_VPORT_RESULT),-device $(QEMU_VPORT_RESULT) -device virtserialport$(comma)chardev=result,-serial chardev:result) \
 		-no-reboot \
 		-monitor none \
 		-kernel $<
 	grep -Fq success $(BUILD_PATH)/result
 
-$(BUILD_PATH)/init-cpio-spec.txt:
+$(BUILD_PATH)/init-cpio-spec.txt: $(TOOLCHAIN_PATH)/.installed $(BUILD_PATH)/init
 	mkdir -p $(BUILD_PATH)
 	echo "file /init $(BUILD_PATH)/init 755 0 0" > $@
 	echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@
@@ -246,10 +305,10 @@ $(BUILD_PATH)/init-cpio-spec.txt:
 	echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@
 	echo "slink /bin/ping6 ping 777 0 0" >> $@
 	echo "dir /lib 755 0 0" >> $@
-	echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
-	echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@
+	echo "file /lib/libc.so $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so 755 0 0" >> $@
+	echo "slink $$($(CHOST)-readelf -p .interp '$(BUILD_PATH)/init'| grep -o '/lib/.*') libc.so 777 0 0" >> $@
 
-$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
+$(KERNEL_BUILD_PATH)/.config: $(TOOLCHAIN_PATH)/.installed kernel.config arch/$(ARCH).config
 	mkdir -p $(KERNEL_BUILD_PATH)
 	cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config
 	printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config
@@ -258,29 +317,20 @@ $(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
 	cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
 	$(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
 
-$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
+$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
 	$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
 
-$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
-	$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
+$(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config $(TOOLCHAIN_PATH)/.installed
+	rm -rf $(TOOLCHAIN_PATH)/$(CHOST)/include/linux
+	$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(TOOLCHAIN_PATH)/$(CHOST) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
 	touch $@
 
-$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR)
+$(TOOLCHAIN_PATH)/.installed: $(TOOLCHAIN_TAR)
 	mkdir -p $(BUILD_PATH)
 	flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
-	cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD)
-	$(MAKE) -C $(MUSL_PATH)
-	$(STRIP) -s $@
-
-$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so
-	$(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers
+	$(STRIP) -s $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so
 	touch $@
 
-$(MUSL_CC): $(MUSL_PATH)/lib/libc.so
-	sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
-	printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc
-	chmod +x $(BUILD_PATH)/musl-gcc
-
 $(IPERF_PATH)/.installed: $(IPERF_TAR)
 	mkdir -p $(BUILD_PATH)
 	flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
@@ -289,6 +339,7 @@ $(IPERF_PATH)/.installed: $(IPERF_TAR)
 	touch $@
 
 $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
+	cd $(IPERF_PATH) && autoreconf -fi
 	cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no
 	$(MAKE) -C $(IPERF_PATH)
 	$(STRIP) -s $@
@@ -304,7 +355,7 @@ $(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE
 
 $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
 	mkdir -p $(BUILD_PATH)
-	$(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
+	$(CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
 	$(STRIP) -s $@
 
 $(IPUTILS_PATH)/.installed: $(IPUTILS_TAR)
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
index 3d063bb247bb..e9ac41f6b3ae 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
@@ -1,5 +1,8 @@
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1"
 CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
index dbdc7e406a7b..03609a203e8c 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
@@ -1,6 +1,9 @@
 CONFIG_CPU_BIG_ENDIAN=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1"
 CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config
index 148f49905418..c616124fdd59 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/arm.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config
@@ -4,6 +4,9 @@ CONFIG_ARCH_VIRT=y
 CONFIG_THUMB2_KERNEL=n
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1"
 CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
index bd76b07d00a2..d3a40a974e16 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/armeb.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
@@ -4,7 +4,10 @@ CONFIG_ARCH_VIRT=y
 CONFIG_THUMB2_KERNEL=n
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1"
 CONFIG_CPU_BIG_ENDIAN=y
 CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
new file mode 100644
index 000000000000..c19c7b519d8b
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
@@ -0,0 +1,13 @@
+CONFIG_PPC64=y
+CONFIG_PPC_PSERIES=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_FRAME_WARN=1280
+CONFIG_THREAD_SHIFT=14
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
new file mode 100644
index 000000000000..ea53e78e923e
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
@@ -0,0 +1,12 @@
+CONFIG_ARCH_RV32I=y
+CONFIG_MMU=y
+CONFIG_FPU=y
+CONFIG_SOC_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1"
+CONFIG_CMDLINE_FORCE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
new file mode 100644
index 000000000000..4a08d8c1d4af
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
@@ -0,0 +1,12 @@
+CONFIG_ARCH_RV64I=y
+CONFIG_MMU=y
+CONFIG_FPU=y
+CONFIG_SOC_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1"
+CONFIG_CMDLINE_FORCE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/s390x.config b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
new file mode 100644
index 000000000000..274a44f4e49c
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
@@ -0,0 +1,6 @@
+CONFIG_SCLP_VT220_TTY=y
+CONFIG_SCLP_VT220_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_S390_GUEST=y
+CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1"
-- 
cgit 


From d261ba6aa411e03c27da266b7df4bef771e8105e Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 4 May 2022 22:29:18 +0200
Subject: wireguard: selftests: restore support for ccache

When moving to non-system toolchains, we inadvertantly killed the
ability to use ccache. So instead, build ccache support into the test
harness directly.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/wireguard/qemu/.gitignore |  1 +
 tools/testing/selftests/wireguard/qemu/Makefile   | 18 +++++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore
index bfa15e6feb2f..42ab9d72b37b 100644
--- a/tools/testing/selftests/wireguard/qemu/.gitignore
+++ b/tools/testing/selftests/wireguard/qemu/.gitignore
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 build/
 distfiles/
+ccache/
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 930f59c9e714..e121ef3878af 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -266,6 +266,13 @@ $(info Building for $(CHOST) using $(CBUILD))
 export CROSS_COMPILE := $(CHOST)-
 export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH)
 export CC := $(CHOST)-gcc
+CCACHE_PATH := $(shell which ccache 2>/dev/null)
+ifneq ($(CCACHE_PATH),)
+export KBUILD_BUILD_TIMESTAMP := Fri Jun  5 15:58:00 CEST 2015
+export PATH := $(TOOLCHAIN_PATH)/bin/ccache:$(PATH)
+export CCACHE_SLOPPINESS := file_macro,time_macros
+export CCACHE_DIR ?= $(PWD)/ccache
+endif
 
 USERSPACE_DEPS := $(TOOLCHAIN_PATH)/.installed $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed
 
@@ -329,6 +336,10 @@ $(TOOLCHAIN_PATH)/.installed: $(TOOLCHAIN_TAR)
 	mkdir -p $(BUILD_PATH)
 	flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
 	$(STRIP) -s $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so
+ifneq ($(CCACHE_PATH),)
+	mkdir -p $(TOOLCHAIN_PATH)/bin/ccache
+	ln -s $(CCACHE_PATH) $(TOOLCHAIN_PATH)/bin/ccache/$(CC)
+endif
 	touch $@
 
 $(IPERF_PATH)/.installed: $(IPERF_TAR)
@@ -421,8 +432,13 @@ clean:
 distclean: clean
 	rm -rf $(DISTFILES_PATH)
 
+cacheclean: clean
+ifneq ($(CCACHE_DIR),)
+	rm -rf $(CCACHE_DIR)
+endif
+
 menuconfig: $(KERNEL_BUILD_PATH)/.config
 	$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig
 
-.PHONY: qemu build clean distclean menuconfig
+.PHONY: qemu build clean distclean cacheclean menuconfig
 .DELETE_ON_ERROR:
-- 
cgit 


From a6b8ea9144340c0aaa66c817a3bbb6bca47f0321 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 4 May 2022 22:29:19 +0200
Subject: wireguard: selftests: bump package deps

Use newer, more reliable package dependencies. These should hopefully
reduce flakes. However, we keep the old iputils package, as it
accumulated bugs after resulting in flakes on slow machines.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/wireguard/qemu/Makefile | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index e121ef3878af..bca07b93eeb0 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -39,13 +39,13 @@ $(DISTFILES_PATH)/$(1): | $(4)
 	flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if ([ -n "$(4)" ] && sed -n "s#^\([a-f0-9]\{64\}\)  \($(1)\)\$$$$#\1  $(DISTFILES_PATH)/\2.tmp#p" "$(4)" || echo "$(3)  $$@.tmp") | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
 endef
 
-$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
-$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
-$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692))
-$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
-$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
+$(eval $(call tar_download,IPERF,iperf,3.11,.tar.gz,https://downloads.es.net/pub/iperf/,de8cb409fad61a0574f4cb07eb19ce1159707403ac2dc01b5d175e91240b7e5f))
+$(eval $(call tar_download,BASH,bash,5.1.16,.tar.gz,https://ftp.gnu.org/gnu/bash/,5bac17218d3911834520dad13cd1f85ab944e1c09ae1aba55906be1f8192f558))
+$(eval $(call tar_download,IPROUTE2,iproute2,5.17.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,bda331d5c4606138892f23a565d78fca18919b4d508a0b7ca8391c2da2db68b9))
+$(eval $(call tar_download,IPTABLES,iptables,1.8.7,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,c109c96bb04998cd44156622d36f8e04b140701ec60531a10668cfdff5e8d8f0))
+$(eval $(call tar_download,NMAP,nmap,7.92,.tgz,https://nmap.org/dist/,064183ea642dc4c12b1ab3b5358ce1cef7d2e7e11ffa2849f16d339f5b717117))
 $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64))
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20210914,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,97ff31489217bb265b7ae850d3d0f335ab07d2652ba1feec88b734bc96bd05ac))
 
 export CFLAGS := -O3 -pipe
 ifeq ($(HOST_ARCH),$(ARCH))
@@ -385,15 +385,15 @@ $(BASH_PATH)/.installed: $(BASH_TAR)
 	touch $@
 
 $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
-	cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble
+	cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-progcomp --disable-readline --disable-mem-scramble
 	$(MAKE) -C $(BASH_PATH)
 	$(STRIP) -s $@
 
 $(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
 	mkdir -p $(BUILD_PATH)
 	flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
-	printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk
-	printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
+	printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_HANDLE_AT\n' > $(IPROUTE2_PATH)/config.mk
+	printf 'libutil.a.done:\n\tflock -x $$@.lock $$(MAKE) -C lib\n\ttouch $$@\nip/ip: libutil.a.done\n\t$$(MAKE) -C ip ip\nmisc/ss: libutil.a.done\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
 	touch $@
 
 $(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
-- 
cgit 


From 3fc1b11e5d7278437bdfff0e01f51e777eefb222 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 4 May 2022 22:29:20 +0200
Subject: wireguard: selftests: set panic_on_warn=1 from cmdline

Rather than setting this once init is running, set panic_on_warn from
the kernel command line, so that it catches splats from WireGuard
initialization code and the various crypto selftests.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/wireguard/qemu/arch/aarch64.config     | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config  | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/arm.config         | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/armeb.config       | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/i686.config        | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/m68k.config        | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/mips.config        | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/mips64.config      | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/mips64el.config    | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/mipsel.config      | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/powerpc.config     | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/powerpc64.config   | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/riscv32.config     | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/riscv64.config     | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/s390x.config       | 2 +-
 tools/testing/selftests/wireguard/qemu/arch/x86_64.config      | 2 +-
 tools/testing/selftests/wireguard/qemu/init.c                  | 6 ------
 18 files changed, 17 insertions(+), 23 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
index e9ac41f6b3ae..09016880ce03 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
@@ -4,5 +4,5 @@ CONFIG_VIRTIO_MENU=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
index 03609a203e8c..19ff66e4c602 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
@@ -5,5 +5,5 @@ CONFIG_VIRTIO_MENU=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config
index c616124fdd59..fc7959bef9c2 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/arm.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config
@@ -8,5 +8,5 @@ CONFIG_VIRTIO_MENU=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
index d3a40a974e16..f3066be81c19 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/armeb.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
@@ -8,6 +8,6 @@ CONFIG_VIRTIO_MENU=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
 CONFIG_CPU_BIG_ENDIAN=y
 CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config
index a9b4fe795048..6d90892a85a2 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/i686.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config
@@ -2,5 +2,5 @@ CONFIG_ACPI=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
index 62a15bdb877e..82c925e49beb 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
@@ -5,5 +5,5 @@ CONFIG_MAC=y
 CONFIG_SERIAL_PMACZILOG=y
 CONFIG_SERIAL_PMACZILOG_TTYS=y
 CONFIG_SERIAL_PMACZILOG_CONSOLE=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config
index df71d6b95546..d7ec63c17b30 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config
@@ -7,5 +7,5 @@ CONFIG_POWER_RESET_SYSCON=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
index 90c783f725c4..0994947e3392 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
@@ -10,5 +10,5 @@ CONFIG_POWER_RESET_SYSCON=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
index 435b0b43e00c..591184342f47 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
@@ -11,5 +11,5 @@ CONFIG_POWER_RESET_SYSCON=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
index 62bb50c4a85f..18a498293737 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
@@ -8,5 +8,5 @@ CONFIG_POWER_RESET_SYSCON=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
index 57957093b71b..5e04882e8e35 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
@@ -6,5 +6,5 @@ CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_MATH_EMULATION=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
index c19c7b519d8b..737194b7619e 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
@@ -7,7 +7,7 @@ CONFIG_PPC_RADIX_MMU=y
 CONFIG_HVC_CONSOLE=y
 CONFIG_CPU_BIG_ENDIAN=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
 CONFIG_SECTION_MISMATCH_WARN_ONLY=y
 CONFIG_FRAME_WARN=1280
 CONFIG_THREAD_SHIFT=14
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
index f52f1e2bc7f6..8148b9d1220a 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
@@ -7,7 +7,7 @@ CONFIG_PPC_RADIX_MMU=y
 CONFIG_HVC_CONSOLE=y
 CONFIG_CPU_LITTLE_ENDIAN=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
 CONFIG_SECTION_MISMATCH_WARN_ONLY=y
 CONFIG_FRAME_WARN=1280
 CONFIG_THREAD_SHIFT=14
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
index ea53e78e923e..0bd0e72d95d4 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
@@ -8,5 +8,5 @@ CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_VIRTIO_MENU=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_VIRTIO_CONSOLE=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1"
 CONFIG_CMDLINE_FORCE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
index 4a08d8c1d4af..dc266f3b1915 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
@@ -8,5 +8,5 @@ CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_VIRTIO_MENU=y
 CONFIG_VIRTIO_MMIO=y
 CONFIG_VIRTIO_CONSOLE=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1"
 CONFIG_CMDLINE_FORCE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/s390x.config b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
index 274a44f4e49c..a7b44dca0b0a 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/s390x.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
@@ -3,4 +3,4 @@ CONFIG_SCLP_VT220_CONSOLE=y
 CONFIG_VIRTIO_MENU=y
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_S390_GUEST=y
-CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1"
+CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1 panic_on_warn=1"
diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
index 45dd53a0d760..efa00693e08b 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
@@ -2,5 +2,5 @@ CONFIG_ACPI=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
 CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index 0b45055d9de0..2a0f48fac925 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -110,12 +110,6 @@ static void enable_logging(void)
 			panic("write(exception-trace)");
 		close(fd);
 	}
-	fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY);
-	if (fd >= 0) {
-		if (write(fd, "1\n", 2) != 2)
-			panic("write(panic_on_warn)");
-		close(fd);
-	}
 }
 
 static void kmod_selftests(void)
-- 
cgit 


From 5a7c5f70c743c6cf32b44b05bd6b19d4ad82f49d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 3 May 2022 15:14:28 +0300
Subject: selftests: ocelot: tc_flower_chains: specify conform-exceed action
 for policer

As discussed here with Ido Schimmel:
https://patchwork.kernel.org/project/netdevbpf/patch/20220224102908.5255-2-jianbol@nvidia.com/

the default conform-exceed action is "reclassify", for a reason we don't
really understand.

The point is that hardware can't offload that police action, so not
specifying "conform-exceed" was always wrong, even though the command
used to work in hardware (but not in software) until the kernel started
adding validation for it.

Fix the command used by the selftest by making the policer drop on
exceed, and pass the packet to the next action (goto) on conform.

Fixes: 8cd6b020b644 ("selftests: ocelot: add some example VCAP IS1, IS2 and ES0 tc offloads")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://lore.kernel.org/r/20220503121428.842906-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index eaf8a04a7ca5..10e54bcca7a9 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -190,7 +190,7 @@ setup_prepare()
 
 	tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \
 		protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
-		action police rate 50mbit burst 64k \
+		action police rate 50mbit burst 64k conform-exceed drop/pipe \
 		action goto chain $(IS2 1 0)
 }
 
-- 
cgit 


From c4a67a21a6d255ddcbaa076c0412aad73c7e0c02 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 4 May 2022 08:40:37 -0700
Subject: Revert "Merge branch 'mlxsw-line-card-model'"

This reverts commit 5e927a9f4b9f29d78a7c7d66ea717bb5c8bbad8e, reversing
changes made to cfc1d91a7d78cf9de25b043d81efcc16966d55b3.

The discussion is still ongoing so let's remove the uAPI
until the discussion settles.

Link: https://lore.kernel.org/all/20220425090021.32e9a98f@kernel.org/
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://lore.kernel.org/r/20220504154037.539442-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../networking/devlink/devlink-linecard.rst        |   4 -
 Documentation/networking/devlink/mlxsw.rst         |  33 ---
 drivers/net/ethernet/mellanox/mlxsw/core.h         |   1 -
 .../net/ethernet/mellanox/mlxsw/core_linecards.c   | 237 +---------------
 drivers/net/ethernet/mellanox/mlxsw/reg.h          |  87 +-----
 include/net/devlink.h                              |  18 +-
 include/uapi/linux/devlink.h                       |   5 -
 net/core/devlink.c                                 | 303 +--------------------
 .../drivers/net/mlxsw/devlink_linecard.sh          |  61 -----
 9 files changed, 10 insertions(+), 739 deletions(-)

(limited to 'tools/testing')

diff --git a/Documentation/networking/devlink/devlink-linecard.rst b/Documentation/networking/devlink/devlink-linecard.rst
index a98b468ad479..6c0b8928bc13 100644
--- a/Documentation/networking/devlink/devlink-linecard.rst
+++ b/Documentation/networking/devlink/devlink-linecard.rst
@@ -14,7 +14,6 @@ system. Following operations are provided:
   * Get a list of supported line card types.
   * Provision of a slot with specific line card type.
   * Get and monitor of line card state and its change.
-  * Get information about line card versions and devices.
 
 Line card according to the type may contain one or more gearboxes
 to mux the lanes with certain speed to multiple ports with lanes
@@ -121,6 +120,3 @@ Example usage
 
     # Set slot 8 to be unprovisioned:
     $ devlink lc set pci/0000:01:00.0 lc 8 notype
-
-    # Set info for slot 8:
-    $ devlink lc info pci/0000:01:00.0 lc 8
diff --git a/Documentation/networking/devlink/mlxsw.rst b/Documentation/networking/devlink/mlxsw.rst
index 0af345680510..cf857cb4ba8f 100644
--- a/Documentation/networking/devlink/mlxsw.rst
+++ b/Documentation/networking/devlink/mlxsw.rst
@@ -58,39 +58,6 @@ The ``mlxsw`` driver reports the following versions
      - running
      - Three digit firmware version
 
-Line card info versions
-=======================
-
-The ``mlxsw`` driver reports the following versions for line cards
-
-.. list-table:: devlink line card info versions implemented
-   :widths: 5 5 90
-
-   * - Name
-     - Type
-     - Description
-   * - ``hw.revision``
-     - fixed
-     - The hardware revision for this line card
-   * - ``ini.version``
-     - running
-     - Version of line card INI loaded
-
-Line card device info versions
-==============================
-
-The ``mlxsw`` driver reports the following versions for line card devices
-
-.. list-table:: devlink line card device info versions implemented
-   :widths: 5 5 90
-
-   * - Name
-     - Type
-     - Description
-   * - ``fw.version``
-     - running
-     - Three digit firmware version
-
 Driver-specific Traps
 =====================
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index d008282d7f2e..c2a891287047 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -581,7 +581,6 @@ struct mlxsw_linecard {
 	   active:1;
 	u16 hw_revision;
 	u16 ini_version;
-	struct list_head device_list;
 };
 
 struct mlxsw_linecard_types_info;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
index 2abd31a62776..5c9869dcf674 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
@@ -87,191 +87,11 @@ static const char *mlxsw_linecard_type_name(struct mlxsw_linecard *linecard)
 	return linecard->name;
 }
 
-struct mlxsw_linecard_device_info {
-	u16 fw_major;
-	u16 fw_minor;
-	u16 fw_sub_minor;
-};
-
-struct mlxsw_linecard_device {
-	struct list_head list;
-	u8 index;
-	struct mlxsw_linecard *linecard;
-	struct devlink_linecard_device *devlink_device;
-	struct mlxsw_linecard_device_info info;
-};
-
-static struct mlxsw_linecard_device *
-mlxsw_linecard_device_lookup(struct mlxsw_linecard *linecard, u8 index)
-{
-	struct mlxsw_linecard_device *device;
-
-	list_for_each_entry(device, &linecard->device_list, list)
-		if (device->index == index)
-			return device;
-	return NULL;
-}
-
-static int mlxsw_linecard_device_attach(struct mlxsw_core *mlxsw_core,
-					struct mlxsw_linecard *linecard,
-					u8 device_index, bool flash_owner)
-{
-	struct mlxsw_linecard_device *device;
-	int err;
-
-	device = kzalloc(sizeof(*device), GFP_KERNEL);
-	if (!device)
-		return -ENOMEM;
-	device->index = device_index;
-	device->linecard = linecard;
-
-	device->devlink_device = devlink_linecard_device_create(linecard->devlink_linecard,
-								device_index, device);
-	if (IS_ERR(device->devlink_device)) {
-		err = PTR_ERR(device->devlink_device);
-		goto err_devlink_linecard_device_attach;
-	}
-
-	list_add_tail(&device->list, &linecard->device_list);
-	return 0;
-
-err_devlink_linecard_device_attach:
-	kfree(device);
-	return err;
-}
-
-static void mlxsw_linecard_device_detach(struct mlxsw_core *mlxsw_core,
-					 struct mlxsw_linecard *linecard,
-					 struct mlxsw_linecard_device *device)
-{
-	list_del(&device->list);
-	devlink_linecard_device_destroy(linecard->devlink_linecard,
-					device->devlink_device);
-	kfree(device);
-}
-
-static void mlxsw_linecard_devices_detach(struct mlxsw_linecard *linecard)
-{
-	struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core;
-	struct mlxsw_linecard_device *device, *tmp;
-
-	list_for_each_entry_safe(device, tmp, &linecard->device_list, list)
-		mlxsw_linecard_device_detach(mlxsw_core, linecard, device);
-}
-
-static int mlxsw_linecard_devices_attach(struct mlxsw_linecard *linecard)
-{
-	struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core;
-	u8 msg_seq = 0;
-	int err;
-
-	do {
-		char mddq_pl[MLXSW_REG_MDDQ_LEN];
-		bool flash_owner;
-		bool data_valid;
-		u8 device_index;
-
-		mlxsw_reg_mddq_device_info_pack(mddq_pl, linecard->slot_index,
-						msg_seq);
-		err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddq), mddq_pl);
-		if (err)
-			return err;
-		mlxsw_reg_mddq_device_info_unpack(mddq_pl, &msg_seq,
-						  &data_valid, &flash_owner,
-						  &device_index, NULL,
-						  NULL, NULL);
-		if (!data_valid)
-			break;
-		err = mlxsw_linecard_device_attach(mlxsw_core, linecard,
-						   device_index, flash_owner);
-		if (err)
-			goto rollback;
-	} while (msg_seq);
-
-	return 0;
-
-rollback:
-	mlxsw_linecard_devices_detach(linecard);
-	return err;
-}
-
-static void mlxsw_linecard_device_update(struct mlxsw_linecard *linecard,
-					 u8 device_index,
-					 struct mlxsw_linecard_device_info *info)
-{
-	struct mlxsw_linecard_device *device;
-
-	device = mlxsw_linecard_device_lookup(linecard, device_index);
-	if (!device)
-		return;
-	device->info = *info;
-}
-
-static int mlxsw_linecard_devices_update(struct mlxsw_linecard *linecard)
-{
-	struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core;
-	u8 msg_seq = 0;
-
-	do {
-		struct mlxsw_linecard_device_info info;
-		char mddq_pl[MLXSW_REG_MDDQ_LEN];
-		bool data_valid;
-		u8 device_index;
-		int err;
-
-		mlxsw_reg_mddq_device_info_pack(mddq_pl, linecard->slot_index,
-						msg_seq);
-		err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddq), mddq_pl);
-		if (err)
-			return err;
-		mlxsw_reg_mddq_device_info_unpack(mddq_pl, &msg_seq,
-						  &data_valid, NULL,
-						  &device_index,
-						  &info.fw_major,
-						  &info.fw_minor,
-						  &info.fw_sub_minor);
-		if (!data_valid)
-			break;
-		mlxsw_linecard_device_update(linecard, device_index, &info);
-	} while (msg_seq);
-
-	return 0;
-}
-
-static int
-mlxsw_linecard_device_info_get(struct devlink_linecard_device *devlink_linecard_device,
-			       void *priv, struct devlink_info_req *req,
-			       struct netlink_ext_ack *extack)
-{
-	struct mlxsw_linecard_device *device = priv;
-	struct mlxsw_linecard_device_info *info;
-	struct mlxsw_linecard *linecard;
-	char buf[32];
-
-	linecard = device->linecard;
-	mutex_lock(&linecard->lock);
-	if (!linecard->active) {
-		mutex_unlock(&linecard->lock);
-		return 0;
-	}
-
-	info = &device->info;
-
-	sprintf(buf, "%u.%u.%u", info->fw_major, info->fw_minor,
-		info->fw_sub_minor);
-	mutex_unlock(&linecard->lock);
-
-	return devlink_info_version_running_put(req,
-						DEVLINK_INFO_VERSION_GENERIC_FW,
-						buf);
-}
-
 static void mlxsw_linecard_provision_fail(struct mlxsw_linecard *linecard)
 {
 	linecard->provisioned = false;
 	linecard->ready = false;
 	linecard->active = false;
-	mlxsw_linecard_devices_detach(linecard);
 	devlink_linecard_provision_fail(linecard->devlink_linecard);
 }
 
@@ -412,7 +232,6 @@ mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type,
 {
 	struct mlxsw_linecards *linecards = linecard->linecards;
 	const char *type;
-	int err;
 
 	type = mlxsw_linecard_types_lookup(linecards, card_type);
 	mlxsw_linecard_status_event_done(linecard,
@@ -430,11 +249,6 @@ mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type,
 			return PTR_ERR(type);
 		}
 	}
-	err = mlxsw_linecard_devices_attach(linecard);
-	if (err) {
-		mlxsw_linecard_provision_fail(linecard);
-		return err;
-	}
 	linecard->provisioned = true;
 	linecard->hw_revision = hw_revision;
 	linecard->ini_version = ini_version;
@@ -447,7 +261,6 @@ static void mlxsw_linecard_provision_clear(struct mlxsw_linecard *linecard)
 	mlxsw_linecard_status_event_done(linecard,
 					 MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION);
 	linecard->provisioned = false;
-	mlxsw_linecard_devices_detach(linecard);
 	devlink_linecard_provision_clear(linecard->devlink_linecard);
 }
 
@@ -479,18 +292,11 @@ static int mlxsw_linecard_ready_clear(struct mlxsw_linecard *linecard)
 	return 0;
 }
 
-static int mlxsw_linecard_active_set(struct mlxsw_linecard *linecard)
+static void mlxsw_linecard_active_set(struct mlxsw_linecard *linecard)
 {
-	int err;
-
-	err = mlxsw_linecard_devices_update(linecard);
-	if (err)
-		return err;
-
 	mlxsw_linecard_active_ops_call(linecard);
 	linecard->active = true;
 	devlink_linecard_activate(linecard->devlink_linecard);
-	return 0;
 }
 
 static void mlxsw_linecard_active_clear(struct mlxsw_linecard *linecard)
@@ -539,11 +345,8 @@ static int mlxsw_linecard_status_process(struct mlxsw_linecards *linecards,
 			goto out;
 	}
 
-	if (active && linecard->active != active) {
-		err = mlxsw_linecard_active_set(linecard);
-		if (err)
-			goto out;
-	}
+	if (active && linecard->active != active)
+		mlxsw_linecard_active_set(linecard);
 
 	if (!active && linecard->active != active)
 		mlxsw_linecard_active_clear(linecard);
@@ -934,44 +737,12 @@ static void mlxsw_linecard_types_get(struct devlink_linecard *devlink_linecard,
 	*type_priv = ini_file;
 }
 
-static int
-mlxsw_linecard_info_get(struct devlink_linecard *devlink_linecard, void *priv,
-			struct devlink_info_req *req,
-			struct netlink_ext_ack *extack)
-{
-	struct mlxsw_linecard *linecard = priv;
-	char buf[32];
-	int err;
-
-	mutex_lock(&linecard->lock);
-	if (!linecard->provisioned) {
-		err = 0;
-		goto unlock;
-	}
-
-	sprintf(buf, "%d", linecard->hw_revision);
-	err = devlink_info_version_fixed_put(req, "hw.revision", buf);
-	if (err)
-		goto unlock;
-
-	sprintf(buf, "%d", linecard->ini_version);
-	err = devlink_info_version_running_put(req, "ini.version", buf);
-	if (err)
-		goto unlock;
-
-unlock:
-	mutex_unlock(&linecard->lock);
-	return err;
-}
-
 static const struct devlink_linecard_ops mlxsw_linecard_ops = {
 	.provision = mlxsw_linecard_provision,
 	.unprovision = mlxsw_linecard_unprovision,
 	.same_provision = mlxsw_linecard_same_provision,
 	.types_count = mlxsw_linecard_types_count,
 	.types_get = mlxsw_linecard_types_get,
-	.info_get = mlxsw_linecard_info_get,
-	.device_info_get = mlxsw_linecard_device_info_get,
 };
 
 struct mlxsw_linecard_status_event {
@@ -1069,7 +840,6 @@ static int mlxsw_linecard_init(struct mlxsw_core *mlxsw_core,
 	linecard->slot_index = slot_index;
 	linecard->linecards = linecards;
 	mutex_init(&linecard->lock);
-	INIT_LIST_HEAD(&linecard->device_list);
 
 	devlink_linecard = devlink_linecard_create(priv_to_devlink(mlxsw_core),
 						   slot_index, &mlxsw_linecard_ops,
@@ -1115,7 +885,6 @@ static void mlxsw_linecard_fini(struct mlxsw_core *mlxsw_core,
 	mlxsw_core_flush_owq();
 	if (linecard->active)
 		mlxsw_linecard_active_clear(linecard);
-	mlxsw_linecard_devices_detach(linecard);
 	devlink_linecard_destroy(linecard->devlink_linecard);
 	mutex_destroy(&linecard->lock);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 078e3aa04383..93af6c974ece 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -11643,11 +11643,7 @@ MLXSW_ITEM32(reg, mddq, sie, 0x00, 31, 1);
 
 enum mlxsw_reg_mddq_query_type {
 	MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_INFO = 1,
-	MLXSW_REG_MDDQ_QUERY_TYPE_DEVICE_INFO, /* If there are no devices
-						* on the slot, data_valid
-						* will be '0'.
-						*/
-	MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_NAME,
+	MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_NAME = 3,
 };
 
 /* reg_mddq_query_type
@@ -11661,28 +11657,6 @@ MLXSW_ITEM32(reg, mddq, query_type, 0x00, 16, 8);
  */
 MLXSW_ITEM32(reg, mddq, slot_index, 0x00, 0, 4);
 
-/* reg_mddq_response_msg_seq
- * Response message sequential number. For a specific request, the response
- * message sequential number is the following one. In addition, the last
- * message should be 0.
- * Access: RO
- */
-MLXSW_ITEM32(reg, mddq, response_msg_seq, 0x04, 16, 8);
-
-/* reg_mddq_request_msg_seq
- * Request message sequential number.
- * The first message number should be 0.
- * Access: Index
- */
-MLXSW_ITEM32(reg, mddq, request_msg_seq, 0x04, 0, 8);
-
-/* reg_mddq_data_valid
- * If set, the data in the data field is valid and contain the information
- * for the queried index.
- * Access: RO
- */
-MLXSW_ITEM32(reg, mddq, data_valid, 0x08, 31, 1);
-
 /* reg_mddq_slot_info_provisioned
  * If set, the INI file is applied and the card is provisioned.
  * Access: RO
@@ -11769,65 +11743,6 @@ mlxsw_reg_mddq_slot_info_unpack(const char *payload, u8 *p_slot_index,
 	*p_card_type = mlxsw_reg_mddq_slot_info_card_type_get(payload);
 }
 
-/* reg_mddq_device_info_flash_owner
- * If set, the device is the flash owner. Otherwise, a shared flash
- * is used by this device (another device is the flash owner).
- * Access: RO
- */
-MLXSW_ITEM32(reg, mddq, device_info_flash_owner, 0x10, 30, 1);
-
-/* reg_mddq_device_info_device_index
- * Device index. The first device should number 0.
- * Access: RO
- */
-MLXSW_ITEM32(reg, mddq, device_info_device_index, 0x10, 0, 8);
-
-/* reg_mddq_device_info_fw_major
- * Major FW version number.
- * Access: RO
- */
-MLXSW_ITEM32(reg, mddq, device_info_fw_major, 0x14, 16, 16);
-
-/* reg_mddq_device_info_fw_minor
- * Minor FW version number.
- * Access: RO
- */
-MLXSW_ITEM32(reg, mddq, device_info_fw_minor, 0x18, 16, 16);
-
-/* reg_mddq_device_info_fw_sub_minor
- * Sub-minor FW version number.
- * Access: RO
- */
-MLXSW_ITEM32(reg, mddq, device_info_fw_sub_minor, 0x18, 0, 16);
-
-static inline void
-mlxsw_reg_mddq_device_info_pack(char *payload, u8 slot_index,
-				u8 request_msg_seq)
-{
-	__mlxsw_reg_mddq_pack(payload, slot_index,
-			      MLXSW_REG_MDDQ_QUERY_TYPE_DEVICE_INFO);
-	mlxsw_reg_mddq_request_msg_seq_set(payload, request_msg_seq);
-}
-
-static inline void
-mlxsw_reg_mddq_device_info_unpack(const char *payload, u8 *p_response_msg_seq,
-				  bool *p_data_valid, bool *p_flash_owner,
-				  u8 *p_device_index, u16 *p_fw_major,
-				  u16 *p_fw_minor, u16 *p_fw_sub_minor)
-{
-	*p_response_msg_seq = mlxsw_reg_mddq_response_msg_seq_get(payload);
-	*p_data_valid = mlxsw_reg_mddq_data_valid_get(payload);
-	if (p_flash_owner)
-		*p_flash_owner = mlxsw_reg_mddq_device_info_flash_owner_get(payload);
-	*p_device_index = mlxsw_reg_mddq_device_info_device_index_get(payload);
-	if (p_fw_major)
-		*p_fw_major = mlxsw_reg_mddq_device_info_fw_major_get(payload);
-	if (p_fw_minor)
-		*p_fw_minor = mlxsw_reg_mddq_device_info_fw_minor_get(payload);
-	if (p_fw_sub_minor)
-		*p_fw_sub_minor = mlxsw_reg_mddq_device_info_fw_sub_minor_get(payload);
-}
-
 #define MLXSW_REG_MDDQ_SLOT_ASCII_NAME_LEN 20
 
 /* reg_mddq_slot_ascii_name
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 062895973656..2a2a2a0c93f7 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -150,9 +150,6 @@ struct devlink_port_new_attrs {
 	   sfnum_valid:1;
 };
 
-struct devlink_info_req;
-struct devlink_linecard_device;
-
 /**
  * struct devlink_linecard_ops - Linecard operations
  * @provision: callback to provision the linecard slot with certain
@@ -171,8 +168,6 @@ struct devlink_linecard_device;
  *                  provisioned.
  * @types_count: callback to get number of supported types
  * @types_get: callback to get next type in list
- * @info_get: callback to get linecard info
- * @device_info_get: callback to get linecard device info
  */
 struct devlink_linecard_ops {
 	int (*provision)(struct devlink_linecard *linecard, void *priv,
@@ -187,12 +182,6 @@ struct devlink_linecard_ops {
 	void (*types_get)(struct devlink_linecard *linecard,
 			  void *priv, unsigned int index, const char **type,
 			  const void **type_priv);
-	int (*info_get)(struct devlink_linecard *linecard, void *priv,
-			struct devlink_info_req *req,
-			struct netlink_ext_ack *extack);
-	int (*device_info_get)(struct devlink_linecard_device *device,
-			       void *priv, struct devlink_info_req *req,
-			       struct netlink_ext_ack *extack);
 };
 
 struct devlink_sb_pool_info {
@@ -639,6 +628,7 @@ struct devlink_flash_update_params {
 #define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK	BIT(1)
 
 struct devlink_region;
+struct devlink_info_req;
 
 /**
  * struct devlink_region_ops - Region operations
@@ -1588,12 +1578,6 @@ struct devlink_linecard *
 devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
 			const struct devlink_linecard_ops *ops, void *priv);
 void devlink_linecard_destroy(struct devlink_linecard *linecard);
-struct devlink_linecard_device *
-devlink_linecard_device_create(struct devlink_linecard *linecard,
-			       unsigned int device_index, void *priv);
-void
-devlink_linecard_device_destroy(struct devlink_linecard *linecard,
-				struct devlink_linecard_device *linecard_device);
 void devlink_linecard_provision_set(struct devlink_linecard *linecard,
 				    const char *type);
 void devlink_linecard_provision_clear(struct devlink_linecard *linecard);
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index fb8c3864457f..b3d40a5d72ff 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -136,8 +136,6 @@ enum devlink_command {
 	DEVLINK_CMD_LINECARD_NEW,
 	DEVLINK_CMD_LINECARD_DEL,
 
-	DEVLINK_CMD_LINECARD_INFO_GET,		/* can dump */
-
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -577,9 +575,6 @@ enum devlink_attr {
 	DEVLINK_ATTR_LINECARD_STATE,		/* u8 */
 	DEVLINK_ATTR_LINECARD_TYPE,		/* string */
 	DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES,	/* nested */
-	DEVLINK_ATTR_LINECARD_DEVICE_LIST,	/* nested */
-	DEVLINK_ATTR_LINECARD_DEVICE,		/* nested */
-	DEVLINK_ATTR_LINECARD_DEVICE_INDEX,	/* u32 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 5f441a0e34f4..5cc88490f18f 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -83,11 +83,10 @@ struct devlink_linecard {
 	const struct devlink_linecard_ops *ops;
 	void *priv;
 	enum devlink_linecard_state state;
-	struct mutex state_lock; /* Protects state and device_list */
+	struct mutex state_lock; /* Protects state */
 	const char *type;
 	struct devlink_linecard_type *types;
 	unsigned int types_count;
-	struct list_head device_list;
 };
 
 /**
@@ -2059,56 +2058,6 @@ struct devlink_linecard_type {
 	const void *priv;
 };
 
-struct devlink_linecard_device {
-	struct list_head list;
-	unsigned int index;
-	void *priv;
-};
-
-static int
-devlink_nl_linecard_device_fill(struct sk_buff *msg,
-				struct devlink_linecard_device *linecard_device)
-{
-	struct nlattr *attr;
-
-	attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE);
-	if (!attr)
-		return -EMSGSIZE;
-	if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_DEVICE_INDEX,
-			linecard_device->index)) {
-		nla_nest_cancel(msg, attr);
-		return -EMSGSIZE;
-	}
-	nla_nest_end(msg, attr);
-
-	return 0;
-}
-
-static int devlink_nl_linecard_devices_fill(struct sk_buff *msg,
-					    struct devlink_linecard *linecard)
-{
-	struct devlink_linecard_device *linecard_device;
-	struct nlattr *attr;
-	int err;
-
-	if (list_empty(&linecard->device_list))
-		return 0;
-
-	attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE_LIST);
-	if (!attr)
-		return -EMSGSIZE;
-	list_for_each_entry(linecard_device, &linecard->device_list, list) {
-		err = devlink_nl_linecard_device_fill(msg, linecard_device);
-		if (err) {
-			nla_nest_cancel(msg, attr);
-			return err;
-		}
-	}
-	nla_nest_end(msg, attr);
-
-	return 0;
-}
-
 static int devlink_nl_linecard_fill(struct sk_buff *msg,
 				    struct devlink *devlink,
 				    struct devlink_linecard *linecard,
@@ -2119,7 +2068,6 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg,
 	struct devlink_linecard_type *linecard_type;
 	struct nlattr *attr;
 	void *hdr;
-	int err;
 	int i;
 
 	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -2152,10 +2100,6 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg,
 		nla_nest_end(msg, attr);
 	}
 
-	err = devlink_nl_linecard_devices_fill(msg, linecard);
-	if (err)
-		goto nla_put_failure;
-
 	genlmsg_end(msg, hdr);
 	return 0;
 
@@ -2425,191 +2369,6 @@ static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
 	return 0;
 }
 
-struct devlink_info_req {
-	struct sk_buff *msg;
-};
-
-static int
-devlink_nl_linecard_device_info_fill(struct sk_buff *msg,
-				     struct devlink_linecard *linecard,
-				     struct devlink_linecard_device *linecard_device,
-				     struct netlink_ext_ack *extack)
-{
-	struct nlattr *attr;
-
-	attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE);
-	if (!attr)
-		return -EMSGSIZE;
-	if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_DEVICE_INDEX,
-			linecard_device->index)) {
-		nla_nest_cancel(msg, attr);
-		return -EMSGSIZE;
-	}
-	if (linecard->ops->device_info_get) {
-		struct devlink_info_req req;
-		int err;
-
-		req.msg = msg;
-		err = linecard->ops->device_info_get(linecard_device,
-						     linecard_device->priv,
-						     &req, extack);
-		if (err) {
-			nla_nest_cancel(msg, attr);
-			return err;
-		}
-	}
-	nla_nest_end(msg, attr);
-
-	return 0;
-}
-
-static int devlink_nl_linecard_devices_info_fill(struct sk_buff *msg,
-						 struct devlink_linecard *linecard,
-						 struct netlink_ext_ack *extack)
-{
-	struct devlink_linecard_device *linecard_device;
-	struct nlattr *attr;
-	int err;
-
-	if (list_empty(&linecard->device_list))
-		return 0;
-
-	attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE_LIST);
-	if (!attr)
-		return -EMSGSIZE;
-	list_for_each_entry(linecard_device, &linecard->device_list, list) {
-		err = devlink_nl_linecard_device_info_fill(msg, linecard,
-							   linecard_device,
-							   extack);
-		if (err) {
-			nla_nest_cancel(msg, attr);
-			return err;
-		}
-	}
-	nla_nest_end(msg, attr);
-
-	return 0;
-}
-
-static int
-devlink_nl_linecard_info_fill(struct sk_buff *msg, struct devlink *devlink,
-			      struct devlink_linecard *linecard,
-			      enum devlink_command cmd, u32 portid,
-			      u32 seq, int flags, struct netlink_ext_ack *extack)
-{
-	struct devlink_info_req req;
-	void *hdr;
-	int err;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	err = -EMSGSIZE;
-	if (devlink_nl_put_handle(msg, devlink))
-		goto nla_put_failure;
-	if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index))
-		goto nla_put_failure;
-
-	req.msg = msg;
-	err = linecard->ops->info_get(linecard, linecard->priv, &req, extack);
-	if (err)
-		goto nla_put_failure;
-
-	err = devlink_nl_linecard_devices_info_fill(msg, linecard, extack);
-	if (err)
-		goto nla_put_failure;
-
-	genlmsg_end(msg, hdr);
-	return 0;
-
-nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	return err;
-}
-
-static int devlink_nl_cmd_linecard_info_get_doit(struct sk_buff *skb,
-						 struct genl_info *info)
-{
-	struct devlink_linecard *linecard = info->user_ptr[1];
-	struct devlink *devlink = linecard->devlink;
-	struct sk_buff *msg;
-	int err;
-
-	if (!linecard->ops->info_get)
-		return -EOPNOTSUPP;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	mutex_lock(&linecard->state_lock);
-	err = devlink_nl_linecard_info_fill(msg, devlink, linecard,
-					    DEVLINK_CMD_LINECARD_INFO_GET,
-					    info->snd_portid, info->snd_seq, 0,
-					    info->extack);
-	mutex_unlock(&linecard->state_lock);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
-
-	return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_linecard_info_get_dumpit(struct sk_buff *msg,
-						   struct netlink_callback *cb)
-{
-	struct devlink_linecard *linecard;
-	struct devlink *devlink;
-	int start = cb->args[0];
-	unsigned long index;
-	int idx = 0;
-	int err = 0;
-
-	mutex_lock(&devlink_mutex);
-	xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
-		if (!devlink_try_get(devlink))
-			continue;
-
-		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
-			goto retry;
-
-		mutex_lock(&devlink->linecards_lock);
-		list_for_each_entry(linecard, &devlink->linecard_list, list) {
-			if (idx < start || !linecard->ops->info_get) {
-				idx++;
-				continue;
-			}
-			mutex_lock(&linecard->state_lock);
-			err = devlink_nl_linecard_info_fill(msg, devlink, linecard,
-							    DEVLINK_CMD_LINECARD_INFO_GET,
-							    NETLINK_CB(cb->skb).portid,
-							    cb->nlh->nlmsg_seq,
-							    NLM_F_MULTI,
-							    cb->extack);
-			mutex_unlock(&linecard->state_lock);
-			if (err) {
-				mutex_unlock(&devlink->linecards_lock);
-				devlink_put(devlink);
-				goto out;
-			}
-			idx++;
-		}
-		mutex_unlock(&devlink->linecards_lock);
-retry:
-		devlink_put(devlink);
-	}
-out:
-	mutex_unlock(&devlink_mutex);
-
-	if (err != -EMSGSIZE)
-		return err;
-
-	cb->args[0] = idx;
-	return msg->len;
-}
-
 static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
 			      struct devlink_sb *devlink_sb,
 			      enum devlink_command cmd, u32 portid,
@@ -6602,6 +6361,10 @@ out_dev:
 	return err;
 }
 
+struct devlink_info_req {
+	struct sk_buff *msg;
+};
+
 int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name)
 {
 	return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name);
@@ -9321,13 +9084,6 @@ static const struct genl_small_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
 	},
-	{
-		.cmd = DEVLINK_CMD_LINECARD_INFO_GET,
-		.doit = devlink_nl_cmd_linecard_info_get_doit,
-		.dumpit = devlink_nl_cmd_linecard_info_get_dumpit,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
-		/* can be retrieved by unprivileged users */
-	},
 	{
 		.cmd = DEVLINK_CMD_SB_GET,
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@@ -10508,7 +10264,6 @@ devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
 	linecard->priv = priv;
 	linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
 	mutex_init(&linecard->state_lock);
-	INIT_LIST_HEAD(&linecard->device_list);
 
 	err = devlink_linecard_types_init(linecard);
 	if (err) {
@@ -10536,7 +10291,6 @@ void devlink_linecard_destroy(struct devlink_linecard *linecard)
 	struct devlink *devlink = linecard->devlink;
 
 	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
-	WARN_ON(!list_empty(&linecard->device_list));
 	mutex_lock(&devlink->linecards_lock);
 	list_del(&linecard->list);
 	devlink_linecard_types_fini(linecard);
@@ -10545,52 +10299,6 @@ void devlink_linecard_destroy(struct devlink_linecard *linecard)
 }
 EXPORT_SYMBOL_GPL(devlink_linecard_destroy);
 
-/**
- *	devlink_linecard_device_create - Create a device on linecard
- *
- *	@linecard: devlink linecard
- *	@device_index: index of the linecard device
- *	@priv: user priv pointer
- *
- *	Return: Line card device structure or an ERR_PTR() encoded error code.
- */
-struct devlink_linecard_device *
-devlink_linecard_device_create(struct devlink_linecard *linecard,
-			       unsigned int device_index, void *priv)
-{
-	struct devlink_linecard_device *linecard_device;
-
-	linecard_device = kzalloc(sizeof(*linecard_device), GFP_KERNEL);
-	if (!linecard_device)
-		return ERR_PTR(-ENOMEM);
-	linecard_device->index = device_index;
-	linecard_device->priv = priv;
-	mutex_lock(&linecard->state_lock);
-	list_add_tail(&linecard_device->list, &linecard->device_list);
-	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-	mutex_unlock(&linecard->state_lock);
-	return linecard_device;
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_device_create);
-
-/**
- *	devlink_linecard_device_destroy - Destroy device on linecard
- *
- *	@linecard: devlink linecard
- *	@linecard_device: devlink linecard device
- */
-void
-devlink_linecard_device_destroy(struct devlink_linecard *linecard,
-				struct devlink_linecard_device *linecard_device)
-{
-	mutex_lock(&linecard->state_lock);
-	list_del(&linecard_device->list);
-	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
-	mutex_unlock(&linecard->state_lock);
-	kfree(linecard_device);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_device_destroy);
-
 /**
  *	devlink_linecard_provision_set - Set provisioning on linecard
  *
@@ -10623,7 +10331,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
 void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
 {
 	mutex_lock(&linecard->state_lock);
-	WARN_ON(!list_empty(&linecard->device_list));
 	linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
 	linecard->type = NULL;
 	devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
index 53a65f416770..08a922d8b86a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
@@ -152,7 +152,6 @@ unprovision_test()
 
 LC_16X100G_TYPE="16x100G"
 LC_16X100G_PORT_COUNT=16
-LC_16X100G_DEVICE_COUNT=4
 
 supported_types_check()
 {
@@ -178,42 +177,6 @@ supported_types_check()
 	check_err $? "16X100G not found between supported types of linecard $lc"
 }
 
-lc_info_check()
-{
-	local lc=$1
-	local fixed_hw_revision
-	local running_ini_version
-
-	fixed_hw_revision=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \
-			    jq -e -r '.[][][].versions.fixed."hw.revision"')
-	check_err $? "Failed to get linecard $lc fixed.hw.revision"
-	log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\""
-	running_ini_version=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \
-			      jq -e -r '.[][][].versions.running."ini.version"')
-	check_err $? "Failed to get linecard $lc running.ini.version"
-	log_info "Linecard $lc running.ini.version: \"$running_ini_version\""
-}
-
-lc_devices_check()
-{
-	local lc=$1
-	local expected_device_count=$2
-	local device_count
-	local device
-
-	device_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
-		       jq -e -r ".[][][].devices |length")
-	check_err $? "Failed to get linecard $lc device count"
-	[ $device_count != 0 ]
-	check_err $? "No device found on linecard $lc"
-	[ $device_count == $expected_device_count ]
-	check_err $? "Unexpected device count on linecard $lc (got $expected_device_count, expected $device_count)"
-	for (( device=0; device<device_count; device++ ))
-	do
-		log_info "Linecard $lc device $device"
-	done
-}
-
 ports_check()
 {
 	local lc=$1
@@ -243,8 +206,6 @@ provision_test()
 		unprovision_one $lc
 	fi
 	provision_one $lc $LC_16X100G_TYPE
-	lc_devices_check $lc $LC_16X100G_DEVICE_COUNT
-	lc_info_check $lc
 	ports_check $lc $LC_16X100G_PORT_COUNT
 	log_test "Provision"
 }
@@ -259,26 +220,6 @@ interface_check()
 	setup_wait
 }
 
-lc_devices_info_check()
-{
-	local lc=$1
-	local expected_device_count=$2
-	local device_count
-	local device
-	local running_device_fw
-
-	device_count=$(devlink lc info $DEVLINK_DEV lc $lc -j | \
-		       jq -e -r ".[][][].devices |length")
-	check_err $? "Failed to get linecard $lc device count"
-	for (( device=0; device<device_count; device++ ))
-	do
-		running_device_fw=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \
-				    jq -e -r ".[][][].devices[$device].versions.running.fw")
-		check_err $? "Failed to get linecard $lc device $device running fw version"
-		log_info "Linecard $lc device $device running.fw: \"$running_device_fw\""
-	done
-}
-
 activation_16x100G_test()
 {
 	RET=0
@@ -295,8 +236,6 @@ activation_16x100G_test()
 		$ACTIVATION_TIMEOUT)
 	check_err $? "Failed to get linecard $lc activated (timeout)"
 
-	lc_devices_info_check $lc $LC_16X100G_DEVICE_COUNT
-
 	interface_check
 
 	log_test "Activation 16x100G"
-- 
cgit 


From e71b7f1f44d3d88c677769c85ef0171caf9fc89f Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed, 4 May 2022 11:07:39 +0200
Subject: selftests: add ping test with ping_group_range tuned

The 'ping' utility is able to manage two kind of sockets (raw or icmp),
depending on the sysctl ping_group_range. By default, ping_group_range is
set to '1 0', which forces ping to use an ip raw socket.

Let's replay the ping tests by allowing 'ping' to use the ip icmp socket.
After the previous patch, ipv4 tests results are the same with both kinds
of socket. For ipv6, there are a lot a new failures (the previous patch
fixes only two cases).

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/fcnal-test.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 47c4d4b4a44a..54701c8b0cd7 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -810,10 +810,16 @@ ipv4_ping()
 	setup
 	set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
 	ipv4_ping_novrf
+	setup
+	set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+	ipv4_ping_novrf
 
 	log_subsection "With VRF"
 	setup "yes"
 	ipv4_ping_vrf
+	setup "yes"
+	set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+	ipv4_ping_vrf
 }
 
 ################################################################################
@@ -2348,10 +2354,16 @@ ipv6_ping()
 	log_subsection "No VRF"
 	setup
 	ipv6_ping_novrf
+	setup
+	set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+	ipv6_ping_novrf
 
 	log_subsection "With VRF"
 	setup "yes"
 	ipv6_ping_vrf
+	setup "yes"
+	set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
+	ipv6_ping_vrf
 }
 
 ################################################################################
-- 
cgit 


From 32fb67a3e7a65171cd790ff0e65fcee49cae7cf5 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Sun, 8 May 2022 11:08:22 +0300
Subject: selftests: lib: Add a generic helper for obtaining HW stats

The function get_l3_stats() from the test hw_stats_l3.sh will be useful for
any test that wishes to work with L3 stats. Furthermore, it is easy to
generalize to other HW stats suites (for when such are added). Therefore,
move the code to lib.sh, rewrite it to have the same interface as the other
stats-collecting functions, and generalize to take the name of the HW stats
suite to collect as an argument.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/hw_stats_l3.sh | 16 ++++------------
 tools/testing/selftests/net/forwarding/lib.sh         | 11 +++++++++++
 2 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
index 1c11c4256d06..9c1f76e108af 100755
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
@@ -162,14 +162,6 @@ ping_ipv6()
 	ping_test $h1.200 2001:db8:2::1 " IPv6"
 }
 
-get_l3_stat()
-{
-	local selector=$1; shift
-
-	ip -j stats show dev $rp1.200 group offload subgroup l3_stats |
-		  jq '.[0].stats64.'$selector
-}
-
 send_packets_rx_ipv4()
 {
 	# Send 21 packets instead of 20, because the first one might trap and go
@@ -208,11 +200,11 @@ ___test_stats()
 	local a
 	local b
 
-	a=$(get_l3_stat ${dir}.packets)
+	a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
 	send_packets_${dir}_${prot}
 	"$@"
 	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
-		       get_l3_stat ${dir}.packets)
+		       hw_stats_get l3_stats $rp1.200 ${dir} packets)
 	check_err $? "Traffic not reflected in the counter: $a -> $b"
 }
 
@@ -281,11 +273,11 @@ __test_stats_report()
 
 	RET=0
 
-	a=$(get_l3_stat ${dir}.packets)
+	a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
 	send_packets_${dir}_${prot}
 	ip address flush dev $rp1.200
 	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
-		       get_l3_stat ${dir}.packets)
+		       hw_stats_get l3_stats $rp1.200 ${dir} packets)
 	check_err $? "Traffic not reflected in the counter: $a -> $b"
 	log_test "Test ${dir} packets: stats pushed on loss of L3"
 
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 66681a2bcdd3..37ae49d47853 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -828,6 +828,17 @@ ipv6_stats_get()
 	cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
 }
 
+hw_stats_get()
+{
+	local suite=$1; shift
+	local if_name=$1; shift
+	local dir=$1; shift
+	local stat=$1; shift
+
+	ip -j stats show dev $if_name group offload subgroup $suite |
+		jq ".[0].stats64.$dir.$stat"
+}
+
 humanize()
 {
 	local speed=$1; shift
-- 
cgit 


From 813f97a2686086464f59a433c1bf3413cf504757 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Sun, 8 May 2022 11:08:23 +0300
Subject: selftests: forwarding: Add a tunnel-based test for L3 HW stats

Add a selftest that uses an IPIP topology and tests that L3 HW stats
reflect the traffic in the tunnel.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/Makefile    |   1 +
 .../selftests/net/forwarding/hw_stats_l3_gre.sh    | 109 +++++++++++++++++++++
 2 files changed, 110 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 0912f5ae7f6b..b5181b5a8e29 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -20,6 +20,7 @@ TEST_PROGS = bridge_igmp.sh \
 	gre_multipath_nh.sh \
 	gre_multipath.sh \
 	hw_stats_l3.sh \
+	hw_stats_l3_gre.sh \
 	ip6_forward_instats_vrf.sh \
 	ip6gre_custom_multipath_hash.sh \
 	ip6gre_flat_key.sh \
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
new file mode 100755
index 000000000000..eb9ec4a68f84
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test L3 stats on IP-in-IP GRE tunnel without key.
+
+# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
+# details.
+
+ALL_TESTS="
+	ping_ipv4
+	test_stats_rx
+	test_stats_tx
+"
+NUM_NETIFS=6
+source lib.sh
+source ipip_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	ol1mac=$(mac_get $ol1)
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create gre $ol1 $ul1
+	sw2_flat_create gre $ol2 $ul2
+	ip stats set dev g1a l3_stats on
+	ip stats set dev g2a l3_stats on
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip stats set dev g1a l3_stats off
+	ip stats set dev g2a l3_stats off
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+	forwarding_restore
+}
+
+ping_ipv4()
+{
+	RET=0
+
+	ping_test $h1 192.0.2.18 " gre flat"
+}
+
+send_packets_ipv4()
+{
+	# Send 21 packets instead of 20, because the first one might trap and go
+	# through the SW datapath, which might not bump the HW counter.
+	$MZ $h1 -c 21 -d 20msec -p 100 \
+	    -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+test_stats()
+{
+	local dev=$1; shift
+	local dir=$1; shift
+
+	local a
+	local b
+
+	RET=0
+
+	a=$(hw_stats_get l3_stats $dev $dir packets)
+	send_packets_ipv4
+	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+		     hw_stats_get l3_stats $dev $dir packets)
+	check_err $? "Traffic not reflected in the counter: $a -> $b"
+
+	log_test "Test $dir packets: $prot"
+}
+
+test_stats_tx()
+{
+	test_stats g1a tx
+}
+
+test_stats_rx()
+{
+	test_stats g2a rx
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From edae34a3ed9293b5077dddf9e51a3d86c95dc76a Mon Sep 17 00:00:00 2001
From: Lina Wang <lina.wang@mediatek.com>
Date: Thu, 5 May 2022 13:48:50 +0800
Subject: selftests net: add UDP GRO fraglist + bpf self-tests

When NET_F_F_GRO_FRAGLIST is enabled and bpf_skb_change_proto is used,
check if udp packets and tcp packets are successfully delivered to user
space. If wrong udp packets are delivered, udpgso_bench_rx will exit
with "Initial byte out of range"

Signed-off-by: Maciej enczykowski <maze@google.com>
Signed-off-by: Lina Wang <lina.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile          |   3 +
 tools/testing/selftests/net/bpf/Makefile      |  14 ++
 tools/testing/selftests/net/bpf/nat6to4.c     | 285 ++++++++++++++++++++++++++
 tools/testing/selftests/net/udpgro_frglist.sh | 101 +++++++++
 4 files changed, 403 insertions(+)
 create mode 100644 tools/testing/selftests/net/bpf/Makefile
 create mode 100644 tools/testing/selftests/net/bpf/nat6to4.c
 create mode 100755 tools/testing/selftests/net/udpgro_frglist.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 0f2ebc38d893..e1f998defd10 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -25,6 +25,7 @@ TEST_PROGS += bareudp.sh
 TEST_PROGS += amt.sh
 TEST_PROGS += unicast_extensions.sh
 TEST_PROGS += udpgro_fwd.sh
+TEST_PROGS += udpgro_frglist.sh
 TEST_PROGS += veth.sh
 TEST_PROGS += ioam6.sh
 TEST_PROGS += gro.sh
@@ -61,6 +62,8 @@ TEST_FILES := settings
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
+include bpf/Makefile
+
 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
 $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile
new file mode 100644
index 000000000000..f91bf14bbee7
--- /dev/null
+++ b/tools/testing/selftests/net/bpf/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CLANG ?= clang
+CCINCLUDE += -I../../bpf
+CCINCLUDE += -I../../../../../usr/include/
+
+TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o
+all: $(TEST_CUSTOM_PROGS)
+
+$(OUTPUT)/%.o: %.c
+	$(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@
+
+clean:
+	rm -f $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/net/bpf/nat6to4.c b/tools/testing/selftests/net/bpf/nat6to4.c
new file mode 100644
index 000000000000..ac54c36b25fc
--- /dev/null
+++ b/tools/testing/selftests/net/bpf/nat6to4.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code is taken from the Android Open Source Project and the author
+ * (Maciej Żenczykowski) has gave permission to relicense it under the
+ * GPLv2. Therefore this program is free software;
+ * You can redistribute it and/or modify it under the terms of the GNU
+ * General Public License version 2 as published by the Free Software
+ * Foundation
+
+ * The original headers, including the original license headers, are
+ * included below for completeness.
+ *
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <linux/bpf.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/swab.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IP_DF 0x4000  // Flag: "Don't Fragment"
+
+SEC("schedcls/ingress6/nat_6")
+int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb)
+{
+	const int l2_header_size =  sizeof(struct ethhdr);
+	void *data = (void *)(long)skb->data;
+	const void *data_end = (void *)(long)skb->data_end;
+	const struct ethhdr * const eth = data;  // used iff is_ethernet
+	const struct ipv6hdr * const ip6 =  (void *)(eth + 1);
+
+	// Require ethernet dst mac address to be our unicast address.
+	if  (skb->pkt_type != PACKET_HOST)
+		return TC_ACT_OK;
+
+	// Must be meta-ethernet IPv6 frame
+	if (skb->protocol != bpf_htons(ETH_P_IPV6))
+		return TC_ACT_OK;
+
+	// Must have (ethernet and) ipv6 header
+	if (data + l2_header_size + sizeof(*ip6) > data_end)
+		return TC_ACT_OK;
+
+	// Ethertype - if present - must be IPv6
+	if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+		return TC_ACT_OK;
+
+	// IP version must be 6
+	if (ip6->version != 6)
+		return TC_ACT_OK;
+	// Maximum IPv6 payload length that can be translated to IPv4
+	if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr))
+		return TC_ACT_OK;
+	switch (ip6->nexthdr) {
+	case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
+	case IPPROTO_UDP:  // address means there is no need to update their checksums.
+	case IPPROTO_GRE:  // We do not need to bother looking at GRE/ESP headers,
+	case IPPROTO_ESP:  // since there is never a checksum to update.
+		break;
+	default:  // do not know how to handle anything else
+		return TC_ACT_OK;
+	}
+
+	struct ethhdr eth2;  // used iff is_ethernet
+
+	eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
+	eth2.h_proto = bpf_htons(ETH_P_IP);  // But replace the ethertype
+
+	struct iphdr ip = {
+		.version = 4,                                                      // u4
+		.ihl = sizeof(struct iphdr) / sizeof(__u32),                       // u4
+		.tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4),             // u8
+		.tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)),  // u16
+		.id = 0,                                                           // u16
+		.frag_off = bpf_htons(IP_DF),                                          // u16
+		.ttl = ip6->hop_limit,                                             // u8
+		.protocol = ip6->nexthdr,                                          // u8
+		.check = 0,                                                        // u16
+		.saddr = 0x0201a8c0,                            // u32
+		.daddr = 0x0101a8c0,                                         // u32
+	};
+
+	// Calculate the IPv4 one's complement checksum of the IPv4 header.
+	__wsum sum4 = 0;
+
+	for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i)
+		sum4 += ((__u16 *)&ip)[i];
+
+	// Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+	ip.check = (__u16)~sum4;                // sum4 cannot be zero, so this is never 0xFFFF
+
+	// Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header.
+	__wsum sum6 = 0;
+	// We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits)
+	for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i)
+		sum6 += ~((__u16 *)ip6)[i];  // note the bitwise negation
+
+	// Note that there is no L4 checksum update: we are relying on the checksum neutrality
+	// of the ipv6 address chosen by netd's ClatdController.
+
+	// Packet mutations begin - point of no return, but if this first modification fails
+	// the packet is probably still pristine, so let clatd handle it.
+	if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0))
+		return TC_ACT_OK;
+	bpf_csum_update(skb, sum6);
+
+	data = (void *)(long)skb->data;
+	data_end = (void *)(long)skb->data_end;
+	if (data + l2_header_size + sizeof(struct iphdr) > data_end)
+		return TC_ACT_SHOT;
+
+	struct ethhdr *new_eth = data;
+
+	// Copy over the updated ethernet header
+	*new_eth = eth2;
+
+	// Copy over the new ipv4 header.
+	*(struct iphdr *)(new_eth + 1) = ip;
+	return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+SEC("schedcls/egress4/snat4")
+int sched_cls_egress4_snat4_prog(struct __sk_buff *skb)
+{
+	const int l2_header_size =  sizeof(struct ethhdr);
+	void *data = (void *)(long)skb->data;
+	const void *data_end = (void *)(long)skb->data_end;
+	const struct ethhdr *const eth = data;  // used iff is_ethernet
+	const struct iphdr *const ip4 = (void *)(eth + 1);
+
+	// Must be meta-ethernet IPv4 frame
+	if (skb->protocol != bpf_htons(ETH_P_IP))
+		return TC_ACT_OK;
+
+	// Must have ipv4 header
+	if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end)
+		return TC_ACT_OK;
+
+	// Ethertype - if present - must be IPv4
+	if (eth->h_proto != bpf_htons(ETH_P_IP))
+		return TC_ACT_OK;
+
+	// IP version must be 4
+	if (ip4->version != 4)
+		return TC_ACT_OK;
+
+	// We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
+	if (ip4->ihl != 5)
+		return TC_ACT_OK;
+
+	// Maximum IPv6 payload length that can be translated to IPv4
+	if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr))
+		return TC_ACT_OK;
+
+	// Calculate the IPv4 one's complement checksum of the IPv4 header.
+	__wsum sum4 = 0;
+
+	for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i)
+		sum4 += ((__u16 *)ip4)[i];
+
+	// Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+	// for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
+	if (sum4 != 0xFFFF)
+		return TC_ACT_OK;
+
+	// Minimum IPv4 total length is the size of the header
+	if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4))
+		return TC_ACT_OK;
+
+	// We are incapable of dealing with IPv4 fragments
+	if (ip4->frag_off & ~bpf_htons(IP_DF))
+		return TC_ACT_OK;
+
+	switch (ip4->protocol) {
+	case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
+	case IPPROTO_GRE:  // address means there is no need to update their checksums.
+	case IPPROTO_ESP:  // We do not need to bother looking at GRE/ESP headers,
+		break;         // since there is never a checksum to update.
+
+	case IPPROTO_UDP:  // See above comment, but must also have UDP header...
+		if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end)
+			return TC_ACT_OK;
+		const struct udphdr *uh = (const struct udphdr *)(ip4 + 1);
+		// If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the
+		// checksum.  Otherwise the network or more likely the NAT64 gateway might
+		// drop the packet because in most cases IPv6/UDP packets with a zero checksum
+		// are invalid. See RFC 6935.  TODO: calculate checksum via bpf_csum_diff()
+		if (!uh->check)
+			return TC_ACT_OK;
+		break;
+
+	default:  // do not know how to handle anything else
+		return TC_ACT_OK;
+	}
+	struct ethhdr eth2;  // used iff is_ethernet
+
+	eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
+	eth2.h_proto = bpf_htons(ETH_P_IPV6);  // But replace the ethertype
+
+	struct ipv6hdr ip6 = {
+		.version = 6,                                    // __u8:4
+		.priority = ip4->tos >> 4,                       // __u8:4
+		.flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0},       // __u8[3]
+		.payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20),  // __be16
+		.nexthdr = ip4->protocol,                        // __u8
+		.hop_limit = ip4->ttl,                           // __u8
+	};
+	ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+	ip6.saddr.in6_u.u6_addr32[1] = 0;
+	ip6.saddr.in6_u.u6_addr32[2] = 0;
+	ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1);
+	ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+	ip6.daddr.in6_u.u6_addr32[1] = 0;
+	ip6.daddr.in6_u.u6_addr32[2] = 0;
+	ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2);
+
+	// Calculate the IPv6 16-bit one's complement checksum of the IPv6 header.
+	__wsum sum6 = 0;
+	// We'll end up with a non-zero sum due to ip6.version == 6
+	for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i)
+		sum6 += ((__u16 *)&ip6)[i];
+
+	// Packet mutations begin - point of no return, but if this first modification fails
+	// the packet is probably still pristine, so let clatd handle it.
+	if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
+		return TC_ACT_OK;
+
+	// This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet.
+	// In such a case, skb->csum is a 16-bit one's complement sum of the entire payload,
+	// thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum.
+	// However, we've already verified the ipv4 checksum is correct and thus 0.
+	// Thus we only need to add the ipv6 header's sum.
+	//
+	// bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error
+	// (-ENOTSUPP) if it isn't.  So we just ignore the return code (see above for more details).
+	bpf_csum_update(skb, sum6);
+
+	// bpf_skb_change_proto() invalidates all pointers - reload them.
+	data = (void *)(long)skb->data;
+	data_end = (void *)(long)skb->data_end;
+
+	// I cannot think of any valid way for this error condition to trigger, however I do
+	// believe the explicit check is required to keep the in kernel ebpf verifier happy.
+	if (data + l2_header_size + sizeof(ip6) > data_end)
+		return TC_ACT_SHOT;
+
+	struct ethhdr *new_eth = data;
+
+	// Copy over the updated ethernet header
+	*new_eth = eth2;
+	// Copy over the new ipv4 header.
+	*(struct ipv6hdr *)(new_eth + 1) = ip6;
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = ("GPL");
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
new file mode 100755
index 000000000000..807b74c8fd80
--- /dev/null
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgro benchmarks
+
+readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+
+cleanup() {
+	local -r jobs="$(jobs -p)"
+	local -r ns="$(ip netns list|grep $PEER_NS)"
+
+	[ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null
+	[ -n "$ns" ] && ip netns del $ns 2>/dev/null
+}
+trap cleanup EXIT
+
+run_one() {
+	# use 'rx' as separator between sender args and receiver args
+	local -r all="$@"
+	local -r tx_args=${all%rx*}
+	local rx_args=${all#*rx}
+
+
+
+	ip netns add "${PEER_NS}"
+	ip -netns "${PEER_NS}" link set lo up
+	ip link add type veth
+	ip link set dev veth0 up
+	ip addr add dev veth0 192.168.1.2/24
+	ip addr add dev veth0 2001:db8::2/64 nodad
+
+	ip link set dev veth1 netns "${PEER_NS}"
+	ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
+	ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
+	ip -netns "${PEER_NS}" link set dev veth1 up
+	ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on
+
+
+	ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+	tc -n "${PEER_NS}" qdisc add dev veth1 clsact
+	tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6  direct-action
+	tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action
+        echo ${rx_args}
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
+
+	# Hack: let bg programs complete the startup
+	sleep 0.1
+	./udpgso_bench_tx ${tx_args}
+}
+
+run_in_netns() {
+	local -r args=$@
+  echo ${args}
+	./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+	local -r args=$@
+
+	echo "udp gso - over veth touching data"
+	run_in_netns ${args} -u -S 0 rx -4 -v
+
+	echo "udp gso and gro - over veth touching data"
+	run_in_netns ${args} -S 0 rx -4 -G
+}
+
+run_tcp() {
+	local -r args=$@
+
+	echo "tcp - over veth touching data"
+	run_in_netns ${args} -t rx -4 -t
+}
+
+run_all() {
+	local -r core_args="-l 4"
+	local -r ipv4_args="${core_args} -4  -D 192.168.1.1"
+	local -r ipv6_args="${core_args} -6  -D 2001:db8::1"
+
+	echo "ipv6"
+	run_tcp "${ipv6_args}"
+	run_udp "${ipv6_args}"
+}
+
+if [ ! -f ../bpf/xdp_dummy.o ]; then
+	echo "Missing xdp_dummy helper. Build bpf selftest first"
+	exit -1
+fi
+
+if [ ! -f bpf/nat6to4.o ]; then
+	echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first"
+	exit -1
+fi
+
+if [[ $# -eq 0 ]]; then
+	run_all
+elif [[ $1 == "__subprocess" ]]; then
+	shift
+	run_one $@
+else
+	run_in_netns $@
+fi
-- 
cgit 


From 23b5c7961f7597ba063969dfd79da3f4e19c792f Mon Sep 17 00:00:00 2001
From: Rebecca Mckeever <remckee0@gmail.com>
Date: Sat, 30 Apr 2022 17:49:36 -0500
Subject: memblock tests: update style of comments for memblock_add_*()
 functions

Update comments in memblock_add_*() functions to match the style used
in tests/alloc_*.c by rewording to make the expected outcome more apparent
and, if more than one memblock is involved, adding a visual of the
memory blocks.

Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 tools/testing/memblock/tests/basic_api.c | 85 ++++++++++++++++++++++----------
 1 file changed, 59 insertions(+), 26 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index fbc1ce160303..ab8d71b1e24b 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -26,8 +26,8 @@ static int memblock_initialization_check(void)
 /*
  * A simple test that adds a memory block of a specified base address
  * and size to the collection of available memory regions (memblock.memory).
- * It checks if a new entry was created and if region counter and total memory
- * were correctly updated.
+ * Expect to create a new entry. The region counter and total memory get
+ * updated.
  */
 static int memblock_add_simple_check(void)
 {
@@ -53,10 +53,10 @@ static int memblock_add_simple_check(void)
 }
 
 /*
- * A simple test that adds a memory block of a specified base address, size
+ * A simple test that adds a memory block of a specified base address, size,
  * NUMA node and memory flags to the collection of available memory regions.
- * It checks if the new entry, region counter and total memory size have
- * expected values.
+ * Expect to create a new entry. The region counter and total memory get
+ * updated.
  */
 static int memblock_add_node_simple_check(void)
 {
@@ -87,9 +87,15 @@ static int memblock_add_node_simple_check(void)
 
 /*
  * A test that tries to add two memory blocks that don't overlap with one
- * another. It checks if two correctly initialized entries were added to the
- * collection of available memory regions (memblock.memory) and if this
- * change was reflected in memblock.memory's total size and region counter.
+ * another:
+ *
+ *  |        +--------+        +--------+  |
+ *  |        |   r1   |        |   r2   |  |
+ *  +--------+--------+--------+--------+--+
+ *
+ * Expect to add two correctly initialized entries to the collection of
+ * available memory regions (memblock.memory). The total size and
+ * region counter fields get updated.
  */
 static int memblock_add_disjoint_check(void)
 {
@@ -124,11 +130,21 @@ static int memblock_add_disjoint_check(void)
 }
 
 /*
- * A test that tries to add two memory blocks, where the second one overlaps
- * with the beginning of the first entry (that is r1.base < r2.base + r2.size).
- * After this, it checks if two entries are merged into one region that starts
- * at r2.base and has size of two regions minus their intersection. It also
- * verifies the reported total size of the available memory and region counter.
+ * A test that tries to add two memory blocks r1 and r2, where r2 overlaps
+ * with the beginning of r1 (that is r1.base < r2.base + r2.size):
+ *
+ *  |    +----+----+------------+          |
+ *  |    |    |r2  |   r1       |          |
+ *  +----+----+----+------------+----------+
+ *       ^    ^
+ *       |    |
+ *       |    r1.base
+ *       |
+ *       r2.base
+ *
+ * Expect to merge the two entries into one region that starts at r2.base
+ * and has size of two regions minus their intersection. The total size of
+ * the available memory is updated, and the region counter stays the same.
  */
 static int memblock_add_overlap_top_check(void)
 {
@@ -162,12 +178,21 @@ static int memblock_add_overlap_top_check(void)
 }
 
 /*
- * A test that tries to add two memory blocks, where the second one overlaps
- * with the end of the first entry (that is r2.base < r1.base + r1.size).
- * After this, it checks if two entries are merged into one region that starts
- * at r1.base and has size of two regions minus their intersection. It verifies
- * that memblock can still see only one entry and has a correct total size of
- * the available memory.
+ * A test that tries to add two memory blocks r1 and r2, where r2 overlaps
+ * with the end of r1 (that is r2.base < r1.base + r1.size):
+ *
+ *  |  +--+------+----------+              |
+ *  |  |  | r1   | r2       |              |
+ *  +--+--+------+----------+--------------+
+ *     ^  ^
+ *     |  |
+ *     |  r2.base
+ *     |
+ *     r1.base
+ *
+ * Expect to merge the two entries into one region that starts at r1.base
+ * and has size of two regions minus their intersection. The total size of
+ * the available memory is updated, and the region counter stays the same.
  */
 static int memblock_add_overlap_bottom_check(void)
 {
@@ -201,11 +226,19 @@ static int memblock_add_overlap_bottom_check(void)
 }
 
 /*
- * A test that tries to add two memory blocks, where the second one is
- * within the range of the first entry (that is r1.base < r2.base &&
- * r2.base + r2.size < r1.base + r1.size). It checks if two entries are merged
- * into one region that stays the same. The counter and total size of available
- * memory are expected to not be updated.
+ * A test that tries to add two memory blocks r1 and r2, where r2 is
+ * within the range of r1 (that is r1.base < r2.base &&
+ * r2.base + r2.size < r1.base + r1.size):
+ *
+ *  |   +-------+--+-----------------------+
+ *  |   |       |r2|      r1               |
+ *  +---+-------+--+-----------------------+
+ *      ^
+ *      |
+ *      r1.base
+ *
+ * Expect to merge two entries into one region that stays the same.
+ * The counter and total size of available memory are not updated.
  */
 static int memblock_add_within_check(void)
 {
@@ -236,8 +269,8 @@ static int memblock_add_within_check(void)
 }
 
 /*
- * A simple test that tries to add the same memory block twice. The counter
- * and total size of available memory are expected to not be updated.
+ * A simple test that tries to add the same memory block twice. Expect
+ * the counter and total size of available memory to not be updated.
  */
 static int memblock_add_twice_check(void)
 {
-- 
cgit 


From e4f76c8d217e6b8c689ab31e0546888c2df016e4 Mon Sep 17 00:00:00 2001
From: Rebecca Mckeever <remckee0@gmail.com>
Date: Sat, 30 Apr 2022 17:49:37 -0500
Subject: memblock tests: update style of comments for memblock_reserve_*()
 functions

Update comments in memblock_reserve_*() functions to match the style used
in tests/alloc_*.c by rewording to make the expected outcome more apparent
and, if more than one memblock is involved, adding a visual of the
memory blocks.

If the comment has an extra column of spaces, remove the extra space at
the beginning of each line for consistency and to conform to Linux kernel
coding style.

Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 tools/testing/memblock/tests/basic_api.c | 94 +++++++++++++++++++++-----------
 1 file changed, 63 insertions(+), 31 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index ab8d71b1e24b..6054b83962ca 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -303,12 +303,12 @@ static int memblock_add_checks(void)
 	return 0;
 }
 
- /*
-  * A simple test that marks a memory block of a specified base address
-  * and size as reserved and to the collection of reserved memory regions
-  * (memblock.reserved). It checks if a new entry was created and if region
-  * counter and total memory size were correctly updated.
-  */
+/*
+ * A simple test that marks a memory block of a specified base address
+ * and size as reserved and to the collection of reserved memory regions
+ * (memblock.reserved). Expect to create a new entry. The region counter
+ * and total memory size are updated.
+ */
 static int memblock_reserve_simple_check(void)
 {
 	struct memblock_region *rgn;
@@ -330,10 +330,15 @@ static int memblock_reserve_simple_check(void)
 }
 
 /*
- * A test that tries to mark two memory blocks that don't overlap as reserved
- * and checks if two entries were correctly added to the collection of reserved
- * memory regions (memblock.reserved) and if this change was reflected in
- * memblock.reserved's total size and region counter.
+ * A test that tries to mark two memory blocks that don't overlap as reserved:
+ *
+ *  |        +--+      +----------------+  |
+ *  |        |r1|      |       r2       |  |
+ *  +--------+--+------+----------------+--+
+ *
+ * Expect to add two entries to the collection of reserved memory regions
+ * (memblock.reserved). The total size and region counter for
+ * memblock.reserved are updated.
  */
 static int memblock_reserve_disjoint_check(void)
 {
@@ -368,13 +373,22 @@ static int memblock_reserve_disjoint_check(void)
 }
 
 /*
- * A test that tries to mark two memory blocks as reserved, where the
- * second one overlaps with the beginning of the first (that is
- * r1.base < r2.base + r2.size).
- * It checks if two entries are merged into one region that starts at r2.base
- * and has size of two regions minus their intersection. The test also verifies
- * that memblock can still see only one entry and has a correct total size of
- * the reserved memory.
+ * A test that tries to mark two memory blocks r1 and r2 as reserved,
+ * where r2 overlaps with the beginning of r1 (that is
+ * r1.base < r2.base + r2.size):
+ *
+ *  |  +--------------+--+--------------+  |
+ *  |  |       r2     |  |     r1       |  |
+ *  +--+--------------+--+--------------+--+
+ *     ^              ^
+ *     |              |
+ *     |              r1.base
+ *     |
+ *     r2.base
+ *
+ * Expect to merge two entries into one region that starts at r2.base and
+ * has size of two regions minus their intersection. The total size of the
+ * reserved memory is updated, and the region counter is not updated.
  */
 static int memblock_reserve_overlap_top_check(void)
 {
@@ -408,13 +422,22 @@ static int memblock_reserve_overlap_top_check(void)
 }
 
 /*
- * A test that tries to mark two memory blocks as reserved, where the
- * second one overlaps with the end of the first entry (that is
- * r2.base < r1.base + r1.size).
- * It checks if two entries are merged into one region that starts at r1.base
- * and has size of two regions minus their intersection. It verifies that
- * memblock can still see only one entry and has a correct total size of the
- * reserved memory.
+ * A test that tries to mark two memory blocks r1 and r2 as reserved,
+ * where r2 overlaps with the end of r1 (that is
+ * r2.base < r1.base + r1.size):
+ *
+ *  |  +--------------+--+--------------+  |
+ *  |  |       r1     |  |     r2       |  |
+ *  +--+--------------+--+--------------+--+
+ *     ^              ^
+ *     |              |
+ *     |              r2.base
+ *     |
+ *     r1.base
+ *
+ * Expect to merge two entries into one region that starts at r1.base and
+ * has size of two regions minus their intersection. The total size of the
+ * reserved memory is updated, and the region counter is not updated.
  */
 static int memblock_reserve_overlap_bottom_check(void)
 {
@@ -448,12 +471,21 @@ static int memblock_reserve_overlap_bottom_check(void)
 }
 
 /*
- * A test that tries to mark two memory blocks as reserved, where the second
- * one is within the range of the first entry (that is
- * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)).
- * It checks if two entries are merged into one region that stays the
- * same. The counter and total size of available memory are expected to not be
- * updated.
+ * A test that tries to mark two memory blocks r1 and r2 as reserved,
+ * where r2 is within the range of r1 (that is
+ * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)):
+ *
+ *  | +-----+--+---------------------------|
+ *  | |     |r2|          r1               |
+ *  +-+-----+--+---------------------------+
+ *    ^     ^
+ *    |     |
+ *    |     r2.base
+ *    |
+ *    r1.base
+ *
+ * Expect to merge two entries into one region that stays the same. The
+ * counter and total size of available memory are not updated.
  */
 static int memblock_reserve_within_check(void)
 {
@@ -485,7 +517,7 @@ static int memblock_reserve_within_check(void)
 
 /*
  * A simple test that tries to reserve the same memory block twice.
- * The region counter and total size of reserved memory are expected to not
+ * Expect the region counter and total size of reserved memory to not
  * be updated.
  */
 static int memblock_reserve_twice_check(void)
-- 
cgit 


From 60bba7b193cc8241b464b4616cfece9353086eeb Mon Sep 17 00:00:00 2001
From: Rebecca Mckeever <remckee0@gmail.com>
Date: Sat, 30 Apr 2022 17:49:38 -0500
Subject: memblock tests: update style of comments for memblock_remove_*()
 functions

Update comments in memblock_remove_*() functions to match the style used
in tests/alloc_*.c by rewording to make the expected outcome more apparent
and, if more than one memblock is involved, adding a visual of the
memory blocks.

If the comment has an extra column of spaces, remove the extra space at
the beginning of each line for consistency and to conform to Linux kernel
coding style.

Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 tools/testing/memblock/tests/basic_api.c | 111 ++++++++++++++++++++++---------
 1 file changed, 81 insertions(+), 30 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 6054b83962ca..37b7cc075b0f 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -550,14 +550,22 @@ static int memblock_reserve_checks(void)
 	return 0;
 }
 
- /*
-  * A simple test that tries to remove the first entry of the array of
-  * available memory regions. By "removing" a region we mean overwriting it
-  * with the next region in memblock.memory. To check this is the case, the
-  * test adds two memory blocks and verifies that the value of the latter
-  * was used to erase r1 region.  It also checks if the region counter and
-  * total size were updated to expected values.
-  */
+/*
+ * A simple test that tries to remove a region r1 from the array of
+ * available memory regions. By "removing" a region we mean overwriting it
+ * with the next region r2 in memblock.memory:
+ *
+ *  |  ......          +----------------+  |
+ *  |  : r1 :          |       r2       |  |
+ *  +--+----+----------+----------------+--+
+ *                     ^
+ *                     |
+ *                     rgn.base
+ *
+ * Expect to add two memory blocks r1 and r2 and then remove r1 so that
+ * r2 is the first available region. The region counter and total size
+ * are updated.
+ */
 static int memblock_remove_simple_check(void)
 {
 	struct memblock_region *rgn;
@@ -587,11 +595,22 @@ static int memblock_remove_simple_check(void)
 	return 0;
 }
 
- /*
-  * A test that tries to remove a region that was not registered as available
-  * memory (i.e. has no corresponding entry in memblock.memory). It verifies
-  * that array, regions counter and total size were not modified.
-  */
+/*
+ * A test that tries to remove a region r2 that was not registered as
+ * available memory (i.e. has no corresponding entry in memblock.memory):
+ *
+ *                     +----------------+
+ *                     |       r2       |
+ *                     +----------------+
+ *  |  +----+                              |
+ *  |  | r1 |                              |
+ *  +--+----+------------------------------+
+ *     ^
+ *     |
+ *     rgn.base
+ *
+ * Expect the array, regions counter and total size to not be modified.
+ */
 static int memblock_remove_absent_check(void)
 {
 	struct memblock_region *rgn;
@@ -621,11 +640,23 @@ static int memblock_remove_absent_check(void)
 }
 
 /*
- * A test that tries to remove a region which overlaps with the beginning of
- * the already existing entry r1 (that is r1.base < r2.base + r2.size). It
- * checks if only the intersection of both regions is removed from the available
- * memory pool. The test also checks if the regions counter and total size are
- * updated to expected values.
+ * A test that tries to remove a region r2 that overlaps with the
+ * beginning of the already existing entry r1
+ * (that is r1.base < r2.base + r2.size):
+ *
+ *           +-----------------+
+ *           |       r2        |
+ *           +-----------------+
+ *  |                 .........+--------+  |
+ *  |                 :     r1 |  rgn   |  |
+ *  +-----------------+--------+--------+--+
+ *                    ^        ^
+ *                    |        |
+ *                    |        rgn.base
+ *                    r1.base
+ *
+ * Expect that only the intersection of both regions is removed from the
+ * available memory pool. The regions counter and total size are updated.
  */
 static int memblock_remove_overlap_top_check(void)
 {
@@ -661,11 +692,21 @@ static int memblock_remove_overlap_top_check(void)
 }
 
 /*
- * A test that tries to remove a region which overlaps with the end of the
- * first entry (that is r2.base < r1.base + r1.size). It checks if only the
- * intersection of both regions is removed from the available memory pool.
- * The test also checks if the regions counter and total size are updated to
- * expected values.
+ * A test that tries to remove a region r2 that overlaps with the end of
+ * the already existing region r1 (that is r2.base < r1.base + r1.size):
+ *
+ *        +--------------------------------+
+ *        |               r2               |
+ *        +--------------------------------+
+ *  | +---+.....                           |
+ *  | |rgn| r1 :                           |
+ *  +-+---+----+---------------------------+
+ *    ^
+ *    |
+ *    r1.base
+ *
+ * Expect that only the intersection of both regions is removed from the
+ * available memory pool. The regions counter and total size are updated.
  */
 static int memblock_remove_overlap_bottom_check(void)
 {
@@ -698,13 +739,23 @@ static int memblock_remove_overlap_bottom_check(void)
 }
 
 /*
- * A test that tries to remove a region which is within the range of the
- * already existing entry (that is
- * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)).
- * It checks if the region is split into two - one that ends at r2.base and
- * second that starts at r2.base + size, with appropriate sizes. The test
- * also checks if the region counter and total size were updated to
- * expected values.
+ * A test that tries to remove a region r2 that is within the range of
+ * the already existing entry r1 (that is
+ * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)):
+ *
+ *                  +----+
+ *                  | r2 |
+ *                  +----+
+ *  | +-------------+....+---------------+ |
+ *  | |     rgn1    | r1 |     rgn2      | |
+ *  +-+-------------+----+---------------+-+
+ *    ^
+ *    |
+ *    r1.base
+ *
+ * Expect that the region is split into two - one that ends at r2.base and
+ * another that starts at r2.base + r2.size, with appropriate sizes. The
+ * region counter and total size are updated.
  */
 static int memblock_remove_within_check(void)
 {
-- 
cgit 


From a5550c053f6cf9993119f5c82ffc25ea80364b64 Mon Sep 17 00:00:00 2001
From: Rebecca Mckeever <remckee0@gmail.com>
Date: Sat, 30 Apr 2022 17:49:39 -0500
Subject: memblock tests: update style of comments for memblock_free_*()
 functions

Update comments in memblock_free_*() functions to match the style used
in tests/alloc_*.c by rewording to make the expected outcome more apparent
and, if more than one memblock is involved, adding a visual of the
memory blocks.

If the comment has an extra column of spaces, remove the extra space at
the beginning of each line for consistency and to conform to Linux kernel
coding style.

Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 tools/testing/memblock/tests/basic_api.c | 102 +++++++++++++++++++++++--------
 1 file changed, 75 insertions(+), 27 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 37b7cc075b0f..a7bc180316d6 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -806,12 +806,19 @@ static int memblock_remove_checks(void)
 }
 
 /*
- * A simple test that tries to free a memory block that was marked earlier
- * as reserved. By "freeing" a region we mean overwriting it with the next
- * entry in memblock.reserved. To check this is the case, the test reserves
- * two memory regions and verifies that the value of the latter was used to
- * erase r1 region.
- * The test also checks if the region counter and total size were updated.
+ * A simple test that tries to free a memory block r1 that was marked
+ * earlier as reserved. By "freeing" a region we mean overwriting it with
+ * the next entry r2 in memblock.reserved:
+ *
+ *  |              ......           +----+ |
+ *  |              : r1 :           | r2 | |
+ *  +--------------+----+-----------+----+-+
+ *                                  ^
+ *                                  |
+ *                                  rgn.base
+ *
+ * Expect to reserve two memory regions and then erase r1 region with the
+ * value of r2. The region counter and total size are updated.
  */
 static int memblock_free_simple_check(void)
 {
@@ -842,11 +849,22 @@ static int memblock_free_simple_check(void)
 	return 0;
 }
 
- /*
-  * A test that tries to free a region that was not marked as reserved
-  * (i.e. has no corresponding entry in memblock.reserved). It verifies
-  * that array, regions counter and total size were not modified.
-  */
+/*
+ * A test that tries to free a region r2 that was not marked as reserved
+ * (i.e. has no corresponding entry in memblock.reserved):
+ *
+ *                     +----------------+
+ *                     |       r2       |
+ *                     +----------------+
+ *  |  +----+                              |
+ *  |  | r1 |                              |
+ *  +--+----+------------------------------+
+ *     ^
+ *     |
+ *     rgn.base
+ *
+ * The array, regions counter and total size are not modified.
+ */
 static int memblock_free_absent_check(void)
 {
 	struct memblock_region *rgn;
@@ -876,11 +894,23 @@ static int memblock_free_absent_check(void)
 }
 
 /*
- * A test that tries to free a region which overlaps with the beginning of
- * the already existing entry r1 (that is r1.base < r2.base + r2.size). It
- * checks if only the intersection of both regions is freed. The test also
- * checks if the regions counter and total size are updated to expected
- * values.
+ * A test that tries to free a region r2 that overlaps with the beginning
+ * of the already existing entry r1 (that is r1.base < r2.base + r2.size):
+ *
+ *     +----+
+ *     | r2 |
+ *     +----+
+ *  |    ...+--------------+               |
+ *  |    :  |    r1        |               |
+ *  +----+--+--------------+---------------+
+ *       ^  ^
+ *       |  |
+ *       |  rgn.base
+ *       |
+ *       r1.base
+ *
+ * Expect that only the intersection of both regions is freed. The
+ * regions counter and total size are updated.
  */
 static int memblock_free_overlap_top_check(void)
 {
@@ -914,10 +944,18 @@ static int memblock_free_overlap_top_check(void)
 }
 
 /*
- * A test that tries to free a region which overlaps with the end of the
- * first entry (that is r2.base < r1.base + r1.size). It checks if only the
- * intersection of both regions is freed. The test also checks if the
- * regions counter and total size are updated to expected values.
+ * A test that tries to free a region r2 that overlaps with the end of
+ * the already existing entry r1 (that is r2.base < r1.base + r1.size):
+ *
+ *                   +----------------+
+ *                   |       r2       |
+ *                   +----------------+
+ *  |    +-----------+.....                |
+ *  |    |       r1  |    :                |
+ *  +----+-----------+----+----------------+
+ *
+ * Expect that only the intersection of both regions is freed. The
+ * regions counter and total size are updated.
  */
 static int memblock_free_overlap_bottom_check(void)
 {
@@ -951,13 +989,23 @@ static int memblock_free_overlap_bottom_check(void)
 }
 
 /*
- * A test that tries to free a region which is within the range of the
- * already existing entry (that is
- * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)).
- * It checks if the region is split into two - one that ends at r2.base and
- * second that starts at r2.base + size, with appropriate sizes. It is
- * expected that the region counter and total size fields were updated t
- * reflect that change.
+ * A test that tries to free a region r2 that is within the range of the
+ * already existing entry r1 (that is
+ * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)):
+ *
+ *                    +----+
+ *                    | r2 |
+ *                    +----+
+ *  |    +------------+....+---------------+
+ *  |    |    rgn1    | r1 |     rgn2      |
+ *  +----+------------+----+---------------+
+ *       ^
+ *       |
+ *       r1.base
+ *
+ * Expect that the region is split into two - one that ends at r2.base and
+ * another that starts at r2.base + r2.size, with appropriate sizes. The
+ * region counter and total size fields are updated.
  */
 static int memblock_free_within_check(void)
 {
-- 
cgit 


From 000605cd1b14f0970465a44bfe89da93cca66348 Mon Sep 17 00:00:00 2001
From: Rebecca Mckeever <remckee0@gmail.com>
Date: Sat, 30 Apr 2022 17:49:40 -0500
Subject: memblock tests: remove completed TODO item

Remove completed item from TODO list.

Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
---
 tools/testing/memblock/TODO | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/memblock/TODO b/tools/testing/memblock/TODO
index c25b2fdec45e..cd1a30d5acc9 100644
--- a/tools/testing/memblock/TODO
+++ b/tools/testing/memblock/TODO
@@ -23,6 +23,3 @@ TODO
 
 5. Add tests for memblock_alloc_node() to check if the correct NUMA node is set
    for the new region
-
-6. Update comments in tests/basic_api.c to match the style used in
-   tests/alloc_*.c
-- 
cgit 


From 4598d9abf4215e1e371a35683350d50122793c80 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:05:09 +0200
Subject: selftests/landlock: Add clang-format exceptions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation to a following commit, add clang-format on and
clang-format off stanzas around constant definitions and the TEST_F_FORK
macro.  This enables to keep aligned values, which is much more readable
than packed definitions.

Add other clang-format exceptions for FIXTURE() and
FIXTURE_VARIANT_ADD() declarations to force space before open brace,
which is reported by checkpatch.pl .

Link: https://lore.kernel.org/r/20220506160513.523257-4-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 tools/testing/selftests/landlock/common.h      |  2 ++
 tools/testing/selftests/landlock/fs_test.c     | 23 +++++++++++++++++------
 tools/testing/selftests/landlock/ptrace_test.c | 20 +++++++++++++++++++-
 3 files changed, 38 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index 183b7e8e1b95..435ba6963756 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -25,6 +25,7 @@
  * this to be possible, we must not call abort() but instead exit smoothly
  * (hence the step print).
  */
+/* clang-format off */
 #define TEST_F_FORK(fixture_name, test_name) \
 	static void fixture_name##_##test_name##_child( \
 		struct __test_metadata *_metadata, \
@@ -71,6 +72,7 @@
 		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
 		const FIXTURE_VARIANT(fixture_name) \
 			__attribute__((unused)) *variant)
+/* clang-format on */
 
 #ifndef landlock_create_ruleset
 static inline int landlock_create_ruleset(
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 10c9a1e4ebd9..aef2eb3d07cd 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -221,8 +221,9 @@ static void remove_layout1(struct __test_metadata *const _metadata)
 	EXPECT_EQ(0, remove_path(dir_s3d2));
 }
 
-FIXTURE(layout1) {
-};
+/* clang-format off */
+FIXTURE(layout1) {};
+/* clang-format on */
 
 FIXTURE_SETUP(layout1)
 {
@@ -376,6 +377,8 @@ TEST_F_FORK(layout1, inval)
 	ASSERT_EQ(0, close(ruleset_fd));
 }
 
+/* clang-format off */
+
 #define ACCESS_FILE ( \
 	LANDLOCK_ACCESS_FS_EXECUTE | \
 	LANDLOCK_ACCESS_FS_WRITE_FILE | \
@@ -396,6 +399,8 @@ TEST_F_FORK(layout1, inval)
 	LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
 	ACCESS_LAST)
 
+/* clang-format on */
+
 TEST_F_FORK(layout1, file_access_rights)
 {
 	__u64 access;
@@ -452,6 +457,8 @@ struct rule {
 	__u64 access;
 };
 
+/* clang-format off */
+
 #define ACCESS_RO ( \
 	LANDLOCK_ACCESS_FS_READ_FILE | \
 	LANDLOCK_ACCESS_FS_READ_DIR)
@@ -460,6 +467,8 @@ struct rule {
 	ACCESS_RO | \
 	LANDLOCK_ACCESS_FS_WRITE_FILE)
 
+/* clang-format on */
+
 static int create_ruleset(struct __test_metadata *const _metadata,
 		const __u64 handled_access_fs, const struct rule rules[])
 {
@@ -2070,8 +2079,9 @@ TEST_F_FORK(layout1, proc_pipe)
 	ASSERT_EQ(0, close(pipe_fds[1]));
 }
 
-FIXTURE(layout1_bind) {
-};
+/* clang-format off */
+FIXTURE(layout1_bind) {};
+/* clang-format on */
 
 FIXTURE_SETUP(layout1_bind)
 {
@@ -2411,8 +2421,9 @@ static const char (*merge_sub_files[])[] = {
  *         └── work
  */
 
-FIXTURE(layout2_overlay) {
-};
+/* clang-format off */
+FIXTURE(layout2_overlay) {};
+/* clang-format on */
 
 FIXTURE_SETUP(layout2_overlay)
 {
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
index 15fbef9cc849..090adadfe2dc 100644
--- a/tools/testing/selftests/landlock/ptrace_test.c
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -59,7 +59,9 @@ static int test_ptrace_read(const pid_t pid)
 	return 0;
 }
 
-FIXTURE(hierarchy) { };
+/* clang-format off */
+FIXTURE(hierarchy) {};
+/* clang-format on */
 
 FIXTURE_VARIANT(hierarchy) {
 	const bool domain_both;
@@ -83,7 +85,9 @@ FIXTURE_VARIANT(hierarchy) {
  *       \              P2 -> P1 : allow
  *        'P2
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
+	/* clang-format on */
 	.domain_both = false,
 	.domain_parent = false,
 	.domain_child = false,
@@ -98,7 +102,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
  *        |  P2  |
  *        '------'
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
+	/* clang-format on */
 	.domain_both = false,
 	.domain_parent = false,
 	.domain_child = true,
@@ -112,7 +118,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
  *            '
  *            P2
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
+	/* clang-format on */
 	.domain_both = false,
 	.domain_parent = true,
 	.domain_child = false,
@@ -127,7 +135,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
  *         |  P2  |
  *         '------'
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
+	/* clang-format on */
 	.domain_both = false,
 	.domain_parent = true,
 	.domain_child = true,
@@ -142,7 +152,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
  * |         P2  |
  * '-------------'
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
+	/* clang-format on */
 	.domain_both = true,
 	.domain_parent = false,
 	.domain_child = false,
@@ -158,7 +170,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
  * |        '------' |
  * '-----------------'
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
+	/* clang-format on */
 	.domain_both = true,
 	.domain_parent = false,
 	.domain_child = true,
@@ -174,7 +188,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
  * |             P2  |
  * '-----------------'
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
+	/* clang-format on */
 	.domain_both = true,
 	.domain_parent = true,
 	.domain_child = false,
@@ -192,7 +208,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
  * |        '------' |
  * '-----------------'
  */
+/* clang-format off */
 FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) {
+	/* clang-format on */
 	.domain_both = true,
 	.domain_parent = true,
 	.domain_child = true,
-- 
cgit 


From 135464f9d29c5b306d7201220f1d00dab30fea89 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:05:10 +0200
Subject: selftests/landlock: Normalize array assignment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a comma after each array value to make clang-format keep the
current array formatting.  See the following commit.

Automatically modified with:
sed -i 's/\t\({}\|NULL\)$/\0,/' tools/testing/selftests/landlock/fs_test.c

Link: https://lore.kernel.org/r/20220506160513.523257-5-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 tools/testing/selftests/landlock/fs_test.c | 112 ++++++++++++++---------------
 1 file changed, 56 insertions(+), 56 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index aef2eb3d07cd..198184ca0396 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -514,7 +514,7 @@ TEST_F_FORK(layout1, proc_nsfs)
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
 				LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
-		{}
+		{},
 	};
 	struct landlock_path_beneath_attr path_beneath;
 	const int ruleset_fd = create_ruleset(_metadata, rules[0].access |
@@ -560,7 +560,7 @@ TEST_F_FORK(layout1, unpriv) {
 			.path = dir_s1d2,
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	int ruleset_fd;
 
@@ -588,7 +588,7 @@ TEST_F_FORK(layout1, effective_access)
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
 				LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 	char buf;
@@ -635,7 +635,7 @@ TEST_F_FORK(layout1, unhandled_access)
 			.path = dir_s1d2,
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	/* Here, we only handle read accesses, not write accesses. */
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules);
@@ -669,7 +669,7 @@ TEST_F_FORK(layout1, ruleset_overlap)
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
 				LANDLOCK_ACCESS_FS_READ_DIR,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -703,14 +703,14 @@ TEST_F_FORK(layout1, non_overlapping_accesses)
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_MAKE_REG,
 		},
-		{}
+		{},
 	};
 	const struct rule layer2[] = {
 		{
 			.path = dir_s1d3,
 			.access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
 		},
-		{}
+		{},
 	};
 	int ruleset_fd;
 
@@ -767,7 +767,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 			.path = file1_s1d3,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE,
 		},
-		{}
+		{},
 	};
 	/* First rule with write restrictions. */
 	const struct rule layer2_read_write[] = {
@@ -782,7 +782,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
-		{}
+		{},
 	};
 	const struct rule layer3_read[] = {
 		/* Allows read access via its great-grandparent directory. */
@@ -790,7 +790,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 			.path = dir_s1d1,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE,
 		},
-		{}
+		{},
 	};
 	const struct rule layer4_read_write[] = {
 		/*
@@ -801,7 +801,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE,
 		},
-		{}
+		{},
 	};
 	const struct rule layer5_read[] = {
 		/*
@@ -812,7 +812,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE,
 		},
-		{}
+		{},
 	};
 	const struct rule layer6_execute[] = {
 		/*
@@ -823,7 +823,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 			.path = dir_s2d1,
 			.access = LANDLOCK_ACCESS_FS_EXECUTE,
 		},
-		{}
+		{},
 	};
 	const struct rule layer7_read_write[] = {
 		/*
@@ -834,7 +834,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
-		{}
+		{},
 	};
 	int ruleset_fd;
 
@@ -932,7 +932,7 @@ TEST_F_FORK(layout1, inherit_subset)
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
 				LANDLOCK_ACCESS_FS_READ_DIR,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1048,7 +1048,7 @@ TEST_F_FORK(layout1, inherit_superset)
 			.path = dir_s1d3,
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1084,7 +1084,7 @@ TEST_F_FORK(layout1, max_layers)
 			.path = dir_s1d2,
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1146,7 +1146,7 @@ TEST_F_FORK(layout1, rule_on_mountpoint)
 			.path = dir_s3d2,
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1175,7 +1175,7 @@ TEST_F_FORK(layout1, rule_over_mountpoint)
 			.path = dir_s3d1,
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1203,7 +1203,7 @@ TEST_F_FORK(layout1, rule_over_root_allow_then_deny)
 			.path = "/",
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1233,7 +1233,7 @@ TEST_F_FORK(layout1, rule_over_root_deny)
 			.path = "/",
 			.access = LANDLOCK_ACCESS_FS_READ_FILE,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1253,7 +1253,7 @@ TEST_F_FORK(layout1, rule_inside_mount_ns)
 			.path = "s3d3",
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	int ruleset_fd;
 
@@ -1280,7 +1280,7 @@ TEST_F_FORK(layout1, mount_and_pivot)
 			.path = dir_s3d2,
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1303,7 +1303,7 @@ TEST_F_FORK(layout1, move_mount)
 			.path = dir_s3d2,
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1344,7 +1344,7 @@ TEST_F_FORK(layout1, release_inodes)
 			.path = dir_s3d3,
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
@@ -1382,7 +1382,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
 			.path = TMP_DIR,
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	const struct rule layer2_subs[] = {
 		{
@@ -1393,7 +1393,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
 			.path = dir_s2d2,
 			.access = ACCESS_RO,
 		},
-		{}
+		{},
 	};
 	int dirfd, ruleset_fd;
 
@@ -1558,7 +1558,7 @@ TEST_F_FORK(layout1, execute)
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_EXECUTE,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
 			rules);
@@ -1591,7 +1591,7 @@ TEST_F_FORK(layout1, link)
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_MAKE_REG,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
 			rules);
@@ -1628,7 +1628,7 @@ TEST_F_FORK(layout1, rename_file)
 			.path = dir_s2d2,
 			.access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
 			rules);
@@ -1705,7 +1705,7 @@ TEST_F_FORK(layout1, rename_dir)
 			.path = dir_s2d1,
 			.access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
 			rules);
@@ -1759,7 +1759,7 @@ TEST_F_FORK(layout1, remove_dir)
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
 			rules);
@@ -1796,7 +1796,7 @@ TEST_F_FORK(layout1, remove_file)
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
 			rules);
@@ -1821,7 +1821,7 @@ static void test_make_file(struct __test_metadata *const _metadata,
 			.path = dir_s1d2,
 			.access = access,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, access, rules);
 
@@ -1907,7 +1907,7 @@ TEST_F_FORK(layout1, make_sym)
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_MAKE_SYM,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
 			rules);
@@ -1952,7 +1952,7 @@ TEST_F_FORK(layout1, make_dir)
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_MAKE_DIR,
 		},
-		{}
+		{},
 	};
 	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
 			rules);
@@ -1992,7 +1992,7 @@ TEST_F_FORK(layout1, proc_unlinked_file)
 			.path = file1_s1d2,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE,
 		},
-		{}
+		{},
 	};
 	int reg_fd, proc_fd;
 	const int ruleset_fd = create_ruleset(_metadata,
@@ -2034,7 +2034,7 @@ TEST_F_FORK(layout1, proc_pipe)
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
 				LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
-		{}
+		{},
 	};
 	/* Limits read and write access to files tied to the filesystem. */
 	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
@@ -2171,7 +2171,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
 			.path = dir_s2d1,
 			.access = ACCESS_RW,
 		},
-		{}
+		{},
 	};
 	/*
 	 * Sets access rights on the same bind-mounted directories.  The result
@@ -2187,7 +2187,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
 			.path = dir_s2d2,
 			.access = ACCESS_RW,
 		},
-		{}
+		{},
 	};
 	/* Only allow read-access to the s1d3 hierarchies. */
 	const struct rule layer3_source[] = {
@@ -2195,7 +2195,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
 			.path = dir_s1d3,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE,
 		},
-		{}
+		{},
 	};
 	/* Removes all access rights. */
 	const struct rule layer4_destination[] = {
@@ -2203,7 +2203,7 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
 			.path = bind_file1_s1d3,
 			.access = LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
-		{}
+		{},
 	};
 	int ruleset_fd;
 
@@ -2305,18 +2305,18 @@ static const char lower_do1_fl3[] = LOWER_DATA "/do1/fl3";
 static const char (*lower_base_files[])[] = {
 	&lower_fl1,
 	&lower_fo1,
-	NULL
+	NULL,
 };
 static const char (*lower_base_directories[])[] = {
 	&lower_dl1,
 	&lower_do1,
-	NULL
+	NULL,
 };
 static const char (*lower_sub_files[])[] = {
 	&lower_dl1_fl2,
 	&lower_do1_fo2,
 	&lower_do1_fl3,
-	NULL
+	NULL,
 };
 
 #define UPPER_BASE	TMP_DIR "/upper"
@@ -2333,18 +2333,18 @@ static const char upper_do1_fu3[] = UPPER_DATA "/do1/fu3";
 static const char (*upper_base_files[])[] = {
 	&upper_fu1,
 	&upper_fo1,
-	NULL
+	NULL,
 };
 static const char (*upper_base_directories[])[] = {
 	&upper_du1,
 	&upper_do1,
-	NULL
+	NULL,
 };
 static const char (*upper_sub_files[])[] = {
 	&upper_du1_fu2,
 	&upper_do1_fo2,
 	&upper_do1_fu3,
-	NULL
+	NULL,
 };
 
 #define MERGE_BASE	TMP_DIR "/merge"
@@ -2365,13 +2365,13 @@ static const char (*merge_base_files[])[] = {
 	&merge_fl1,
 	&merge_fu1,
 	&merge_fo1,
-	NULL
+	NULL,
 };
 static const char (*merge_base_directories[])[] = {
 	&merge_dl1,
 	&merge_du1,
 	&merge_do1,
-	NULL
+	NULL,
 };
 static const char (*merge_sub_files[])[] = {
 	&merge_dl1_fl2,
@@ -2379,7 +2379,7 @@ static const char (*merge_sub_files[])[] = {
 	&merge_do1_fo2,
 	&merge_do1_fl3,
 	&merge_do1_fu3,
-	NULL
+	NULL,
 };
 
 /*
@@ -2544,7 +2544,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
 			.path = MERGE_BASE,
 			.access = ACCESS_RW,
 		},
-		{}
+		{},
 	};
 	const struct rule layer2_data[] = {
 		{
@@ -2559,7 +2559,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
 			.path = MERGE_DATA,
 			.access = ACCESS_RW,
 		},
-		{}
+		{},
 	};
 	/* Sets access right on directories inside both layers. */
 	const struct rule layer3_subdirs[] = {
@@ -2591,7 +2591,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
 			.path = merge_do1,
 			.access = ACCESS_RW,
 		},
-		{}
+		{},
 	};
 	/* Tighten access rights to the files. */
 	const struct rule layer4_files[] = {
@@ -2644,7 +2644,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
 				LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
-		{}
+		{},
 	};
 	const struct rule layer5_merge_only[] = {
 		{
@@ -2652,7 +2652,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
 				LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
-		{}
+		{},
 	};
 	int ruleset_fd;
 	size_t i;
-- 
cgit 


From d23386ed7019d50164fa2066aae8656097a02425 Mon Sep 17 00:00:00 2001
From: Carlos Llamas <cmllamas@google.com>
Date: Fri, 29 Apr 2022 23:56:42 +0000
Subject: binderfs: add extended_error feature entry

Add extended_error to the binderfs feature list, to help userspace
determine whether the BINDER_GET_EXTENDED_ERROR ioctl is supported by
the binder driver.

Reviewed-by: Christian Brauner (Microsoft) <brauner@kernel.org>
Acked-by: Todd Kjos <tkjos@google.com>
Signed-off-by: Carlos Llamas <cmllamas@google.com>
Link: https://lore.kernel.org/r/20220429235644.697372-4-cmllamas@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/android/binderfs.c                                   | 8 ++++++++
 tools/testing/selftests/filesystems/binderfs/binderfs_test.c | 1 +
 2 files changed, 9 insertions(+)

(limited to 'tools/testing')

diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
index e3605cdd4335..6c5e94f6cb3a 100644
--- a/drivers/android/binderfs.c
+++ b/drivers/android/binderfs.c
@@ -60,6 +60,7 @@ enum binderfs_stats_mode {
 
 struct binder_features {
 	bool oneway_spam_detection;
+	bool extended_error;
 };
 
 static const struct constant_table binderfs_param_stats[] = {
@@ -75,6 +76,7 @@ static const struct fs_parameter_spec binderfs_fs_parameters[] = {
 
 static struct binder_features binder_features = {
 	.oneway_spam_detection = true,
+	.extended_error = true,
 };
 
 static inline struct binderfs_info *BINDERFS_SB(const struct super_block *sb)
@@ -615,6 +617,12 @@ static int init_binder_features(struct super_block *sb)
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
+	dentry = binderfs_create_file(dir, "extended_error",
+				      &binder_features_fops,
+				      &binder_features.extended_error);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
 	return 0;
 }
 
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 0315955ff0f4..9409bb136d95 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -64,6 +64,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
 		device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
 	static const char * const binder_features[] = {
 		"oneway_spam_detection",
+		"extended_error",
 	};
 
 	change_mountns(_metadata);
-- 
cgit 


From 1e2666e029e5cc2b81dbd7c85af5bcc8c80524e0 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sun, 8 May 2022 17:41:40 -0700
Subject: selftests/bpf: Prevent skeleton generation race

Prevent "classic" and light skeleton generation rules from stomping on
each other's toes due to the use of the same <obj>.linked{1,2,3}.o
naming pattern. There is no coordination and synchronizataion between
.skel.h and .lskel.h rules, so they can easily overwrite each other's
intermediate object files, leading to errors like:

  /bin/sh: line 1: 170928 Bus error               (core dumped)
  /data/users/andriin/linux/tools/testing/selftests/bpf/tools/sbin/bpftool gen skeleton
  /data/users/andriin/linux/tools/testing/selftests/bpf/test_ksyms_weak.linked3.o
  name test_ksyms_weak
  > /data/users/andriin/linux/tools/testing/selftests/bpf/test_ksyms_weak.skel.h
  make: *** [Makefile:507: /data/users/andriin/linux/tools/testing/selftests/bpf/test_ksyms_weak.skel.h] Error 135
  make: *** Deleting file '/data/users/andriin/linux/tools/testing/selftests/bpf/test_ksyms_weak.skel.h'

Fix by using different suffix for light skeleton rule.

Fixes: c48e51c8b07a ("bpf: selftests: Add selftests for module kfunc support")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220509004148.1801791-2-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index bafdc5373a13..6bbc03161544 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -423,11 +423,11 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 
 $(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
-	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$<
-	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
-	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
-	$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
-	$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$<
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o)
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
+	$(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+	$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@
 
 $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o))
-- 
cgit 


From 2a4ca46b7d2a6f7ba7359e8d7fafe9ad378fa18e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sun, 8 May 2022 17:41:43 -0700
Subject: selftests/bpf: Use both syntaxes for field-based CO-RE helpers

Excercise both supported forms of bpf_core_field_exists() and
bpf_core_field_size() helpers: variable-based field reference and
type/field name-based one.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220509004148.1801791-5-andrii@kernel.org
---
 tools/testing/selftests/bpf/progs/test_core_reloc_existence.c | 11 +++++------
 tools/testing/selftests/bpf/progs/test_core_reloc_size.c      |  8 ++++----
 2 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
index 7e45e2bdf6cd..5b8a75097ea3 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
@@ -45,35 +45,34 @@ int test_core_existence(void *ctx)
 	struct core_reloc_existence_output *out = (void *)&data.out;
 
 	out->a_exists = bpf_core_field_exists(in->a);
-	if (bpf_core_field_exists(in->a))
+	if (bpf_core_field_exists(struct core_reloc_existence, a))
 		out->a_value = BPF_CORE_READ(in, a);
 	else
 		out->a_value = 0xff000001u;
 
 	out->b_exists = bpf_core_field_exists(in->b);
-	if (bpf_core_field_exists(in->b))
+	if (bpf_core_field_exists(struct core_reloc_existence, b))
 		out->b_value = BPF_CORE_READ(in, b);
 	else
 		out->b_value = 0xff000002u;
 
 	out->c_exists = bpf_core_field_exists(in->c);
-	if (bpf_core_field_exists(in->c))
+	if (bpf_core_field_exists(struct core_reloc_existence, c))
 		out->c_value = BPF_CORE_READ(in, c);
 	else
 		out->c_value = 0xff000003u;
 
 	out->arr_exists = bpf_core_field_exists(in->arr);
-	if (bpf_core_field_exists(in->arr))
+	if (bpf_core_field_exists(struct core_reloc_existence, arr))
 		out->arr_value = BPF_CORE_READ(in, arr[0]);
 	else
 		out->arr_value = 0xff000004u;
 
 	out->s_exists = bpf_core_field_exists(in->s);
-	if (bpf_core_field_exists(in->s))
+	if (bpf_core_field_exists(struct core_reloc_existence, s))
 		out->s_value = BPF_CORE_READ(in, s.x);
 	else
 		out->s_value = 0xff000005u;
 
 	return 0;
 }
-
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
index 7b2d576aeea1..6766addd2583 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
@@ -44,10 +44,10 @@ int test_core_size(void *ctx)
 	out->struct_sz = bpf_core_field_size(in->struct_field);
 	out->union_sz = bpf_core_field_size(in->union_field);
 	out->arr_sz = bpf_core_field_size(in->arr_field);
-	out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]);
-	out->ptr_sz = bpf_core_field_size(in->ptr_field);
-	out->enum_sz = bpf_core_field_size(in->enum_field);
-	out->float_sz = bpf_core_field_size(in->float_field);
+	out->arr_elem_sz = bpf_core_field_size(struct core_reloc_size, arr_field[0]);
+	out->ptr_sz = bpf_core_field_size(struct core_reloc_size, ptr_field);
+	out->enum_sz = bpf_core_field_size(struct core_reloc_size, enum_field);
+	out->float_sz = bpf_core_field_size(struct core_reloc_size, float_field);
 
 	return 0;
 }
-- 
cgit 


From 785c3342cf6c520250258d8d7bc0cae1d4461dc8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sun, 8 May 2022 17:41:45 -0700
Subject: selftests/bpf: Add bpf_core_field_offset() tests

Add test cases for bpf_core_field_offset() helper.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220509004148.1801791-7-andrii@kernel.org
---
 .../testing/selftests/bpf/prog_tests/core_reloc.c  | 13 +++++++++--
 .../bpf/progs/btf__core_reloc_size___diff_offs.c   |  3 +++
 .../testing/selftests/bpf/progs/core_reloc_types.h | 18 ++++++++++++++++
 .../selftests/bpf/progs/test_core_reloc_size.c     | 25 +++++++++++++++++++++-
 4 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index f28f75aa9154..3712dfe1be59 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -277,13 +277,21 @@ static int duration = 0;
 #define SIZE_OUTPUT_DATA(type)						\
 	STRUCT_TO_CHAR_PTR(core_reloc_size_output) {			\
 		.int_sz = sizeof(((type *)0)->int_field),		\
+		.int_off = offsetof(type, int_field),			\
 		.struct_sz = sizeof(((type *)0)->struct_field),		\
+		.struct_off = offsetof(type, struct_field),		\
 		.union_sz = sizeof(((type *)0)->union_field),		\
+		.union_off = offsetof(type, union_field),		\
 		.arr_sz = sizeof(((type *)0)->arr_field),		\
-		.arr_elem_sz = sizeof(((type *)0)->arr_field[0]),	\
+		.arr_off = offsetof(type, arr_field),			\
+		.arr_elem_sz = sizeof(((type *)0)->arr_field[1]),	\
+		.arr_elem_off = offsetof(type, arr_field[1]),		\
 		.ptr_sz = 8, /* always 8-byte pointer for BPF */	\
+		.ptr_off = offsetof(type, ptr_field),			\
 		.enum_sz = sizeof(((type *)0)->enum_field),		\
+		.enum_off = offsetof(type, enum_field),			\
 		.float_sz = sizeof(((type *)0)->float_field),		\
+		.float_off = offsetof(type, float_field),		\
 	}
 
 #define SIZE_CASE(name) {						\
@@ -714,9 +722,10 @@ static const struct core_reloc_test_case test_cases[] = {
 	}),
 	BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield),
 
-	/* size relocation checks */
+	/* field size and offset relocation checks */
 	SIZE_CASE(size),
 	SIZE_CASE(size___diff_sz),
+	SIZE_CASE(size___diff_offs),
 	SIZE_ERR_CASE(size___err_ambiguous),
 
 	/* validate type existence and size relocations */
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c
new file mode 100644
index 000000000000..3824345d82ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_size___diff_offs x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index c95c0cabe951..f9dc9766546e 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -785,13 +785,21 @@ struct core_reloc_bitfields___err_too_big_bitfield {
  */
 struct core_reloc_size_output {
 	int int_sz;
+	int int_off;
 	int struct_sz;
+	int struct_off;
 	int union_sz;
+	int union_off;
 	int arr_sz;
+	int arr_off;
 	int arr_elem_sz;
+	int arr_elem_off;
 	int ptr_sz;
+	int ptr_off;
 	int enum_sz;
+	int enum_off;
 	int float_sz;
+	int float_off;
 };
 
 struct core_reloc_size {
@@ -814,6 +822,16 @@ struct core_reloc_size___diff_sz {
 	double float_field;
 };
 
+struct core_reloc_size___diff_offs {
+	float float_field;
+	enum { YET_OTHER_VALUE = 123 } enum_field;
+	void *ptr_field;
+	int arr_field[4];
+	union { int x; } union_field;
+	struct { int x; } struct_field;
+	int int_field;
+};
+
 /* Error case of two candidates with the fields (int_field) at the same
  * offset, but with differing final relocation values: size 4 vs size 1
  */
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
index 6766addd2583..5b686053ce42 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
@@ -15,13 +15,21 @@ struct {
 
 struct core_reloc_size_output {
 	int int_sz;
+	int int_off;
 	int struct_sz;
+	int struct_off;
 	int union_sz;
+	int union_off;
 	int arr_sz;
+	int arr_off;
 	int arr_elem_sz;
+	int arr_elem_off;
 	int ptr_sz;
+	int ptr_off;
 	int enum_sz;
+	int enum_off;
 	int float_sz;
+	int float_off;
 };
 
 struct core_reloc_size {
@@ -41,13 +49,28 @@ int test_core_size(void *ctx)
 	struct core_reloc_size_output *out = (void *)&data.out;
 
 	out->int_sz = bpf_core_field_size(in->int_field);
+	out->int_off = bpf_core_field_offset(in->int_field);
+
 	out->struct_sz = bpf_core_field_size(in->struct_field);
+	out->struct_off = bpf_core_field_offset(in->struct_field);
+
 	out->union_sz = bpf_core_field_size(in->union_field);
+	out->union_off = bpf_core_field_offset(in->union_field);
+
 	out->arr_sz = bpf_core_field_size(in->arr_field);
-	out->arr_elem_sz = bpf_core_field_size(struct core_reloc_size, arr_field[0]);
+	out->arr_off = bpf_core_field_offset(in->arr_field);
+
+	out->arr_elem_sz = bpf_core_field_size(struct core_reloc_size, arr_field[1]);
+	out->arr_elem_off = bpf_core_field_offset(struct core_reloc_size, arr_field[1]);
+
 	out->ptr_sz = bpf_core_field_size(struct core_reloc_size, ptr_field);
+	out->ptr_off = bpf_core_field_offset(struct core_reloc_size, ptr_field);
+
 	out->enum_sz = bpf_core_field_size(struct core_reloc_size, enum_field);
+	out->enum_off = bpf_core_field_offset(struct core_reloc_size, enum_field);
+
 	out->float_sz = bpf_core_field_size(struct core_reloc_size, float_field);
+	out->float_off = bpf_core_field_offset(struct core_reloc_size, float_field);
 
 	return 0;
 }
-- 
cgit 


From f760d0537925e2973ed3adc2e590aa2968d0e8dc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sun, 8 May 2022 17:41:46 -0700
Subject: libbpf: Provide barrier() and barrier_var() in bpf_helpers.h

Add barrier() and barrier_var() macros into bpf_helpers.h to be used by
end users. While a bit advanced and specialized instruments, they are
sometimes indispensable. Instead of requiring each user to figure out
exact asm volatile incantations for themselves, provide them from
bpf_helpers.h.

Also remove conflicting definitions from selftests. Some tests rely on
barrier_var() definition being nothing, those will still work as libbpf
does the #ifndef/#endif guarding for barrier() and barrier_var(),
allowing users to redefine them, if necessary.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220509004148.1801791-8-andrii@kernel.org
---
 tools/lib/bpf/bpf_helpers.h                        | 24 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/exhandler_kern.c |  2 --
 tools/testing/selftests/bpf/progs/loop5.c          |  1 -
 tools/testing/selftests/bpf/progs/profiler1.c      |  1 -
 tools/testing/selftests/bpf/progs/pyperf.h         |  2 --
 .../testing/selftests/bpf/progs/test_pkt_access.c  |  2 --
 6 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index bbae9a057bc8..fb04eaf367f1 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -75,6 +75,30 @@
 	})
 #endif
 
+/*
+ * Compiler (optimization) barrier.
+ */
+#ifndef barrier
+#define barrier() asm volatile("" ::: "memory")
+#endif
+
+/* Variable-specific compiler (optimization) barrier. It's a no-op which makes
+ * compiler believe that there is some black box modification of a given
+ * variable and thus prevents compiler from making extra assumption about its
+ * value and potential simplifications and optimizations on this variable.
+ *
+ * E.g., compiler might often delay or even omit 32-bit to 64-bit casting of
+ * a variable, making some code patterns unverifiable. Putting barrier_var()
+ * in place will ensure that cast is performed before the barrier_var()
+ * invocation, because compiler has to pessimistically assume that embedded
+ * asm section might perform some extra operations on that variable.
+ *
+ * This is a variable-specific variant of more global barrier().
+ */
+#ifndef barrier_var
+#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
+#endif
+
 /*
  * Helper macro to throw a compilation error if __bpf_unreachable() gets
  * built into the resulting code. This works given BPF back end does not
diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c
index dd9b30a0f0fc..20d009e2d266 100644
--- a/tools/testing/selftests/bpf/progs/exhandler_kern.c
+++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c
@@ -7,8 +7,6 @@
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_core_read.h>
 
-#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
-
 char _license[] SEC("license") = "GPL";
 
 unsigned int exception_triggered;
diff --git a/tools/testing/selftests/bpf/progs/loop5.c b/tools/testing/selftests/bpf/progs/loop5.c
index 913791923fa3..1b13f37f85ec 100644
--- a/tools/testing/selftests/bpf/progs/loop5.c
+++ b/tools/testing/selftests/bpf/progs/loop5.c
@@ -2,7 +2,6 @@
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
-#define barrier() __asm__ __volatile__("": : :"memory")
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c
index 4df9088bfc00..fb6b13522949 100644
--- a/tools/testing/selftests/bpf/progs/profiler1.c
+++ b/tools/testing/selftests/bpf/progs/profiler1.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
 #define UNROLL
 #define INLINE __always_inline
 #include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 5d3dc4d66d47..6c7b1fb268d6 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -171,8 +171,6 @@ struct process_frame_ctx {
 	bool done;
 };
 
-#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
-
 static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
 {
 	int zero = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index 0558544e1ff0..5cd7c096f62d 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -14,8 +14,6 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-#define barrier() __asm__ __volatile__("": : :"memory")
-
 /* llvm will optimize both subprograms into exactly the same BPF assembly
  *
  * Disassembly of section .text:
-- 
cgit 


From 7b3a06382442c4d83c9d35253638cb3f561da9b9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sun, 8 May 2022 17:41:48 -0700
Subject: selftests/bpf: Test libbpf's ringbuf size fix up logic

Make sure we always excercise libbpf's ringbuf map size adjustment logic
by specifying non-zero size that's definitely not a page size multiple.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220509004148.1801791-10-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c | 12 ------------
 tools/testing/selftests/bpf/progs/test_ringbuf_multi.c |  2 ++
 2 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index e945195b24c9..eb5f7f5aa81a 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -50,18 +50,6 @@ void test_ringbuf_multi(void)
 	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
 		return;
 
-	err = bpf_map__set_max_entries(skel->maps.ringbuf1, page_size);
-	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
-		goto cleanup;
-
-	err = bpf_map__set_max_entries(skel->maps.ringbuf2, page_size);
-	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
-		goto cleanup;
-
-	err = bpf_map__set_max_entries(bpf_map__inner_map(skel->maps.ringbuf_arr), page_size);
-	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
-		goto cleanup;
-
 	proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL);
 	if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n"))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
index 197b86546dca..e416e0ce12b7 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -15,6 +15,8 @@ struct sample {
 
 struct ringbuf_map {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	/* libbpf will adjust to valid page size */
+	__uint(max_entries, 1000);
 } ringbuf1 SEC(".maps"),
   ringbuf2 SEC(".maps");
 
-- 
cgit 


From 41c240099fe09377b6b9f8272e45d2267c843d3e Mon Sep 17 00:00:00 2001
From: Joel Savitz <jsavitz@redhat.com>
Date: Mon, 9 May 2022 17:34:29 -0700
Subject: selftests: vm: Makefile: rename TARGETS to VMTARGETS

The tools/testing/selftests/vm/Makefile uses the variable TARGETS
internally to generate a list of platform-specific binary build targets
suffixed with _{32,64}.  When building the selftests using its own
Makefile directly, such as via the following command run in a kernel tree:

One receives an error such as the following:

make: Entering directory '/root/linux/tools/testing/selftests'
make --no-builtin-rules ARCH=x86 -C ../../.. headers_install
make[1]: Entering directory '/root/linux'
  INSTALL ./usr/include
make[1]: Leaving directory '/root/linux'
make[1]: Entering directory '/root/linux/tools/testing/selftests/vm'
make[1]: *** No rule to make target 'vm.c', needed by '/root/linux/tools/testing/selftests/vm/vm_64'.  Stop.
make[1]: Leaving directory '/root/linux/tools/testing/selftests/vm'
make: *** [Makefile:175: all] Error 2
make: Leaving directory '/root/linux/tools/testing/selftests'

The TARGETS variable passed to tools/testing/selftests/Makefile collides
with the TARGETS used in tools/testing/selftests/vm/Makefile, so rename
the latter to VMTARGETS, eliminating the collision with no functional
change.

Link: https://lkml.kernel.org/r/20220504213454.1282532-1-jsavitz@redhat.com
Fixes: f21fda8f6453 ("selftests: vm: pkeys: fix multilib builds for x86")
Signed-off-by: Joel Savitz <jsavitz@redhat.com>
Acked-by: Nico Pache <npache@redhat.com>
Cc: Joel Savitz <jsavitz@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Sandipan Das <sandipan@linux.ibm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 04a49e876a46..5b1ecd00695b 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -57,9 +57,9 @@ CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_prog
 CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
 CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
 
-TARGETS := protection_keys
-BINARIES_32 := $(TARGETS:%=%_32)
-BINARIES_64 := $(TARGETS:%=%_64)
+VMTARGETS := protection_keys
+BINARIES_32 := $(VMTARGETS:%=%_32)
+BINARIES_64 := $(VMTARGETS:%=%_64)
 
 ifeq ($(CAN_BUILD_WITH_NOPIE),1)
 CFLAGS += -no-pie
@@ -112,7 +112,7 @@ $(BINARIES_32): CFLAGS += -m32 -mxsave
 $(BINARIES_32): LDLIBS += -lrt -ldl -lm
 $(BINARIES_32): $(OUTPUT)/%_32: %.c
 	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t))))
 endif
 
 ifeq ($(CAN_BUILD_X86_64),1)
@@ -120,7 +120,7 @@ $(BINARIES_64): CFLAGS += -m64 -mxsave
 $(BINARIES_64): LDLIBS += -lrt -ldl
 $(BINARIES_64): $(OUTPUT)/%_64: %.c
 	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t))))
 endif
 
 # x86_64 users should be encouraged to install 32-bit libraries
-- 
cgit 


From 17de1e559cf1eb01d5d90afd3064d5a280060f6f Mon Sep 17 00:00:00 2001
From: Joel Savitz <jsavitz@redhat.com>
Date: Mon, 9 May 2022 18:20:47 -0700
Subject: selftests: clarify common error when running gup_test

The gup_test binary will fail showing only the output of perror("open") in
the case that /sys/kernel/debug/gup_test is not found. This will almost
always be due to CONFIG_GUP_TEST not being set, which enables
compilation of a kernel that provides this file.

Add a short error message to clarify this failure and point the user to
the solution.

Link: https://lkml.kernel.org/r/20220502224942.995427-1-jsavitz@redhat.com
Signed-off-by: Joel Savitz <jsavitz@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Nico Pache <npache@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/gup_test.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 593262555e18..6bb36ca71cb5 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -21,6 +21,8 @@
 #define FOLL_WRITE	0x01	/* check pte is writable */
 #define FOLL_TOUCH	0x02	/* mark page accessed */
 
+#define GUP_TEST_FILE "/sys/kernel/debug/gup_test"
+
 static unsigned long cmd = GUP_FAST_BENCHMARK;
 static int gup_fd, repeats = 1;
 static unsigned long size = 128 * MB;
-- 
cgit 


From a82ebb093fc7bdd88f8df17b2fa303d7535fa43b Mon Sep 17 00:00:00 2001
From: Takshak Chahande <ctakshak@fb.com>
Date: Tue, 10 May 2022 01:22:21 -0700
Subject: selftests/bpf: Handle batch operations for map-in-map bpf-maps

This patch adds up test cases that handles 4 combinations:
 a) outer map: BPF_MAP_TYPE_ARRAY_OF_MAPS
    inner maps: BPF_MAP_TYPE_ARRAY and BPF_MAP_TYPE_HASH
 b) outer map: BPF_MAP_TYPE_HASH_OF_MAPS
    inner maps: BPF_MAP_TYPE_ARRAY and BPF_MAP_TYPE_HASH

Signed-off-by: Takshak Chahande <ctakshak@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220510082221.2390540-2-ctakshak@fb.com
---
 .../selftests/bpf/map_tests/map_in_map_batch_ops.c | 252 +++++++++++++++++++++
 1 file changed, 252 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
new file mode 100644
index 000000000000..f472d28ad11a
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+#define OUTER_MAP_ENTRIES 10
+
+static __u32 get_map_id_from_fd(int map_fd)
+{
+	struct bpf_map_info map_info = {};
+	uint32_t info_len = sizeof(map_info);
+	int ret;
+
+	ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+	CHECK(ret < 0, "Finding map info failed", "error:%s\n",
+	      strerror(errno));
+
+	return map_info.id;
+}
+
+/* This creates number of OUTER_MAP_ENTRIES maps that will be stored
+ * in outer map and return the created map_fds
+ */
+static void create_inner_maps(enum bpf_map_type map_type,
+			      __u32 *inner_map_fds)
+{
+	int map_fd, map_index, ret;
+	__u32 map_key = 0, map_id;
+	char map_name[15];
+
+	for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) {
+		memset(map_name, 0, sizeof(map_name));
+		sprintf(map_name, "inner_map_fd_%d", map_index);
+		map_fd = bpf_map_create(map_type, map_name, sizeof(__u32),
+					sizeof(__u32), 1, NULL);
+		CHECK(map_fd < 0,
+		      "inner bpf_map_create() failed",
+		      "map_type=(%d) map_name(%s), error:%s\n",
+		      map_type, map_name, strerror(errno));
+
+		/* keep track of the inner map fd as it is required
+		 * to add records in outer map
+		 */
+		inner_map_fds[map_index] = map_fd;
+
+		/* Add entry into this created map
+		 * eg: map1 key = 0, value = map1's map id
+		 *     map2 key = 0, value = map2's map id
+		 */
+		map_id = get_map_id_from_fd(map_fd);
+		ret = bpf_map_update_elem(map_fd, &map_key, &map_id, 0);
+		CHECK(ret != 0,
+		      "bpf_map_update_elem failed",
+		      "map_type=(%d) map_name(%s), error:%s\n",
+		      map_type, map_name, strerror(errno));
+	}
+}
+
+static int create_outer_map(enum bpf_map_type map_type, __u32 inner_map_fd)
+{
+	int outer_map_fd;
+	LIBBPF_OPTS(bpf_map_create_opts, attr);
+
+	attr.inner_map_fd = inner_map_fd;
+	outer_map_fd = bpf_map_create(map_type, "outer_map", sizeof(__u32),
+				      sizeof(__u32), OUTER_MAP_ENTRIES,
+				      &attr);
+	CHECK(outer_map_fd < 0,
+	      "outer bpf_map_create()",
+	      "map_type=(%d), error:%s\n",
+	      map_type, strerror(errno));
+
+	return outer_map_fd;
+}
+
+static void validate_fetch_results(int outer_map_fd,
+				   __u32 *fetched_keys, __u32 *fetched_values,
+				   __u32 max_entries_fetched)
+{
+	__u32 inner_map_key, inner_map_value;
+	int inner_map_fd, entry, err;
+	__u32 outer_map_value;
+
+	for (entry = 0; entry < max_entries_fetched; ++entry) {
+		outer_map_value = fetched_values[entry];
+		inner_map_fd = bpf_map_get_fd_by_id(outer_map_value);
+		CHECK(inner_map_fd < 0,
+		      "Failed to get inner map fd",
+		      "from id(%d), error=%s\n",
+		      outer_map_value, strerror(errno));
+		err = bpf_map_get_next_key(inner_map_fd, NULL, &inner_map_key);
+		CHECK(err != 0,
+		      "Failed to get inner map key",
+		      "error=%s\n", strerror(errno));
+
+		err = bpf_map_lookup_elem(inner_map_fd, &inner_map_key,
+					  &inner_map_value);
+
+		close(inner_map_fd);
+
+		CHECK(err != 0,
+		      "Failed to get inner map value",
+		      "for key(%d), error=%s\n",
+		      inner_map_key, strerror(errno));
+
+		/* Actual value validation */
+		CHECK(outer_map_value != inner_map_value,
+		      "Failed to validate inner map value",
+		      "fetched(%d) and lookedup(%d)!\n",
+		      outer_map_value, inner_map_value);
+	}
+}
+
+static void fetch_and_validate(int outer_map_fd,
+			       struct bpf_map_batch_opts *opts,
+			       __u32 batch_size, bool delete_entries)
+{
+	__u32 *fetched_keys, *fetched_values, total_fetched = 0;
+	__u32 batch_key = 0, fetch_count, step_size;
+	int err, max_entries = OUTER_MAP_ENTRIES;
+	__u32 value_size = sizeof(__u32);
+
+	/* Total entries needs to be fetched */
+	fetched_keys = calloc(max_entries, value_size);
+	fetched_values = calloc(max_entries, value_size);
+	CHECK((!fetched_keys || !fetched_values),
+	      "Memory allocation failed for fetched_keys or fetched_values",
+	      "error=%s\n", strerror(errno));
+
+	for (step_size = batch_size;
+	     step_size <= max_entries;
+	     step_size += batch_size) {
+		fetch_count = step_size;
+		err = delete_entries
+		      ? bpf_map_lookup_and_delete_batch(outer_map_fd,
+				      total_fetched ? &batch_key : NULL,
+				      &batch_key,
+				      fetched_keys + total_fetched,
+				      fetched_values + total_fetched,
+				      &fetch_count, opts)
+		      : bpf_map_lookup_batch(outer_map_fd,
+				      total_fetched ? &batch_key : NULL,
+				      &batch_key,
+				      fetched_keys + total_fetched,
+				      fetched_values + total_fetched,
+				      &fetch_count, opts);
+
+		if (err && errno == ENOSPC) {
+			/* Fetch again with higher batch size */
+			total_fetched = 0;
+			continue;
+		}
+
+		CHECK((err < 0 && (errno != ENOENT)),
+		      "lookup with steps failed",
+		      "error: %s\n", strerror(errno));
+
+		/* Update the total fetched number */
+		total_fetched += fetch_count;
+		if (err)
+			break;
+	}
+
+	CHECK((total_fetched != max_entries),
+	      "Unable to fetch expected entries !",
+	      "total_fetched(%d) and max_entries(%d) error: (%d):%s\n",
+	      total_fetched, max_entries, errno, strerror(errno));
+
+	/* validate the fetched entries */
+	validate_fetch_results(outer_map_fd, fetched_keys,
+			       fetched_values, total_fetched);
+	printf("batch_op(%s) is successful with batch_size(%d)\n",
+	       delete_entries ? "LOOKUP_AND_DELETE" : "LOOKUP", batch_size);
+
+	free(fetched_keys);
+	free(fetched_values);
+}
+
+static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
+				  enum bpf_map_type inner_map_type)
+{
+	__u32 *outer_map_keys, *inner_map_fds;
+	__u32 max_entries = OUTER_MAP_ENTRIES;
+	LIBBPF_OPTS(bpf_map_batch_opts, opts);
+	__u32 value_size = sizeof(__u32);
+	int batch_size[2] = {5, 10};
+	__u32 map_index, op_index;
+	int outer_map_fd, ret;
+
+	outer_map_keys = calloc(max_entries, value_size);
+	inner_map_fds = calloc(max_entries, value_size);
+	CHECK((!outer_map_keys || !inner_map_fds),
+	      "Memory allocation failed for outer_map_keys or inner_map_fds",
+	      "error=%s\n", strerror(errno));
+
+	create_inner_maps(inner_map_type, inner_map_fds);
+
+	outer_map_fd = create_outer_map(outer_map_type, *inner_map_fds);
+	/* create outer map keys */
+	for (map_index = 0; map_index < max_entries; map_index++)
+		outer_map_keys[map_index] =
+			((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+			 ? 9 : 1000) - map_index;
+
+	/* batch operation - map_update */
+	ret = bpf_map_update_batch(outer_map_fd, outer_map_keys,
+				   inner_map_fds, &max_entries, &opts);
+	CHECK(ret != 0,
+	      "Failed to update the outer map batch ops",
+	      "error=%s\n", strerror(errno));
+
+	/* batch operation - map_lookup */
+	for (op_index = 0; op_index < 2; ++op_index)
+		fetch_and_validate(outer_map_fd, &opts,
+				   batch_size[op_index], false);
+
+	/* batch operation - map_lookup_delete */
+	if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+		fetch_and_validate(outer_map_fd, &opts,
+				   max_entries, true /*delete*/);
+
+	/* close all map fds */
+	for (map_index = 0; map_index < max_entries; map_index++)
+		close(inner_map_fds[map_index]);
+	close(outer_map_fd);
+
+	free(inner_map_fds);
+	free(outer_map_keys);
+}
+
+void test_map_in_map_batch_ops_array(void)
+{
+	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+	printf("%s:PASS with inner ARRAY map\n", __func__);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH);
+	printf("%s:PASS with inner HASH map\n", __func__);
+}
+
+void test_map_in_map_batch_ops_hash(void)
+{
+	_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+	printf("%s:PASS with inner ARRAY map\n", __func__);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH);
+	printf("%s:PASS with inner HASH map\n", __func__);
+}
-- 
cgit 


From 1ee7efd40abf3ab01e67ff4be15d4385d5fa52c1 Mon Sep 17 00:00:00 2001
From: Kaixi Fan <fankaixi.li@bytedance.com>
Date: Sat, 30 Apr 2022 15:48:43 +0800
Subject: selftests/bpf: Move vxlan tunnel testcases to test_progs

Move vxlan tunnel testcases from test_tunnel.sh to test_progs.
And add vxlan tunnel source testcases also. Other tunnel testcases
will be moved to test_progs step by step in the future.
Rename bpf program section name as SEC("tc") because test_progs
bpf loader could not load sections with name SEC("gre_set_tunnel").
Because of this, add bpftool to load bpf programs in test_tunnel.sh.

Signed-off-by: Kaixi Fan <fankaixi.li@bytedance.com>
Link: https://lore.kernel.org/r/20220430074844.69214-3-fankaixi.li@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/test_tunnel.c | 423 +++++++++++++++++++++
 .../testing/selftests/bpf/progs/test_tunnel_kern.c | 226 ++++++++---
 tools/testing/selftests/bpf/test_tunnel.sh         | 124 +-----
 3 files changed, 610 insertions(+), 163 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/test_tunnel.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
new file mode 100644
index 000000000000..071c9c91b50f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * End-to-end eBPF tunnel test suite
+ *   The file tests BPF network tunnel implementation.
+ *
+ * Topology:
+ * ---------
+ *     root namespace   |     at_ns0 namespace
+ *                       |
+ *       -----------     |     -----------
+ *       | tnl dev |     |     | tnl dev |  (overlay network)
+ *       -----------     |     -----------
+ *       metadata-mode   |     metadata-mode
+ *        with bpf       |       with bpf
+ *                       |
+ *       ----------      |     ----------
+ *       |  veth1  | --------- |  veth0  |  (underlay network)
+ *       ----------    peer    ----------
+ *
+ *
+ *  Device Configuration
+ *  --------------------
+ *  root namespace with metadata-mode tunnel + BPF
+ *  Device names and addresses:
+ *	veth1 IP 1: 172.16.1.200, IPv6: 00::22 (underlay)
+ *		IP 2: 172.16.1.20, IPv6: 00::bb (underlay)
+ *	tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay)
+ *
+ *  Namespace at_ns0 with native tunnel
+ *  Device names and addresses:
+ *	veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
+ *	tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay)
+ *
+ *
+ * End-to-end ping packet flow
+ *  ---------------------------
+ *  Most of the tests start by namespace creation, device configuration,
+ *  then ping the underlay and overlay network.  When doing 'ping 10.1.1.100'
+ *  from root namespace, the following operations happen:
+ *  1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
+ *  2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
+ *     with local_ip=172.16.1.200, remote_ip=172.16.1.100. BPF program choose
+ *     the primary or secondary ip of veth1 as the local ip of tunnel. The
+ *     choice is made based on the value of bpf map local_ip_map.
+ *  3) Outer tunnel header is prepended and route the packet to veth1's egress.
+ *  4) veth0's ingress queue receive the tunneled packet at namespace at_ns0.
+ *  5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet.
+ *  6) Forward the packet to the overlay tnl dev.
+ */
+
+#include <arpa/inet.h>
+#include <linux/if_tun.h>
+#include <linux/limits.h>
+#include <linux/sysctl.h>
+#include <linux/time_types.h>
+#include <linux/net_tstamp.h>
+#include <net/if.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tunnel_kern.skel.h"
+
+#define IP4_ADDR_VETH0 "172.16.1.100"
+#define IP4_ADDR1_VETH1 "172.16.1.200"
+#define IP4_ADDR2_VETH1 "172.16.1.20"
+#define IP4_ADDR_TUNL_DEV0 "10.1.1.100"
+#define IP4_ADDR_TUNL_DEV1 "10.1.1.200"
+
+#define IP6_ADDR_VETH0 "::11"
+#define IP6_ADDR1_VETH1 "::22"
+#define IP6_ADDR2_VETH1 "::bb"
+
+#define IP4_ADDR1_HEX_VETH1 0xac1001c8
+#define IP4_ADDR2_HEX_VETH1 0xac100114
+#define IP6_ADDR1_HEX_VETH1 0x22
+#define IP6_ADDR2_HEX_VETH1 0xbb
+
+#define MAC_TUNL_DEV0 "52:54:00:d9:01:00"
+#define MAC_TUNL_DEV1 "52:54:00:d9:02:00"
+
+#define VXLAN_TUNL_DEV0 "vxlan00"
+#define VXLAN_TUNL_DEV1 "vxlan11"
+#define IP6VXLAN_TUNL_DEV0 "ip6vxlan00"
+#define IP6VXLAN_TUNL_DEV1 "ip6vxlan11"
+
+#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
+
+#define SYS(fmt, ...)						\
+	({							\
+		char cmd[1024];					\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
+		if (!ASSERT_OK(system(cmd), cmd))		\
+			goto fail;				\
+	})
+
+#define SYS_NOFAIL(fmt, ...)					\
+	({							\
+		char cmd[1024];					\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
+		system(cmd);					\
+	})
+
+static int config_device(void)
+{
+	SYS("ip netns add at_ns0");
+	SYS("ip link add veth0 type veth peer name veth1");
+	SYS("ip link set veth0 netns at_ns0");
+	SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1");
+	SYS("ip addr add " IP4_ADDR2_VETH1 "/24 dev veth1");
+	SYS("ip link set dev veth1 up mtu 1500");
+	SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0");
+	SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500");
+
+	return 0;
+fail:
+	return -1;
+}
+
+static void cleanup(void)
+{
+	SYS_NOFAIL("test -f /var/run/netns/at_ns0 && ip netns delete at_ns0");
+	SYS_NOFAIL("ip link del veth1 2> /dev/null");
+	SYS_NOFAIL("ip link del %s 2> /dev/null", VXLAN_TUNL_DEV1);
+	SYS_NOFAIL("ip link del %s 2> /dev/null", IP6VXLAN_TUNL_DEV1);
+}
+
+static int add_vxlan_tunnel(void)
+{
+	/* at_ns0 namespace */
+	SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789",
+	    VXLAN_TUNL_DEV0);
+	SYS("ip netns exec at_ns0 ip link set dev %s address %s up",
+	    VXLAN_TUNL_DEV0, MAC_TUNL_DEV0);
+	SYS("ip netns exec at_ns0 ip addr add dev %s %s/24",
+	    VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
+	SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s",
+	    IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0);
+
+	/* root namespace */
+	SYS("ip link add dev %s type vxlan external gbp dstport 4789",
+	    VXLAN_TUNL_DEV1);
+	SYS("ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
+	SYS("ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
+	SYS("ip neigh add %s lladdr %s dev %s",
+	    IP4_ADDR_TUNL_DEV0, MAC_TUNL_DEV0, VXLAN_TUNL_DEV1);
+
+	return 0;
+fail:
+	return -1;
+}
+
+static void delete_vxlan_tunnel(void)
+{
+	SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s",
+		   VXLAN_TUNL_DEV0);
+	SYS_NOFAIL("ip link delete dev %s", VXLAN_TUNL_DEV1);
+}
+
+static int add_ip6vxlan_tunnel(void)
+{
+	SYS("ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0",
+	    IP6_ADDR_VETH0);
+	SYS("ip netns exec at_ns0 ip link set dev veth0 up");
+	SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1);
+	SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1);
+	SYS("ip link set dev veth1 up");
+
+	/* at_ns0 namespace */
+	SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789",
+	    IP6VXLAN_TUNL_DEV0);
+	SYS("ip netns exec at_ns0 ip addr add dev %s %s/24",
+	    IP6VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
+	SYS("ip netns exec at_ns0 ip link set dev %s address %s up",
+	    IP6VXLAN_TUNL_DEV0, MAC_TUNL_DEV0);
+
+	/* root namespace */
+	SYS("ip link add dev %s type vxlan external dstport 4789",
+	    IP6VXLAN_TUNL_DEV1);
+	SYS("ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
+	SYS("ip link set dev %s address %s up",
+	    IP6VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
+
+	return 0;
+fail:
+	return -1;
+}
+
+static void delete_ip6vxlan_tunnel(void)
+{
+	SYS_NOFAIL("ip netns exec at_ns0 ip -6 addr delete %s/96 dev veth0",
+		   IP6_ADDR_VETH0);
+	SYS_NOFAIL("ip -6 addr delete %s/96 dev veth1", IP6_ADDR1_VETH1);
+	SYS_NOFAIL("ip -6 addr delete %s/96 dev veth1", IP6_ADDR2_VETH1);
+	SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s",
+		   IP6VXLAN_TUNL_DEV0);
+	SYS_NOFAIL("ip link delete dev %s", IP6VXLAN_TUNL_DEV1);
+}
+
+static int test_ping(int family, const char *addr)
+{
+	SYS("%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
+	return 0;
+fail:
+	return -1;
+}
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
+			    .priority = 1, .prog_fd = igr_fd);
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
+			    .priority = 1, .prog_fd = egr_fd);
+	int ret;
+
+	ret = bpf_tc_hook_create(hook);
+	if (!ASSERT_OK(ret, "create tc hook"))
+		return ret;
+
+	if (igr_fd >= 0) {
+		hook->attach_point = BPF_TC_INGRESS;
+		ret = bpf_tc_attach(hook, &opts1);
+		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+			bpf_tc_hook_destroy(hook);
+			return ret;
+		}
+	}
+
+	if (egr_fd >= 0) {
+		hook->attach_point = BPF_TC_EGRESS;
+		ret = bpf_tc_attach(hook, &opts2);
+		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+			bpf_tc_hook_destroy(hook);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void test_vxlan_tunnel(void)
+{
+	struct test_tunnel_kern *skel = NULL;
+	struct nstoken *nstoken;
+	int local_ip_map_fd;
+	int set_src_prog_fd, get_src_prog_fd;
+	int set_dst_prog_fd;
+	int key = 0, ifindex = -1;
+	uint local_ip;
+	int err;
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+			    .attach_point = BPF_TC_INGRESS);
+
+	/* add vxlan tunnel */
+	err = add_vxlan_tunnel();
+	if (!ASSERT_OK(err, "add vxlan tunnel"))
+		goto done;
+
+	/* load and attach bpf prog to tunnel dev tc hook point */
+	skel = test_tunnel_kern__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+		goto done;
+	ifindex = if_nametoindex(VXLAN_TUNL_DEV1);
+	if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex"))
+		goto done;
+	tc_hook.ifindex = ifindex;
+	get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src);
+	set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src);
+	if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+		goto done;
+
+	/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
+	nstoken = open_netns("at_ns0");
+	if (!ASSERT_OK_PTR(nstoken, "setns src"))
+		goto done;
+	ifindex = if_nametoindex(VXLAN_TUNL_DEV0);
+	if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex"))
+		goto done;
+	tc_hook.ifindex = ifindex;
+	set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst);
+	if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+		goto done;
+	close_netns(nstoken);
+
+	/* use veth1 ip 2 as tunnel source ip */
+	local_ip_map_fd = bpf_map__fd(skel->maps.local_ip_map);
+	if (!ASSERT_GE(local_ip_map_fd, 0, "bpf_map__fd"))
+		goto done;
+	local_ip = IP4_ADDR2_HEX_VETH1;
+	err = bpf_map_update_elem(local_ip_map_fd, &key, &local_ip, BPF_ANY);
+	if (!ASSERT_OK(err, "update bpf local_ip_map"))
+		goto done;
+
+	/* ping test */
+	err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+	if (!ASSERT_OK(err, "test_ping"))
+		goto done;
+
+done:
+	/* delete vxlan tunnel */
+	delete_vxlan_tunnel();
+	if (local_ip_map_fd >= 0)
+		close(local_ip_map_fd);
+	if (skel)
+		test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6vxlan_tunnel(void)
+{
+	struct test_tunnel_kern *skel = NULL;
+	struct nstoken *nstoken;
+	int local_ip_map_fd;
+	int set_src_prog_fd, get_src_prog_fd;
+	int set_dst_prog_fd;
+	int key = 0, ifindex = -1;
+	uint local_ip;
+	int err;
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+			    .attach_point = BPF_TC_INGRESS);
+
+	/* add vxlan tunnel */
+	err = add_ip6vxlan_tunnel();
+	if (!ASSERT_OK(err, "add_ip6vxlan_tunnel"))
+		goto done;
+
+	/* load and attach bpf prog to tunnel dev tc hook point */
+	skel = test_tunnel_kern__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+		goto done;
+	ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1);
+	if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex"))
+		goto done;
+	tc_hook.ifindex = ifindex;
+	get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src);
+	set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src);
+	if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+		goto done;
+
+	/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
+	nstoken = open_netns("at_ns0");
+	if (!ASSERT_OK_PTR(nstoken, "setns src"))
+		goto done;
+	ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0);
+	if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex"))
+		goto done;
+	tc_hook.ifindex = ifindex;
+	set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst);
+	if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+		goto done;
+	close_netns(nstoken);
+
+	/* use veth1 ip 2 as tunnel source ip */
+	local_ip_map_fd = bpf_map__fd(skel->maps.local_ip_map);
+	if (!ASSERT_GE(local_ip_map_fd, 0, "get local_ip_map fd"))
+		goto done;
+	local_ip = IP6_ADDR2_HEX_VETH1;
+	err = bpf_map_update_elem(local_ip_map_fd, &key, &local_ip, BPF_ANY);
+	if (!ASSERT_OK(err, "update bpf local_ip_map"))
+		goto done;
+
+	/* ping test */
+	err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+	if (!ASSERT_OK(err, "test_ping"))
+		goto done;
+
+done:
+	/* delete ipv6 vxlan tunnel */
+	delete_ip6vxlan_tunnel();
+	if (local_ip_map_fd >= 0)
+		close(local_ip_map_fd);
+	if (skel)
+		test_tunnel_kern__destroy(skel);
+}
+
+#define RUN_TEST(name)							\
+	({								\
+		if (test__start_subtest(#name)) {			\
+			test_ ## name();				\
+		}							\
+	})
+
+static void *test_tunnel_run_tests(void *arg)
+{
+	cleanup();
+	config_device();
+
+	RUN_TEST(vxlan_tunnel);
+	RUN_TEST(ip6vxlan_tunnel);
+
+	cleanup();
+
+	return NULL;
+}
+
+void serial_test_tunnel(void)
+{
+	pthread_t test_thread;
+	int err;
+
+	/* Run the tests in their own thread to isolate the namespace changes
+	 * so they do not affect the environment of other tests.
+	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+	 */
+	err = pthread_create(&test_thread, NULL, &test_tunnel_run_tests, NULL);
+	if (ASSERT_OK(err, "pthread_create"))
+		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index ef0dde83b85a..b33ceff2f74d 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -40,8 +40,16 @@ struct vxlan_metadata {
 	__u32     gbp;
 };
 
-SEC("gre_set_tunnel")
-int _gre_set_tunnel(struct __sk_buff *skb)
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} local_ip_map SEC(".maps");
+
+
+SEC("tc")
+int gre_set_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
@@ -62,8 +70,8 @@ int _gre_set_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("gre_get_tunnel")
-int _gre_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int gre_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
@@ -79,8 +87,8 @@ int _gre_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ip6gretap_set_tunnel")
-int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6gretap_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key;
 	int ret;
@@ -103,8 +111,8 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ip6gretap_get_tunnel")
-int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6gretap_get_tunnel(struct __sk_buff *skb)
 {
 	char fmt[] = "key %d remote ip6 ::%x label %x\n";
 	struct bpf_tunnel_key key;
@@ -123,8 +131,8 @@ int _ip6gretap_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("erspan_set_tunnel")
-int _erspan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int erspan_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key;
 	struct erspan_metadata md;
@@ -166,8 +174,8 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("erspan_get_tunnel")
-int _erspan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int erspan_get_tunnel(struct __sk_buff *skb)
 {
 	char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
 	struct bpf_tunnel_key key;
@@ -207,8 +215,8 @@ int _erspan_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ip4ip6erspan_set_tunnel")
-int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key;
 	struct erspan_metadata md;
@@ -251,8 +259,8 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ip4ip6erspan_get_tunnel")
-int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
 {
 	char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
 	struct bpf_tunnel_key key;
@@ -293,14 +301,62 @@ int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("vxlan_set_tunnel")
-int _vxlan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int vxlan_set_tunnel_dst(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct vxlan_metadata md;
+	__u32 index = 0;
+	__u32 *local_ip = NULL;
+
+	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+	if (!local_ip) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.local_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.remote_ipv4 = *local_ip;
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
+
+	md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("tc")
+int vxlan_set_tunnel_src(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
 	struct vxlan_metadata md;
+	__u32 index = 0;
+	__u32 *local_ip = NULL;
+
+	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+	if (!local_ip) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
 
 	__builtin_memset(&key, 0x0, sizeof(key));
+	key.local_ipv4 = *local_ip;
 	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
 	key.tunnel_id = 2;
 	key.tunnel_tos = 0;
@@ -323,13 +379,22 @@ int _vxlan_set_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("vxlan_get_tunnel")
-int _vxlan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int vxlan_get_tunnel_src(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
 	struct vxlan_metadata md;
 	char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
+	char fmt2[] = "local ip 0x%x\n";
+	__u32 index = 0;
+	__u32 *local_ip = NULL;
+
+	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+	if (!local_ip) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
@@ -343,19 +408,65 @@ int _vxlan_get_tunnel(struct __sk_buff *skb)
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			key.tunnel_id, key.remote_ipv4, md.gbp);
+	if (key.local_ipv4 != *local_ip || md.gbp != 0x800FF) {
+		bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n",
+			   key.tunnel_id, key.local_ipv4,
+			   key.remote_ipv4, md.gbp);
+		bpf_printk("local_ip 0x%x\n", *local_ip);
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
 
 	return TC_ACT_OK;
 }
 
-SEC("ip6vxlan_set_tunnel")
-int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key;
 	int ret;
+	__u32 index = 0;
+	__u32 *local_ip;
+
+	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+	if (!local_ip) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
 
 	__builtin_memset(&key, 0x0, sizeof(key));
+	key.local_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.remote_ipv6[3] = bpf_htonl(*local_ip);
+	key.tunnel_id = 22;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("tc")
+int ip6vxlan_set_tunnel_src(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	int ret;
+	__u32 index = 0;
+	__u32 *local_ip;
+
+	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+	if (!local_ip) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.local_ipv6[3] = bpf_htonl(*local_ip);
 	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
 	key.tunnel_id = 22;
 	key.tunnel_tos = 0;
@@ -371,12 +482,21 @@ int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ip6vxlan_get_tunnel")
-int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6vxlan_get_tunnel_src(struct __sk_buff *skb)
 {
 	char fmt[] = "key %d remote ip6 ::%x label %x\n";
+	char fmt2[] = "local ip6 ::%x\n";
 	struct bpf_tunnel_key key;
 	int ret;
+	__u32 index = 0;
+	__u32 *local_ip;
+
+	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+	if (!local_ip) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
@@ -385,14 +505,20 @@ int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+	if (bpf_ntohl(key.local_ipv6[3]) != *local_ip) {
+		bpf_trace_printk(fmt, sizeof(fmt),
+				 key.tunnel_id,
+				 key.remote_ipv6[3], key.tunnel_label);
+		bpf_trace_printk(fmt2, sizeof(fmt2), key.local_ipv6[3]);
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
 
 	return TC_ACT_OK;
 }
 
-SEC("geneve_set_tunnel")
-int _geneve_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int geneve_set_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
@@ -429,8 +555,8 @@ int _geneve_set_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("geneve_get_tunnel")
-int _geneve_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int geneve_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
@@ -452,8 +578,8 @@ int _geneve_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ip6geneve_set_tunnel")
-int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6geneve_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key;
 	struct geneve_opt gopt;
@@ -490,8 +616,8 @@ int _ip6geneve_set_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ip6geneve_get_tunnel")
-int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6geneve_get_tunnel(struct __sk_buff *skb)
 {
 	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
 	struct bpf_tunnel_key key;
@@ -515,8 +641,8 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ipip_set_tunnel")
-int _ipip_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key = {};
 	void *data = (void *)(long)skb->data;
@@ -544,8 +670,8 @@ int _ipip_set_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ipip_get_tunnel")
-int _ipip_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
@@ -561,8 +687,8 @@ int _ipip_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ipip6_set_tunnel")
-int _ipip6_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip6_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key = {};
 	void *data = (void *)(long)skb->data;
@@ -592,8 +718,8 @@ int _ipip6_set_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ipip6_get_tunnel")
-int _ipip6_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip6_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
@@ -611,8 +737,8 @@ int _ipip6_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ip6ip6_set_tunnel")
-int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6ip6_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key = {};
 	void *data = (void *)(long)skb->data;
@@ -641,8 +767,8 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("ip6ip6_get_tunnel")
-int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6ip6_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
@@ -660,8 +786,8 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("xfrm_get_state")
-int _xfrm_get_state(struct __sk_buff *skb)
+SEC("tc")
+int xfrm_get_state(struct __sk_buff *skb)
 {
 	struct bpf_xfrm_state x;
 	char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index 2817d9948d59..e9ebc67d73f7 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -45,6 +45,7 @@
 # 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
 # 6) Forward the packet to the overlay tnl dev
 
+BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel"
 PING_ARG="-c 3 -w 10 -q"
 ret=0
 GREEN='\033[0;92m'
@@ -155,52 +156,6 @@ add_ip6erspan_tunnel()
 	ip link set dev $DEV up
 }
 
-add_vxlan_tunnel()
-{
-	# Set static ARP entry here because iptables set-mark works
-	# on L3 packet, as a result not applying to ARP packets,
-	# causing errors at get_tunnel_{key/opt}.
-
-	# at_ns0 namespace
-	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE \
-		id 2 dstport 4789 gbp remote 172.16.1.200
-	ip netns exec at_ns0 \
-		ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-	ip netns exec at_ns0 \
-		ip neigh add 10.1.1.200 lladdr 52:54:00:d9:02:00 dev $DEV_NS
-	ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
-
-	# root namespace
-	ip link add dev $DEV type $TYPE external gbp dstport 4789
-	ip link set dev $DEV address 52:54:00:d9:02:00 up
-	ip addr add dev $DEV 10.1.1.200/24
-	ip neigh add 10.1.1.100 lladdr 52:54:00:d9:01:00 dev $DEV
-}
-
-add_ip6vxlan_tunnel()
-{
-	#ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
-	ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
-	ip netns exec at_ns0 ip link set dev veth0 up
-	#ip -4 addr del 172.16.1.200 dev veth1
-	ip -6 addr add dev veth1 ::22/96
-	ip link set dev veth1 up
-
-	# at_ns0 namespace
-	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
-		local ::11 remote ::22
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-	ip netns exec at_ns0 ip link set dev $DEV_NS up
-
-	# root namespace
-	ip link add dev $DEV type $TYPE external dstport 4789
-	ip addr add dev $DEV 10.1.1.200/24
-	ip link set dev $DEV up
-}
-
 add_geneve_tunnel()
 {
 	# at_ns0 namespace
@@ -403,58 +358,6 @@ test_ip6erspan()
         echo -e ${GREEN}"PASS: $TYPE"${NC}
 }
 
-test_vxlan()
-{
-	TYPE=vxlan
-	DEV_NS=vxlan00
-	DEV=vxlan11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_vxlan_tunnel
-	attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: $TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6vxlan()
-{
-	TYPE=vxlan
-	DEV_NS=ip6vxlan00
-	DEV=ip6vxlan11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_ip6vxlan_tunnel
-	ip link set dev veth1 mtu 1500
-	attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
-	# underlay
-	ping6 $PING_ARG ::11
-	# ip4 over ip6
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
 test_geneve()
 {
 	TYPE=geneve
@@ -641,9 +544,11 @@ test_xfrm_tunnel()
 	config_device
 	> /sys/kernel/debug/tracing/trace
 	setup_xfrm_tunnel
+	mkdir -p ${BPF_PIN_TUNNEL_DIR}
+	bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}
 	tc qdisc add dev veth1 clsact
-	tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
-		sec xfrm_get_state
+	tc filter add dev veth1 proto ip ingress bpf da object-pinned \
+		${BPF_PIN_TUNNEL_DIR}/xfrm_get_state
 	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
 	sleep 1
 	grep "reqid 1" /sys/kernel/debug/tracing/trace
@@ -666,13 +571,17 @@ attach_bpf()
 	DEV=$1
 	SET=$2
 	GET=$3
+	mkdir -p ${BPF_PIN_TUNNEL_DIR}
+	bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}/
 	tc qdisc add dev $DEV clsact
-	tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
-	tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
+	tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET
+	tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET
 }
 
 cleanup()
 {
+        rm -rf ${BPF_PIN_TUNNEL_DIR}
+
 	ip netns delete at_ns0 2> /dev/null
 	ip link del veth1 2> /dev/null
 	ip link del ipip11 2> /dev/null
@@ -681,8 +590,6 @@ cleanup()
 	ip link del gretap11 2> /dev/null
 	ip link del ip6gre11 2> /dev/null
 	ip link del ip6gretap11 2> /dev/null
-	ip link del vxlan11 2> /dev/null
-	ip link del ip6vxlan11 2> /dev/null
 	ip link del geneve11 2> /dev/null
 	ip link del ip6geneve11 2> /dev/null
 	ip link del erspan11 2> /dev/null
@@ -714,7 +621,6 @@ enable_debug()
 {
 	echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
 	echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
-	echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
 	echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
 	echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
 }
@@ -750,14 +656,6 @@ bpf_tunnel_test()
 	test_ip6erspan v2
 	errors=$(( $errors + $? ))
 
-	echo "Testing VXLAN tunnel..."
-	test_vxlan
-	errors=$(( $errors + $? ))
-
-	echo "Testing IP6VXLAN tunnel..."
-	test_ip6vxlan
-	errors=$(( $errors + $? ))
-
 	echo "Testing GENEVE tunnel..."
 	test_geneve
 	errors=$(( $errors + $? ))
-- 
cgit 


From 71b2ec21c3313e4cea38d5a6b009e99df30e540a Mon Sep 17 00:00:00 2001
From: Kaixi Fan <fankaixi.li@bytedance.com>
Date: Sat, 30 Apr 2022 15:48:44 +0800
Subject: selftests/bpf: Replace bpf_trace_printk in tunnel kernel code

Replace bpf_trace_printk with bpf_printk in test_tunnel_kern.c.
function bpf_printk is more easier and useful than bpf_trace_printk.

Signed-off-by: Kaixi Fan <fankaixi.li@bytedance.com>
Link: https://lore.kernel.org/r/20220430074844.69214-4-fankaixi.li@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/progs/test_tunnel_kern.c | 169 +++++++++------------
 1 file changed, 72 insertions(+), 97 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index b33ceff2f74d..17f2f325b3f3 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -21,10 +21,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-#define ERROR(ret) do {\
-		char fmt[] = "ERROR line:%d ret:%d\n";\
-		bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
-	} while (0)
+#define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret)
 
 struct geneve_opt {
 	__be16	opt_class;
@@ -47,7 +44,6 @@ struct {
 	__type(value, __u32);
 } local_ip_map SEC(".maps");
 
-
 SEC("tc")
 int gre_set_tunnel(struct __sk_buff *skb)
 {
@@ -63,7 +59,7 @@ int gre_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -75,15 +71,14 @@ int gre_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
-	char fmt[] = "key %d remote ip 0x%x\n";
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+	bpf_printk("key %d remote ip 0x%x\n", key.tunnel_id, key.remote_ipv4);
 	return TC_ACT_OK;
 }
 
@@ -104,7 +99,7 @@ int ip6gretap_set_tunnel(struct __sk_buff *skb)
 				     BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
 				     BPF_F_SEQ_NUMBER);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -114,19 +109,18 @@ int ip6gretap_set_tunnel(struct __sk_buff *skb)
 SEC("tc")
 int ip6gretap_get_tunnel(struct __sk_buff *skb)
 {
-	char fmt[] = "key %d remote ip6 ::%x label %x\n";
 	struct bpf_tunnel_key key;
 	int ret;
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+	bpf_printk("key %d remote ip6 ::%x label %x\n",
+		   key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
 
 	return TC_ACT_OK;
 }
@@ -147,7 +141,7 @@ int erspan_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_ZERO_CSUM_TX);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -167,7 +161,7 @@ int erspan_set_tunnel(struct __sk_buff *skb)
 
 	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -177,7 +171,6 @@ int erspan_set_tunnel(struct __sk_buff *skb)
 SEC("tc")
 int erspan_get_tunnel(struct __sk_buff *skb)
 {
-	char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
 	struct bpf_tunnel_key key;
 	struct erspan_metadata md;
 	__u32 index;
@@ -185,31 +178,27 @@ int erspan_get_tunnel(struct __sk_buff *skb)
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			key.tunnel_id, key.remote_ipv4, md.version);
+	bpf_printk("key %d remote ip 0x%x erspan version %d\n",
+		   key.tunnel_id, key.remote_ipv4, md.version);
 
 #ifdef ERSPAN_V1
-	char fmt2[] = "\tindex %x\n";
-
 	index = bpf_ntohl(md.u.index);
-	bpf_trace_printk(fmt2, sizeof(fmt2), index);
+	bpf_printk("\tindex %x\n", index);
 #else
-	char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
-
-	bpf_trace_printk(fmt2, sizeof(fmt2),
-			 md.u.md2.dir,
-			 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
-			 bpf_ntohl(md.u.md2.timestamp));
+	bpf_printk("\tdirection %d hwid %x timestamp %u\n",
+		   md.u.md2.dir,
+		   (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+		   bpf_ntohl(md.u.md2.timestamp));
 #endif
 
 	return TC_ACT_OK;
@@ -231,7 +220,7 @@ int ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -252,7 +241,7 @@ int ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
 
 	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -262,7 +251,6 @@ int ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
 SEC("tc")
 int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
 {
-	char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
 	struct bpf_tunnel_key key;
 	struct erspan_metadata md;
 	__u32 index;
@@ -271,31 +259,27 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			key.tunnel_id, key.remote_ipv4, md.version);
+	bpf_printk("ip6erspan get key %d remote ip6 ::%x erspan version %d\n",
+		   key.tunnel_id, key.remote_ipv4, md.version);
 
 #ifdef ERSPAN_V1
-	char fmt2[] = "\tindex %x\n";
-
 	index = bpf_ntohl(md.u.index);
-	bpf_trace_printk(fmt2, sizeof(fmt2), index);
+	bpf_printk("\tindex %x\n", index);
 #else
-	char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
-
-	bpf_trace_printk(fmt2, sizeof(fmt2),
-			 md.u.md2.dir,
-			 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
-			 bpf_ntohl(md.u.md2.timestamp));
+	bpf_printk("\tdirection %d hwid %x timestamp %u\n",
+		   md.u.md2.dir,
+		   (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+		   bpf_ntohl(md.u.md2.timestamp));
 #endif
 
 	return TC_ACT_OK;
@@ -351,7 +335,7 @@ int vxlan_set_tunnel_src(struct __sk_buff *skb)
 
 	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
 	if (!local_ip) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -365,14 +349,14 @@ int vxlan_set_tunnel_src(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_ZERO_CSUM_TX);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
 	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -385,26 +369,24 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb)
 	int ret;
 	struct bpf_tunnel_key key;
 	struct vxlan_metadata md;
-	char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
-	char fmt2[] = "local ip 0x%x\n";
 	__u32 index = 0;
 	__u32 *local_ip = NULL;
 
 	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
 	if (!local_ip) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -461,7 +443,7 @@ int ip6vxlan_set_tunnel_src(struct __sk_buff *skb)
 
 	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
 	if (!local_ip) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -475,7 +457,7 @@ int ip6vxlan_set_tunnel_src(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -485,8 +467,6 @@ int ip6vxlan_set_tunnel_src(struct __sk_buff *skb)
 SEC("tc")
 int ip6vxlan_get_tunnel_src(struct __sk_buff *skb)
 {
-	char fmt[] = "key %d remote ip6 ::%x label %x\n";
-	char fmt2[] = "local ip6 ::%x\n";
 	struct bpf_tunnel_key key;
 	int ret;
 	__u32 index = 0;
@@ -494,23 +474,23 @@ int ip6vxlan_get_tunnel_src(struct __sk_buff *skb)
 
 	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
 	if (!local_ip) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	if (bpf_ntohl(key.local_ipv6[3]) != *local_ip) {
-		bpf_trace_printk(fmt, sizeof(fmt),
-				 key.tunnel_id,
-				 key.remote_ipv6[3], key.tunnel_label);
-		bpf_trace_printk(fmt2, sizeof(fmt2), key.local_ipv6[3]);
-		ERROR(ret);
+		bpf_printk("ip6vxlan key %d local ip6 ::%x remote ip6 ::%x label 0x%x\n",
+			   key.tunnel_id, bpf_ntohl(key.local_ipv6[3]),
+			   bpf_ntohl(key.remote_ipv6[3]), key.tunnel_label);
+		bpf_printk("local_ip 0x%x\n", *local_ip);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -542,13 +522,13 @@ int geneve_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_ZERO_CSUM_TX);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -561,11 +541,10 @@ int geneve_get_tunnel(struct __sk_buff *skb)
 	int ret;
 	struct bpf_tunnel_key key;
 	struct geneve_opt gopt;
-	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -573,8 +552,8 @@ int geneve_get_tunnel(struct __sk_buff *skb)
 	if (ret < 0)
 		gopt.opt_class = 0;
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+	bpf_printk("key %d remote ip 0x%x geneve class 0x%x\n",
+		   key.tunnel_id, key.remote_ipv4, gopt.opt_class);
 	return TC_ACT_OK;
 }
 
@@ -594,7 +573,7 @@ int ip6geneve_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -609,7 +588,7 @@ int ip6geneve_set_tunnel(struct __sk_buff *skb)
 
 	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -619,7 +598,6 @@ int ip6geneve_set_tunnel(struct __sk_buff *skb)
 SEC("tc")
 int ip6geneve_get_tunnel(struct __sk_buff *skb)
 {
-	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
 	struct bpf_tunnel_key key;
 	struct geneve_opt gopt;
 	int ret;
@@ -627,7 +605,7 @@ int ip6geneve_get_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -635,8 +613,8 @@ int ip6geneve_get_tunnel(struct __sk_buff *skb)
 	if (ret < 0)
 		gopt.opt_class = 0;
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+	bpf_printk("key %d remote ip 0x%x geneve class 0x%x\n",
+		   key.tunnel_id, key.remote_ipv4, gopt.opt_class);
 
 	return TC_ACT_OK;
 }
@@ -652,7 +630,7 @@ int ipip_set_tunnel(struct __sk_buff *skb)
 
 	/* single length check */
 	if (data + sizeof(*iph) > data_end) {
-		ERROR(1);
+		log_err(1);
 		return TC_ACT_SHOT;
 	}
 
@@ -663,7 +641,7 @@ int ipip_set_tunnel(struct __sk_buff *skb)
 
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -675,15 +653,14 @@ int ipip_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
-	char fmt[] = "remote ip 0x%x\n";
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+	bpf_printk("remote ip 0x%x\n", key.remote_ipv4);
 	return TC_ACT_OK;
 }
 
@@ -698,7 +675,7 @@ int ipip6_set_tunnel(struct __sk_buff *skb)
 
 	/* single length check */
 	if (data + sizeof(*iph) > data_end) {
-		ERROR(1);
+		log_err(1);
 		return TC_ACT_SHOT;
 	}
 
@@ -711,7 +688,7 @@ int ipip6_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -723,17 +700,16 @@ int ipip6_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
-	char fmt[] = "remote ip6 %x::%x\n";
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
-			 bpf_htonl(key.remote_ipv6[3]));
+	bpf_printk("remote ip6 %x::%x\n", bpf_htonl(key.remote_ipv6[0]),
+		   bpf_htonl(key.remote_ipv6[3]));
 	return TC_ACT_OK;
 }
 
@@ -748,7 +724,7 @@ int ip6ip6_set_tunnel(struct __sk_buff *skb)
 
 	/* single length check */
 	if (data + sizeof(*iph) > data_end) {
-		ERROR(1);
+		log_err(1);
 		return TC_ACT_SHOT;
 	}
 
@@ -760,7 +736,7 @@ int ip6ip6_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -772,17 +748,16 @@ int ip6ip6_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
-	char fmt[] = "remote ip6 %x::%x\n";
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
-			 bpf_htonl(key.remote_ipv6[3]));
+	bpf_printk("remote ip6 %x::%x\n", bpf_htonl(key.remote_ipv6[0]),
+		   bpf_htonl(key.remote_ipv6[3]));
 	return TC_ACT_OK;
 }
 
@@ -790,15 +765,15 @@ SEC("tc")
 int xfrm_get_state(struct __sk_buff *skb)
 {
 	struct bpf_xfrm_state x;
-	char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
 	int ret;
 
 	ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
 	if (ret < 0)
 		return TC_ACT_OK;
 
-	bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
-			 bpf_ntohl(x.remote_ipv4));
+	bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n",
+		   x.reqid, bpf_ntohl(x.spi),
+		   bpf_ntohl(x.remote_ipv4));
 	return TC_ACT_OK;
 }
 
-- 
cgit 


From 6b2d16b6579acf63456817fda3bc9bf4a35c6a60 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 10 May 2022 17:52:31 +0200
Subject: selftests/bpf: Fix result check for test_bpf_hash_map

The original condition looks like a typo, verify the skeleton loading
result instead.

Signed-off-by: Dmitrii Dolgov <9erthalion6@gmail.com>
Link: https://lore.kernel.org/r/20220510155233.9815-3-9erthalion6@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 2c403ddc8076..6943209b7457 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -629,8 +629,7 @@ static void test_bpf_hash_map(void)
 	skel->bss->in_test_mode = true;
 
 	err = bpf_iter_bpf_hash_map__load(skel);
-	if (CHECK(!skel, "bpf_iter_bpf_hash_map__load",
-		  "skeleton load failed\n"))
+	if (!ASSERT_OK(err, "bpf_iter_bpf_hash_map__load"))
 		goto out;
 
 	/* iterator with hashmap2 and hashmap3 should fail */
-- 
cgit 


From f78625fdc95efeaac6deea5a4ed992dff7358c1e Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 10 May 2022 17:52:32 +0200
Subject: selftests/bpf: Use ASSERT_* instead of CHECK

Replace usage of CHECK with a corresponding ASSERT_* macro for bpf_iter
tests. Only done if the final result is equivalent, no changes when
replacement means loosing some information, e.g. from formatting string.

Signed-off-by: Dmitrii Dolgov <9erthalion6@gmail.com>
Link: https://lore.kernel.org/r/20220510155233.9815-4-9erthalion6@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 242 ++++++++--------------
 1 file changed, 88 insertions(+), 154 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 6943209b7457..48289c886058 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -34,8 +34,7 @@ static void test_btf_id_or_null(void)
 	struct bpf_iter_test_kern3 *skel;
 
 	skel = bpf_iter_test_kern3__open_and_load();
-	if (CHECK(skel, "bpf_iter_test_kern3__open_and_load",
-		  "skeleton open_and_load unexpectedly succeeded\n")) {
+	if (!ASSERT_ERR_PTR(skel, "bpf_iter_test_kern3__open_and_load")) {
 		bpf_iter_test_kern3__destroy(skel);
 		return;
 	}
@@ -52,7 +51,7 @@ static void do_dummy_read(struct bpf_program *prog)
 		return;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* not check contents, but ensure read() ends without error */
@@ -87,8 +86,7 @@ static void test_ipv6_route(void)
 	struct bpf_iter_ipv6_route *skel;
 
 	skel = bpf_iter_ipv6_route__open_and_load();
-	if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_ipv6_route__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_ipv6_route);
@@ -101,8 +99,7 @@ static void test_netlink(void)
 	struct bpf_iter_netlink *skel;
 
 	skel = bpf_iter_netlink__open_and_load();
-	if (CHECK(!skel, "bpf_iter_netlink__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_netlink__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_netlink);
@@ -115,8 +112,7 @@ static void test_bpf_map(void)
 	struct bpf_iter_bpf_map *skel;
 
 	skel = bpf_iter_bpf_map__open_and_load();
-	if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_map__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_bpf_map);
@@ -129,8 +125,7 @@ static void test_task(void)
 	struct bpf_iter_task *skel;
 
 	skel = bpf_iter_task__open_and_load();
-	if (CHECK(!skel, "bpf_iter_task__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_task);
@@ -161,8 +156,7 @@ static void test_task_stack(void)
 	struct bpf_iter_task_stack *skel;
 
 	skel = bpf_iter_task_stack__open_and_load();
-	if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_stack__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_task_stack);
@@ -183,24 +177,22 @@ static void test_task_file(void)
 	void *ret;
 
 	skel = bpf_iter_task_file__open_and_load();
-	if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_file__open_and_load"))
 		return;
 
 	skel->bss->tgid = getpid();
 
-	if (CHECK(pthread_create(&thread_id, NULL, &do_nothing, NULL),
-		  "pthread_create", "pthread_create failed\n"))
+	if (!ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing, NULL),
+		  "pthread_create"))
 		goto done;
 
 	do_dummy_read(skel->progs.dump_task_file);
 
-	if (CHECK(pthread_join(thread_id, &ret) || ret != NULL,
-		  "pthread_join", "pthread_join failed\n"))
+	if (!ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL,
+		  "pthread_join"))
 		goto done;
 
-	CHECK(skel->bss->count != 0, "check_count",
-	      "invalid non pthread file visit count %d\n", skel->bss->count);
+	ASSERT_EQ(skel->bss->count, 0, "check_count");
 
 done:
 	bpf_iter_task_file__destroy(skel);
@@ -224,7 +216,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
 		return ret;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	err = read_fd_into_buffer(iter_fd, buf, TASKBUFSZ);
@@ -238,9 +230,8 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
 	if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno)))
 		goto free_link;
 
-	CHECK(strstr(taskbuf, "(struct task_struct)") == NULL,
-	      "check for btf representation of task_struct in iter data",
-	      "struct task_struct not found");
+	ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)",
+	      "check for btf representation of task_struct in iter data");
 free_link:
 	if (iter_fd > 0)
 		close(iter_fd);
@@ -255,8 +246,7 @@ static void test_task_btf(void)
 	int ret;
 
 	skel = bpf_iter_task_btf__open_and_load();
-	if (CHECK(!skel, "bpf_iter_task_btf__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_btf__open_and_load"))
 		return;
 
 	bss = skel->bss;
@@ -265,12 +255,10 @@ static void test_task_btf(void)
 	if (ret)
 		goto cleanup;
 
-	if (CHECK(bss->tasks == 0, "check if iterated over tasks",
-		  "no task iteration, did BPF program run?\n"))
+	if (!ASSERT_NEQ(bss->tasks, 0, "no task iteration, did BPF program run?"))
 		goto cleanup;
 
-	CHECK(bss->seq_err != 0, "check for unexpected err",
-	      "bpf_seq_printf_btf returned %ld", bss->seq_err);
+	ASSERT_EQ(bss->seq_err, 0, "check for unexpected err");
 
 cleanup:
 	bpf_iter_task_btf__destroy(skel);
@@ -281,8 +269,7 @@ static void test_tcp4(void)
 	struct bpf_iter_tcp4 *skel;
 
 	skel = bpf_iter_tcp4__open_and_load();
-	if (CHECK(!skel, "bpf_iter_tcp4__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp4__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_tcp4);
@@ -295,8 +282,7 @@ static void test_tcp6(void)
 	struct bpf_iter_tcp6 *skel;
 
 	skel = bpf_iter_tcp6__open_and_load();
-	if (CHECK(!skel, "bpf_iter_tcp6__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp6__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_tcp6);
@@ -309,8 +295,7 @@ static void test_udp4(void)
 	struct bpf_iter_udp4 *skel;
 
 	skel = bpf_iter_udp4__open_and_load();
-	if (CHECK(!skel, "bpf_iter_udp4__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_udp4__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_udp4);
@@ -323,8 +308,7 @@ static void test_udp6(void)
 	struct bpf_iter_udp6 *skel;
 
 	skel = bpf_iter_udp6__open_and_load();
-	if (CHECK(!skel, "bpf_iter_udp6__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_udp6__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_udp6);
@@ -349,7 +333,7 @@ static void test_unix(void)
 static int do_read_with_fd(int iter_fd, const char *expected,
 			   bool read_one_char)
 {
-	int err = -1, len, read_buf_len, start;
+	int len, read_buf_len, start;
 	char buf[16] = {};
 
 	read_buf_len = read_one_char ? 1 : 16;
@@ -363,9 +347,7 @@ static int do_read_with_fd(int iter_fd, const char *expected,
 	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
 		return -1;
 
-	err = strcmp(buf, expected);
-	if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n",
-		  buf, expected))
+	if (!ASSERT_STREQ(buf, expected, "read"))
 		return -1;
 
 	return 0;
@@ -378,19 +360,17 @@ static void test_anon_iter(bool read_one_char)
 	int iter_fd, err;
 
 	skel = bpf_iter_test_kern1__open_and_load();
-	if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern1__open_and_load"))
 		return;
 
 	err = bpf_iter_test_kern1__attach(skel);
-	if (CHECK(err, "bpf_iter_test_kern1__attach",
-		  "skeleton attach failed\n")) {
+	if (!ASSERT_OK(err, "bpf_iter_test_kern1__attach")) {
 		goto out;
 	}
 
 	link = skel->links.dump_task;
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto out;
 
 	do_read_with_fd(iter_fd, "abcd", read_one_char);
@@ -423,8 +403,7 @@ static void test_file_iter(void)
 	int err;
 
 	skel1 = bpf_iter_test_kern1__open_and_load();
-	if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel1, "bpf_iter_test_kern1__open_and_load"))
 		return;
 
 	link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
@@ -447,12 +426,11 @@ static void test_file_iter(void)
 	 * should change.
 	 */
 	skel2 = bpf_iter_test_kern2__open_and_load();
-	if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel2, "bpf_iter_test_kern2__open_and_load"))
 		goto unlink_path;
 
 	err = bpf_link__update_program(link, skel2->progs.dump_task);
-	if (CHECK(err, "update_prog", "update_prog failed\n"))
+	if (!ASSERT_OK(err, "update_prog"))
 		goto destroy_skel2;
 
 	do_read(path, "ABCD");
@@ -478,8 +456,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 	char *buf;
 
 	skel = bpf_iter_test_kern4__open();
-	if (CHECK(!skel, "bpf_iter_test_kern4__open",
-		  "skeleton open failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern4__open"))
 		return;
 
 	/* create two maps: bpf program will only do bpf_seq_write
@@ -515,8 +492,8 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 	}
 	skel->rodata->ret1 = ret1;
 
-	if (CHECK(bpf_iter_test_kern4__load(skel),
-		  "bpf_iter_test_kern4__load", "skeleton load failed\n"))
+	if (!ASSERT_OK(bpf_iter_test_kern4__load(skel),
+		  "bpf_iter_test_kern4__load"))
 		goto free_map2;
 
 	/* setup filtering map_id in bpf program */
@@ -538,7 +515,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 		goto free_map2;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	buf = malloc(expected_read_len);
@@ -574,22 +551,16 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 			goto free_buf;
 	}
 
-	if (CHECK(total_read_len != expected_read_len, "read",
-		  "total len %u, expected len %u\n", total_read_len,
-		  expected_read_len))
+	if (!ASSERT_EQ(total_read_len, expected_read_len, "read"))
 		goto free_buf;
 
-	if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed",
-		  "expected 1 actual %d\n", skel->bss->map1_accessed))
+	if (!ASSERT_EQ(skel->bss->map1_accessed, 1, "map1_accessed"))
 		goto free_buf;
 
-	if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed",
-		  "expected 2 actual %d\n", skel->bss->map2_accessed))
+	if (!ASSERT_EQ(skel->bss->map2_accessed, 2, "map2_accessed"))
 		goto free_buf;
 
-	CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2,
-	      "map2_seqnum", "two different seqnum %lld %lld\n",
-	      skel->bss->map2_seqnum1, skel->bss->map2_seqnum2);
+	ASSERT_EQ(skel->bss->map2_seqnum1, skel->bss->map2_seqnum2, "map2_seqnum");
 
 free_buf:
 	free(buf);
@@ -622,8 +593,7 @@ static void test_bpf_hash_map(void)
 	char buf[64];
 
 	skel = bpf_iter_bpf_hash_map__open();
-	if (CHECK(!skel, "bpf_iter_bpf_hash_map__open",
-		  "skeleton open failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_hash_map__open"))
 		return;
 
 	skel->bss->in_test_mode = true;
@@ -658,7 +628,7 @@ static void test_bpf_hash_map(void)
 		expected_val += val;
 
 		err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
-		if (CHECK(err, "map_update", "map_update failed\n"))
+		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
@@ -668,7 +638,7 @@ static void test_bpf_hash_map(void)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
@@ -678,17 +648,11 @@ static void test_bpf_hash_map(void)
 		goto close_iter;
 
 	/* test results */
-	if (CHECK(skel->bss->key_sum_a != expected_key_a,
-		  "key_sum_a", "got %u expected %u\n",
-		  skel->bss->key_sum_a, expected_key_a))
+	if (!ASSERT_EQ(skel->bss->key_sum_a, expected_key_a, "key_sum_a"))
 		goto close_iter;
-	if (CHECK(skel->bss->key_sum_b != expected_key_b,
-		  "key_sum_b", "got %u expected %u\n",
-		  skel->bss->key_sum_b, expected_key_b))
+	if (!ASSERT_EQ(skel->bss->key_sum_b, expected_key_b, "key_sum_b"))
 		goto close_iter;
-	if (CHECK(skel->bss->val_sum != expected_val,
-		  "val_sum", "got %llu expected %llu\n",
-		  skel->bss->val_sum, expected_val))
+	if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
 		goto close_iter;
 
 close_iter:
@@ -717,16 +681,14 @@ static void test_bpf_percpu_hash_map(void)
 	void *val;
 
 	skel = bpf_iter_bpf_percpu_hash_map__open();
-	if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open",
-		  "skeleton open failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__open"))
 		return;
 
 	skel->rodata->num_cpus = bpf_num_possible_cpus();
 	val = malloc(8 * bpf_num_possible_cpus());
 
 	err = bpf_iter_bpf_percpu_hash_map__load(skel);
-	if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load",
-		  "skeleton load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load"))
 		goto out;
 
 	/* update map values here */
@@ -744,7 +706,7 @@ static void test_bpf_percpu_hash_map(void)
 		}
 
 		err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY);
-		if (CHECK(err, "map_update", "map_update failed\n"))
+		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
@@ -757,7 +719,7 @@ static void test_bpf_percpu_hash_map(void)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
@@ -767,17 +729,11 @@ static void test_bpf_percpu_hash_map(void)
 		goto close_iter;
 
 	/* test results */
-	if (CHECK(skel->bss->key_sum_a != expected_key_a,
-		  "key_sum_a", "got %u expected %u\n",
-		  skel->bss->key_sum_a, expected_key_a))
+	if (!ASSERT_EQ(skel->bss->key_sum_a, expected_key_a, "key_sum_a"))
 		goto close_iter;
-	if (CHECK(skel->bss->key_sum_b != expected_key_b,
-		  "key_sum_b", "got %u expected %u\n",
-		  skel->bss->key_sum_b, expected_key_b))
+	if (!ASSERT_EQ(skel->bss->key_sum_b, expected_key_b, "key_sum_b"))
 		goto close_iter;
-	if (CHECK(skel->bss->val_sum != expected_val,
-		  "val_sum", "got %u expected %u\n",
-		  skel->bss->val_sum, expected_val))
+	if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
 		goto close_iter;
 
 close_iter:
@@ -802,8 +758,7 @@ static void test_bpf_array_map(void)
 	int len, start;
 
 	skel = bpf_iter_bpf_array_map__open_and_load();
-	if (CHECK(!skel, "bpf_iter_bpf_array_map__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load"))
 		return;
 
 	map_fd = bpf_map__fd(skel->maps.arraymap1);
@@ -816,7 +771,7 @@ static void test_bpf_array_map(void)
 			first_val = val;
 
 		err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY);
-		if (CHECK(err, "map_update", "map_update failed\n"))
+		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
@@ -829,7 +784,7 @@ static void test_bpf_array_map(void)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
@@ -849,21 +804,16 @@ static void test_bpf_array_map(void)
 		  res_first_key, res_first_val, first_val))
 		goto close_iter;
 
-	if (CHECK(skel->bss->key_sum != expected_key,
-		  "key_sum", "got %u expected %u\n",
-		  skel->bss->key_sum, expected_key))
+	if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
 		goto close_iter;
-	if (CHECK(skel->bss->val_sum != expected_val,
-		  "val_sum", "got %llu expected %llu\n",
-		  skel->bss->val_sum, expected_val))
+	if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
 		goto close_iter;
 
 	for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
 		err = bpf_map_lookup_elem(map_fd, &i, &val);
-		if (CHECK(err, "map_lookup", "map_lookup failed\n"))
+		if (!ASSERT_OK(err, "map_lookup"))
 			goto out;
-		if (CHECK(i != val, "invalid_val",
-			  "got value %llu expected %u\n", val, i))
+		if (!ASSERT_EQ(i, val, "invalid_val"))
 			goto out;
 	}
 
@@ -888,16 +838,14 @@ static void test_bpf_percpu_array_map(void)
 	int len;
 
 	skel = bpf_iter_bpf_percpu_array_map__open();
-	if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open",
-		  "skeleton open failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__open"))
 		return;
 
 	skel->rodata->num_cpus = bpf_num_possible_cpus();
 	val = malloc(8 * bpf_num_possible_cpus());
 
 	err = bpf_iter_bpf_percpu_array_map__load(skel);
-	if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load",
-		  "skeleton load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load"))
 		goto out;
 
 	/* update map values here */
@@ -911,7 +859,7 @@ static void test_bpf_percpu_array_map(void)
 		}
 
 		err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY);
-		if (CHECK(err, "map_update", "map_update failed\n"))
+		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
@@ -924,7 +872,7 @@ static void test_bpf_percpu_array_map(void)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
@@ -934,13 +882,9 @@ static void test_bpf_percpu_array_map(void)
 		goto close_iter;
 
 	/* test results */
-	if (CHECK(skel->bss->key_sum != expected_key,
-		  "key_sum", "got %u expected %u\n",
-		  skel->bss->key_sum, expected_key))
+	if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
 		goto close_iter;
-	if (CHECK(skel->bss->val_sum != expected_val,
-		  "val_sum", "got %u expected %u\n",
-		  skel->bss->val_sum, expected_val))
+	if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
 		goto close_iter;
 
 close_iter:
@@ -965,17 +909,16 @@ static void test_bpf_sk_storage_delete(void)
 	char buf[64];
 
 	skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
-	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load"))
 		return;
 
 	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
 
 	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
-	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+	if (!ASSERT_GE(sock_fd, 0, "socket"))
 		goto out;
 	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
-	if (CHECK(err, "map_update", "map_update failed\n"))
+	if (!ASSERT_OK(err, "map_update"))
 		goto out;
 
 	memset(&linfo, 0, sizeof(linfo));
@@ -988,7 +931,7 @@ static void test_bpf_sk_storage_delete(void)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
@@ -1026,22 +969,21 @@ static void test_bpf_sk_storage_get(void)
 	int sock_fd = -1;
 
 	skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
-	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load"))
 		return;
 
 	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
-	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+	if (!ASSERT_GE(sock_fd, 0, "socket"))
 		goto out;
 
 	err = listen(sock_fd, 1);
-	if (CHECK(err != 0, "listen", "errno: %d\n", errno))
+	if (!ASSERT_OK(err, "listen"))
 		goto close_socket;
 
 	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
 
 	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
-	if (CHECK(err, "bpf_map_update_elem", "map_update_failed\n"))
+	if (!ASSERT_OK(err, "bpf_map_update_elem"))
 		goto close_socket;
 
 	do_dummy_read(skel->progs.fill_socket_owner);
@@ -1077,15 +1019,14 @@ static void test_bpf_sk_storage_map(void)
 	char buf[64];
 
 	skel = bpf_iter_bpf_sk_storage_map__open_and_load();
-	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_map__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_map__open_and_load"))
 		return;
 
 	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
 	num_sockets = ARRAY_SIZE(sock_fd);
 	for (i = 0; i < num_sockets; i++) {
 		sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0);
-		if (CHECK(sock_fd[i] < 0, "socket", "errno: %d\n", errno))
+		if (!ASSERT_GE(sock_fd[i], 0, "socket"))
 			goto out;
 
 		val = i + 1;
@@ -1093,7 +1034,7 @@ static void test_bpf_sk_storage_map(void)
 
 		err = bpf_map_update_elem(map_fd, &sock_fd[i], &val,
 					  BPF_NOEXIST);
-		if (CHECK(err, "map_update", "map_update failed\n"))
+		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
@@ -1106,7 +1047,7 @@ static void test_bpf_sk_storage_map(void)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
@@ -1116,14 +1057,10 @@ static void test_bpf_sk_storage_map(void)
 		goto close_iter;
 
 	/* test results */
-	if (CHECK(skel->bss->ipv6_sk_count != num_sockets,
-		  "ipv6_sk_count", "got %u expected %u\n",
-		  skel->bss->ipv6_sk_count, num_sockets))
+	if (!ASSERT_EQ(skel->bss->ipv6_sk_count, num_sockets, "ipv6_sk_count"))
 		goto close_iter;
 
-	if (CHECK(skel->bss->val_sum != expected_val,
-		  "val_sum", "got %u expected %u\n",
-		  skel->bss->val_sum, expected_val))
+	if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
 		goto close_iter;
 
 close_iter:
@@ -1146,8 +1083,7 @@ static void test_rdonly_buf_out_of_bound(void)
 	struct bpf_link *link;
 
 	skel = bpf_iter_test_kern5__open_and_load();
-	if (CHECK(!skel, "bpf_iter_test_kern5__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern5__open_and_load"))
 		return;
 
 	memset(&linfo, 0, sizeof(linfo));
@@ -1166,8 +1102,7 @@ static void test_buf_neg_offset(void)
 	struct bpf_iter_test_kern6 *skel;
 
 	skel = bpf_iter_test_kern6__open_and_load();
-	if (CHECK(skel, "bpf_iter_test_kern6__open_and_load",
-		  "skeleton open_and_load unexpected success\n"))
+	if (!ASSERT_ERR_PTR(skel, "bpf_iter_test_kern6__open_and_load"))
 		bpf_iter_test_kern6__destroy(skel);
 }
 
@@ -1199,13 +1134,13 @@ static void test_task_vma(void)
 	char maps_path[64];
 
 	skel = bpf_iter_task_vma__open();
-	if (CHECK(!skel, "bpf_iter_task_vma__open", "skeleton open failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open"))
 		return;
 
 	skel->bss->pid = getpid();
 
 	err = bpf_iter_task_vma__load(skel);
-	if (CHECK(err, "bpf_iter_task_vma__load", "skeleton load failed\n"))
+	if (!ASSERT_OK(err, "bpf_iter_task_vma__load"))
 		goto out;
 
 	skel->links.proc_maps = bpf_program__attach_iter(
@@ -1217,7 +1152,7 @@ static void test_task_vma(void)
 	}
 
 	iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto out;
 
 	/* Read CMP_BUFFER_SIZE (1kB) from bpf_iter. Read in small chunks
@@ -1229,7 +1164,7 @@ static void test_task_vma(void)
 					  MIN(read_size, CMP_BUFFER_SIZE - len));
 		if (!err)
 			break;
-		if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n"))
+		if (!ASSERT_GE(err, 0, "read_iter_fd"))
 			goto out;
 		len += err;
 	}
@@ -1237,18 +1172,17 @@ static void test_task_vma(void)
 	/* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */
 	snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid);
 	proc_maps_fd = open(maps_path, O_RDONLY);
-	if (CHECK(proc_maps_fd < 0, "open_proc_maps", "open_proc_maps failed\n"))
+	if (!ASSERT_GE(proc_maps_fd, 0, "open_proc_maps"))
 		goto out;
 	err = read_fd_into_buffer(proc_maps_fd, proc_maps_output, CMP_BUFFER_SIZE);
-	if (CHECK(err < 0, "read_prog_maps_fd", "read_prog_maps_fd failed\n"))
+	if (!ASSERT_GE(err, 0, "read_prog_maps_fd"))
 		goto out;
 
 	/* strip and compare the first line of the two files */
 	str_strip_first_line(task_vma_output);
 	str_strip_first_line(proc_maps_output);
 
-	CHECK(strcmp(task_vma_output, proc_maps_output), "compare_output",
-	      "found mismatch\n");
+	ASSERT_STREQ(task_vma_output, proc_maps_output, "compare_output");
 out:
 	close(proc_maps_fd);
 	close(iter_fd);
-- 
cgit 


From 5a9b8e2c1ad44ade2f0e5419de223425bd380bda Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 10 May 2022 17:52:33 +0200
Subject: selftests/bpf: Add bpf link iter test

Add a simple test for bpf link iterator

Signed-off-by: Dmitrii Dolgov <9erthalion6@gmail.com>
Link: https://lore.kernel.org/r/20220510155233.9815-5-9erthalion6@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c   | 16 ++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_iter.h        |  7 +++++++
 .../testing/selftests/bpf/progs/bpf_iter_bpf_link.c | 21 +++++++++++++++++++++
 3 files changed, 44 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 48289c886058..7ff5fa93d056 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -26,6 +26,7 @@
 #include "bpf_iter_bpf_sk_storage_map.skel.h"
 #include "bpf_iter_test_kern5.skel.h"
 #include "bpf_iter_test_kern6.skel.h"
+#include "bpf_iter_bpf_link.skel.h"
 
 static int duration;
 
@@ -1106,6 +1107,19 @@ static void test_buf_neg_offset(void)
 		bpf_iter_test_kern6__destroy(skel);
 }
 
+static void test_link_iter(void)
+{
+	struct bpf_iter_bpf_link *skel;
+
+	skel = bpf_iter_bpf_link__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_link__open_and_load"))
+		return;
+
+	do_dummy_read(skel->progs.dump_bpf_link);
+
+	bpf_iter_bpf_link__destroy(skel);
+}
+
 #define CMP_BUFFER_SIZE 1024
 static char task_vma_output[CMP_BUFFER_SIZE];
 static char proc_maps_output[CMP_BUFFER_SIZE];
@@ -1251,4 +1265,6 @@ void test_bpf_iter(void)
 		test_rdonly_buf_out_of_bound();
 	if (test__start_subtest("buf-neg-offset"))
 		test_buf_neg_offset();
+	if (test__start_subtest("link-iter"))
+		test_link_iter();
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 8cfaeba1ddbf..97ec8bc76ae6 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -16,6 +16,7 @@
 #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
 #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
 #define bpf_iter__sockmap bpf_iter__sockmap___not_used
+#define bpf_iter__bpf_link bpf_iter__bpf_link___not_used
 #define btf_ptr btf_ptr___not_used
 #define BTF_F_COMPACT BTF_F_COMPACT___not_used
 #define BTF_F_NONAME BTF_F_NONAME___not_used
@@ -37,6 +38,7 @@
 #undef bpf_iter__bpf_map_elem
 #undef bpf_iter__bpf_sk_storage_map
 #undef bpf_iter__sockmap
+#undef bpf_iter__bpf_link
 #undef btf_ptr
 #undef BTF_F_COMPACT
 #undef BTF_F_NONAME
@@ -132,6 +134,11 @@ struct bpf_iter__sockmap {
 	struct sock *sk;
 };
 
+struct bpf_iter__bpf_link {
+	struct bpf_iter_meta *meta;
+	struct bpf_link *link;
+};
+
 struct btf_ptr {
 	void *ptr;
 	__u32 type_id;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c
new file mode 100644
index 000000000000..e1af2f8f75a6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Red Hat, Inc. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/bpf_link")
+int dump_bpf_link(struct bpf_iter__bpf_link *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct bpf_link *link = ctx->link;
+	int link_id;
+
+	if (!link)
+		return 0;
+
+	link_id = link->id;
+	bpf_seq_write(seq, &link_id, sizeof(link_id));
+	return 0;
+}
-- 
cgit 


From 5b6c7e5c44349b29c614e1b61f80c6849fc72ccf Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 10 May 2022 14:26:16 +0200
Subject: selftests/bpf: Add attach bench test

Adding test that reads all functions from ftrace available_filter_functions
file and attach them all through kprobe_multi API.

It also prints stats info with -v option, like on my setup:

  test_bench_attach: found 48712 functions
  test_bench_attach: attached in   1.069s
  test_bench_attach: detached in   0.373s

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20220510122616.2652285-6-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/kprobe_multi_test.c   | 143 +++++++++++++++++++++
 .../selftests/bpf/progs/kprobe_multi_empty.c       |  12 ++
 2 files changed, 155 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/kprobe_multi_empty.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index c56db65d4c15..816eacededd1 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -2,6 +2,9 @@
 #include <test_progs.h>
 #include "kprobe_multi.skel.h"
 #include "trace_helpers.h"
+#include "kprobe_multi_empty.skel.h"
+#include "bpf/libbpf_internal.h"
+#include "bpf/hashmap.h"
 
 static void kprobe_multi_test_run(struct kprobe_multi *skel, bool test_return)
 {
@@ -301,6 +304,144 @@ cleanup:
 	kprobe_multi__destroy(skel);
 }
 
+static inline __u64 get_time_ns(void)
+{
+	struct timespec t;
+
+	clock_gettime(CLOCK_MONOTONIC, &t);
+	return (__u64) t.tv_sec * 1000000000 + t.tv_nsec;
+}
+
+static size_t symbol_hash(const void *key, void *ctx __maybe_unused)
+{
+	return str_hash((const char *) key);
+}
+
+static bool symbol_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
+{
+	return strcmp((const char *) key1, (const char *) key2) == 0;
+}
+
+static int get_syms(char ***symsp, size_t *cntp)
+{
+	size_t cap = 0, cnt = 0, i;
+	char *name, **syms = NULL;
+	struct hashmap *map;
+	char buf[256];
+	FILE *f;
+	int err;
+
+	/*
+	 * The available_filter_functions contains many duplicates,
+	 * but other than that all symbols are usable in kprobe multi
+	 * interface.
+	 * Filtering out duplicates by using hashmap__add, which won't
+	 * add existing entry.
+	 */
+	f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
+	if (!f)
+		return -EINVAL;
+
+	map = hashmap__new(symbol_hash, symbol_equal, NULL);
+	if (IS_ERR(map))
+		goto error;
+
+	while (fgets(buf, sizeof(buf), f)) {
+		/* skip modules */
+		if (strchr(buf, '['))
+			continue;
+		if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
+			continue;
+		/*
+		 * We attach to almost all kernel functions and some of them
+		 * will cause 'suspicious RCU usage' when fprobe is attached
+		 * to them. Filter out the current culprits - arch_cpu_idle
+		 * and rcu_* functions.
+		 */
+		if (!strcmp(name, "arch_cpu_idle"))
+			continue;
+		if (!strncmp(name, "rcu_", 4))
+			continue;
+		err = hashmap__add(map, name, NULL);
+		if (err) {
+			free(name);
+			if (err == -EEXIST)
+				continue;
+			goto error;
+		}
+		err = libbpf_ensure_mem((void **) &syms, &cap,
+					sizeof(*syms), cnt + 1);
+		if (err) {
+			free(name);
+			goto error;
+		}
+		syms[cnt] = name;
+		cnt++;
+	}
+
+	*symsp = syms;
+	*cntp = cnt;
+
+error:
+	fclose(f);
+	hashmap__free(map);
+	if (err) {
+		for (i = 0; i < cnt; i++)
+			free(syms[cnt]);
+		free(syms);
+	}
+	return err;
+}
+
+static void test_bench_attach(void)
+{
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+	struct kprobe_multi_empty *skel = NULL;
+	long attach_start_ns, attach_end_ns;
+	long detach_start_ns, detach_end_ns;
+	double attach_delta, detach_delta;
+	struct bpf_link *link = NULL;
+	char **syms = NULL;
+	size_t cnt, i;
+
+	if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms"))
+		return;
+
+	skel = kprobe_multi_empty__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+		goto cleanup;
+
+	opts.syms = (const char **) syms;
+	opts.cnt = cnt;
+
+	attach_start_ns = get_time_ns();
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_empty,
+						     NULL, &opts);
+	attach_end_ns = get_time_ns();
+
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
+		goto cleanup;
+
+	detach_start_ns = get_time_ns();
+	bpf_link__destroy(link);
+	detach_end_ns = get_time_ns();
+
+	attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
+	detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
+
+	printf("%s: found %lu functions\n", __func__, cnt);
+	printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
+	printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
+
+cleanup:
+	kprobe_multi_empty__destroy(skel);
+	if (syms) {
+		for (i = 0; i < cnt; i++)
+			free(syms[i]);
+		free(syms);
+	}
+}
+
 void test_kprobe_multi_test(void)
 {
 	if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
@@ -320,4 +461,6 @@ void test_kprobe_multi_test(void)
 		test_attach_api_syms();
 	if (test__start_subtest("attach_api_fails"))
 		test_attach_api_fails();
+	if (test__start_subtest("bench_attach"))
+		test_bench_attach();
 }
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
new file mode 100644
index 000000000000..e76e499aca39
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe.multi/")
+int test_kprobe_empty(struct pt_regs *ctx)
+{
+	return 0;
+}
-- 
cgit 


From ddc0027a4c3f0cf07a5d54178f016535ef58bca5 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@fb.com>
Date: Tue, 10 May 2022 13:59:23 -0700
Subject: selftest/bpf: The test cases of BPF cookie for
 fentry/fexit/fmod_ret/lsm.

Make sure BPF cookies are correct for fentry/fexit/fmod_ret/lsm.

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220510205923.3206889-6-kuifeng@fb.com
---
 .../testing/selftests/bpf/prog_tests/bpf_cookie.c  | 89 ++++++++++++++++++++++
 .../testing/selftests/bpf/progs/test_bpf_cookie.c  | 52 +++++++++++--
 2 files changed, 133 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 923a6139b2d8..83ef55e3caa4 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -4,8 +4,11 @@
 #include <pthread.h>
 #include <sched.h>
 #include <sys/syscall.h>
+#include <sys/mman.h>
 #include <unistd.h>
 #include <test_progs.h>
+#include <network_helpers.h>
+#include <bpf/btf.h>
 #include "test_bpf_cookie.skel.h"
 #include "kprobe_multi.skel.h"
 
@@ -410,6 +413,88 @@ cleanup:
 	bpf_link__destroy(link);
 }
 
+static void tracing_subtest(struct test_bpf_cookie *skel)
+{
+	__u64 cookie;
+	int prog_fd;
+	int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1;
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
+	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+
+	skel->bss->fentry_res = 0;
+	skel->bss->fexit_res = 0;
+
+	cookie = 0x10000000000000L;
+	prog_fd = bpf_program__fd(skel->progs.fentry_test1);
+	link_opts.tracing.cookie = cookie;
+	fentry_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_FENTRY, &link_opts);
+	if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create"))
+		goto cleanup;
+
+	cookie = 0x20000000000000L;
+	prog_fd = bpf_program__fd(skel->progs.fexit_test1);
+	link_opts.tracing.cookie = cookie;
+	fexit_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_FEXIT, &link_opts);
+	if (!ASSERT_GE(fexit_fd, 0, "fexit.link_create"))
+		goto cleanup;
+
+	cookie = 0x30000000000000L;
+	prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+	link_opts.tracing.cookie = cookie;
+	fmod_ret_fd = bpf_link_create(prog_fd, 0, BPF_MODIFY_RETURN, &link_opts);
+	if (!ASSERT_GE(fmod_ret_fd, 0, "fmod_ret.link_create"))
+		goto cleanup;
+
+	prog_fd = bpf_program__fd(skel->progs.fentry_test1);
+	bpf_prog_test_run_opts(prog_fd, &opts);
+
+	prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+	bpf_prog_test_run_opts(prog_fd, &opts);
+
+	ASSERT_EQ(skel->bss->fentry_res, 0x10000000000000L, "fentry_res");
+	ASSERT_EQ(skel->bss->fexit_res, 0x20000000000000L, "fexit_res");
+	ASSERT_EQ(skel->bss->fmod_ret_res, 0x30000000000000L, "fmod_ret_res");
+
+cleanup:
+	if (fentry_fd >= 0)
+		close(fentry_fd);
+	if (fexit_fd >= 0)
+		close(fexit_fd);
+	if (fmod_ret_fd >= 0)
+		close(fmod_ret_fd);
+}
+
+int stack_mprotect(void);
+
+static void lsm_subtest(struct test_bpf_cookie *skel)
+{
+	__u64 cookie;
+	int prog_fd;
+	int lsm_fd = -1;
+	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+
+	skel->bss->lsm_res = 0;
+
+	cookie = 0x90000000000090L;
+	prog_fd = bpf_program__fd(skel->progs.test_int_hook);
+	link_opts.tracing.cookie = cookie;
+	lsm_fd = bpf_link_create(prog_fd, 0, BPF_LSM_MAC, &link_opts);
+	if (!ASSERT_GE(lsm_fd, 0, "lsm.link_create"))
+		goto cleanup;
+
+	stack_mprotect();
+	if (!ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+		goto cleanup;
+
+	usleep(1);
+
+	ASSERT_EQ(skel->bss->lsm_res, 0x90000000000090L, "fentry_res");
+
+cleanup:
+	if (lsm_fd >= 0)
+		close(lsm_fd);
+}
+
 void test_bpf_cookie(void)
 {
 	struct test_bpf_cookie *skel;
@@ -432,6 +517,10 @@ void test_bpf_cookie(void)
 		tp_subtest(skel);
 	if (test__start_subtest("perf_event"))
 		pe_subtest(skel);
+	if (test__start_subtest("trampoline"))
+		tracing_subtest(skel);
+	if (test__start_subtest("lsm"))
+		lsm_subtest(skel);
 
 	test_bpf_cookie__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
index 0e2222968918..22d0ac8709b4 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -4,18 +4,23 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include <errno.h>
 
 int my_tid;
 
-int kprobe_res;
-int kprobe_multi_res;
-int kretprobe_res;
-int uprobe_res;
-int uretprobe_res;
-int tp_res;
-int pe_res;
+__u64 kprobe_res;
+__u64 kprobe_multi_res;
+__u64 kretprobe_res;
+__u64 uprobe_res;
+__u64 uretprobe_res;
+__u64 tp_res;
+__u64 pe_res;
+__u64 fentry_res;
+__u64 fexit_res;
+__u64 fmod_ret_res;
+__u64 lsm_res;
 
-static void update(void *ctx, int *res)
+static void update(void *ctx, __u64 *res)
 {
 	if (my_tid != (u32)bpf_get_current_pid_tgid())
 		return;
@@ -82,4 +87,35 @@ int handle_pe(struct pt_regs *ctx)
 	return 0;
 }
 
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(fentry_test1, int a)
+{
+	update(ctx, &fentry_res);
+	return 0;
+}
+
+SEC("fexit/bpf_fentry_test1")
+int BPF_PROG(fexit_test1, int a, int ret)
+{
+	update(ctx, &fexit_res);
+	return 0;
+}
+
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int _a, int *_b, int _ret)
+{
+	update(ctx, &fmod_ret_res);
+	return 1234;
+}
+
+SEC("lsm/file_mprotect")
+int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
+	     unsigned long reqprot, unsigned long prot, int ret)
+{
+	if (my_tid != (u32)bpf_get_current_pid_tgid())
+		return ret;
+	update(ctx, &lsm_res);
+	return -EPERM;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit 


From 685e64a3c91df3d169be9e3e37862f118280927f Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 10 May 2022 13:55:56 +0200
Subject: selftests: xsk: cleanup bash scripts

Remove the spec-file that is not used any longer from the shell
scripts. Also remove an unused option.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20220510115604.8717-2-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_xsk.sh    |  8 +-------
 tools/testing/selftests/bpf/xsk_prereqs.sh | 11 -----------
 2 files changed, 1 insertion(+), 18 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index cd7bf32e6a17..2bd12c16fbb7 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -43,7 +43,6 @@
 #   ** veth<xxxx> in root namespace
 #   ** veth<yyyy> in af_xdp<xxxx> namespace
 #   ** namespace af_xdp<xxxx>
-#   * create a spec file veth.spec that includes this run-time configuration
 #   *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid
 #       conflict with any existing interface
 #   * tests the veth and xsk layers of the topology
@@ -77,7 +76,7 @@
 
 . xsk_prereqs.sh
 
-while getopts "cvD" flag
+while getopts "vD" flag
 do
 	case "${flag}" in
 		v) verbose=1;;
@@ -130,12 +129,7 @@ if [ $retval -ne 0 ]; then
 	exit $retval
 fi
 
-echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE}
-
-validate_veth_spec_file
-
 if [[ $verbose -eq 1 ]]; then
-        echo "Spec file created: ${SPECFILE}"
 	VERBOSE_ARG="-v"
 fi
 
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index bf29d2549bee..7606d59b06bd 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -8,7 +8,6 @@ ksft_xfail=2
 ksft_xpass=3
 ksft_skip=4
 
-SPECFILE=veth.spec
 XSKOBJ=xdpxceiver
 
 validate_root_exec()
@@ -34,13 +33,6 @@ validate_veth_support()
 	fi
 }
 
-validate_veth_spec_file()
-{
-	if [ ! -f ${SPECFILE} ]; then
-		test_exit $ksft_skip 1
-	fi
-}
-
 test_status()
 {
 	statusval=$1
@@ -74,9 +66,6 @@ clear_configs()
 	#veth node inside NS won't get removed so we explicitly remove it
 	[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
 		{ ip link del $1; }
-	if [ -f ${SPECFILE} ]; then
-		rm -f ${SPECFILE}
-	fi
 }
 
 cleanup_exit()
-- 
cgit 


From f3e619bb34d37c4c7ee80647e68b533d1f357927 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 10 May 2022 13:55:57 +0200
Subject: selftests: xsk: do not send zero-length packets

Do not try to send packets of zero length since they are dropped by
veth after commit 726e2c5929de84 ("veth: Ensure eth header is in skb's
linear part"). Replace these two packets with packets of length 60 so
that they are not dropped.

Also clean up the confusing naming. MIN_PKT_SIZE was really
MIN_ETH_PKT_SIZE and PKT_SIZE was both MIN_ETH_SIZE and the default
packet size called just PKT_SIZE. Make it consistent by using the
right define in the right place.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20220510115604.8717-3-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xdpxceiver.c | 14 +++++++-------
 tools/testing/selftests/bpf/xdpxceiver.h |  5 +++--
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index cfcb031323c5..218f20f135c9 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -575,7 +575,7 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
 
 	if (!pkt)
 		return NULL;
-	if (!pkt->valid || pkt->len < PKT_SIZE)
+	if (!pkt->valid || pkt->len < MIN_PKT_SIZE)
 		return pkt;
 
 	data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
@@ -677,8 +677,8 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
 		return false;
 	}
 
-	if (len < PKT_SIZE) {
-		/*Do not try to verify packets that are smaller than minimum size. */
+	if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) {
+		/* Do not try to verify packets that are smaller than minimum size. */
 		return true;
 	}
 
@@ -1282,10 +1282,10 @@ static void testapp_single_pkt(struct test_spec *test)
 static void testapp_invalid_desc(struct test_spec *test)
 {
 	struct pkt pkts[] = {
-		/* Zero packet length at address zero allowed */
-		{0, 0, 0, true},
-		/* Zero packet length allowed */
-		{0x1000, 0, 0, true},
+		/* Zero packet address allowed */
+		{0, PKT_SIZE, 0, true},
+		/* Allowed packet */
+		{0x1000, PKT_SIZE, 0, true},
 		/* Straddling the start of umem */
 		{-2, PKT_SIZE, 0, false},
 		/* Packet too large */
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 62a3e6388632..37ab549ce5fe 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -25,9 +25,10 @@
 #define MAX_TEARDOWN_ITER 10
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
 			sizeof(struct udphdr))
-#define MIN_PKT_SIZE 64
+#define MIN_ETH_PKT_SIZE 64
 #define ETH_FCS_SIZE 4
-#define PKT_SIZE (MIN_PKT_SIZE - ETH_FCS_SIZE)
+#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE)
+#define PKT_SIZE (MIN_PKT_SIZE)
 #define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
 #define IP_PKT_VER 0x4
 #define IP_PKT_TOS 0x9
-- 
cgit 


From f90062b53229aeb51ed71388f8864442d9e2c6ab Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 10 May 2022 13:55:58 +0200
Subject: selftests: xsk: run all tests for busy-poll

Execute all xsk selftests for busy-poll mode too. Currently they were
only run for the standard interrupt driven softirq mode. Replace the
unused option queue-id with the new option busy-poll.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20220510115604.8717-4-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_xsk.sh    | 25 +++++++++--
 tools/testing/selftests/bpf/xdpxceiver.c   | 68 +++++++++++++++++++++++++-----
 tools/testing/selftests/bpf/xdpxceiver.h   |  9 ++++
 tools/testing/selftests/bpf/xsk_prereqs.sh |  6 ++-
 4 files changed, 94 insertions(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 2bd12c16fbb7..7989a9376f0e 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -109,6 +109,14 @@ setup_vethPairs() {
 	if [[ $verbose -eq 1 ]]; then
 	        echo "setting up ${VETH1}: namespace: ${NS1}"
 	fi
+
+	if [[ $busy_poll -eq 1 ]]; then
+	        echo 2 > /sys/class/net/${VETH0}/napi_defer_hard_irqs
+		echo 200000 > /sys/class/net/${VETH0}/gro_flush_timeout
+		echo 2 > /sys/class/net/${VETH1}/napi_defer_hard_irqs
+		echo 200000 > /sys/class/net/${VETH1}/gro_flush_timeout
+	fi
+
 	ip link set ${VETH1} netns ${NS1}
 	ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
 	ip link set ${VETH0} mtu ${MTU}
@@ -130,11 +138,11 @@ if [ $retval -ne 0 ]; then
 fi
 
 if [[ $verbose -eq 1 ]]; then
-	VERBOSE_ARG="-v"
+	ARGS+="-v "
 fi
 
 if [[ $dump_pkts -eq 1 ]]; then
-	DUMP_PKTS_ARG="-D"
+	ARGS="-D "
 fi
 
 test_status $retval "${TEST_NAME}"
@@ -143,8 +151,19 @@ test_status $retval "${TEST_NAME}"
 
 statusList=()
 
-TEST_NAME="XSK KSELFTESTS"
+TEST_NAME="XSK_SELFTESTS_SOFTIRQ"
+
+execxdpxceiver
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
 
+cleanup_exit ${VETH0} ${VETH1} ${NS1}
+TEST_NAME="XSK_SELFTESTS_BUSY_POLL"
+busy_poll=1
+
+setup_vethPairs
 execxdpxceiver
 
 retval=$?
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 218f20f135c9..6efac9e35c2e 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -90,6 +90,7 @@
 #include <string.h>
 #include <stddef.h>
 #include <sys/mman.h>
+#include <sys/socket.h>
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <time.h>
@@ -122,9 +123,11 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV"
+#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
 
 #define print_ksft_result(test)						\
-	(ksft_test_result_pass("PASS: %s %s\n", mode_string(test), (test)->name))
+	(ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), \
+			       (test)->name))
 
 static void memset32_htonl(void *dest, u32 val, u32 size)
 {
@@ -264,6 +267,26 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size
 	return 0;
 }
 
+static void enable_busy_poll(struct xsk_socket_info *xsk)
+{
+	int sock_opt;
+
+	sock_opt = 1;
+	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
+		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
+		exit_with_error(errno);
+
+	sock_opt = 20;
+	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
+		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
+		exit_with_error(errno);
+
+	sock_opt = BATCH_SIZE;
+	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
+		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
+		exit_with_error(errno);
+}
+
 static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
 				struct ifobject *ifobject, bool shared)
 {
@@ -287,8 +310,8 @@ static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_inf
 
 static struct option long_options[] = {
 	{"interface", required_argument, 0, 'i'},
-	{"queue", optional_argument, 0, 'q'},
-	{"dump-pkts", optional_argument, 0, 'D'},
+	{"busy-poll", no_argument, 0, 'b'},
+	{"dump-pkts", no_argument, 0, 'D'},
 	{"verbose", no_argument, 0, 'v'},
 	{0, 0, 0, 0}
 };
@@ -299,9 +322,9 @@ static void usage(const char *prog)
 		"  Usage: %s [OPTIONS]\n"
 		"  Options:\n"
 		"  -i, --interface      Use interface\n"
-		"  -q, --queue=n        Use queue n (default 0)\n"
 		"  -D, --dump-pkts      Dump packets L2 - L5\n"
-		"  -v, --verbose        Verbose output\n";
+		"  -v, --verbose        Verbose output\n"
+		"  -b, --busy-poll      Enable busy poll\n";
 
 	ksft_print_msg(str, prog);
 }
@@ -347,7 +370,7 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
 	for (;;) {
 		char *sptr, *token;
 
-		c = getopt_long(argc, argv, "i:Dv", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index);
 		if (c == -1)
 			break;
 
@@ -373,6 +396,10 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
 		case 'v':
 			opt_verbose = true;
 			break;
+		case 'b':
+			ifobj_tx->busy_poll = true;
+			ifobj_rx->busy_poll = true;
+			break;
 		default:
 			usage(basename(argv[0]));
 			ksft_exit_xfail();
@@ -716,11 +743,24 @@ static void kick_tx(struct xsk_socket_info *xsk)
 	int ret;
 
 	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
-	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN)
+	if (ret >= 0)
+		return;
+	if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
+		usleep(100);
 		return;
+	}
 	exit_with_error(errno);
 }
 
+static void kick_rx(struct xsk_socket_info *xsk)
+{
+	int ret;
+
+	ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+	if (ret < 0)
+		exit_with_error(errno);
+}
+
 static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 {
 	unsigned int rcvd;
@@ -745,15 +785,18 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 	}
 }
 
-static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk,
-			 struct pollfd *fds)
+static void receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 {
+	struct pkt_stream *pkt_stream = ifobj->pkt_stream;
 	struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
+	struct xsk_socket_info *xsk = ifobj->xsk;
 	struct xsk_umem_info *umem = xsk->umem;
 	u32 idx_rx = 0, idx_fq = 0, rcvd, i;
 	int ret;
 
 	while (pkt) {
+		kick_rx(xsk);
+
 		rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
 		if (!rcvd) {
 			if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
@@ -890,6 +933,8 @@ static bool rx_stats_are_valid(struct ifobject *ifobject)
 	socklen_t optlen;
 	int err;
 
+	kick_rx(ifobject->xsk);
+
 	optlen = sizeof(stats);
 	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
 	if (err) {
@@ -984,6 +1029,9 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 				exit_with_error(-ret);
 			usleep(USLEEP_MAX);
 		}
+
+		if (ifobject->busy_poll)
+			enable_busy_poll(&ifobject->xsk_arr[i]);
 	}
 
 	ifobject->xsk = &ifobject->xsk_arr[0];
@@ -1083,7 +1131,7 @@ static void *worker_testapp_validate_rx(void *arg)
 		while (!rx_stats_are_valid(ifobject))
 			continue;
 	else
-		receive_pkts(ifobject->pkt_stream, ifobject->xsk, &fds);
+		receive_pkts(ifobject, &fds);
 
 	if (test->total_steps == test->current_step)
 		testapp_cleanup_xsk_res(ifobject);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 37ab549ce5fe..0eea0e1495ba 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -17,6 +17,14 @@
 #define PF_XDP AF_XDP
 #endif
 
+#ifndef SO_BUSY_POLL_BUDGET
+#define SO_BUSY_POLL_BUDGET 70
+#endif
+
+#ifndef SO_PREFER_BUSY_POLL
+#define SO_PREFER_BUSY_POLL 69
+#endif
+
 #define MAX_INTERFACES 2
 #define MAX_INTERFACE_NAME_CHARS 7
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
@@ -139,6 +147,7 @@ struct ifobject {
 	bool tx_on;
 	bool rx_on;
 	bool use_poll;
+	bool busy_poll;
 	bool pacing_on;
 	u8 dst_mac[ETH_ALEN];
 	u8 src_mac[ETH_ALEN];
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 7606d59b06bd..8b77d4c78aba 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -80,5 +80,9 @@ validate_ip_utility()
 
 execxdpxceiver()
 {
-	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
+        if [[ $busy_poll -eq 1 ]]; then
+	        ARGS+="-b "
+	fi
+
+	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${ARGS}
 }
-- 
cgit 


From 895b62eed2ab48c314653bcf3459521b42f9db4e Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 10 May 2022 13:55:59 +0200
Subject: selftests: xsk: fix reporting of failed tests

Fix the reporting of failed tests as it was broken in several
ways. First, a failed test was reported as both failed and passed
messing up the count. Second, tests were not aborted after a failure
and could generate more "failures" messing up the count even
more. Third, the failure reporting from the application to the shell
script was wrong. It always reported pass. And finally, the handling
of the failures in the launch script was not correct.

Correct all this by propagating the failure up through the function
calls to a calling function that can abort the test. A receiver or
sender thread will mark the new variable in the test spec called fail,
if a test has failed. This is then picked up by the main thread when
everyone else has exited and this is then marked and propagated up to
the calling script.

Also add a summary function in the calling script so that a user
does not have to go through the sub tests to see if something has
failed.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20220510115604.8717-5-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_xsk.sh    |  23 ++--
 tools/testing/selftests/bpf/xdpxceiver.c   | 173 ++++++++++++++++++-----------
 tools/testing/selftests/bpf/xdpxceiver.h   |   4 +
 tools/testing/selftests/bpf/xsk_prereqs.sh |  30 ++---
 4 files changed, 141 insertions(+), 89 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 7989a9376f0e..d06215ee843d 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -87,7 +87,7 @@ done
 TEST_NAME="PREREQUISITES"
 
 URANDOM=/dev/urandom
-[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit 1 1; }
+[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit $ksft_fail; }
 
 VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
 VETH0=ve${VETH0_POSTFIX}
@@ -155,10 +155,6 @@ TEST_NAME="XSK_SELFTESTS_SOFTIRQ"
 
 execxdpxceiver
 
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
 TEST_NAME="XSK_SELFTESTS_BUSY_POLL"
 busy_poll=1
@@ -166,19 +162,20 @@ busy_poll=1
 setup_vethPairs
 execxdpxceiver
 
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
 
-for _status in "${statusList[@]}"
+failures=0
+echo -e "\nSummary:"
+for i in "${!statusList[@]}"
 do
-	if [ $_status -ne 0 ]; then
-		test_exit $ksft_fail 0
+	if [ ${statusList[$i]} -ne 0 ]; then
+	        test_status ${statusList[$i]} ${nameList[$i]}
+		failures=1
 	fi
 done
 
-test_exit $ksft_pass 0
+if [ $failures -eq 0 ]; then
+        echo "All tests successful!"
+fi
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 6efac9e35c2e..ebbab8f967c1 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -125,9 +125,15 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV"
 #define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
 
-#define print_ksft_result(test)						\
-	(ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), \
-			       (test)->name))
+static void report_failure(struct test_spec *test)
+{
+	if (test->fail)
+		return;
+
+	ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
+			      test->name);
+	test->fail = true;
+}
 
 static void memset32_htonl(void *dest, u32 val, u32 size)
 {
@@ -443,6 +449,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 	test->current_step = 0;
 	test->total_steps = 1;
 	test->nb_sockets = 1;
+	test->fail = false;
 }
 
 static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
@@ -689,8 +696,7 @@ static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt
 	if (offset == expected_offset)
 		return true;
 
-	ksft_test_result_fail("ERROR: [%s] expected [%u], got [%u]\n", __func__, expected_offset,
-			      offset);
+	ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
 	return false;
 }
 
@@ -700,7 +706,7 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
 	struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
 
 	if (!pkt) {
-		ksft_test_result_fail("ERROR: [%s] too many packets received\n", __func__);
+		ksft_print_msg("[%s] too many packets received\n", __func__);
 		return false;
 	}
 
@@ -710,9 +716,8 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
 	}
 
 	if (pkt->len != len) {
-		ksft_test_result_fail
-			("ERROR: [%s] expected length [%d], got length [%d]\n",
-			 __func__, pkt->len, len);
+		ksft_print_msg("[%s] expected length [%d], got length [%d]\n",
+			       __func__, pkt->len, len);
 		return false;
 	}
 
@@ -723,9 +728,8 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
 			pkt_dump(data, PKT_SIZE);
 
 		if (pkt->payload != seqnum) {
-			ksft_test_result_fail
-				("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n",
-					__func__, pkt->payload, seqnum);
+			ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n",
+				       __func__, pkt->payload, seqnum);
 			return false;
 		}
 	} else {
@@ -761,7 +765,7 @@ static void kick_rx(struct xsk_socket_info *xsk)
 		exit_with_error(errno);
 }
 
-static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
+static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 {
 	unsigned int rcvd;
 	u32 idx;
@@ -774,18 +778,19 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 		if (rcvd > xsk->outstanding_tx) {
 			u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
 
-			ksft_test_result_fail("ERROR: [%s] Too many packets completed\n",
-					      __func__);
+			ksft_print_msg("[%s] Too many packets completed\n", __func__);
 			ksft_print_msg("Last completion address: %llx\n", addr);
-			return;
+			return TEST_FAILURE;
 		}
 
 		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
 		xsk->outstanding_tx -= rcvd;
 	}
+
+	return TEST_PASS;
 }
 
-static void receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
+static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 {
 	struct pkt_stream *pkt_stream = ifobj->pkt_stream;
 	struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
@@ -824,20 +829,19 @@ static void receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 			u64 addr = desc->addr, orig;
 
 			if (!pkt) {
-				ksft_test_result_fail("ERROR: [%s] Received too many packets.\n",
-						      __func__);
+				ksft_print_msg("[%s] Received too many packets.\n",
+					       __func__);
 				ksft_print_msg("Last packet has addr: %llx len: %u\n",
 					       addr, desc->len);
-				return;
+				return TEST_FAILURE;
 			}
 
 			orig = xsk_umem__extract_addr(addr);
 			addr = xsk_umem__add_offset_to_addr(addr);
 
-			if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len))
-				return;
-			if (!is_offset_correct(umem, pkt_stream, addr, pkt->addr))
-				return;
+			if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) ||
+			    !is_offset_correct(umem, pkt_stream, addr, pkt->addr))
+				return TEST_FAILURE;
 
 			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
 			pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
@@ -852,9 +856,11 @@ static void receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 			pthread_cond_signal(&pacing_cond);
 		pthread_mutex_unlock(&pacing_mutex);
 	}
+
+	return TEST_PASS;
 }
 
-static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
+static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb)
 {
 	struct xsk_socket_info *xsk = ifobject->xsk;
 	u32 i, idx, valid_pkts = 0;
@@ -864,14 +870,14 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
 
 	for (i = 0; i < BATCH_SIZE; i++) {
 		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
-		struct pkt *pkt = pkt_generate(ifobject, pkt_nb);
+		struct pkt *pkt = pkt_generate(ifobject, *pkt_nb);
 
 		if (!pkt)
 			break;
 
 		tx_desc->addr = pkt->addr;
 		tx_desc->len = pkt->len;
-		pkt_nb++;
+		(*pkt_nb)++;
 		if (pkt->valid)
 			valid_pkts++;
 	}
@@ -886,10 +892,11 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
 
 	xsk_ring_prod__submit(&xsk->tx, i);
 	xsk->outstanding_tx += valid_pkts;
-	complete_pkts(xsk, i);
+	if (complete_pkts(xsk, i))
+		return TEST_FAILURE;
 
 	usleep(10);
-	return i;
+	return TEST_PASS;
 }
 
 static void wait_for_tx_completion(struct xsk_socket_info *xsk)
@@ -898,7 +905,7 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk)
 		complete_pkts(xsk, BATCH_SIZE);
 }
 
-static void send_pkts(struct ifobject *ifobject)
+static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
 {
 	struct pollfd fds = { };
 	u32 pkt_cnt = 0;
@@ -907,6 +914,8 @@ static void send_pkts(struct ifobject *ifobject)
 	fds.events = POLLOUT;
 
 	while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
+		int err;
+
 		if (ifobject->use_poll) {
 			int ret;
 
@@ -918,13 +927,16 @@ static void send_pkts(struct ifobject *ifobject)
 				continue;
 		}
 
-		pkt_cnt += __send_pkts(ifobject, pkt_cnt);
+		err = __send_pkts(ifobject, &pkt_cnt);
+		if (err || test->fail)
+			return TEST_FAILURE;
 	}
 
 	wait_for_tx_completion(ifobject->xsk);
+	return TEST_PASS;
 }
 
-static bool rx_stats_are_valid(struct ifobject *ifobject)
+static int rx_stats_validate(struct ifobject *ifobject)
 {
 	u32 xsk_stat = 0, expected_stat = ifobject->pkt_stream->nb_pkts;
 	struct xsk_socket *xsk = ifobject->xsk->xsk;
@@ -938,9 +950,9 @@ static bool rx_stats_are_valid(struct ifobject *ifobject)
 	optlen = sizeof(stats);
 	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
 	if (err) {
-		ksft_test_result_fail("ERROR Rx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
-				      __func__, -err, strerror(-err));
-		return true;
+		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+			       __func__, -err, strerror(-err));
+		return TEST_FAILURE;
 	}
 
 	if (optlen == sizeof(struct xdp_statistics)) {
@@ -965,13 +977,13 @@ static bool rx_stats_are_valid(struct ifobject *ifobject)
 		}
 
 		if (xsk_stat == expected_stat)
-			return true;
+			return TEST_PASS;
 	}
 
-	return false;
+	return TEST_CONTINUE;
 }
 
-static void tx_stats_validate(struct ifobject *ifobject)
+static int tx_stats_validate(struct ifobject *ifobject)
 {
 	struct xsk_socket *xsk = ifobject->xsk->xsk;
 	int fd = xsk_socket__fd(xsk);
@@ -982,16 +994,18 @@ static void tx_stats_validate(struct ifobject *ifobject)
 	optlen = sizeof(stats);
 	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
 	if (err) {
-		ksft_test_result_fail("ERROR Tx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
-				      __func__, -err, strerror(-err));
-		return;
+		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+			       __func__, -err, strerror(-err));
+		return TEST_FAILURE;
 	}
 
-	if (stats.tx_invalid_descs == ifobject->pkt_stream->nb_pkts)
-		return;
+	if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts) {
+		ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
+			       __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
+		return TEST_FAILURE;
+	}
 
-	ksft_test_result_fail("ERROR: [%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
-			      __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
+	return TEST_PASS;
 }
 
 static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
@@ -1064,18 +1078,26 @@ static void *worker_testapp_validate_tx(void *arg)
 {
 	struct test_spec *test = (struct test_spec *)arg;
 	struct ifobject *ifobject = test->ifobj_tx;
+	int err;
 
 	if (test->current_step == 1)
 		thread_common_ops(test, ifobject);
 
 	print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
 		      ifobject->ifname);
-	send_pkts(ifobject);
+	err = send_pkts(test, ifobject);
+	if (err) {
+		report_failure(test);
+		goto out;
+	}
 
-	if (stat_test_type == STAT_TEST_TX_INVALID)
-		tx_stats_validate(ifobject);
+	if (stat_test_type == STAT_TEST_TX_INVALID) {
+		err = tx_stats_validate(ifobject);
+		report_failure(test);
+	}
 
-	if (test->total_steps == test->current_step)
+out:
+	if (test->total_steps == test->current_step || err)
 		testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
 }
@@ -1116,6 +1138,7 @@ static void *worker_testapp_validate_rx(void *arg)
 	struct test_spec *test = (struct test_spec *)arg;
 	struct ifobject *ifobject = test->ifobj_rx;
 	struct pollfd fds = { };
+	int err;
 
 	if (test->current_step == 1)
 		thread_common_ops(test, ifobject);
@@ -1127,18 +1150,27 @@ static void *worker_testapp_validate_rx(void *arg)
 
 	pthread_barrier_wait(&barr);
 
-	if (test_type == TEST_TYPE_STATS)
-		while (!rx_stats_are_valid(ifobject))
-			continue;
-	else
-		receive_pkts(ifobject, &fds);
+	if (test_type == TEST_TYPE_STATS) {
+		do {
+			err = rx_stats_validate(ifobject);
+		} while (err == TEST_CONTINUE);
+	} else {
+		err = receive_pkts(ifobject, &fds);
+	}
+
+	if (err) {
+		report_failure(test);
+		pthread_mutex_lock(&pacing_mutex);
+		pthread_cond_signal(&pacing_cond);
+		pthread_mutex_unlock(&pacing_mutex);
+	}
 
-	if (test->total_steps == test->current_step)
+	if (test->total_steps == test->current_step || err)
 		testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
 }
 
-static void testapp_validate_traffic(struct test_spec *test)
+static int testapp_validate_traffic(struct test_spec *test)
 {
 	struct ifobject *ifobj_tx = test->ifobj_tx;
 	struct ifobject *ifobj_rx = test->ifobj_rx;
@@ -1163,6 +1195,8 @@ static void testapp_validate_traffic(struct test_spec *test)
 
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
+
+	return !!test->fail;
 }
 
 static void testapp_teardown(struct test_spec *test)
@@ -1171,7 +1205,8 @@ static void testapp_teardown(struct test_spec *test)
 
 	test_spec_set_name(test, "TEARDOWN");
 	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
-		testapp_validate_traffic(test);
+		if (testapp_validate_traffic(test))
+			return;
 		test_spec_reset(test);
 	}
 }
@@ -1194,7 +1229,8 @@ static void testapp_bidi(struct test_spec *test)
 	test->ifobj_tx->rx_on = true;
 	test->ifobj_rx->tx_on = true;
 	test->total_steps = 2;
-	testapp_validate_traffic(test);
+	if (testapp_validate_traffic(test))
+		return;
 
 	print_verbose("Switching Tx/Rx vectors\n");
 	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
@@ -1222,7 +1258,8 @@ static void testapp_bpf_res(struct test_spec *test)
 	test_spec_set_name(test, "BPF_RES");
 	test->total_steps = 2;
 	test->nb_sockets = 2;
-	testapp_validate_traffic(test);
+	if (testapp_validate_traffic(test))
+		return;
 
 	swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
 	testapp_validate_traffic(test);
@@ -1278,6 +1315,9 @@ static void testapp_stats(struct test_spec *test)
 		default:
 			break;
 		}
+
+		if (test->fail)
+			break;
 	}
 
 	/* To only see the whole stat set being completed unless an individual test fails. */
@@ -1458,7 +1498,9 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 		break;
 	}
 
-	print_ksft_result(test);
+	if (!test->fail)
+		ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
+				      test->name);
 }
 
 static struct ifobject *ifobject_create(void)
@@ -1497,8 +1539,8 @@ int main(int argc, char **argv)
 {
 	struct pkt_stream *pkt_stream_default;
 	struct ifobject *ifobj_tx, *ifobj_rx;
+	u32 i, j, failed_tests = 0;
 	struct test_spec test;
-	u32 i, j;
 
 	/* Use libbpf 1.0 API mode */
 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
@@ -1537,12 +1579,17 @@ int main(int argc, char **argv)
 			test_spec_init(&test, ifobj_tx, ifobj_rx, i);
 			run_pkt_test(&test, i, j);
 			usleep(USLEEP_MAX);
+
+			if (test.fail)
+				failed_tests++;
 		}
 
 	pkt_stream_delete(pkt_stream_default);
 	ifobject_delete(ifobj_tx);
 	ifobject_delete(ifobj_rx);
 
-	ksft_exit_pass();
-	return 0;
+	if (failed_tests)
+		ksft_exit_fail();
+	else
+		ksft_exit_pass();
 }
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 0eea0e1495ba..7c6bf5ed594d 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -25,6 +25,9 @@
 #define SO_PREFER_BUSY_POLL 69
 #endif
 
+#define TEST_PASS 0
+#define TEST_FAILURE -1
+#define TEST_CONTINUE -2
 #define MAX_INTERFACES 2
 #define MAX_INTERFACE_NAME_CHARS 7
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
@@ -160,6 +163,7 @@ struct test_spec {
 	u16 total_steps;
 	u16 current_step;
 	u16 nb_sockets;
+	bool fail;
 	char name[MAX_TEST_NAME_SIZE];
 };
 
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 8b77d4c78aba..684e813803ec 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -15,7 +15,7 @@ validate_root_exec()
 	msg="skip all tests:"
 	if [ $UID != 0 ]; then
 		echo $msg must be run as root >&2
-		test_exit $ksft_fail 2
+		test_exit $ksft_fail
 	else
 		return $ksft_pass
 	fi
@@ -26,7 +26,7 @@ validate_veth_support()
 	msg="skip all tests:"
 	if [ $(ip link add $1 type veth 2>/dev/null; echo $?;) != 0 ]; then
 		echo $msg veth kernel support not available >&2
-		test_exit $ksft_skip 1
+		test_exit $ksft_skip
 	else
 		ip link del $1
 		return $ksft_pass
@@ -36,22 +36,21 @@ validate_veth_support()
 test_status()
 {
 	statusval=$1
-	if [ $statusval -eq 2 ]; then
-		echo -e "$2: [ FAIL ]"
-	elif [ $statusval -eq 1 ]; then
-		echo -e "$2: [ SKIPPED ]"
-	elif [ $statusval -eq 0 ]; then
-		echo -e "$2: [ PASS ]"
+	if [ $statusval -eq $ksft_fail ]; then
+		echo "$2: [ FAIL ]"
+	elif [ $statusval -eq $ksft_skip ]; then
+		echo "$2: [ SKIPPED ]"
+	elif [ $statusval -eq $ksft_pass ]; then
+		echo "$2: [ PASS ]"
 	fi
 }
 
 test_exit()
 {
-	retval=$1
-	if [ $2 -ne 0 ]; then
-		test_status $2 $(basename $0)
+	if [ $1 -ne 0 ]; then
+		test_status $1 $(basename $0)
 	fi
-	exit $retval
+	exit 1
 }
 
 clear_configs()
@@ -75,7 +74,7 @@ cleanup_exit()
 
 validate_ip_utility()
 {
-	[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; }
+	[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip; }
 }
 
 execxdpxceiver()
@@ -85,4 +84,9 @@ execxdpxceiver()
 	fi
 
 	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${ARGS}
+
+	retval=$?
+	test_status $retval "${TEST_NAME}"
+	statusList+=($retval)
+	nameList+=(${TEST_NAME})
 }
-- 
cgit 


From db1bd7a99454db5d9003ddcd64e771e265170f5b Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 10 May 2022 13:56:00 +0200
Subject: selftests: xsk: add timeout to tests

Add a timeout to the tests so that if all packets have not been
received within 3 seconds, fail the ongoing test. Hinders a test from
dead-locking if there is something wrong.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20220510115604.8717-6-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xdpxceiver.c | 15 +++++++++++++++
 tools/testing/selftests/bpf/xdpxceiver.h |  1 +
 2 files changed, 16 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index ebbab8f967c1..dc21951a1b0a 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -91,6 +91,7 @@
 #include <stddef.h>
 #include <sys/mman.h>
 #include <sys/socket.h>
+#include <sys/time.h>
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <time.h>
@@ -792,6 +793,7 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 
 static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 {
+	struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0};
 	struct pkt_stream *pkt_stream = ifobj->pkt_stream;
 	struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
 	struct xsk_socket_info *xsk = ifobj->xsk;
@@ -799,7 +801,20 @@ static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 	u32 idx_rx = 0, idx_fq = 0, rcvd, i;
 	int ret;
 
+	ret = gettimeofday(&tv_now, NULL);
+	if (ret)
+		exit_with_error(errno);
+	timeradd(&tv_now, &tv_timeout, &tv_end);
+
 	while (pkt) {
+		ret = gettimeofday(&tv_now, NULL);
+		if (ret)
+			exit_with_error(errno);
+		if (timercmp(&tv_now, &tv_end, >)) {
+			ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+			return TEST_FAILURE;
+		}
+
 		kick_rx(xsk);
 
 		rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 7c6bf5ed594d..79ba344d2765 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -49,6 +49,7 @@
 #define SOCK_RECONF_CTR 10
 #define BATCH_SIZE 64
 #define POLL_TMOUT 1000
+#define RECV_TMOUT 3
 #define DEFAULT_PKT_CNT (4 * 1024)
 #define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
 #define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE)
-- 
cgit 


From d41cb6c47474484c647933702e9fa8fb5054cdf2 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 10 May 2022 13:56:01 +0200
Subject: selftests: xsk: cleanup veth pair at ctrl-c

Remove the veth pair when the tests are aborted by pressing
ctrl-c. Currently in this situation, the veth pair is left on the
system polluting the netdev space.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20220510115604.8717-7-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_xsk.sh | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index d06215ee843d..567500299231 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -97,6 +97,13 @@ NS0=root
 NS1=af_xdp${VETH1_POSTFIX}
 MTU=1500
 
+trap ctrl_c INT
+
+function ctrl_c() {
+        cleanup_exit ${VETH0} ${VETH1} ${NS1}
+	exit 1
+}
+
 setup_vethPairs() {
 	if [[ $verbose -eq 1 ]]; then
 	        echo "setting up ${VETH0}: namespace: ${NS0}"
-- 
cgit 


From 76c576638f5d8dcfd86df4dcb0641133a662b4d5 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 10 May 2022 13:56:02 +0200
Subject: selftests: xsk: introduce validation functions

Introduce validation functions that can be optionally called by the Rx
and Tx threads. These are then used to replace the Rx and Tx stats
dispatchers. This so that we in the next commit can make the stats
tests proper normal tests and not be some special case, as today.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20220510115604.8717-8-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xdpxceiver.c | 117 +++++++++++++++++++++----------
 tools/testing/selftests/bpf/xdpxceiver.h |   3 +
 2 files changed, 82 insertions(+), 38 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index dc21951a1b0a..3eef29cacf94 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -426,6 +426,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 		ifobj->use_poll = false;
 		ifobj->pacing_on = true;
 		ifobj->pkt_stream = test->pkt_stream_default;
+		ifobj->validation_func = NULL;
 
 		if (i == 0) {
 			ifobj->rx_on = false;
@@ -951,54 +952,90 @@ static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
 	return TEST_PASS;
 }
 
-static int rx_stats_validate(struct ifobject *ifobject)
+static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
+{
+	int fd = xsk_socket__fd(xsk), err;
+	socklen_t optlen, expected_len;
+
+	optlen = sizeof(*stats);
+	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
+	if (err) {
+		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+			       __func__, -err, strerror(-err));
+		return TEST_FAILURE;
+	}
+
+	expected_len = sizeof(struct xdp_statistics);
+	if (optlen != expected_len) {
+		ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
+			       __func__, expected_len, optlen);
+		return TEST_FAILURE;
+	}
+
+	return TEST_PASS;
+}
+
+static int validate_rx_dropped(struct ifobject *ifobject)
 {
-	u32 xsk_stat = 0, expected_stat = ifobject->pkt_stream->nb_pkts;
 	struct xsk_socket *xsk = ifobject->xsk->xsk;
-	int fd = xsk_socket__fd(xsk);
 	struct xdp_statistics stats;
-	socklen_t optlen;
 	int err;
 
 	kick_rx(ifobject->xsk);
 
-	optlen = sizeof(stats);
-	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
-	if (err) {
-		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
-			       __func__, -err, strerror(-err));
+	err = get_xsk_stats(xsk, &stats);
+	if (err)
 		return TEST_FAILURE;
-	}
 
-	if (optlen == sizeof(struct xdp_statistics)) {
-		switch (stat_test_type) {
-		case STAT_TEST_RX_DROPPED:
-			xsk_stat = stats.rx_dropped;
-			break;
-		case STAT_TEST_TX_INVALID:
-			return true;
-		case STAT_TEST_RX_FULL:
-			xsk_stat = stats.rx_ring_full;
-			if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
-				expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE;
-			else
-				expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE;
-			break;
-		case STAT_TEST_RX_FILL_EMPTY:
-			xsk_stat = stats.rx_fill_ring_empty_descs;
-			break;
-		default:
-			break;
-		}
+	if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts)
+		return TEST_PASS;
 
-		if (xsk_stat == expected_stat)
-			return TEST_PASS;
-	}
+	return TEST_CONTINUE;
+}
+
+static int validate_rx_full(struct ifobject *ifobject)
+{
+	struct xsk_socket *xsk = ifobject->xsk->xsk;
+	struct xdp_statistics stats;
+	u32 expected_stat;
+	int err;
+
+	kick_rx(ifobject->xsk);
+
+	err = get_xsk_stats(xsk, &stats);
+	if (err)
+		return TEST_FAILURE;
+
+	if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+		expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE;
+	else
+		expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE;
+
+	if (stats.rx_ring_full == expected_stat)
+		return TEST_PASS;
+
+	return TEST_CONTINUE;
+}
+
+static int validate_fill_empty(struct ifobject *ifobject)
+{
+	struct xsk_socket *xsk = ifobject->xsk->xsk;
+	struct xdp_statistics stats;
+	int err;
+
+	kick_rx(ifobject->xsk);
+
+	err = get_xsk_stats(xsk, &stats);
+	if (err)
+		return TEST_FAILURE;
+
+	if (stats.rx_fill_ring_empty_descs == ifobject->pkt_stream->nb_pkts)
+		return TEST_PASS;
 
 	return TEST_CONTINUE;
 }
 
-static int tx_stats_validate(struct ifobject *ifobject)
+static int validate_tx_invalid_descs(struct ifobject *ifobject)
 {
 	struct xsk_socket *xsk = ifobject->xsk->xsk;
 	int fd = xsk_socket__fd(xsk);
@@ -1106,8 +1143,8 @@ static void *worker_testapp_validate_tx(void *arg)
 		goto out;
 	}
 
-	if (stat_test_type == STAT_TEST_TX_INVALID) {
-		err = tx_stats_validate(ifobject);
+	if (ifobject->validation_func) {
+		err = ifobject->validation_func(ifobject);
 		report_failure(test);
 	}
 
@@ -1165,9 +1202,9 @@ static void *worker_testapp_validate_rx(void *arg)
 
 	pthread_barrier_wait(&barr);
 
-	if (test_type == TEST_TYPE_STATS) {
+	if (ifobject->validation_func) {
 		do {
-			err = rx_stats_validate(ifobject);
+			err = ifobject->validation_func(ifobject);
 		} while (err == TEST_CONTINUE);
 	} else {
 		err = receive_pkts(ifobject, &fds);
@@ -1302,16 +1339,19 @@ static void testapp_stats(struct test_spec *test)
 			test_spec_set_name(test, "STAT_RX_DROPPED");
 			test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
 				XDP_PACKET_HEADROOM - 1;
+			test->ifobj_rx->validation_func = validate_rx_dropped;
 			testapp_validate_traffic(test);
 			break;
 		case STAT_TEST_RX_FULL:
 			test_spec_set_name(test, "STAT_RX_FULL");
 			test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE;
+			test->ifobj_rx->validation_func = validate_rx_full;
 			testapp_validate_traffic(test);
 			break;
 		case STAT_TEST_TX_INVALID:
 			test_spec_set_name(test, "STAT_TX_INVALID");
 			pkt_stream_replace(test, DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
+			test->ifobj_tx->validation_func = validate_tx_invalid_descs;
 			testapp_validate_traffic(test);
 
 			pkt_stream_restore_default(test);
@@ -1323,6 +1363,7 @@ static void testapp_stats(struct test_spec *test)
 			if (!test->ifobj_rx->pkt_stream)
 				exit_with_error(ENOMEM);
 			test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
+			test->ifobj_rx->validation_func = validate_fill_empty;
 			testapp_validate_traffic(test);
 
 			pkt_stream_restore_default(test);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 79ba344d2765..f271c7b35a2c 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -130,6 +130,8 @@ struct pkt_stream {
 	bool use_addr_for_fill;
 };
 
+struct ifobject;
+typedef int (*validation_func_t)(struct ifobject *ifobj);
 typedef void *(*thread_func_t)(void *arg);
 
 struct ifobject {
@@ -139,6 +141,7 @@ struct ifobject {
 	struct xsk_socket_info *xsk_arr;
 	struct xsk_umem_info *umem;
 	thread_func_t func_ptr;
+	validation_func_t validation_func;
 	struct pkt_stream *pkt_stream;
 	int ns_fd;
 	int xsk_map_fd;
-- 
cgit 


From 4fec7028ffeaa7d8b55d65d3d1e75202196ee441 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 10 May 2022 13:56:03 +0200
Subject: selftests: xsk: make the stats tests normal tests

Make the stats tests look and feel just like normal tests instead of
bunched under the umbrella of TEST_STATS. This means we will always
run each of them even if one fails. Also gets rid of some special case
code.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20220510115604.8717-9-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xdpxceiver.c | 106 ++++++++++++++-----------------
 tools/testing/selftests/bpf/xdpxceiver.h |  16 ++---
 2 files changed, 53 insertions(+), 69 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 3eef29cacf94..a75af0ea19a3 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -1324,60 +1324,48 @@ static void testapp_headroom(struct test_spec *test)
 	testapp_validate_traffic(test);
 }
 
-static void testapp_stats(struct test_spec *test)
+static void testapp_stats_rx_dropped(struct test_spec *test)
 {
-	int i;
+	test_spec_set_name(test, "STAT_RX_DROPPED");
+	test->ifobj_tx->pacing_on = false;
+	test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
+		XDP_PACKET_HEADROOM - 1;
+	test->ifobj_rx->validation_func = validate_rx_dropped;
+	testapp_validate_traffic(test);
+}
 
-	for (i = 0; i < STAT_TEST_TYPE_MAX; i++) {
-		test_spec_reset(test);
-		stat_test_type = i;
-		/* No or few packets will be received so cannot pace packets */
-		test->ifobj_tx->pacing_on = false;
-
-		switch (stat_test_type) {
-		case STAT_TEST_RX_DROPPED:
-			test_spec_set_name(test, "STAT_RX_DROPPED");
-			test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
-				XDP_PACKET_HEADROOM - 1;
-			test->ifobj_rx->validation_func = validate_rx_dropped;
-			testapp_validate_traffic(test);
-			break;
-		case STAT_TEST_RX_FULL:
-			test_spec_set_name(test, "STAT_RX_FULL");
-			test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE;
-			test->ifobj_rx->validation_func = validate_rx_full;
-			testapp_validate_traffic(test);
-			break;
-		case STAT_TEST_TX_INVALID:
-			test_spec_set_name(test, "STAT_TX_INVALID");
-			pkt_stream_replace(test, DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
-			test->ifobj_tx->validation_func = validate_tx_invalid_descs;
-			testapp_validate_traffic(test);
+static void testapp_stats_tx_invalid_descs(struct test_spec *test)
+{
+	test_spec_set_name(test, "STAT_TX_INVALID");
+	test->ifobj_tx->pacing_on = false;
+	pkt_stream_replace(test, DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
+	test->ifobj_tx->validation_func = validate_tx_invalid_descs;
+	testapp_validate_traffic(test);
 
-			pkt_stream_restore_default(test);
-			break;
-		case STAT_TEST_RX_FILL_EMPTY:
-			test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
-			test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 0,
-									 MIN_PKT_SIZE);
-			if (!test->ifobj_rx->pkt_stream)
-				exit_with_error(ENOMEM);
-			test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
-			test->ifobj_rx->validation_func = validate_fill_empty;
-			testapp_validate_traffic(test);
-
-			pkt_stream_restore_default(test);
-			break;
-		default:
-			break;
-		}
+	pkt_stream_restore_default(test);
+}
 
-		if (test->fail)
-			break;
-	}
+static void testapp_stats_rx_full(struct test_spec *test)
+{
+	test_spec_set_name(test, "STAT_RX_FULL");
+	test->ifobj_tx->pacing_on = false;
+	test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE;
+	test->ifobj_rx->validation_func = validate_rx_full;
+	testapp_validate_traffic(test);
+}
 
-	/* To only see the whole stat set being completed unless an individual test fails. */
-	test_spec_set_name(test, "STATS");
+static void testapp_stats_fill_empty(struct test_spec *test)
+{
+	test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
+	test->ifobj_tx->pacing_on = false;
+	test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 0, MIN_PKT_SIZE);
+	if (!test->ifobj_rx->pkt_stream)
+		exit_with_error(ENOMEM);
+	test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
+	test->ifobj_rx->validation_func = validate_fill_empty;
+	testapp_validate_traffic(test);
+
+	pkt_stream_restore_default(test);
 }
 
 /* Simple test */
@@ -1482,14 +1470,18 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
 
 static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
 {
-	test_type = type;
-
-	/* reset defaults after potential previous test */
-	stat_test_type = -1;
-
-	switch (test_type) {
-	case TEST_TYPE_STATS:
-		testapp_stats(test);
+	switch (type) {
+	case TEST_TYPE_STATS_RX_DROPPED:
+		testapp_stats_rx_dropped(test);
+		break;
+	case TEST_TYPE_STATS_TX_INVALID_DESCS:
+		testapp_stats_tx_invalid_descs(test);
+		break;
+	case TEST_TYPE_STATS_RX_FULL:
+		testapp_stats_rx_full(test);
+		break;
+	case TEST_TYPE_STATS_FILL_EMPTY:
+		testapp_stats_fill_empty(test);
 		break;
 	case TEST_TYPE_TEARDOWN:
 		testapp_teardown(test);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index f271c7b35a2c..bf18a95be48c 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -77,24 +77,16 @@ enum test_type {
 	TEST_TYPE_HEADROOM,
 	TEST_TYPE_TEARDOWN,
 	TEST_TYPE_BIDI,
-	TEST_TYPE_STATS,
+	TEST_TYPE_STATS_RX_DROPPED,
+	TEST_TYPE_STATS_TX_INVALID_DESCS,
+	TEST_TYPE_STATS_RX_FULL,
+	TEST_TYPE_STATS_FILL_EMPTY,
 	TEST_TYPE_BPF_RES,
 	TEST_TYPE_MAX
 };
 
-enum stat_test_type {
-	STAT_TEST_RX_DROPPED,
-	STAT_TEST_TX_INVALID,
-	STAT_TEST_RX_FULL,
-	STAT_TEST_RX_FILL_EMPTY,
-	STAT_TEST_TYPE_MAX
-};
-
 static bool opt_pkt_dump;
-static int test_type;
-
 static bool opt_verbose;
-static int stat_test_type;
 
 struct xsk_umem_info {
 	struct xsk_ring_prod fq;
-- 
cgit 


From 27e934bec35bc733733cd886508376cc8f9bd80f Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 10 May 2022 13:56:04 +0200
Subject: selftests: xsk: make stat tests not spin on getsockopt

Convert the stats tests from spinning on the getsockopt to just check
getsockopt once when the Rx thread has received all the packets. The
actual completion of receiving the last packet forms a natural point
in time when the receiver is ready to call the getsockopt to check the
stats. In the previous version , we just span on the getsockopt until
we received the right answer. This could be forever or just getting
the "correct" answer by shear luck.

The pacing_on variable can now be dropped since all test can now
handle pacing properly.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20220510115604.8717-10-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xdpxceiver.c | 170 +++++++++++++++++--------------
 tools/testing/selftests/bpf/xdpxceiver.h |   6 +-
 2 files changed, 99 insertions(+), 77 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index a75af0ea19a3..e5992a6b5e09 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -424,7 +424,8 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 
 		ifobj->xsk = &ifobj->xsk_arr[0];
 		ifobj->use_poll = false;
-		ifobj->pacing_on = true;
+		ifobj->use_fill_ring = true;
+		ifobj->release_rx = true;
 		ifobj->pkt_stream = test->pkt_stream_default;
 		ifobj->validation_func = NULL;
 
@@ -503,9 +504,10 @@ static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
 	return &pkt_stream->pkts[pkt_nb];
 }
 
-static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream)
+static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
 {
 	while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) {
+		(*pkts_sent)++;
 		if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid)
 			return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++];
 		pkt_stream->rx_pkt_nb++;
@@ -521,10 +523,16 @@ static void pkt_stream_delete(struct pkt_stream *pkt_stream)
 
 static void pkt_stream_restore_default(struct test_spec *test)
 {
-	if (test->ifobj_tx->pkt_stream != test->pkt_stream_default) {
+	struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream;
+
+	if (tx_pkt_stream != test->pkt_stream_default) {
 		pkt_stream_delete(test->ifobj_tx->pkt_stream);
 		test->ifobj_tx->pkt_stream = test->pkt_stream_default;
 	}
+
+	if (test->ifobj_rx->pkt_stream != test->pkt_stream_default &&
+	    test->ifobj_rx->pkt_stream != tx_pkt_stream)
+		pkt_stream_delete(test->ifobj_rx->pkt_stream);
 	test->ifobj_rx->pkt_stream = test->pkt_stream_default;
 }
 
@@ -546,6 +554,16 @@ static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
 	return pkt_stream;
 }
 
+static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len)
+{
+	pkt->addr = addr;
+	pkt->len = len;
+	if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom)
+		pkt->valid = false;
+	else
+		pkt->valid = true;
+}
+
 static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
 {
 	struct pkt_stream *pkt_stream;
@@ -557,14 +575,9 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
 
 	pkt_stream->nb_pkts = nb_pkts;
 	for (i = 0; i < nb_pkts; i++) {
-		pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size;
-		pkt_stream->pkts[i].len = pkt_len;
+		pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size,
+			pkt_len);
 		pkt_stream->pkts[i].payload = i;
-
-		if (pkt_len > umem->frame_size)
-			pkt_stream->pkts[i].valid = false;
-		else
-			pkt_stream->pkts[i].valid = true;
 	}
 
 	return pkt_stream;
@@ -592,15 +605,27 @@ static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int off
 	u32 i;
 
 	pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default);
-	for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2) {
-		pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size + offset;
-		pkt_stream->pkts[i].len = pkt_len;
-	}
+	for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2)
+		pkt_set(umem, &pkt_stream->pkts[i],
+			(i % umem->num_frames) * umem->frame_size + offset, pkt_len);
 
 	test->ifobj_tx->pkt_stream = pkt_stream;
 	test->ifobj_rx->pkt_stream = pkt_stream;
 }
 
+static void pkt_stream_receive_half(struct test_spec *test)
+{
+	struct xsk_umem_info *umem = test->ifobj_rx->umem;
+	struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream;
+	u32 i;
+
+	test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts,
+							 pkt_stream->pkts[0].len);
+	pkt_stream = test->ifobj_rx->pkt_stream;
+	for (i = 1; i < pkt_stream->nb_pkts; i += 2)
+		pkt_stream->pkts[i].valid = false;
+}
+
 static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
 {
 	struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
@@ -795,11 +820,11 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 {
 	struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0};
+	u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0;
 	struct pkt_stream *pkt_stream = ifobj->pkt_stream;
-	struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
 	struct xsk_socket_info *xsk = ifobj->xsk;
 	struct xsk_umem_info *umem = xsk->umem;
-	u32 idx_rx = 0, idx_fq = 0, rcvd, i;
+	struct pkt *pkt;
 	int ret;
 
 	ret = gettimeofday(&tv_now, NULL);
@@ -807,6 +832,7 @@ static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 		exit_with_error(errno);
 	timeradd(&tv_now, &tv_timeout, &tv_end);
 
+	pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
 	while (pkt) {
 		ret = gettimeofday(&tv_now, NULL);
 		if (ret)
@@ -828,30 +854,24 @@ static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 			continue;
 		}
 
-		ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-		while (ret != rcvd) {
-			if (ret < 0)
-				exit_with_error(-ret);
-			if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
-				ret = poll(fds, 1, POLL_TMOUT);
+		if (ifobj->use_fill_ring) {
+			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+			while (ret != rcvd) {
 				if (ret < 0)
 					exit_with_error(-ret);
+				if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+					ret = poll(fds, 1, POLL_TMOUT);
+					if (ret < 0)
+						exit_with_error(-ret);
+				}
+				ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
 			}
-			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
 		}
 
 		for (i = 0; i < rcvd; i++) {
 			const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
 			u64 addr = desc->addr, orig;
 
-			if (!pkt) {
-				ksft_print_msg("[%s] Received too many packets.\n",
-					       __func__);
-				ksft_print_msg("Last packet has addr: %llx len: %u\n",
-					       addr, desc->len);
-				return TEST_FAILURE;
-			}
-
 			orig = xsk_umem__extract_addr(addr);
 			addr = xsk_umem__add_offset_to_addr(addr);
 
@@ -859,18 +879,22 @@ static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 			    !is_offset_correct(umem, pkt_stream, addr, pkt->addr))
 				return TEST_FAILURE;
 
-			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
-			pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
+			if (ifobj->use_fill_ring)
+				*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+			pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
 		}
 
-		xsk_ring_prod__submit(&umem->fq, rcvd);
-		xsk_ring_cons__release(&xsk->rx, rcvd);
+		if (ifobj->use_fill_ring)
+			xsk_ring_prod__submit(&umem->fq, rcvd);
+		if (ifobj->release_rx)
+			xsk_ring_cons__release(&xsk->rx, rcvd);
 
 		pthread_mutex_lock(&pacing_mutex);
-		pkts_in_flight -= rcvd;
+		pkts_in_flight -= pkts_sent;
 		if (pkts_in_flight < umem->num_frames)
 			pthread_cond_signal(&pacing_cond);
 		pthread_mutex_unlock(&pacing_mutex);
+		pkts_sent = 0;
 	}
 
 	return TEST_PASS;
@@ -900,7 +924,8 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb)
 
 	pthread_mutex_lock(&pacing_mutex);
 	pkts_in_flight += valid_pkts;
-	if (ifobject->pacing_on && pkts_in_flight >= ifobject->umem->num_frames - BATCH_SIZE) {
+	/* pkts_in_flight might be negative if many invalid packets are sent */
+	if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) {
 		kick_tx(xsk);
 		pthread_cond_wait(&pacing_cond, &pacing_mutex);
 	}
@@ -987,34 +1012,29 @@ static int validate_rx_dropped(struct ifobject *ifobject)
 	if (err)
 		return TEST_FAILURE;
 
-	if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts)
+	if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2)
 		return TEST_PASS;
 
-	return TEST_CONTINUE;
+	return TEST_FAILURE;
 }
 
 static int validate_rx_full(struct ifobject *ifobject)
 {
 	struct xsk_socket *xsk = ifobject->xsk->xsk;
 	struct xdp_statistics stats;
-	u32 expected_stat;
 	int err;
 
+	usleep(1000);
 	kick_rx(ifobject->xsk);
 
 	err = get_xsk_stats(xsk, &stats);
 	if (err)
 		return TEST_FAILURE;
 
-	if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
-		expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE;
-	else
-		expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE;
-
-	if (stats.rx_ring_full == expected_stat)
+	if (stats.rx_ring_full)
 		return TEST_PASS;
 
-	return TEST_CONTINUE;
+	return TEST_FAILURE;
 }
 
 static int validate_fill_empty(struct ifobject *ifobject)
@@ -1023,16 +1043,17 @@ static int validate_fill_empty(struct ifobject *ifobject)
 	struct xdp_statistics stats;
 	int err;
 
+	usleep(1000);
 	kick_rx(ifobject->xsk);
 
 	err = get_xsk_stats(xsk, &stats);
 	if (err)
 		return TEST_FAILURE;
 
-	if (stats.rx_fill_ring_empty_descs == ifobject->pkt_stream->nb_pkts)
+	if (stats.rx_fill_ring_empty_descs)
 		return TEST_PASS;
 
-	return TEST_CONTINUE;
+	return TEST_FAILURE;
 }
 
 static int validate_tx_invalid_descs(struct ifobject *ifobject)
@@ -1051,7 +1072,7 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject)
 		return TEST_FAILURE;
 	}
 
-	if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts) {
+	if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) {
 		ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
 			       __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
 		return TEST_FAILURE;
@@ -1138,17 +1159,12 @@ static void *worker_testapp_validate_tx(void *arg)
 	print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
 		      ifobject->ifname);
 	err = send_pkts(test, ifobject);
-	if (err) {
-		report_failure(test);
-		goto out;
-	}
 
-	if (ifobject->validation_func) {
+	if (!err && ifobject->validation_func)
 		err = ifobject->validation_func(ifobject);
+	if (err)
 		report_failure(test);
-	}
 
-out:
 	if (test->total_steps == test->current_step || err)
 		testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
@@ -1202,14 +1218,10 @@ static void *worker_testapp_validate_rx(void *arg)
 
 	pthread_barrier_wait(&barr);
 
-	if (ifobject->validation_func) {
-		do {
-			err = ifobject->validation_func(ifobject);
-		} while (err == TEST_CONTINUE);
-	} else {
-		err = receive_pkts(ifobject, &fds);
-	}
+	err = receive_pkts(ifobject, &fds);
 
+	if (!err && ifobject->validation_func)
+		err = ifobject->validation_func(ifobject);
 	if (err) {
 		report_failure(test);
 		pthread_mutex_lock(&pacing_mutex);
@@ -1327,9 +1339,10 @@ static void testapp_headroom(struct test_spec *test)
 static void testapp_stats_rx_dropped(struct test_spec *test)
 {
 	test_spec_set_name(test, "STAT_RX_DROPPED");
-	test->ifobj_tx->pacing_on = false;
 	test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
-		XDP_PACKET_HEADROOM - 1;
+		XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
+	pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
+	pkt_stream_receive_half(test);
 	test->ifobj_rx->validation_func = validate_rx_dropped;
 	testapp_validate_traffic(test);
 }
@@ -1337,8 +1350,7 @@ static void testapp_stats_rx_dropped(struct test_spec *test)
 static void testapp_stats_tx_invalid_descs(struct test_spec *test)
 {
 	test_spec_set_name(test, "STAT_TX_INVALID");
-	test->ifobj_tx->pacing_on = false;
-	pkt_stream_replace(test, DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
+	pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
 	test->ifobj_tx->validation_func = validate_tx_invalid_descs;
 	testapp_validate_traffic(test);
 
@@ -1348,20 +1360,30 @@ static void testapp_stats_tx_invalid_descs(struct test_spec *test)
 static void testapp_stats_rx_full(struct test_spec *test)
 {
 	test_spec_set_name(test, "STAT_RX_FULL");
-	test->ifobj_tx->pacing_on = false;
-	test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE;
+	pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE);
+	test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
+							 DEFAULT_UMEM_BUFFERS, PKT_SIZE);
+	if (!test->ifobj_rx->pkt_stream)
+		exit_with_error(ENOMEM);
+
+	test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
+	test->ifobj_rx->release_rx = false;
 	test->ifobj_rx->validation_func = validate_rx_full;
 	testapp_validate_traffic(test);
+
+	pkt_stream_restore_default(test);
 }
 
 static void testapp_stats_fill_empty(struct test_spec *test)
 {
 	test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
-	test->ifobj_tx->pacing_on = false;
-	test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 0, MIN_PKT_SIZE);
+	pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE);
+	test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
+							 DEFAULT_UMEM_BUFFERS, PKT_SIZE);
 	if (!test->ifobj_rx->pkt_stream)
 		exit_with_error(ENOMEM);
-	test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
+
+	test->ifobj_rx->use_fill_ring = false;
 	test->ifobj_rx->validation_func = validate_fill_empty;
 	testapp_validate_traffic(test);
 
@@ -1504,7 +1526,7 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 		test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE");
 		test->ifobj_tx->umem->frame_size = 2048;
 		test->ifobj_rx->umem->frame_size = 2048;
-		pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+		pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE);
 		testapp_validate_traffic(test);
 
 		pkt_stream_restore_default(test);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index bf18a95be48c..8f672b0fe0e1 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -27,7 +27,6 @@
 
 #define TEST_PASS 0
 #define TEST_FAILURE -1
-#define TEST_CONTINUE -2
 #define MAX_INTERFACES 2
 #define MAX_INTERFACE_NAME_CHARS 7
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
@@ -147,7 +146,8 @@ struct ifobject {
 	bool rx_on;
 	bool use_poll;
 	bool busy_poll;
-	bool pacing_on;
+	bool use_fill_ring;
+	bool release_rx;
 	u8 dst_mac[ETH_ALEN];
 	u8 src_mac[ETH_ALEN];
 };
@@ -167,6 +167,6 @@ pthread_barrier_t barr;
 pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
 pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER;
 
-u32 pkts_in_flight;
+int pkts_in_flight;
 
 #endif				/* XDPXCEIVER_H */
-- 
cgit 


From 998e1869de1b10caec6ec940632ff271036926f5 Mon Sep 17 00:00:00 2001
From: Daniel Müller <deso@posteo.net>
Date: Wed, 11 May 2022 17:22:49 +0000
Subject: selftests/bpf: Enable CONFIG_FPROBE for self tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some of the BPF selftests are failing when running with a rather bare
bones configuration based on tools/testing/selftests/bpf/config.
Specifically, we see a bunch of failures due to errno 95:

  > test_attach_api:PASS:fentry_raw_skel_load 0 nsec
  > libbpf: prog 'test_kprobe_manual': failed to attach: Operation not supported
  > test_attach_api:FAIL:bpf_program__attach_kprobe_multi_opts unexpected error: -95
  > 79 /6     kprobe_multi_test/attach_api_syms:FAIL

The cause of these is that CONFIG_FPROBE is missing. With this change we
add this configuration value to the BPF selftests config.

Signed-off-by: Daniel Müller <deso@posteo.net>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20220511172249.4082510-1-deso@posteo.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 763db63a3890..08c6e5a66d87 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -53,3 +53,4 @@ CONFIG_NF_DEFRAG_IPV4=y
 CONFIG_NF_DEFRAG_IPV6=y
 CONFIG_NF_CONNTRACK=y
 CONFIG_USERFAULTFD=y
+CONFIG_FPROBE=y
-- 
cgit 


From fd0ad6f1d10c01796904608aacd6e70d6f624305 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 11 May 2022 11:47:35 -0700
Subject: selftests/bpf: fix a few clang compilation errors

With latest clang, I got the following compilation errors:
  .../prog_tests/test_tunnel.c:291:6: error: variable 'local_ip_map_fd' is used uninitialized
     whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized]
       if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  .../bpf/prog_tests/test_tunnel.c:312:6: note: uninitialized use occurs here
        if (local_ip_map_fd >= 0)
            ^~~~~~~~~~~~~~~
  ...
  .../prog_tests/kprobe_multi_test.c:346:6: error: variable 'err' is used uninitialized
      whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized]
        if (IS_ERR(map))
            ^~~~~~~~~~~
  .../prog_tests/kprobe_multi_test.c:388:6: note: uninitialized use occurs here
        if (err) {
            ^~~

This patch fixed the above compilation errors.

Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20220511184735.3670214-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 4 +++-
 tools/testing/selftests/bpf/prog_tests/test_tunnel.c       | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 816eacededd1..586dc52d6fb9 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -343,8 +343,10 @@ static int get_syms(char ***symsp, size_t *cntp)
 		return -EINVAL;
 
 	map = hashmap__new(symbol_hash, symbol_equal, NULL);
-	if (IS_ERR(map))
+	if (IS_ERR(map)) {
+		err = libbpf_get_error(map);
 		goto error;
+	}
 
 	while (fgets(buf, sizeof(buf), f)) {
 		/* skip modules */
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index 071c9c91b50f..3bba4a2a0530 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -246,7 +246,7 @@ static void test_vxlan_tunnel(void)
 {
 	struct test_tunnel_kern *skel = NULL;
 	struct nstoken *nstoken;
-	int local_ip_map_fd;
+	int local_ip_map_fd = -1;
 	int set_src_prog_fd, get_src_prog_fd;
 	int set_dst_prog_fd;
 	int key = 0, ifindex = -1;
@@ -319,7 +319,7 @@ static void test_ip6vxlan_tunnel(void)
 {
 	struct test_tunnel_kern *skel = NULL;
 	struct nstoken *nstoken;
-	int local_ip_map_fd;
+	int local_ip_map_fd = -1;
 	int set_src_prog_fd, get_src_prog_fd;
 	int set_dst_prog_fd;
 	int key = 0, ifindex = -1;
-- 
cgit 


From b57c7e8b76c646cf77ce4353a779a8b781592209 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 11 May 2022 01:09:04 +0300
Subject: selftests: forwarding: tc_actions: allow mirred egress test to run on
 non-offloaded h2

The host interfaces $h1 and $h2 don't have to be switchdev interfaces,
but due to the fact that we pass $tcflags which may have the value of
"skip_sw", we force $h2 to offload a drop rule for dst_ip, something
which it may not be able to do.

The selftest only wants to verify the hit count of this rule as a means
of figuring out whether the packet was received, so remove the $tcflags
for it and let it be done in software.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Link: https://lore.kernel.org/r/20220510220904.284552-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/tc_actions.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index de19eb6c38f0..1e0a62f638fe 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -60,7 +60,7 @@ mirred_egress_test()
 	RET=0
 
 	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
-		$tcflags dst_ip 192.0.2.2 action drop
+		dst_ip 192.0.2.2 action drop
 
 	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
 		-t ip -q
-- 
cgit 


From 5cdccadcac2612f947ebc26ad7023dfb7e8871f9 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 12 May 2022 01:16:52 +0530
Subject: bpf: Prepare prog_test_struct kfuncs for runtime tests

In an effort to actually test the refcounting logic at runtime, add a
refcount_t member to prog_test_ref_kfunc and use it in selftests to
verify and test the whole logic more exhaustively.

The kfunc calls for prog_test_member do not require runtime refcounting,
as they are only used for verifier selftests, not during runtime
execution. Hence, their implementation now has a WARN_ON_ONCE as it is
not meant to be reachable code at runtime. It is strictly used in tests
triggering failure cases in the verifier. bpf_kfunc_call_memb_release is
called from map free path, since prog_test_member is embedded in map
value for some verifier tests, so we skip WARN_ON_ONCE for it.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220511194654.765705-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/bpf/test_run.c                              | 23 +++++++++++++++++------
 tools/testing/selftests/bpf/verifier/map_kptr.c |  4 ++--
 2 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 7a1579c91432..4d08cca771c7 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -564,31 +564,36 @@ struct prog_test_ref_kfunc {
 	int b;
 	struct prog_test_member memb;
 	struct prog_test_ref_kfunc *next;
+	refcount_t cnt;
 };
 
 static struct prog_test_ref_kfunc prog_test_struct = {
 	.a = 42,
 	.b = 108,
 	.next = &prog_test_struct,
+	.cnt = REFCOUNT_INIT(1),
 };
 
 noinline struct prog_test_ref_kfunc *
 bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
 {
-	/* randomly return NULL */
-	if (get_jiffies_64() % 2)
-		return NULL;
+	refcount_inc(&prog_test_struct.cnt);
 	return &prog_test_struct;
 }
 
 noinline struct prog_test_member *
 bpf_kfunc_call_memb_acquire(void)
 {
-	return &prog_test_struct.memb;
+	WARN_ON_ONCE(1);
+	return NULL;
 }
 
 noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
 {
+	if (!p)
+		return;
+
+	refcount_dec(&p->cnt);
 }
 
 noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p)
@@ -597,12 +602,18 @@ noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p)
 
 noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
 {
+	WARN_ON_ONCE(1);
 }
 
 noinline struct prog_test_ref_kfunc *
-bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b)
+bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b)
 {
-	return &prog_test_struct;
+	struct prog_test_ref_kfunc *p = READ_ONCE(*pp);
+
+	if (!p)
+		return NULL;
+	refcount_inc(&p->cnt);
+	return p;
 }
 
 struct prog_test_pass1 {
diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
index 9113834640e6..6914904344c0 100644
--- a/tools/testing/selftests/bpf/verifier/map_kptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
@@ -212,13 +212,13 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 	BPF_EXIT_INSN(),
-	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 24),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 32),
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.fixup_map_kptr = { 1 },
 	.result = REJECT,
-	.errstr = "access beyond struct prog_test_ref_kfunc at off 24 size 8",
+	.errstr = "access beyond struct prog_test_ref_kfunc at off 32 size 8",
 },
 {
 	"map_kptr: unref: inherit PTR_UNTRUSTED on struct walk",
-- 
cgit 


From 04accf794bb2a5a06f23f7d48d195ffa329181a6 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 12 May 2022 01:16:53 +0530
Subject: selftests/bpf: Add negative C tests for kptrs

This uses the newly added SEC("?foo") naming to disable autoload of
programs, and then loads them one by one for the object and verifies
that loading fails and matches the returned error string from verifier.
This is similar to already existing verifier tests but provides coverage
for BPF C.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220511194654.765705-4-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/map_kptr.c |  87 ++++-
 tools/testing/selftests/bpf/progs/map_kptr_fail.c | 418 ++++++++++++++++++++++
 2 files changed, 504 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/map_kptr_fail.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
index 9e2fbda64a65..00819277cb17 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -2,8 +2,86 @@
 #include <test_progs.h>
 
 #include "map_kptr.skel.h"
+#include "map_kptr_fail.skel.h"
 
-void test_map_kptr(void)
+static char log_buf[1024 * 1024];
+
+struct {
+	const char *prog_name;
+	const char *err_msg;
+} map_kptr_fail_tests[] = {
+	{ "size_not_bpf_dw", "kptr access size must be BPF_DW" },
+	{ "non_const_var_off", "kptr access cannot have variable offset" },
+	{ "non_const_var_off_kptr_xchg", "R1 doesn't have constant offset. kptr has to be" },
+	{ "misaligned_access_write", "kptr access misaligned expected=8 off=7" },
+	{ "misaligned_access_read", "kptr access misaligned expected=8 off=1" },
+	{ "reject_var_off_store", "variable untrusted_ptr_ access var_off=(0x0; 0x1e0)" },
+	{ "reject_bad_type_match", "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc" },
+	{ "marked_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" },
+	{ "correct_btf_id_check_size", "access beyond struct prog_test_ref_kfunc at off 32 size 4" },
+	{ "inherit_untrusted_on_walk", "R1 type=untrusted_ptr_ expected=percpu_ptr_" },
+	{ "reject_kptr_xchg_on_unref", "off=8 kptr isn't referenced kptr" },
+	{ "reject_kptr_get_no_map_val", "arg#0 expected pointer to map value" },
+	{ "reject_kptr_get_no_null_map_val", "arg#0 expected pointer to map value" },
+	{ "reject_kptr_get_no_kptr", "arg#0 no referenced kptr at map value offset=0" },
+	{ "reject_kptr_get_on_unref", "arg#0 no referenced kptr at map value offset=8" },
+	{ "reject_kptr_get_bad_type_match", "kernel function bpf_kfunc_call_test_kptr_get args#0" },
+	{ "mark_ref_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" },
+	{ "reject_untrusted_store_to_ref", "store to referenced kptr disallowed" },
+	{ "reject_bad_type_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member" },
+	{ "reject_untrusted_xchg", "R2 type=untrusted_ptr_ expected=ptr_" },
+	{ "reject_member_of_ref_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc" },
+	{ "reject_indirect_helper_access", "kptr cannot be accessed indirectly by helper" },
+	{ "reject_indirect_global_func_access", "kptr cannot be accessed indirectly by helper" },
+	{ "kptr_xchg_ref_state", "Unreleased reference id=5 alloc_insn=" },
+	{ "kptr_get_ref_state", "Unreleased reference id=3 alloc_insn=" },
+};
+
+static void test_map_kptr_fail_prog(const char *prog_name, const char *err_msg)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+						.kernel_log_size = sizeof(log_buf),
+						.kernel_log_level = 1);
+	struct map_kptr_fail *skel;
+	struct bpf_program *prog;
+	int ret;
+
+	skel = map_kptr_fail__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "map_kptr_fail__open_opts"))
+		return;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto end;
+
+	bpf_program__set_autoload(prog, true);
+
+	ret = map_kptr_fail__load(skel);
+	if (!ASSERT_ERR(ret, "map_kptr__load must fail"))
+		goto end;
+
+	if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
+		fprintf(stderr, "Expected: %s\n", err_msg);
+		fprintf(stderr, "Verifier: %s\n", log_buf);
+	}
+
+end:
+	map_kptr_fail__destroy(skel);
+}
+
+static void test_map_kptr_fail(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(map_kptr_fail_tests); i++) {
+		if (!test__start_subtest(map_kptr_fail_tests[i].prog_name))
+			continue;
+		test_map_kptr_fail_prog(map_kptr_fail_tests[i].prog_name,
+					map_kptr_fail_tests[i].err_msg);
+	}
+}
+
+static void test_map_kptr_success(void)
 {
 	struct map_kptr *skel;
 	int key = 0, ret;
@@ -35,3 +113,10 @@ void test_map_kptr(void)
 
 	map_kptr__destroy(skel);
 }
+
+void test_map_kptr(void)
+{
+	if (test__start_subtest("success"))
+		test_map_kptr_success();
+	test_map_kptr_fail();
+}
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
new file mode 100644
index 000000000000..05e209b1b12a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct map_value {
+	char buf[8];
+	struct prog_test_ref_kfunc __kptr *unref_ptr;
+	struct prog_test_ref_kfunc __kptr_ref *ref_ptr;
+	struct prog_test_member __kptr_ref *ref_memb_ptr;
+};
+
+struct array_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} array_map SEC(".maps");
+
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym;
+
+SEC("?tc")
+int size_not_bpf_dw(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	*(u32 *)&v->unref_ptr = 0;
+	return 0;
+}
+
+SEC("?tc")
+int non_const_var_off(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0, id;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	id = ctx->protocol;
+	if (id < 4 || id > 12)
+		return 0;
+	*(u64 *)((void *)v + id) = 0;
+
+	return 0;
+}
+
+SEC("?tc")
+int non_const_var_off_kptr_xchg(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0, id;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	id = ctx->protocol;
+	if (id < 4 || id > 12)
+		return 0;
+	bpf_kptr_xchg((void *)v + id, NULL);
+
+	return 0;
+}
+
+SEC("?tc")
+int misaligned_access_write(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	*(void **)((void *)v + 7) = NULL;
+
+	return 0;
+}
+
+SEC("?tc")
+int misaligned_access_read(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	return *(u64 *)((void *)v + 1);
+}
+
+SEC("?tc")
+int reject_var_off_store(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *unref_ptr;
+	struct map_value *v;
+	int key = 0, id;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	unref_ptr = v->unref_ptr;
+	if (!unref_ptr)
+		return 0;
+	id = ctx->protocol;
+	if (id < 4 || id > 12)
+		return 0;
+	unref_ptr += id;
+	v->unref_ptr = unref_ptr;
+
+	return 0;
+}
+
+SEC("?tc")
+int reject_bad_type_match(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *unref_ptr;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	unref_ptr = v->unref_ptr;
+	if (!unref_ptr)
+		return 0;
+	unref_ptr = (void *)unref_ptr + 4;
+	v->unref_ptr = unref_ptr;
+
+	return 0;
+}
+
+SEC("?tc")
+int marked_as_untrusted_or_null(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_this_cpu_ptr(v->unref_ptr);
+	return 0;
+}
+
+SEC("?tc")
+int correct_btf_id_check_size(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	p = v->unref_ptr;
+	if (!p)
+		return 0;
+	return *(int *)((void *)p + bpf_core_type_size(struct prog_test_ref_kfunc));
+}
+
+SEC("?tc")
+int inherit_untrusted_on_walk(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *unref_ptr;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	unref_ptr = v->unref_ptr;
+	if (!unref_ptr)
+		return 0;
+	unref_ptr = unref_ptr->next;
+	bpf_this_cpu_ptr(unref_ptr);
+	return 0;
+}
+
+SEC("?tc")
+int reject_kptr_xchg_on_unref(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_kptr_xchg(&v->unref_ptr, NULL);
+	return 0;
+}
+
+SEC("?tc")
+int reject_kptr_get_no_map_val(struct __sk_buff *ctx)
+{
+	bpf_kfunc_call_test_kptr_get((void *)&ctx, 0, 0);
+	return 0;
+}
+
+SEC("?tc")
+int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx)
+{
+	bpf_kfunc_call_test_kptr_get(bpf_map_lookup_elem(&array_map, &(int){0}), 0, 0);
+	return 0;
+}
+
+SEC("?tc")
+int reject_kptr_get_no_kptr(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_kfunc_call_test_kptr_get((void *)v, 0, 0);
+	return 0;
+}
+
+SEC("?tc")
+int reject_kptr_get_on_unref(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_kfunc_call_test_kptr_get(&v->unref_ptr, 0, 0);
+	return 0;
+}
+
+SEC("?tc")
+int reject_kptr_get_bad_type_match(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_kfunc_call_test_kptr_get((void *)&v->ref_memb_ptr, 0, 0);
+	return 0;
+}
+
+SEC("?tc")
+int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_this_cpu_ptr(v->ref_ptr);
+	return 0;
+}
+
+SEC("?tc")
+int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	p = v->ref_ptr;
+	if (!p)
+		return 0;
+	/* Checkmate, clang */
+	*(struct prog_test_ref_kfunc * volatile *)&v->ref_ptr = p;
+	return 0;
+}
+
+SEC("?tc")
+int reject_untrusted_xchg(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	p = v->ref_ptr;
+	if (!p)
+		return 0;
+	bpf_kptr_xchg(&v->ref_ptr, p);
+	return 0;
+}
+
+SEC("?tc")
+int reject_bad_type_xchg(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *ref_ptr;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	ref_ptr = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+	if (!ref_ptr)
+		return 0;
+	bpf_kptr_xchg(&v->ref_memb_ptr, ref_ptr);
+	return 0;
+}
+
+SEC("?tc")
+int reject_member_of_ref_xchg(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *ref_ptr;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	ref_ptr = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+	if (!ref_ptr)
+		return 0;
+	bpf_kptr_xchg(&v->ref_memb_ptr, &ref_ptr->memb);
+	return 0;
+}
+
+SEC("?syscall")
+int reject_indirect_helper_access(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_get_current_comm(v, sizeof(v->buf) + 1);
+	return 0;
+}
+
+__noinline
+int write_func(int *p)
+{
+	return p ? *p = 42 : 0;
+}
+
+SEC("?tc")
+int reject_indirect_global_func_access(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	return write_func((void *)v + 5);
+}
+
+SEC("?tc")
+int kptr_xchg_ref_state(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+	if (!p)
+		return 0;
+	bpf_kptr_xchg(&v->ref_ptr, p);
+	return 0;
+}
+
+SEC("?tc")
+int kptr_get_ref_state(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 0ef6740e97777bbe04aeacd32239ccb1732098d7 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 12 May 2022 01:16:54 +0530
Subject: selftests/bpf: Add tests for kptr_ref refcounting

Check at runtime how various operations for kptr_ref affect its refcount
and verify against the actual count.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220511194654.765705-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/map_kptr.c |  27 +++++-
 tools/testing/selftests/bpf/progs/map_kptr.c      | 106 +++++++++++++++++++++-
 2 files changed, 128 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
index 00819277cb17..8142dd9eff4c 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 #include "map_kptr.skel.h"
 #include "map_kptr_fail.skel.h"
@@ -81,8 +82,13 @@ static void test_map_kptr_fail(void)
 	}
 }
 
-static void test_map_kptr_success(void)
+static void test_map_kptr_success(bool test_run)
 {
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
 	struct map_kptr *skel;
 	int key = 0, ret;
 	char buf[24];
@@ -91,6 +97,16 @@ static void test_map_kptr_success(void)
 	if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load"))
 		return;
 
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref), &opts);
+	ASSERT_OK(ret, "test_map_kptr_ref refcount");
+	ASSERT_OK(opts.retval, "test_map_kptr_ref retval");
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts);
+	ASSERT_OK(ret, "test_map_kptr_ref2 refcount");
+	ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval");
+
+	if (test_run)
+		return;
+
 	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0);
 	ASSERT_OK(ret, "array_map update");
 	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0);
@@ -116,7 +132,12 @@ static void test_map_kptr_success(void)
 
 void test_map_kptr(void)
 {
-	if (test__start_subtest("success"))
-		test_map_kptr_success();
+	if (test__start_subtest("success")) {
+		test_map_kptr_success(false);
+		/* Do test_run twice, so that we see refcount going back to 1
+		 * after we leave it in map from first iteration.
+		 */
+		test_map_kptr_success(true);
+	}
 	test_map_kptr_fail();
 }
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index 1b0e0409eaa5..eb8217803493 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -141,7 +141,7 @@ SEC("tc")
 int test_map_kptr(struct __sk_buff *ctx)
 {
 	struct map_value *v;
-	int i, key = 0;
+	int key = 0;
 
 #define TEST(map)					\
 	v = bpf_map_lookup_elem(&map, &key);		\
@@ -162,7 +162,7 @@ SEC("tc")
 int test_map_in_map_kptr(struct __sk_buff *ctx)
 {
 	struct map_value *v;
-	int i, key = 0;
+	int key = 0;
 	void *map;
 
 #define TEST(map_in_map)                                \
@@ -187,4 +187,106 @@ int test_map_in_map_kptr(struct __sk_buff *ctx)
 	return 0;
 }
 
+SEC("tc")
+int test_map_kptr_ref(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p, *p_st;
+	unsigned long arg = 0;
+	struct map_value *v;
+	int key = 0, ret;
+
+	p = bpf_kfunc_call_test_acquire(&arg);
+	if (!p)
+		return 1;
+
+	p_st = p->next;
+	if (p_st->cnt.refs.counter != 2) {
+		ret = 2;
+		goto end;
+	}
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v) {
+		ret = 3;
+		goto end;
+	}
+
+	p = bpf_kptr_xchg(&v->ref_ptr, p);
+	if (p) {
+		ret = 4;
+		goto end;
+	}
+	if (p_st->cnt.refs.counter != 2)
+		return 5;
+
+	p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0);
+	if (!p)
+		return 6;
+	if (p_st->cnt.refs.counter != 3) {
+		ret = 7;
+		goto end;
+	}
+	bpf_kfunc_call_test_release(p);
+	if (p_st->cnt.refs.counter != 2)
+		return 8;
+
+	p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+	if (!p)
+		return 9;
+	bpf_kfunc_call_test_release(p);
+	if (p_st->cnt.refs.counter != 1)
+		return 10;
+
+	p = bpf_kfunc_call_test_acquire(&arg);
+	if (!p)
+		return 11;
+	p = bpf_kptr_xchg(&v->ref_ptr, p);
+	if (p) {
+		ret = 12;
+		goto end;
+	}
+	if (p_st->cnt.refs.counter != 2)
+		return 13;
+	/* Leave in map */
+
+	return 0;
+end:
+	bpf_kfunc_call_test_release(p);
+	return ret;
+}
+
+SEC("tc")
+int test_map_kptr_ref2(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p, *p_st;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 1;
+
+	p_st = v->ref_ptr;
+	if (!p_st || p_st->cnt.refs.counter != 2)
+		return 2;
+
+	p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+	if (!p)
+		return 3;
+	if (p_st->cnt.refs.counter != 2) {
+		bpf_kfunc_call_test_release(p);
+		return 4;
+	}
+
+	p = bpf_kptr_xchg(&v->ref_ptr, p);
+	if (p) {
+		bpf_kfunc_call_test_release(p);
+		return 5;
+	}
+	if (p_st->cnt.refs.counter != 2)
+		return 6;
+
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit 


From ed7c13776e20c74486b0939a3c1de984c5efb6aa Mon Sep 17 00:00:00 2001
From: Feng Zhou <zhoufeng.zf@bytedance.com>
Date: Wed, 11 May 2022 17:38:54 +0800
Subject: selftests/bpf: add test case for bpf_map_lookup_percpu_elem

test_progs:
Tests new ebpf helpers bpf_map_lookup_percpu_elem.

Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com>
Link: https://lore.kernel.org/r/20220511093854.411-3-zhoufeng.zf@bytedance.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpf/prog_tests/map_lookup_percpu_elem.c        | 46 ++++++++++++++++++
 .../bpf/progs/test_map_lookup_percpu_elem.c        | 54 ++++++++++++++++++++++
 2 files changed, 100 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c
new file mode 100644
index 000000000000..58b24c2112b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2022 Bytedance
+
+#include <test_progs.h>
+
+#include "test_map_lookup_percpu_elem.skel.h"
+
+#define TEST_VALUE  1
+
+void test_map_lookup_percpu_elem(void)
+{
+	struct test_map_lookup_percpu_elem *skel;
+	int key = 0, ret;
+	int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	int *buf;
+
+	buf = (int *)malloc(nr_cpus*sizeof(int));
+	if (!ASSERT_OK_PTR(buf, "malloc"))
+		return;
+	memset(buf, 0, nr_cpus*sizeof(int));
+	buf[0] = TEST_VALUE;
+
+	skel = test_map_lookup_percpu_elem__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_map_lookup_percpu_elem__open_and_load"))
+		return;
+	ret = test_map_lookup_percpu_elem__attach(skel);
+	ASSERT_OK(ret, "test_map_lookup_percpu_elem__attach");
+
+	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_array_map), &key, buf, 0);
+	ASSERT_OK(ret, "percpu_array_map update");
+
+	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_hash_map), &key, buf, 0);
+	ASSERT_OK(ret, "percpu_hash_map update");
+
+	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_lru_hash_map), &key, buf, 0);
+	ASSERT_OK(ret, "percpu_lru_hash_map update");
+
+	syscall(__NR_getuid);
+
+	ret = skel->bss->percpu_array_elem_val == TEST_VALUE &&
+	      skel->bss->percpu_hash_elem_val == TEST_VALUE &&
+	      skel->bss->percpu_lru_hash_elem_val == TEST_VALUE;
+	ASSERT_OK(!ret, "bpf_map_lookup_percpu_elem success");
+
+	test_map_lookup_percpu_elem__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c
new file mode 100644
index 000000000000..5d4ef86cbf48
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2022 Bytedance
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+int percpu_array_elem_val = 0;
+int percpu_hash_elem_val = 0;
+int percpu_lru_hash_elem_val = 0;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} percpu_array_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} percpu_hash_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} percpu_lru_hash_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_getuid")
+int sysenter_getuid(const void *ctx)
+{
+	__u32 key = 0;
+	__u32 cpu = 0;
+	__u32 *value;
+
+	value = bpf_map_lookup_percpu_elem(&percpu_array_map, &key, cpu);
+	if (value)
+		percpu_array_elem_val = *value;
+
+	value = bpf_map_lookup_percpu_elem(&percpu_hash_map, &key, cpu);
+	if (value)
+		percpu_hash_elem_val = *value;
+
+	value = bpf_map_lookup_percpu_elem(&percpu_lru_hash_map, &key, cpu);
+	if (value)
+		percpu_lru_hash_elem_val = *value;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 5790a2fee02c48e28fde2ce7ea4765eeadcda0ba Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 11 May 2022 16:20:12 -0700
Subject: selftests/bpf: make fexit_stress test run in serial mode

fexit_stress is attaching maximum allowed amount of fexit programs to
bpf_fentry_test1 kernel function, which is used by a bunch of other
parallel tests, thus pretty frequently interfering with their execution.

Given the test assumes nothing else is attaching to bpf_fentry_test1,
mark it serial.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220511232012.609370-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/fexit_stress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index fe1f0f26ea14..a7e74297f15f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -5,7 +5,7 @@
 /* that's kernel internal BPF_MAX_TRAMP_PROGS define */
 #define CNT 38
 
-void test_fexit_stress(void)
+void serial_test_fexit_stress(void)
 {
 	char test_skb[128] = {};
 	int fexit_fd[CNT] = {};
-- 
cgit 


From d2b8060f165105a68748a6d98ed548ca112ce4d3 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 11 May 2022 22:46:09 -0700
Subject: lkdtm/usercopy: Rename "heap" to "slab"

To more clearly distinguish between the various heap types, rename the
slab tests to "slab".

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: linux-kselftest@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/misc/lkdtm/usercopy.c           | 30 +++++++++++++++---------------
 tools/testing/selftests/lkdtm/tests.txt |  8 ++++----
 2 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'tools/testing')

diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c
index 7852b9fc7c47..945806db2a13 100644
--- a/drivers/misc/lkdtm/usercopy.c
+++ b/drivers/misc/lkdtm/usercopy.c
@@ -130,7 +130,7 @@ free_user:
  * This checks for whole-object size validation with hardened usercopy,
  * with or without usercopy whitelisting.
  */
-static void do_usercopy_heap_size(bool to_user)
+static void do_usercopy_slab_size(bool to_user)
 {
 	unsigned long user_addr;
 	unsigned char *one, *two;
@@ -196,9 +196,9 @@ free_kernel:
 
 /*
  * This checks for the specific whitelist window within an object. If this
- * test passes, then do_usercopy_heap_size() tests will pass too.
+ * test passes, then do_usercopy_slab_size() tests will pass too.
  */
-static void do_usercopy_heap_whitelist(bool to_user)
+static void do_usercopy_slab_whitelist(bool to_user)
 {
 	unsigned long user_alloc;
 	unsigned char *buf = NULL;
@@ -272,24 +272,24 @@ free_alloc:
 }
 
 /* Callable tests. */
-static void lkdtm_USERCOPY_HEAP_SIZE_TO(void)
+static void lkdtm_USERCOPY_SLAB_SIZE_TO(void)
 {
-	do_usercopy_heap_size(true);
+	do_usercopy_slab_size(true);
 }
 
-static void lkdtm_USERCOPY_HEAP_SIZE_FROM(void)
+static void lkdtm_USERCOPY_SLAB_SIZE_FROM(void)
 {
-	do_usercopy_heap_size(false);
+	do_usercopy_slab_size(false);
 }
 
-static void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void)
+static void lkdtm_USERCOPY_SLAB_WHITELIST_TO(void)
 {
-	do_usercopy_heap_whitelist(true);
+	do_usercopy_slab_whitelist(true);
 }
 
-static void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void)
+static void lkdtm_USERCOPY_SLAB_WHITELIST_FROM(void)
 {
-	do_usercopy_heap_whitelist(false);
+	do_usercopy_slab_whitelist(false);
 }
 
 static void lkdtm_USERCOPY_STACK_FRAME_TO(void)
@@ -358,10 +358,10 @@ void __exit lkdtm_usercopy_exit(void)
 }
 
 static struct crashtype crashtypes[] = {
-	CRASHTYPE(USERCOPY_HEAP_SIZE_TO),
-	CRASHTYPE(USERCOPY_HEAP_SIZE_FROM),
-	CRASHTYPE(USERCOPY_HEAP_WHITELIST_TO),
-	CRASHTYPE(USERCOPY_HEAP_WHITELIST_FROM),
+	CRASHTYPE(USERCOPY_SLAB_SIZE_TO),
+	CRASHTYPE(USERCOPY_SLAB_SIZE_FROM),
+	CRASHTYPE(USERCOPY_SLAB_WHITELIST_TO),
+	CRASHTYPE(USERCOPY_SLAB_WHITELIST_FROM),
 	CRASHTYPE(USERCOPY_STACK_FRAME_TO),
 	CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
 	CRASHTYPE(USERCOPY_STACK_BEYOND),
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 9dace01dbf15..65e53eb0840b 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -64,10 +64,10 @@ REFCOUNT_DEC_AND_TEST_SATURATED Saturation detected: still saturated
 REFCOUNT_SUB_AND_TEST_SATURATED Saturation detected: still saturated
 #REFCOUNT_TIMING timing only
 #ATOMIC_TIMING timing only
-USERCOPY_HEAP_SIZE_TO
-USERCOPY_HEAP_SIZE_FROM
-USERCOPY_HEAP_WHITELIST_TO
-USERCOPY_HEAP_WHITELIST_FROM
+USERCOPY_SLAB_SIZE_TO
+USERCOPY_SLAB_SIZE_FROM
+USERCOPY_SLAB_WHITELIST_TO
+USERCOPY_SLAB_WHITELIST_FROM
 USERCOPY_STACK_FRAME_TO
 USERCOPY_STACK_FRAME_FROM
 USERCOPY_STACK_BEYOND
-- 
cgit 


From eab691b1a6841813e84abfc8cef392b67efdaede Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Fri, 6 May 2022 19:02:02 +0800
Subject: selftests/ftrace: Save kprobe_events to test log

It may lead to kernel panic when execute the following testcase on mips:

  # cd tools/testing/selftests/ftrace
  # ./ftracetest test.d/kprobe/multiple_kprobes.tc

A preliminary analysis shows that the issue is related with

  echo 1 > events/kprobes/enable

after add the 256 probe points.

In order to find the root cause, I want to verify which probe point has
problem, so it is necessary to save kprobe_events to test log.

With this patch, we can get the 256 probe points in the test log through
the following command:

  # ./ftracetest test.d/kprobe/multiple_kprobes.tc -vvv -k

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 312d23780096..be754f5bcf79 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -25,6 +25,8 @@ if [ $L -ne 256 ]; then
   exit_fail
 fi
 
+cat kprobe_events >> $testlog
+
 echo 1 > events/kprobes/enable
 echo 0 > events/kprobes/enable
 echo > kprobe_events
-- 
cgit 


From 54de76c0123915e7533ce352de30a1f2d80fe81f Mon Sep 17 00:00:00 2001
From: Phil Auld <pauld@redhat.com>
Date: Thu, 12 May 2022 10:34:39 -0400
Subject: kselftest/cgroup: fix test_stress.sh to use OUTPUT dir

Running cgroup kselftest with O= fails to run the with_stress test due
to hardcoded ./test_core. Find test_core binary using the OUTPUT directory.

Fixes: 1a99fcc035fb ("selftests: cgroup: Run test_core under interfering stress")
Signed-off-by: Phil Auld <pauld@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/cgroup/test_stress.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_stress.sh b/tools/testing/selftests/cgroup/test_stress.sh
index 15d9d5896394..109c044f715f 100755
--- a/tools/testing/selftests/cgroup/test_stress.sh
+++ b/tools/testing/selftests/cgroup/test_stress.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-./with_stress.sh -s subsys -s fork ./test_core
+./with_stress.sh -s subsys -s fork ${OUTPUT}/test_core
-- 
cgit 


From c249764320cba8ab42821b0c7dad75f117c853e4 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Fri, 8 Apr 2022 14:51:05 -0700
Subject: kunit: tool: update test counts summary line format

Before:
> Testing complete. Passed: 137, Failed: 0, Crashed: 0, Skipped: 36, Errors: 0

After:
> Testing complete. Ran 173 tests: passed: 137, skipped: 36

Even with our current set of statuses, the output is a bit verbose.
It could get worse in the future if we add more (e.g. timeout, kasan).
Let's only print the relevant ones.

I had previously been sympathetic to the argument that always
printing out all the statuses would make it easier to parse results.
But now we have commit acd8e8407b8f ("kunit: Print test statistics on
failure"), there are test counts printed out in the raw output.
We don't currently print out an overall total across all suites, but it
would be easy to add, if we see a need for that.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Co-developed-by: David Gow <davidgow@google.com>
Signed-off-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_parser.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 807ed2bd6832..de1c0b7e14ed 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -94,11 +94,11 @@ class TestCounts:
 	def __str__(self) -> str:
 		"""Returns the string representation of a TestCounts object.
 		"""
-		return ('Passed: ' + str(self.passed) +
-			', Failed: ' + str(self.failed) +
-			', Crashed: ' + str(self.crashed) +
-			', Skipped: ' + str(self.skipped) +
-			', Errors: ' + str(self.errors))
+		statuses = [('passed', self.passed), ('failed', self.failed),
+			('crashed', self.crashed), ('skipped', self.skipped),
+			('errors', self.errors)]
+		return f'Ran {self.total()} tests: ' + \
+			', '.join(f'{s}: {n}' for s, n in statuses if n > 0)
 
 	def total(self) -> int:
 		"""Returns the total number of test cases within a test
-- 
cgit 


From 3f0a50f345f78183f6e9b39c2f45ca5dcaa511ca Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Thu, 12 May 2022 07:25:55 -0700
Subject: kunit: tool: stop using a shell to run kernel under QEMU

Note: this potentially breaks custom qemu_configs if people are using
them! But the fix for them is simple, don't specify multiple arguments
in one string and don't add on a redundant ''.

It feels a bit iffy to be using a shell in the first place.

There's the usual shenanigans where people could pass in arbitrary shell
commands via --kernel_arg (since we're just adding '' around the
kernel_cmdline) or via a custom qemu_config.
This isn't too much of a concern given the nature of this script (and
the qemu_config file is in python, you can do w/e you want already).

But it does have some other drawbacks.

One example of a kunit-specific pain point:
If the relevant qemu binary is missing, we get output like this:
> /bin/sh: line 1: qemu-system-aarch64: command not found
This in turn results in our KTAP parser complaining about
missing/invalid KTAP, but we don't directly show the error!
It's even more annoying to debug when you consider --raw_output only
shows KUnit output by default, i.e. you need --raw_output=all to see it.

Whereas directly invoking the binary, Python will raise a
FileNotFoundError for us, which is a noisier but more clear.

Making this change requires
* splitting parameters like ['-m 256'] into ['-m', '256'] in
  kunit/qemu_configs/*.py
* change [''] to [] in kunit/qemu_configs/*.py since otherwise
  QEMU fails w/ 'Device needs media, but drive is empty'
* dropping explicit quoting of the kernel cmdline
* using shlex.quote() when we print what command we're running
  so the user can copy-paste and run it

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_kernel.py         | 18 ++++++++++--------
 tools/testing/kunit/qemu_configs/alpha.py   |  2 +-
 tools/testing/kunit/qemu_configs/arm.py     |  2 +-
 tools/testing/kunit/qemu_configs/arm64.py   |  2 +-
 tools/testing/kunit/qemu_configs/i386.py    |  2 +-
 tools/testing/kunit/qemu_configs/powerpc.py |  2 +-
 tools/testing/kunit/qemu_configs/riscv.py   |  6 +++---
 tools/testing/kunit/qemu_configs/s390.py    |  4 ++--
 tools/testing/kunit/qemu_configs/sparc.py   |  2 +-
 tools/testing/kunit/qemu_configs/x86_64.py  |  2 +-
 10 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 483f78e15ce9..1b9c4922a675 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -11,6 +11,7 @@ import importlib.util
 import logging
 import subprocess
 import os
+import shlex
 import shutil
 import signal
 import threading
@@ -118,16 +119,17 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations):
 				'-nodefaults',
 				'-m', '1024',
 				'-kernel', kernel_path,
-				'-append', '\'' + ' '.join(params + [self._kernel_command_line]) + '\'',
+				'-append', ' '.join(params + [self._kernel_command_line]),
 				'-no-reboot',
 				'-nographic',
-				'-serial stdio'] + self._extra_qemu_params
-		print('Running tests with:\n$', ' '.join(qemu_command))
-		return subprocess.Popen(' '.join(qemu_command),
-					   stdin=subprocess.PIPE,
-					   stdout=subprocess.PIPE,
-					   stderr=subprocess.STDOUT,
-					   text=True, shell=True, errors='backslashreplace')
+				'-serial', 'stdio'] + self._extra_qemu_params
+		# Note: shlex.join() does what we want, but requires python 3.8+.
+		print('Running tests with:\n$', ' '.join(shlex.quote(arg) for arg in qemu_command))
+		return subprocess.Popen(qemu_command,
+					stdin=subprocess.PIPE,
+					stdout=subprocess.PIPE,
+					stderr=subprocess.STDOUT,
+					text=True, errors='backslashreplace')
 
 class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations):
 	"""An abstraction over command line operations performed on a source tree."""
diff --git a/tools/testing/kunit/qemu_configs/alpha.py b/tools/testing/kunit/qemu_configs/alpha.py
index 5d0c0cff03bd..3ac846e03a6b 100644
--- a/tools/testing/kunit/qemu_configs/alpha.py
+++ b/tools/testing/kunit/qemu_configs/alpha.py
@@ -7,4 +7,4 @@ CONFIG_SERIAL_8250_CONSOLE=y''',
 			   qemu_arch='alpha',
 			   kernel_path='arch/alpha/boot/vmlinux',
 			   kernel_command_line='console=ttyS0',
-			   extra_qemu_params=[''])
+			   extra_qemu_params=[])
diff --git a/tools/testing/kunit/qemu_configs/arm.py b/tools/testing/kunit/qemu_configs/arm.py
index b9c2a35e0296..db2160200566 100644
--- a/tools/testing/kunit/qemu_configs/arm.py
+++ b/tools/testing/kunit/qemu_configs/arm.py
@@ -10,4 +10,4 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y''',
 			   qemu_arch='arm',
 			   kernel_path='arch/arm/boot/zImage',
 			   kernel_command_line='console=ttyAMA0',
-			   extra_qemu_params=['-machine virt'])
+			   extra_qemu_params=['-machine', 'virt'])
diff --git a/tools/testing/kunit/qemu_configs/arm64.py b/tools/testing/kunit/qemu_configs/arm64.py
index 517c04459f47..67d04064f785 100644
--- a/tools/testing/kunit/qemu_configs/arm64.py
+++ b/tools/testing/kunit/qemu_configs/arm64.py
@@ -9,4 +9,4 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y''',
 			   qemu_arch='aarch64',
 			   kernel_path='arch/arm64/boot/Image.gz',
 			   kernel_command_line='console=ttyAMA0',
-			   extra_qemu_params=['-machine virt', '-cpu cortex-a57'])
+			   extra_qemu_params=['-machine', 'virt', '-cpu', 'cortex-a57'])
diff --git a/tools/testing/kunit/qemu_configs/i386.py b/tools/testing/kunit/qemu_configs/i386.py
index aed3ffd3937d..52b80be40e4b 100644
--- a/tools/testing/kunit/qemu_configs/i386.py
+++ b/tools/testing/kunit/qemu_configs/i386.py
@@ -7,4 +7,4 @@ CONFIG_SERIAL_8250_CONSOLE=y''',
 			   qemu_arch='x86_64',
 			   kernel_path='arch/x86/boot/bzImage',
 			   kernel_command_line='console=ttyS0',
-			   extra_qemu_params=[''])
+			   extra_qemu_params=[])
diff --git a/tools/testing/kunit/qemu_configs/powerpc.py b/tools/testing/kunit/qemu_configs/powerpc.py
index 35e9de24f0db..7ec38d4131f7 100644
--- a/tools/testing/kunit/qemu_configs/powerpc.py
+++ b/tools/testing/kunit/qemu_configs/powerpc.py
@@ -9,4 +9,4 @@ CONFIG_HVC_CONSOLE=y''',
 			   qemu_arch='ppc64',
 			   kernel_path='vmlinux',
 			   kernel_command_line='console=ttyS0',
-			   extra_qemu_params=['-M pseries', '-cpu power8'])
+			   extra_qemu_params=['-M', 'pseries', '-cpu', 'power8'])
diff --git a/tools/testing/kunit/qemu_configs/riscv.py b/tools/testing/kunit/qemu_configs/riscv.py
index 9e528087cd7c..b882fde39435 100644
--- a/tools/testing/kunit/qemu_configs/riscv.py
+++ b/tools/testing/kunit/qemu_configs/riscv.py
@@ -26,6 +26,6 @@ CONFIG_SERIAL_EARLYCON_RISCV_SBI=y''',
 			   kernel_path='arch/riscv/boot/Image',
 			   kernel_command_line='console=ttyS0',
 			   extra_qemu_params=[
-					   '-machine virt',
-					   '-cpu rv64',
-					   '-bios opensbi-riscv64-generic-fw_dynamic.bin'])
+					   '-machine', 'virt',
+					   '-cpu', 'rv64',
+					   '-bios', 'opensbi-riscv64-generic-fw_dynamic.bin'])
diff --git a/tools/testing/kunit/qemu_configs/s390.py b/tools/testing/kunit/qemu_configs/s390.py
index e310bd521113..98fa4fb60c0a 100644
--- a/tools/testing/kunit/qemu_configs/s390.py
+++ b/tools/testing/kunit/qemu_configs/s390.py
@@ -10,5 +10,5 @@ CONFIG_MODULES=y''',
 			   kernel_path='arch/s390/boot/bzImage',
 			   kernel_command_line='console=ttyS0',
 			   extra_qemu_params=[
-					   '-machine s390-ccw-virtio',
-					   '-cpu qemu',])
+					   '-machine', 's390-ccw-virtio',
+					   '-cpu', 'qemu',])
diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py
index 27f474e7ad6e..e975c4331a7c 100644
--- a/tools/testing/kunit/qemu_configs/sparc.py
+++ b/tools/testing/kunit/qemu_configs/sparc.py
@@ -7,4 +7,4 @@ CONFIG_SERIAL_8250_CONSOLE=y''',
 			   qemu_arch='sparc',
 			   kernel_path='arch/sparc/boot/zImage',
 			   kernel_command_line='console=ttyS0 mem=256M',
-			   extra_qemu_params=['-m 256'])
+			   extra_qemu_params=['-m', '256'])
diff --git a/tools/testing/kunit/qemu_configs/x86_64.py b/tools/testing/kunit/qemu_configs/x86_64.py
index 77ab1aeee8a3..dc7949076863 100644
--- a/tools/testing/kunit/qemu_configs/x86_64.py
+++ b/tools/testing/kunit/qemu_configs/x86_64.py
@@ -7,4 +7,4 @@ CONFIG_SERIAL_8250_CONSOLE=y''',
 			   qemu_arch='x86_64',
 			   kernel_path='arch/x86/boot/bzImage',
 			   kernel_command_line='console=ttyS0',
-			   extra_qemu_params=[''])
+			   extra_qemu_params=[])
-- 
cgit 


From 9660209d9418f2295d31fea0d32e313e9b2c1200 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Tue, 29 Mar 2022 14:42:48 -0700
Subject: kunit: tool: print clearer error message when there's no TAP output

Before:
$ ./tools/testing/kunit/kunit.py parse /dev/null
...
[ERROR] Test : invalid KTAP input!

After:
$ ./tools/testing/kunit/kunit.py parse /dev/null
...
[ERROR] Test <missing>: could not find any KTAP output!

This error message gets printed out when extract_tap_output() yielded no
lines. So while it could be because of malformed KTAP output from KUnit,
it could also be due to not having any KTAP output at all.

Try and make the error message here more clear.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_parser.py    | 3 ++-
 tools/testing/kunit/kunit_tool_test.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index de1c0b7e14ed..e16331a5bec4 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -824,7 +824,8 @@ def parse_run_tests(kernel_output: Iterable[str]) -> Test:
 	lines = extract_tap_lines(kernel_output)
 	test = Test()
 	if not lines:
-		test.add_error('invalid KTAP input!')
+		test.name = '<missing>'
+		test.add_error('could not find any KTAP output!')
 		test.status = TestStatus.FAILURE_TO_PARSE_TESTS
 	else:
 		test = parse_test(lines, 0, [])
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 210df0f443e6..aebda46bcad8 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -226,7 +226,7 @@ class KUnitParserTest(unittest.TestCase):
 		with open(crash_log) as file:
 			result = kunit_parser.parse_run_tests(
 				kunit_parser.extract_tap_lines(file.readlines()))
-		print_mock.assert_any_call(StrContains('invalid KTAP input!'))
+		print_mock.assert_any_call(StrContains('could not find any KTAP output!'))
 		print_mock.stop()
 		self.assertEqual(0, len(result.subtests))
 
@@ -557,7 +557,7 @@ class KUnitMainTest(unittest.TestCase):
 		self.assertEqual(e.exception.code, 1)
 		self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
 		self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
-		self.print_mock.assert_any_call(StrContains('invalid KTAP input!'))
+		self.print_mock.assert_any_call(StrContains('could not find any KTAP output!'))
 
 	def test_exec_no_tests(self):
 		self.linux_source_mock.run_kernel = mock.Mock(return_value=['TAP version 14', '1..0'])
-- 
cgit 


From ec8cb4f617a23700d37018d249e3b05149d44a38 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 11 May 2022 17:06:11 -0700
Subject: net: selftests: Stress reuseport listen

This patch adds a test that has 300 VIPs listening on port 443.
Each VIP:443 will have 80 listening socks by using SO_REUSEPORT.
Thus, it will have 24000 listening socks.

Before removing the port only listening_hash, all socks will be in the
same port 443 bucket and inet_reuseport_add_sock() spends much time to
walk through the bucket.  After removing the port only listening_hash
and move all usage to the port+addr lhash2, each bucket in the
ideal case has 80 sk which is much smaller than before.

Here is the test result from a qemu:
Before: listen 24000 socks took 210.210485362 (~210s)
 After: listen 24000 socks took 0.207173      (~210ms)

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/Makefile               |   2 +
 .../selftests/net/stress_reuseport_listen.c        | 105 +++++++++++++++++++++
 .../selftests/net/stress_reuseport_listen.sh       |  25 +++++
 3 files changed, 132 insertions(+)
 create mode 100644 tools/testing/selftests/net/stress_reuseport_listen.c
 create mode 100755 tools/testing/selftests/net/stress_reuseport_listen.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 811e9b712bea..7ea54af55490 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -38,6 +38,7 @@ TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
 TEST_PROGS += vrf_strict_mode_test.sh
 TEST_PROGS += arp_ndisc_evict_nocarrier.sh
 TEST_PROGS += ndisc_unsolicited_na_test.sh
+TEST_PROGS += stress_reuseport_listen.sh
 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
 TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
 TEST_GEN_FILES =  socket nettest
@@ -56,6 +57,7 @@ TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 TEST_GEN_FILES += toeplitz
 TEST_GEN_FILES += cmsg_sender
+TEST_GEN_FILES += stress_reuseport_listen
 TEST_PROGS += test_vxlan_vnifiltering.sh
 
 TEST_FILES := settings
diff --git a/tools/testing/selftests/net/stress_reuseport_listen.c b/tools/testing/selftests/net/stress_reuseport_listen.c
new file mode 100644
index 000000000000..ef800bb35a8e
--- /dev/null
+++ b/tools/testing/selftests/net/stress_reuseport_listen.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+/* Test listening on the same port 443 with multiple VIPS.
+ * Each VIP:443 will have multiple sk listening on by using
+ * SO_REUSEPORT.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <error.h>
+#include <errno.h>
+#include <time.h>
+#include <arpa/inet.h>
+
+#define IP6_LADDR_START "2401:dead::1"
+#define IP6_LPORT 443
+#define NSEC_PER_SEC 1000000000L
+#define NSEC_PER_USEC 1000L
+
+static unsigned int nr_socks_per_vip;
+static unsigned int nr_vips;
+
+static int *bind_reuseport_sock6(void)
+{
+	int *lfds, *cur_fd, err, optvalue = 1;
+	struct sockaddr_in6 sa6 = {};
+	unsigned int i, j;
+
+	sa6.sin6_family = AF_INET6;
+	sa6.sin6_port = htons(IP6_LPORT);
+	err = inet_pton(AF_INET6, IP6_LADDR_START, &sa6.sin6_addr);
+	if (err != 1)
+		error(1, err, "inet_pton(%s)", IP6_LADDR_START);
+
+	lfds = malloc(nr_vips * nr_socks_per_vip * sizeof(lfds[0]));
+	if (!lfds)
+		error(1, errno, "cannot alloc array of lfds");
+
+	cur_fd = lfds;
+	for (i = 0; i < nr_vips; i++) {
+		for (j = 0; j < nr_socks_per_vip; j++) {
+			*cur_fd = socket(AF_INET6, SOCK_STREAM, 0);
+			if (*cur_fd == -1)
+				error(1, errno,
+				      "lfds[%u,%u] = socket(AF_INET6)", i, j);
+
+			err = setsockopt(*cur_fd, SOL_SOCKET, SO_REUSEPORT,
+					 &optvalue, sizeof(optvalue));
+			if (err)
+				error(1, errno,
+				      "setsockopt(lfds[%u,%u], SO_REUSEPORT)",
+				      i, j);
+
+			err = bind(*cur_fd, (struct sockaddr *)&sa6,
+				   sizeof(sa6));
+			if (err)
+				error(1, errno, "bind(lfds[%u,%u])", i, j);
+			cur_fd++;
+		}
+		sa6.sin6_addr.s6_addr32[3]++;
+	}
+
+	return lfds;
+}
+
+int main(int argc, const char *argv[])
+{
+	struct timespec start_ts, end_ts;
+	unsigned long start_ns, end_ns;
+	unsigned int nr_lsocks;
+	int *lfds, i, err;
+
+	if (argc != 3 || atoi(argv[1]) <= 0 || atoi(argv[2]) <= 0)
+		error(1, 0, "Usage: %s <nr_vips> <nr_socks_per_vip>\n",
+		      argv[0]);
+
+	nr_vips = atoi(argv[1]);
+	nr_socks_per_vip = atoi(argv[2]);
+	nr_lsocks = nr_vips * nr_socks_per_vip;
+	lfds = bind_reuseport_sock6();
+
+	clock_gettime(CLOCK_MONOTONIC, &start_ts);
+	for (i = 0; i < nr_lsocks; i++) {
+		err = listen(lfds[i], 0);
+		if (err)
+			error(1, errno, "listen(lfds[%d])", i);
+	}
+	clock_gettime(CLOCK_MONOTONIC, &end_ts);
+
+	start_ns = start_ts.tv_sec * NSEC_PER_SEC + start_ts.tv_nsec;
+	end_ns = end_ts.tv_sec * NSEC_PER_SEC + end_ts.tv_nsec;
+
+	printf("listen %d socks took %lu.%lu\n", nr_lsocks,
+	       (end_ns - start_ns) / NSEC_PER_SEC,
+	       (end_ns - start_ns) / NSEC_PER_USEC);
+
+	for (i = 0; i < nr_lsocks; i++)
+		close(lfds[i]);
+
+	free(lfds);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/stress_reuseport_listen.sh b/tools/testing/selftests/net/stress_reuseport_listen.sh
new file mode 100755
index 000000000000..4de11da4092b
--- /dev/null
+++ b/tools/testing/selftests/net/stress_reuseport_listen.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+
+NS='stress_reuseport_listen_ns'
+NR_FILES=24100
+SAVED_NR_FILES=$(ulimit -n)
+
+setup() {
+	ip netns add $NS
+	ip netns exec $NS sysctl -q -w net.ipv6.ip_nonlocal_bind=1
+	ulimit -n $NR_FILES
+}
+
+cleanup() {
+	ip netns del $NS
+	ulimit -n $SAVED_NR_FILES
+}
+
+trap cleanup EXIT
+setup
+# 300 different vips listen on port 443
+# Each vip:443 sockaddr has 80 LISTEN sock by using SO_REUSEPORT
+# Total 24000 listening socks
+ip netns exec $NS ./stress_reuseport_listen 300 80
-- 
cgit 


From 349d03ffd5f62c298fd667ffa397c3fdc5c6194b Mon Sep 17 00:00:00 2001
From: Vladis Dronov <vdronov@redhat.com>
Date: Sun, 8 May 2022 15:09:44 +0200
Subject: crypto: s390 - add crypto library interface for ChaCha20

Implement a crypto library interface for the s390-native ChaCha20 cipher
algorithm. This allows us to stop to select CRYPTO_CHACHA20 and instead
select CRYPTO_ARCH_HAVE_LIB_CHACHA. This allows BIG_KEYS=y not to build
a whole ChaCha20 crypto infrastructure as a built-in, but build a smaller
CRYPTO_LIB_CHACHA instead.

Make CRYPTO_CHACHA_S390 config entry to look like similar ones on other
architectures. Remove CRYPTO_ALGAPI select as anyway it is selected by
CRYPTO_SKCIPHER.

Add a new test module and a test script for ChaCha20 cipher and its
interfaces. Here are test results on an idle z15 machine:

Data | Generic crypto TFM |  s390 crypto TFM |    s390 lib
size |      enc      dec  |     enc     dec  |     enc     dec
-----+--------------------+------------------+----------------
512b |   1545ns   1295ns  |   604ns   446ns  |   430ns  407ns
4k   |   9536ns   9463ns  |  2329ns  2174ns  |  2170ns  2154ns
64k  |  149.6us  149.3us  |  34.4us  34.5us  |  33.9us  33.1us
6M   |  23.61ms  23.11ms  |  4223us  4160us  |  3951us  4008us
60M  |  143.9ms  143.9ms  |  33.5ms  33.2ms  |  32.2ms  32.1ms

Signed-off-by: Vladis Dronov <vdronov@redhat.com>
Reviewed-by: Harald Freudenberger <freude@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/chacha-glue.c                   |  34 ++-
 drivers/crypto/Kconfig                           |   4 +-
 tools/testing/crypto/chacha20-s390/Makefile      |  12 +
 tools/testing/crypto/chacha20-s390/run-tests.sh  |  34 +++
 tools/testing/crypto/chacha20-s390/test-cipher.c | 372 +++++++++++++++++++++++
 5 files changed, 452 insertions(+), 4 deletions(-)
 create mode 100644 tools/testing/crypto/chacha20-s390/Makefile
 create mode 100644 tools/testing/crypto/chacha20-s390/run-tests.sh
 create mode 100644 tools/testing/crypto/chacha20-s390/test-cipher.c

(limited to 'tools/testing')

diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
index ccfff73e2c93..2ec51f339cec 100644
--- a/arch/s390/crypto/chacha-glue.c
+++ b/arch/s390/crypto/chacha-glue.c
@@ -62,6 +62,34 @@ static int chacha20_s390(struct skcipher_request *req)
 	return rc;
 }
 
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+	/* TODO: implement hchacha_block_arch() in assembly */
+	hchacha_block_generic(state, stream, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
+		       unsigned int bytes, int nrounds)
+{
+	/* s390 chacha20 implementation has 20 rounds hard-coded,
+	 * it cannot handle a block of data or less, but otherwise
+	 * it can handle data of arbitrary size
+	 */
+	if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20)
+		chacha_crypt_generic(state, dst, src, bytes, nrounds);
+	else
+		chacha20_crypt_s390(state, dst, src, bytes,
+				    &state[4], &state[12]);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
 static struct skcipher_alg chacha_algs[] = {
 	{
 		.base.cra_name		= "chacha20",
@@ -83,12 +111,14 @@ static struct skcipher_alg chacha_algs[] = {
 
 static int __init chacha_mod_init(void)
 {
-	return crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
+	return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
+		crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0;
 }
 
 static void __exit chacha_mod_fini(void)
 {
-	crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
+	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER))
+		crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
 }
 
 module_cpu_feature_match(VXRS, chacha_mod_init);
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 7b2d138bc83e..ee99c02c84e8 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -216,9 +216,9 @@ config CRYPTO_AES_S390
 config CRYPTO_CHACHA_S390
 	tristate "ChaCha20 stream cipher"
 	depends on S390
-	select CRYPTO_ALGAPI
 	select CRYPTO_SKCIPHER
-	select CRYPTO_CHACHA20
+	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
 	help
 	  This is the s390 SIMD implementation of the ChaCha20 stream
 	  cipher (RFC 7539).
diff --git a/tools/testing/crypto/chacha20-s390/Makefile b/tools/testing/crypto/chacha20-s390/Makefile
new file mode 100644
index 000000000000..db81cd2fb9c5
--- /dev/null
+++ b/tools/testing/crypto/chacha20-s390/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Red Hat, Inc.
+# Author: Vladis Dronov <vdronoff@gmail.com>
+
+obj-m += test_cipher.o
+test_cipher-y := test-cipher.o
+
+all:
+	make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) modules
+clean:
+	make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) clean
diff --git a/tools/testing/crypto/chacha20-s390/run-tests.sh b/tools/testing/crypto/chacha20-s390/run-tests.sh
new file mode 100644
index 000000000000..43108794b996
--- /dev/null
+++ b/tools/testing/crypto/chacha20-s390/run-tests.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Red Hat, Inc.
+# Author: Vladis Dronov <vdronoff@gmail.com>
+#
+# This script runs (via instmod) test-cipher.ko module which invokes
+# generic and s390-native ChaCha20 encryprion algorithms with different
+# size of data. Check 'dmesg' for results.
+#
+# The insmod error is expected:
+# insmod: ERROR: could not insert module test_cipher.ko: Operation not permitted
+
+lsmod | grep chacha | cut -f1 -d' ' | xargs rmmod
+modprobe chacha_generic
+modprobe chacha_s390
+
+# run encryption for different data size, including whole block(s) +/- 1
+insmod test_cipher.ko size=63
+insmod test_cipher.ko size=64
+insmod test_cipher.ko size=65
+insmod test_cipher.ko size=127
+insmod test_cipher.ko size=128
+insmod test_cipher.ko size=129
+insmod test_cipher.ko size=511
+insmod test_cipher.ko size=512
+insmod test_cipher.ko size=513
+insmod test_cipher.ko size=4096
+insmod test_cipher.ko size=65611
+insmod test_cipher.ko size=6291456
+insmod test_cipher.ko size=62914560
+
+# print test logs
+dmesg | tail -170
diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c
new file mode 100644
index 000000000000..34e8b855266f
--- /dev/null
+++ b/tools/testing/crypto/chacha20-s390/test-cipher.c
@@ -0,0 +1,372 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ * Author: Vladis Dronov <vdronoff@gmail.com>
+ */
+
+#include <asm/elf.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <crypto/skcipher.h>
+#include <crypto/akcipher.h>
+#include <crypto/acompress.h>
+#include <crypto/rng.h>
+#include <crypto/drbg.h>
+#include <crypto/kpp.h>
+#include <crypto/internal/simd.h>
+#include <crypto/chacha.h>
+#include <crypto/aead.h>
+#include <crypto/hash.h>
+#include <linux/crypto.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/fips.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/scatterlist.h>
+#include <linux/time.h>
+#include <linux/vmalloc.h>
+#include <linux/zlib.h>
+#include <linux/once.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+static unsigned int data_size __read_mostly = 256;
+static unsigned int debug __read_mostly = 0;
+
+/* tie all skcipher structures together */
+struct skcipher_def {
+	struct scatterlist sginp, sgout;
+	struct crypto_skcipher *tfm;
+	struct skcipher_request *req;
+	struct crypto_wait wait;
+};
+
+/* Perform cipher operations with the chacha lib */
+static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
+{
+	u32 chacha_state[CHACHA_STATE_WORDS];
+	u8 iv[16], key[32];
+	u64 start, end;
+
+	memset(key, 'X', sizeof(key));
+	memset(iv, 'I', sizeof(iv));
+
+	if (debug) {
+		print_hex_dump(KERN_INFO, "key: ", DUMP_PREFIX_OFFSET,
+			       16, 1, key, 32, 1);
+
+		print_hex_dump(KERN_INFO, "iv:  ", DUMP_PREFIX_OFFSET,
+			       16, 1, iv, 16, 1);
+	}
+
+	/* Encrypt */
+	chacha_init_arch(chacha_state, (u32*)key, iv);
+
+	start = ktime_get_ns();
+	chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20);
+	end = ktime_get_ns();
+
+
+	if (debug)
+		print_hex_dump(KERN_INFO, "encr:", DUMP_PREFIX_OFFSET,
+			       16, 1, cipher,
+			       (data_size > 64 ? 64 : data_size), 1);
+
+	pr_info("lib encryption took: %lld nsec", end - start);
+
+	/* Decrypt */
+	chacha_init_arch(chacha_state, (u32 *)key, iv);
+
+	start = ktime_get_ns();
+	chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20);
+	end = ktime_get_ns();
+
+	if (debug)
+		print_hex_dump(KERN_INFO, "decr:", DUMP_PREFIX_OFFSET,
+			       16, 1, revert,
+			       (data_size > 64 ? 64 : data_size), 1);
+
+	pr_info("lib decryption took: %lld nsec", end - start);
+
+	return 0;
+}
+
+/* Perform cipher operations with skcipher */
+static unsigned int test_skcipher_encdec(struct skcipher_def *sk,
+					 int enc)
+{
+	int rc;
+
+	if (enc) {
+		rc = crypto_wait_req(crypto_skcipher_encrypt(sk->req),
+				     &sk->wait);
+		if (rc)
+			pr_info("skcipher encrypt returned with result"
+				"%d\n", rc);
+	}
+	else
+	{
+		rc = crypto_wait_req(crypto_skcipher_decrypt(sk->req),
+				     &sk->wait);
+		if (rc)
+			pr_info("skcipher decrypt returned with result"
+				"%d\n", rc);
+	}
+
+	return rc;
+}
+
+/* Initialize and trigger cipher operations */
+static int test_skcipher(char *name, u8 *revert, u8 *cipher, u8 *plain)
+{
+	struct skcipher_def sk;
+	struct crypto_skcipher *skcipher = NULL;
+	struct skcipher_request *req = NULL;
+	u8 iv[16], key[32];
+	u64 start, end;
+	int ret = -EFAULT;
+
+	skcipher = crypto_alloc_skcipher(name, 0, 0);
+	if (IS_ERR(skcipher)) {
+		pr_info("could not allocate skcipher %s handle\n", name);
+		return PTR_ERR(skcipher);
+	}
+
+	req = skcipher_request_alloc(skcipher, GFP_KERNEL);
+	if (!req) {
+		pr_info("could not allocate skcipher request\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+					  crypto_req_done,
+					  &sk.wait);
+
+	memset(key, 'X', sizeof(key));
+	memset(iv, 'I', sizeof(iv));
+
+	if (crypto_skcipher_setkey(skcipher, key, 32)) {
+		pr_info("key could not be set\n");
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	if (debug) {
+		print_hex_dump(KERN_INFO, "key: ", DUMP_PREFIX_OFFSET,
+			       16, 1, key, 32, 1);
+
+		print_hex_dump(KERN_INFO, "iv:  ", DUMP_PREFIX_OFFSET,
+			       16, 1, iv, 16, 1);
+	}
+
+	sk.tfm = skcipher;
+	sk.req = req;
+
+	/* Encrypt in one pass */
+	sg_init_one(&sk.sginp, plain, data_size);
+	sg_init_one(&sk.sgout, cipher, data_size);
+	skcipher_request_set_crypt(req, &sk.sginp, &sk.sgout,
+				   data_size, iv);
+	crypto_init_wait(&sk.wait);
+
+	/* Encrypt data */
+	start = ktime_get_ns();
+	ret = test_skcipher_encdec(&sk, 1);
+	end = ktime_get_ns();
+
+	if (ret)
+		goto out;
+
+	pr_info("%s tfm encryption successful, took %lld nsec\n", name, end - start);
+
+	if (debug)
+		print_hex_dump(KERN_INFO, "encr:", DUMP_PREFIX_OFFSET,
+			       16, 1, cipher,
+			       (data_size > 64 ? 64 : data_size), 1);
+
+	/* Prepare for decryption */
+	memset(iv, 'I', sizeof(iv));
+
+	sg_init_one(&sk.sginp, cipher, data_size);
+	sg_init_one(&sk.sgout, revert, data_size);
+	skcipher_request_set_crypt(req, &sk.sginp, &sk.sgout,
+				   data_size, iv);
+	crypto_init_wait(&sk.wait);
+
+	/* Decrypt data */
+	start = ktime_get_ns();
+	ret = test_skcipher_encdec(&sk, 0);
+	end = ktime_get_ns();
+
+	if (ret)
+		goto out;
+
+	pr_info("%s tfm decryption successful, took %lld nsec\n", name, end - start);
+
+	if (debug)
+		print_hex_dump(KERN_INFO, "decr:", DUMP_PREFIX_OFFSET,
+			       16, 1, revert,
+			       (data_size > 64 ? 64 : data_size), 1);
+
+	/* Dump some internal skcipher data */
+	if (debug)
+		pr_info("skcipher %s: cryptlen %d blksize %d stride %d "
+			"ivsize %d alignmask 0x%x\n",
+			name, sk.req->cryptlen,
+			crypto_skcipher_blocksize(sk.tfm),
+			crypto_skcipher_alg(sk.tfm)->walksize,
+			crypto_skcipher_ivsize(sk.tfm),
+			crypto_skcipher_alignmask(sk.tfm));
+
+out:
+	if (skcipher)
+		crypto_free_skcipher(skcipher);
+	if (req)
+		skcipher_request_free(req);
+	return ret;
+}
+
+static int __init chacha_s390_test_init(void)
+{
+	u8 *plain = NULL, *revert = NULL;
+	u8 *cipher_generic = NULL, *cipher_s390 = NULL;
+	int ret = -1;
+
+	pr_info("s390 ChaCha20 test module: size=%d debug=%d\n",
+		data_size, debug);
+
+	/* Allocate and fill buffers */
+	plain = vmalloc(data_size);
+	if (!plain) {
+		pr_info("could not allocate plain buffer\n");
+		ret = -2;
+		goto out;
+	}
+	memset(plain, 'a', data_size);
+	get_random_bytes(plain, (data_size > 256 ? 256 : data_size));
+
+	cipher_generic = vmalloc(data_size);
+	if (!cipher_generic) {
+		pr_info("could not allocate cipher_generic buffer\n");
+		ret = -2;
+		goto out;
+	}
+	memset(cipher_generic, 0, data_size);
+
+	cipher_s390 = vmalloc(data_size);
+	if (!cipher_s390) {
+		pr_info("could not allocate cipher_s390 buffer\n");
+		ret = -2;
+		goto out;
+	}
+	memset(cipher_s390, 0, data_size);
+
+	revert = vmalloc(data_size);
+	if (!revert) {
+		pr_info("could not allocate revert buffer\n");
+		ret = -2;
+		goto out;
+	}
+	memset(revert, 0, data_size);
+
+	if (debug)
+		print_hex_dump(KERN_INFO, "src: ", DUMP_PREFIX_OFFSET,
+			       16, 1, plain,
+			       (data_size > 64 ? 64 : data_size), 1);
+
+	/* Use chacha20 generic */
+	ret = test_skcipher("chacha20-generic", revert, cipher_generic, plain);
+	if (ret)
+		goto out;
+
+	if (memcmp(plain, revert, data_size)) {
+		pr_info("generic en/decryption check FAILED\n");
+		ret = -2;
+		goto out;
+	}
+	else
+		pr_info("generic en/decryption check OK\n");
+
+	memset(revert, 0, data_size);
+
+	/* Use chacha20 s390 */
+	ret = test_skcipher("chacha20-s390", revert, cipher_s390, plain);
+	if (ret)
+		goto out;
+
+	if (memcmp(plain, revert, data_size)) {
+		pr_info("s390 en/decryption check FAILED\n");
+		ret = -2;
+		goto out;
+	}
+	else
+		pr_info("s390 en/decryption check OK\n");
+
+	if (memcmp(cipher_generic, cipher_s390, data_size)) {
+		pr_info("s390 vs generic check FAILED\n");
+		ret = -2;
+		goto out;
+	}
+	else
+		pr_info("s390 vs generic check OK\n");
+
+	memset(cipher_s390, 0, data_size);
+	memset(revert, 0, data_size);
+
+	/* Use chacha20 lib */
+	test_lib_chacha(revert, cipher_s390, plain);
+
+	if (memcmp(plain, revert, data_size)) {
+		pr_info("lib en/decryption check FAILED\n");
+		ret = -2;
+		goto out;
+	}
+	else
+		pr_info("lib en/decryption check OK\n");
+
+	if (memcmp(cipher_generic, cipher_s390, data_size)) {
+		pr_info("lib vs generic check FAILED\n");
+		ret = -2;
+		goto out;
+	}
+	else
+		pr_info("lib vs generic check OK\n");
+
+	pr_info("--- chacha20 s390 test end ---\n");
+
+out:
+	if (plain)
+		vfree(plain);
+	if (cipher_generic)
+		vfree(cipher_generic);
+	if (cipher_s390)
+		vfree(cipher_s390);
+	if (revert)
+		vfree(revert);
+
+	return -1;
+}
+
+static void __exit chacha_s390_test_exit(void)
+{
+	pr_info("s390 ChaCha20 test module exit\n");
+}
+
+module_param_named(size, data_size, uint, 0660);
+module_param(debug, int, 0660);
+MODULE_PARM_DESC(size, "Size of a plaintext");
+MODULE_PARM_DESC(debug, "Debug level (0=off,1=on)");
+
+module_init(chacha_s390_test_init);
+module_exit(chacha_s390_test_exit);
+
+MODULE_DESCRIPTION("s390 ChaCha20 self-test");
+MODULE_AUTHOR("Vladis Dronov <vdronoff@gmail.com>");
+MODULE_LICENSE("GPL v2");
-- 
cgit 


From 49bb39bddad214304bb523258f02f57cd25ed88b Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Thu, 12 May 2022 16:12:07 +0300
Subject: selftests: fib_nexthops: Make the test more robust

Rarely some of the test cases fail. Make the test more robust by increasing
the timeout of ping commands to 5 seconds.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 48 ++++++++++++++---------------
 1 file changed, 24 insertions(+), 24 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index b3bf5319bb0e..a99ee3fb2e13 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -882,13 +882,13 @@ ipv6_fcnal_runtime()
 	log_test $? 0 "Route delete"
 
 	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
 	log_test $? 0 "Ping with nexthop"
 
 	run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3"
 	run_cmd "$IP nexthop add id 122 group 81/82"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
 	log_test $? 0 "Ping - multipath"
 
 	#
@@ -896,26 +896,26 @@ ipv6_fcnal_runtime()
 	#
 	run_cmd "$IP -6 nexthop add id 83 blackhole"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
 	log_test $? 2 "Ping - blackhole"
 
 	run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
 	log_test $? 0 "Ping - blackhole replaced with gateway"
 
 	run_cmd "$IP -6 nexthop replace id 83 blackhole"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
 	log_test $? 2 "Ping - gateway replaced by blackhole"
 
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
 	if [ $? -eq 0 ]; then
 		run_cmd "$IP nexthop replace id 122 group 83"
-		run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+		run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
 		log_test $? 2 "Ping - group with blackhole"
 
 		run_cmd "$IP nexthop replace id 122 group 81/82"
-		run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+		run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
 		log_test $? 0 "Ping - group blackhole replaced with gateways"
 	else
 		log_test 2 0 "Ping - multipath failed"
@@ -1003,10 +1003,10 @@ ipv6_fcnal_runtime()
 	run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3"
 	run_cmd "$IP nexthop add id 93 group 91/92"
 	run_cmd "$IP -6 ro add default nhid 91"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
 	log_test $? 0 "Nexthop with default route and rpfilter"
 	run_cmd "$IP -6 ro replace default nhid 93"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
 	log_test $? 0 "Nexthop with multipath default route and rpfilter"
 
 	# TO-DO:
@@ -1460,13 +1460,13 @@ ipv4_fcnal_runtime()
 	#
 	run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 21"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 	log_test $? 0 "Basic ping"
 
 	run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
 	run_cmd "$IP nexthop add id 122 group 21/22"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 	log_test $? 0 "Ping - multipath"
 
 	run_cmd "$IP ro delete 172.16.101.1/32 nhid 122"
@@ -1477,7 +1477,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1"
 	run_cmd "$IP ro add default nhid 501"
 	run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 	log_test $? 0 "Ping - multiple default routes, nh first"
 
 	# flip the order
@@ -1486,7 +1486,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20"
 	run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1"
 	run_cmd "$IP ro add default nhid 501 metric 20"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 	log_test $? 0 "Ping - multiple default routes, nh second"
 
 	run_cmd "$IP nexthop delete nhid 501"
@@ -1497,26 +1497,26 @@ ipv4_fcnal_runtime()
 	#
 	run_cmd "$IP nexthop add id 23 blackhole"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 23"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 	log_test $? 2 "Ping - blackhole"
 
 	run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 	log_test $? 0 "Ping - blackhole replaced with gateway"
 
 	run_cmd "$IP nexthop replace id 23 blackhole"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 	log_test $? 2 "Ping - gateway replaced by blackhole"
 
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 	if [ $? -eq 0 ]; then
 		run_cmd "$IP nexthop replace id 122 group 23"
-		run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+		run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 		log_test $? 2 "Ping - group with blackhole"
 
 		run_cmd "$IP nexthop replace id 122 group 21/22"
-		run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+		run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 		log_test $? 0 "Ping - group blackhole replaced with gateways"
 	else
 		log_test 2 0 "Ping - multipath failed"
@@ -1543,7 +1543,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1"
 	set +e
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 24"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 	log_test $? 0 "IPv6 nexthop with IPv4 route"
 
 	$IP neigh sh | grep -q "${lladdr} dev veth1"
@@ -1567,11 +1567,11 @@ ipv4_fcnal_runtime()
 
 	check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
 
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 	log_test $? 0 "IPv6 nexthop with IPv4 route"
 
 	run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 	log_test $? 0 "IPv4 route with IPv6 gateway"
 
 	$IP neigh sh | grep -q "${lladdr} dev veth1"
@@ -1588,7 +1588,7 @@ ipv4_fcnal_runtime()
 
 	run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
 	run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
 	log_test $? 0 "IPv4 default route with IPv6 gateway"
 
 	#
-- 
cgit 


From 365d519923a279af379a8d0f641ef50e44bb610e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 12 May 2022 18:10:25 -0700
Subject: selftests/bpf: Check combination of jit blinding and pointers to bpf
 subprogs.

Check that ld_imm64 with src_reg=1 (aka BPF_PSEUDO_FUNC) works
with jit_blinding.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220513011025.13344-2-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/progs/test_subprogs.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
index b7c37ca09544..f8e9256cf18d 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -89,6 +89,11 @@ int prog2(void *ctx)
 	return 0;
 }
 
+static int empty_callback(__u32 index, void *data)
+{
+	return 0;
+}
+
 /* prog3 has the same section name as prog1 */
 SEC("raw_tp/sys_enter")
 int prog3(void *ctx)
@@ -98,6 +103,9 @@ int prog3(void *ctx)
 	if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
 		return 1;
 
+	/* test that ld_imm64 with BPF_PSEUDO_FUNC doesn't get blinded */
+	bpf_loop(1, empty_callback, NULL, 0);
+
 	res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */
 	return 0;
 }
-- 
cgit 


From b2531d4bdce19f28364b45aac9132e153b1f23a4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 12 May 2022 15:07:13 -0700
Subject: selftests/bpf: Convert some selftests to high-level BPF map APIs

Convert a bunch of selftests to using newly added high-level BPF map
APIs.

This change exposed that map_kptr selftests allocated too big buffer,
which is fixed in this patch as well.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220512220713.2617964-2-andrii@kernel.org
---
 .../selftests/bpf/prog_tests/core_autosize.c       |  2 +-
 .../testing/selftests/bpf/prog_tests/core_retro.c  | 17 ++++++------
 tools/testing/selftests/bpf/prog_tests/for_each.c  | 30 ++++++++++++----------
 .../selftests/bpf/prog_tests/lookup_and_delete.c   | 15 ++++++-----
 tools/testing/selftests/bpf/prog_tests/map_kptr.c  | 23 ++++++++++-------
 .../selftests/bpf/prog_tests/stacktrace_build_id.c |  8 +++---
 .../bpf/prog_tests/stacktrace_build_id_nmi.c       | 11 ++++----
 tools/testing/selftests/bpf/prog_tests/timer_mim.c |  2 +-
 8 files changed, 61 insertions(+), 47 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
index 1dfe14ff6aa4..f2ce4fd1cdae 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
@@ -167,7 +167,7 @@ void test_core_autosize(void)
 	if (!ASSERT_OK_PTR(bss_map, "bss_map_find"))
 		goto cleanup;
 
-	err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out);
+	err = bpf_map__lookup_elem(bss_map, &zero, sizeof(zero), &out, sizeof(out), 0);
 	if (!ASSERT_OK(err, "bss_lookup"))
 		goto cleanup;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c
index 6acb0e94d4d7..4a2c256c8db6 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_retro.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c
@@ -6,31 +6,32 @@
 
 void test_core_retro(void)
 {
-	int err, zero = 0, res, duration = 0, my_pid = getpid();
+	int err, zero = 0, res, my_pid = getpid();
 	struct test_core_retro *skel;
 
 	/* load program */
 	skel = test_core_retro__open_and_load();
-	if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
 		goto out_close;
 
-	err = bpf_map_update_elem(bpf_map__fd(skel->maps.exp_tgid_map), &zero, &my_pid, 0);
-	if (CHECK(err, "map_update", "failed to set expected PID: %d\n", errno))
+	err = bpf_map__update_elem(skel->maps.exp_tgid_map, &zero, sizeof(zero),
+				   &my_pid, sizeof(my_pid), 0);
+	if (!ASSERT_OK(err, "map_update"))
 		goto out_close;
 
 	/* attach probe */
 	err = test_core_retro__attach(skel);
-	if (CHECK(err, "attach_kprobe", "err %d\n", err))
+	if (!ASSERT_OK(err, "attach_kprobe"))
 		goto out_close;
 
 	/* trigger */
 	usleep(1);
 
-	err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.results), &zero, &res);
-	if (CHECK(err, "map_lookup", "failed to lookup result: %d\n", errno))
+	err = bpf_map__lookup_elem(skel->maps.results, &zero, sizeof(zero), &res, sizeof(res), 0);
+	if (!ASSERT_OK(err, "map_lookup"))
 		goto out_close;
 
-	CHECK(res != my_pid, "pid_check", "got %d != exp %d\n", res, my_pid);
+	ASSERT_EQ(res, my_pid, "pid_check");
 
 out_close:
 	test_core_retro__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 754e80937e5d..8963f8a549f2 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -10,9 +10,10 @@ static unsigned int duration;
 
 static void test_hash_map(void)
 {
-	int i, err, hashmap_fd, max_entries, percpu_map_fd;
+	int i, err, max_entries;
 	struct for_each_hash_map_elem *skel;
 	__u64 *percpu_valbuf = NULL;
+	size_t percpu_val_sz;
 	__u32 key, num_cpus;
 	__u64 val;
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
@@ -25,26 +26,27 @@ static void test_hash_map(void)
 	if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
 		return;
 
-	hashmap_fd = bpf_map__fd(skel->maps.hashmap);
 	max_entries = bpf_map__max_entries(skel->maps.hashmap);
 	for (i = 0; i < max_entries; i++) {
 		key = i;
 		val = i + 1;
-		err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY);
+		err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+					   &val, sizeof(val), BPF_ANY);
 		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
 	num_cpus = bpf_num_possible_cpus();
-	percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
-	percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
+	percpu_val_sz = sizeof(__u64) * num_cpus;
+	percpu_valbuf = malloc(percpu_val_sz);
 	if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
 		goto out;
 
 	key = 1;
 	for (i = 0; i < num_cpus; i++)
 		percpu_valbuf[i] = i + 1;
-	err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
+	err = bpf_map__update_elem(skel->maps.percpu_map, &key, sizeof(key),
+				   percpu_valbuf, percpu_val_sz, BPF_ANY);
 	if (!ASSERT_OK(err, "percpu_map_update"))
 		goto out;
 
@@ -58,7 +60,7 @@ static void test_hash_map(void)
 	ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems");
 
 	key = 1;
-	err = bpf_map_lookup_elem(hashmap_fd, &key, &val);
+	err = bpf_map__lookup_elem(skel->maps.hashmap, &key, sizeof(key), &val, sizeof(val), 0);
 	ASSERT_ERR(err, "hashmap_lookup");
 
 	ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called");
@@ -75,9 +77,10 @@ out:
 static void test_array_map(void)
 {
 	__u32 key, num_cpus, max_entries;
-	int i, arraymap_fd, percpu_map_fd, err;
+	int i, err;
 	struct for_each_array_map_elem *skel;
 	__u64 *percpu_valbuf = NULL;
+	size_t percpu_val_sz;
 	__u64 val, expected_total;
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v4,
@@ -89,7 +92,6 @@ static void test_array_map(void)
 	if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
 		return;
 
-	arraymap_fd = bpf_map__fd(skel->maps.arraymap);
 	expected_total = 0;
 	max_entries = bpf_map__max_entries(skel->maps.arraymap);
 	for (i = 0; i < max_entries; i++) {
@@ -98,21 +100,23 @@ static void test_array_map(void)
 		/* skip the last iteration for expected total */
 		if (i != max_entries - 1)
 			expected_total += val;
-		err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY);
+		err = bpf_map__update_elem(skel->maps.arraymap, &key, sizeof(key),
+					   &val, sizeof(val), BPF_ANY);
 		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
 	num_cpus = bpf_num_possible_cpus();
-	percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
-	percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
+	percpu_val_sz = sizeof(__u64) * num_cpus;
+	percpu_valbuf = malloc(percpu_val_sz);
 	if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
 		goto out;
 
 	key = 0;
 	for (i = 0; i < num_cpus; i++)
 		percpu_valbuf[i] = i + 1;
-	err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
+	err = bpf_map__update_elem(skel->maps.percpu_map, &key, sizeof(key),
+				   percpu_valbuf, percpu_val_sz, BPF_ANY);
 	if (!ASSERT_OK(err, "percpu_map_update"))
 		goto out;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
index beebfa9730e1..a767bb4a271c 100644
--- a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
@@ -112,7 +112,8 @@ static void test_lookup_and_delete_hash(void)
 
 	/* Lookup and delete element. */
 	key = 1;
-	err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+	err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+					      &key, sizeof(key), &value, sizeof(value), 0);
 	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
 		goto cleanup;
 
@@ -147,7 +148,8 @@ static void test_lookup_and_delete_percpu_hash(void)
 
 	/* Lookup and delete element. */
 	key = 1;
-	err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+	err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+					      &key, sizeof(key), value, sizeof(value), 0);
 	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
 		goto cleanup;
 
@@ -191,7 +193,8 @@ static void test_lookup_and_delete_lru_hash(void)
 		goto cleanup;
 
 	/* Lookup and delete element 3. */
-	err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+	err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+					      &key, sizeof(key), &value, sizeof(value), 0);
 	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
 		goto cleanup;
 
@@ -240,10 +243,10 @@ static void test_lookup_and_delete_lru_percpu_hash(void)
 		value[i] = 0;
 
 	/* Lookup and delete element 3. */
-	err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
-	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) {
+	err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+					      &key, sizeof(key), value, sizeof(value), 0);
+	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
 		goto cleanup;
-	}
 
 	/* Check if only one CPU has set the value. */
 	for (i = 0; i < nr_cpus; i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
index 8142dd9eff4c..fdcea7a61491 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -91,7 +91,7 @@ static void test_map_kptr_success(bool test_run)
 	);
 	struct map_kptr *skel;
 	int key = 0, ret;
-	char buf[24];
+	char buf[16];
 
 	skel = map_kptr__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load"))
@@ -107,24 +107,29 @@ static void test_map_kptr_success(bool test_run)
 	if (test_run)
 		return;
 
-	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0);
+	ret = bpf_map__update_elem(skel->maps.array_map,
+				   &key, sizeof(key), buf, sizeof(buf), 0);
 	ASSERT_OK(ret, "array_map update");
-	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0);
+	ret = bpf_map__update_elem(skel->maps.array_map,
+				   &key, sizeof(key), buf, sizeof(buf), 0);
 	ASSERT_OK(ret, "array_map update2");
 
-	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_map), &key, buf, 0);
+	ret = bpf_map__update_elem(skel->maps.hash_map,
+				   &key, sizeof(key), buf, sizeof(buf), 0);
 	ASSERT_OK(ret, "hash_map update");
-	ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_map), &key);
+	ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0);
 	ASSERT_OK(ret, "hash_map delete");
 
-	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key, buf, 0);
+	ret = bpf_map__update_elem(skel->maps.hash_malloc_map,
+				   &key, sizeof(key), buf, sizeof(buf), 0);
 	ASSERT_OK(ret, "hash_malloc_map update");
-	ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key);
+	ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0);
 	ASSERT_OK(ret, "hash_malloc_map delete");
 
-	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.lru_hash_map), &key, buf, 0);
+	ret = bpf_map__update_elem(skel->maps.lru_hash_map,
+				   &key, sizeof(key), buf, sizeof(buf), 0);
 	ASSERT_OK(ret, "lru_hash_map update");
-	ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.lru_hash_map), &key);
+	ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0);
 	ASSERT_OK(ret, "lru_hash_map delete");
 
 	map_kptr__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index e8399ae50e77..9ad09a6c538a 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -8,7 +8,7 @@ void test_stacktrace_build_id(void)
 	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
 	struct test_stacktrace_build_id *skel;
 	int err, stack_trace_len;
-	__u32 key, previous_key, val, duration = 0;
+	__u32 key, prev_key, val, duration = 0;
 	char buf[256];
 	int i, j;
 	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@@ -58,7 +58,7 @@ retry:
 		  "err %d errno %d\n", err, errno))
 		goto cleanup;
 
-	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+	err = bpf_map__get_next_key(skel->maps.stackmap, NULL, &key, sizeof(key));
 	if (CHECK(err, "get_next_key from stackmap",
 		  "err %d, errno %d\n", err, errno))
 		goto cleanup;
@@ -79,8 +79,8 @@ retry:
 				if (strstr(buf, build_id) != NULL)
 					build_id_matches = 1;
 			}
-		previous_key = key;
-	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+		prev_key = key;
+	} while (bpf_map__get_next_key(skel->maps.stackmap, &prev_key, &key, sizeof(key)) == 0);
 
 	/* stack_map_get_build_id_offset() is racy and sometimes can return
 	 * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index f45a1d7b0a28..f4ea1a215ce4 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -27,7 +27,7 @@ void test_stacktrace_build_id_nmi(void)
 		.type = PERF_TYPE_HARDWARE,
 		.config = PERF_COUNT_HW_CPU_CYCLES,
 	};
-	__u32 key, previous_key, val, duration = 0;
+	__u32 key, prev_key, val, duration = 0;
 	char buf[256];
 	int i, j;
 	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@@ -100,7 +100,7 @@ retry:
 		  "err %d errno %d\n", err, errno))
 		goto cleanup;
 
-	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+	err = bpf_map__get_next_key(skel->maps.stackmap, NULL, &key, sizeof(key));
 	if (CHECK(err, "get_next_key from stackmap",
 		  "err %d, errno %d\n", err, errno))
 		goto cleanup;
@@ -108,7 +108,8 @@ retry:
 	do {
 		char build_id[64];
 
-		err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+		err = bpf_map__lookup_elem(skel->maps.stackmap, &key, sizeof(key),
+					   id_offs, sizeof(id_offs), 0);
 		if (CHECK(err, "lookup_elem from stackmap",
 			  "err %d, errno %d\n", err, errno))
 			goto cleanup;
@@ -121,8 +122,8 @@ retry:
 				if (strstr(buf, build_id) != NULL)
 					build_id_matches = 1;
 			}
-		previous_key = key;
-	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+		prev_key = key;
+	} while (bpf_map__get_next_key(skel->maps.stackmap, &prev_key, &key, sizeof(key)) == 0);
 
 	/* stack_map_get_build_id_offset() is racy and sometimes can return
 	 * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
index 2ee5f5ae11d4..9ff7843909e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -35,7 +35,7 @@ static int timer_mim(struct timer_mim *timer_skel)
 	ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok");
 
 	close(bpf_map__fd(timer_skel->maps.inner_htab));
-	err = bpf_map_delete_elem(bpf_map__fd(timer_skel->maps.outer_arr), &key1);
+	err = bpf_map__delete_elem(timer_skel->maps.outer_arr, &key1, sizeof(key1), 0);
 	ASSERT_EQ(err, 0, "delete inner map");
 
 	/* check that timer_cb[12] are no longer running */
-- 
cgit 


From f893abbd6997f9a95815acfb84aa865f0c996373 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Mon, 9 May 2022 18:20:51 -0700
Subject: selftets/damon/sysfs: test existence and permission of
 avail_operations

This commit adds a selftest test case for ensuring the existence and the
permission (read-only) of the 'avail_oprations' DAMON sysfs file.

Link: https://lkml.kernel.org/r/20220426203843.45238-4-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/damon/sysfs.sh | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index 2e3ae77cb6db..89592c64462f 100644
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -231,6 +231,7 @@ test_context()
 {
 	context_dir=$1
 	ensure_dir "$context_dir" "exist"
+	ensure_file "$context_dir/avail_operations" "exit" 400
 	ensure_file "$context_dir/operations" "exist" 600
 	test_monitoring_attrs "$context_dir/monitoring_attrs"
 	test_targets "$context_dir/targets"
-- 
cgit 


From 9994715333515e82865e533250e488496b9742f4 Mon Sep 17 00:00:00 2001
From: Niels Dossche <dossche.niels@gmail.com>
Date: Mon, 9 May 2022 18:20:54 -0700
Subject: selftest/vm: test that mremap fails on non-existent vma

Add a regression test that validates that mremap fails for vma's that
don't exist.

Link: https://lkml.kernel.org/r/20220427224439.23828-3-dossche.niels@gmail.com
Signed-off-by: Niels Dossche <dossche.niels@gmail.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/hugepage-mremap.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c
index 1d689084a54b..585978f181ed 100644
--- a/tools/testing/selftests/vm/hugepage-mremap.c
+++ b/tools/testing/selftests/vm/hugepage-mremap.c
@@ -178,6 +178,12 @@ int main(int argc, char *argv[])
 
 	munmap(addr, length);
 
+	addr = mremap(addr, length, length, 0);
+	if (addr != MAP_FAILED) {
+		printf("mremap: Expected failure, but call succeeded\n");
+		exit(1);
+	}
+
 	close(fd);
 	unlink(argv[argc-1]);
 
-- 
cgit 


From c0eeeb02d9df878c71a457008900b650d94bd0d9 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Thu, 12 May 2022 20:22:56 -0700
Subject: selftests/uffd: enable uffd-wp for shmem/hugetlbfs

After we added support for shmem and hugetlbfs, we can turn uffd-wp test
on always now.

Link: https://lkml.kernel.org/r/20220405014932.15212-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 92a4516f8f0d..bbc4a6d8cf7b 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -82,7 +82,7 @@ static int test_type;
 static volatile bool test_uffdio_copy_eexist = true;
 static volatile bool test_uffdio_zeropage_eexist = true;
 /* Whether to test uffd write-protection */
-static bool test_uffdio_wp = false;
+static bool test_uffdio_wp = true;
 /* Whether to test uffd minor faults */
 static bool test_uffdio_minor = false;
 
@@ -1594,8 +1594,6 @@ static void set_test_type(const char *type)
 	if (!strcmp(type, "anon")) {
 		test_type = TEST_ANON;
 		uffd_test_ops = &anon_uffd_test_ops;
-		/* Only enable write-protect test for anonymous test */
-		test_uffdio_wp = true;
 	} else if (!strcmp(type, "hugetlb")) {
 		test_type = TEST_HUGETLB;
 		uffd_test_ops = &hugetlb_uffd_test_ops;
-- 
cgit 


From 1bf0831383c6b372ff870d061ee62156635035c2 Mon Sep 17 00:00:00 2001
From: Guo Zhengkui <guozhengkui@vivo.com>
Date: Thu, 12 May 2022 20:22:56 -0700
Subject: userfaultfd/selftests: use swap() instead of open coding it

Address the following coccicheck warning:

tools/testing/selftests/vm/userfaultfd.c:1536:21-22: WARNING opportunity
for swap().
tools/testing/selftests/vm/userfaultfd.c:1540:33-34: WARNING opportunity
for swap().

by using swap() for the swapping of variable values and drop
`tmp_area` that is not needed any more.

`swap()` macro in userfaultfd.c is introduced in commit 681696862bc18
("selftests: vm: remove dependecy from internal kernel macros")

It has been tested with gcc (Debian 8.3.0-6) 8.3.0.

Link: https://lkml.kernel.org/r/20220407123141.4998-1-guozhengkui@vivo.com
Signed-off-by: Guo Zhengkui <guozhengkui@vivo.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index bbc4a6d8cf7b..0bdfc1955229 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -1422,7 +1422,6 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize)
 static int userfaultfd_stress(void)
 {
 	void *area;
-	char *tmp_area;
 	unsigned long nr;
 	struct uffdio_register uffdio_register;
 	struct uffd_stats uffd_stats[nr_cpus];
@@ -1533,13 +1532,9 @@ static int userfaultfd_stress(void)
 					    count_verify[nr], nr);
 
 		/* prepare next bounce */
-		tmp_area = area_src;
-		area_src = area_dst;
-		area_dst = tmp_area;
+		swap(area_src, area_dst);
 
-		tmp_area = area_src_alias;
-		area_src_alias = area_dst_alias;
-		area_dst_alias = tmp_area;
+		swap(area_src_alias, area_dst_alias);
 
 		uffd_stats_report(uffd_stats, nr_cpus);
 	}
-- 
cgit 


From f0cdaa5687d3178f3759033a7ff8411720b61647 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Thu, 12 May 2022 20:22:56 -0700
Subject: cgroups: refactor children cgroups in memcg tests

Patch series "Fix bugs in memcontroller cgroup tests", v2.

tools/testing/selftests/cgroup/test_memcontrol.c contains a set of
testcases which validate expected behavior of the cgroup memory
controller.  Roman Gushchin recently sent out a patchset that fixed a few
issues in the test.  This patchset continues that effort by fixing a few
more issues that were causing non-deterministic failures in the suite.
With this patchset, I'm unable to reproduce any more errors after running
the tests in a continuous loop for many iterations.  Before, I was able to
reproduce at least one of the errors fixed in this patchset with just one
or two runs.


This patch (of 5):

In test_memcg_min() and test_memcg_low(), there is an array of four
sibling cgroups.  All but one of these sibling groups does a 50MB
allocation, and the group that does no allocation is the third of four in
the array.  This is not a problem per se, but makes it a bit tricky to do
some assertions in test_memcg_low(), as we want to make assertions on the
siblings based on whether or not they performed allocations.  Having a
static index before which all groups have performed an allocation makes
this cleaner.

This patch therefore reorders the sibling groups so that the group that
performs no allocations is the last in the array.  A follow-on patch will
leverage this to fix a bug in the test that incorrectly asserts that a
sibling group that had performed an allocation, but only had protection
from its parent, will not observe any memory.events.low events during
reclaim.

Link: https://lkml.kernel.org/r/20220423155619.3669555-1-void@manifault.com
Link: https://lkml.kernel.org/r/20220423155619.3669555-2-void@manifault.com
Signed-off-by: David Vernet <void@manifault.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 28 ++++++++++++------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index ee7defb17280..d240a391f99e 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -248,8 +248,8 @@ static int cg_test_proc_killed(const char *cgroup)
  * A/B     memory.min = 50M,  memory.current = 50M
  * A/B/C   memory.min = 75M,  memory.current = 50M
  * A/B/D   memory.min = 25M,  memory.current = 50M
- * A/B/E   memory.min = 500M, memory.current = 0
- * A/B/F   memory.min = 0,    memory.current = 50M
+ * A/B/E   memory.min = 0,    memory.current = 50M
+ * A/B/F   memory.min = 500M, memory.current = 0
  *
  * Usages are pagecache, but the test keeps a running
  * process in every leaf cgroup.
@@ -259,7 +259,7 @@ static int cg_test_proc_killed(const char *cgroup)
  * A/B    memory.current ~= 50M
  * A/B/C  memory.current ~= 33M
  * A/B/D  memory.current ~= 17M
- * A/B/E  memory.current ~= 0
+ * A/B/F  memory.current ~= 0
  *
  * After that it tries to allocate more than there is
  * unprotected memory in A available, and checks
@@ -325,7 +325,7 @@ static int test_memcg_min(const char *root)
 		if (cg_create(children[i]))
 			goto cleanup;
 
-		if (i == 2)
+		if (i > 2)
 			continue;
 
 		cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
@@ -340,9 +340,9 @@ static int test_memcg_min(const char *root)
 		goto cleanup;
 	if (cg_write(children[1], "memory.min", "25M"))
 		goto cleanup;
-	if (cg_write(children[2], "memory.min", "500M"))
+	if (cg_write(children[2], "memory.min", "0"))
 		goto cleanup;
-	if (cg_write(children[3], "memory.min", "0"))
+	if (cg_write(children[3], "memory.min", "500M"))
 		goto cleanup;
 
 	attempts = 0;
@@ -368,7 +368,7 @@ static int test_memcg_min(const char *root)
 	if (!values_close(c[1], MB(17), 10))
 		goto cleanup;
 
-	if (!values_close(c[2], 0, 1))
+	if (c[3] != 0)
 		goto cleanup;
 
 	if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
@@ -405,8 +405,8 @@ cleanup:
  * A/B     memory.low = 50M,  memory.current = 50M
  * A/B/C   memory.low = 75M,  memory.current = 50M
  * A/B/D   memory.low = 25M,  memory.current = 50M
- * A/B/E   memory.low = 500M, memory.current = 0
- * A/B/F   memory.low = 0,    memory.current = 50M
+ * A/B/E   memory.low = 0,    memory.current = 50M
+ * A/B/F   memory.low = 500M, memory.current = 0
  *
  * Usages are pagecache.
  * Then it creates A/G an creates a significant
@@ -416,7 +416,7 @@ cleanup:
  * A/B    memory.current ~= 50M
  * A/B/   memory.current ~= 33M
  * A/B/D  memory.current ~= 17M
- * A/B/E  memory.current ~= 0
+ * A/B/F  memory.current ~= 0
  *
  * After that it tries to allocate more than there is
  * unprotected memory in A available,
@@ -480,7 +480,7 @@ static int test_memcg_low(const char *root)
 		if (cg_create(children[i]))
 			goto cleanup;
 
-		if (i == 2)
+		if (i > 2)
 			continue;
 
 		if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
@@ -495,9 +495,9 @@ static int test_memcg_low(const char *root)
 		goto cleanup;
 	if (cg_write(children[1], "memory.low", "25M"))
 		goto cleanup;
-	if (cg_write(children[2], "memory.low", "500M"))
+	if (cg_write(children[2], "memory.low", "0"))
 		goto cleanup;
-	if (cg_write(children[3], "memory.low", "0"))
+	if (cg_write(children[3], "memory.low", "500M"))
 		goto cleanup;
 
 	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
@@ -515,7 +515,7 @@ static int test_memcg_low(const char *root)
 	if (!values_close(c[1], MB(17), 10))
 		goto cleanup;
 
-	if (!values_close(c[2], 0, 1))
+	if (c[3] != 0)
 		goto cleanup;
 
 	if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
-- 
cgit 


From cdc69458a5f3d4cf31372efd45fe92cec6b167e4 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Thu, 12 May 2022 20:22:57 -0700
Subject: cgroup: account for memory_recursiveprot in test_memcg_low()

The test_memcg_low() testcase in test_memcontrol.c verifies the expected
behavior of groups using the memory.low knob.  Part of the testcase
verifies that a group with memory.low that experiences reclaim due to
memory pressure elsewhere in the system, observes memory.events.low events
as a result of that reclaim.

In commit 8a931f801340 ("mm: memcontrol: recursive memory.low
protection"), the memory controller was updated to propagate memory.low
and memory.min protection from a parent group to its children via a
configurable memory_recursiveprot mount option.  This unfortunately broke
the memcg tests, which asserts that a sibling that experienced reclaim but
had a memory.low value of 0, would not observe any memory.low events.
This patch updates test_memcg_low() to account for the new behavior
introduced by memory_recursiveprot.

So as to make the test resilient to multiple configurations, the patch
also adds a new proc_mount_contains() helper that checks for a string in
/proc/mounts, and is used to toggle behavior based on whether the default
memory_recursiveprot was present.

Link: https://lkml.kernel.org/r/20220423155619.3669555-3-void@manifault.com
Signed-off-by: David Vernet <void@manifault.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/cgroup_util.c     | 12 ++++++++++++
 tools/testing/selftests/cgroup/cgroup_util.h     |  1 +
 tools/testing/selftests/cgroup/test_memcontrol.c | 16 +++++++++++++---
 3 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index e6f3679cdcc0..b4d7027a44c3 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -528,6 +528,18 @@ int set_oom_adj_score(int pid, int score)
 	return 0;
 }
 
+int proc_mount_contains(const char *option)
+{
+	char buf[4 * PAGE_SIZE];
+	ssize_t read;
+
+	read = read_text("/proc/mounts", buf, sizeof(buf));
+	if (read < 0)
+		return read;
+
+	return strstr(buf, option) != NULL;
+}
+
 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
 {
 	char path[PATH_MAX];
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 628738532ac9..756f76052b44 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -48,6 +48,7 @@ extern int is_swap_enabled(void);
 extern int set_oom_adj_score(int pid, int score);
 extern int cg_wait_for_proc_count(const char *cgroup, int count);
 extern int cg_killall(const char *cgroup);
+int proc_mount_contains(const char *option);
 extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size);
 extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle);
 extern pid_t clone_into_cgroup(int cgroup_fd);
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index d240a391f99e..4da138d05acb 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -21,6 +21,8 @@
 #include "../kselftest.h"
 #include "cgroup_util.h"
 
+static bool has_recursiveprot;
+
 /*
  * This test creates two nested cgroups with and without enabling
  * the memory controller.
@@ -525,15 +527,18 @@ static int test_memcg_low(const char *root)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		int no_low_events_index = has_recursiveprot ? 2 : 1;
+
 		oom = cg_read_key_long(children[i], "memory.events", "oom ");
 		low = cg_read_key_long(children[i], "memory.events", "low ");
 
 		if (oom)
 			goto cleanup;
-		if (i < 2 && low <= 0)
+		if (i <= no_low_events_index && low <= 0)
 			goto cleanup;
-		if (i >= 2 && low)
+		if (i > no_low_events_index && low)
 			goto cleanup;
+
 	}
 
 	ret = KSFT_PASS;
@@ -1382,7 +1387,7 @@ struct memcg_test {
 int main(int argc, char **argv)
 {
 	char root[PATH_MAX];
-	int i, ret = EXIT_SUCCESS;
+	int i, proc_status, ret = EXIT_SUCCESS;
 
 	if (cg_find_unified_root(root, sizeof(root)))
 		ksft_exit_skip("cgroup v2 isn't mounted\n");
@@ -1398,6 +1403,11 @@ int main(int argc, char **argv)
 		if (cg_write(root, "cgroup.subtree_control", "+memory"))
 			ksft_exit_skip("Failed to set memory controller\n");
 
+	proc_status = proc_mount_contains("memory_recursiveprot");
+	if (proc_status < 0)
+		ksft_exit_skip("Failed to query cgroup mount option\n");
+	has_recursiveprot = proc_status;
+
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		switch (tests[i].fn(root)) {
 		case KSFT_PASS:
-- 
cgit 


From 72b1e03aa7255094d15752952a7e56c5f39b6e37 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Thu, 12 May 2022 20:22:57 -0700
Subject: cgroup: account for memory_localevents in
 test_memcg_oom_group_leaf_events()

The test_memcg_oom_group_leaf_events() testcase in the cgroup memcg tests
validates that processes in a group that perform allocations exceeding
memory.oom.group are killed.  It also validates that the
memory.events.oom_kill events are properly propagated in this case.

Commit 06e11c907ea4 ("kselftests: memcg: update the oom group leaf events
test") fixed test_memcg_oom_group_leaf_events() to account for the fact
that the memory.events.oom_kill events in a child cgroup is propagated up
to its parent.  This behavior can actually be configured by the
memory_localevents mount option, so this patch updates the testcase to
properly account for the possible presence of this mount option.

Link: https://lkml.kernel.org/r/20220423155619.3669555-4-void@manifault.com
Signed-off-by: David Vernet <void@manifault.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 4da138d05acb..31d5c3f9b2b2 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -21,6 +21,7 @@
 #include "../kselftest.h"
 #include "cgroup_util.h"
 
+static bool has_localevents;
 static bool has_recursiveprot;
 
 /*
@@ -1200,6 +1201,7 @@ static int test_memcg_oom_group_leaf_events(const char *root)
 {
 	int ret = KSFT_FAIL;
 	char *parent, *child;
+	long parent_oom_events;
 
 	parent = cg_name(root, "memcg_test_0");
 	child = cg_name(root, "memcg_test_0/memcg_test_1");
@@ -1345,14 +1347,20 @@ static int test_memcg_oom_group_score_events(const char *root)
 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 		goto cleanup;
 
-	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
-		goto cleanup;
+	parent_oom_events = cg_read_key_long(
+			parent, "memory.events", "oom_kill ");
+	/*
+	 * If memory_localevents is not enabled (the default), the parent should
+	 * count OOM events in its children groups. Otherwise, it should not
+	 * have observed any events.
+	 */
+	if ((has_localevents && parent_oom_events == 0) ||
+	     parent_oom_events > 0)
+		ret = KSFT_PASS;
 
 	if (kill(safe_pid, SIGKILL))
 		goto cleanup;
 
-	ret = KSFT_PASS;
-
 cleanup:
 	if (memcg)
 		cg_destroy(memcg);
@@ -1361,7 +1369,6 @@ cleanup:
 	return ret;
 }
 
-
 #define T(x) { x, #x }
 struct memcg_test {
 	int (*fn)(const char *root);
@@ -1408,6 +1415,11 @@ int main(int argc, char **argv)
 		ksft_exit_skip("Failed to query cgroup mount option\n");
 	has_recursiveprot = proc_status;
 
+	proc_status = proc_mount_contains("memory_localevents");
+	if (proc_status < 0)
+		ksft_exit_skip("Failed to query cgroup mount option\n");
+	has_localevents = proc_status;
+
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		switch (tests[i].fn(root)) {
 		case KSFT_PASS:
-- 
cgit 


From 830316807e0275146cbd5d2ae66fd338d0dfd09e Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Thu, 12 May 2022 20:22:57 -0700
Subject: cgroup: remove racy check in test_memcg_sock()

test_memcg_sock() in the cgroup memcg tests, verifies expected memory
accounting for sockets.  The test forks a process which functions as a TCP
server, and sends large buffers back and forth between itself (as the TCP
client) and the forked TCP server.  While doing so, it verifies that
memory.current and memory.stat.sock look correct.

There is currently a check in tcp_client() which asserts memory.current >=
memory.stat.sock.  This check is racy, as between memory.current and
memory.stat.sock being queried, a packet could come in which causes
mem_cgroup_charge_skmem() to be invoked.  This could cause
memory.stat.sock to exceed memory.current.  Reversing the order of
querying doesn't address the problem either, as memory may be reclaimed
between the two calls.  Instead, this patch just removes that assertion
altogether, and instead relies on the values_close() check that follows to
validate the expected accounting.

Link: https://lkml.kernel.org/r/20220423155619.3669555-5-void@manifault.com
Signed-off-by: David Vernet <void@manifault.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 31d5c3f9b2b2..6d3aace6be53 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -1102,9 +1102,6 @@ static int tcp_client(const char *cgroup, unsigned short port)
 		if (current < 0 || sock < 0)
 			goto close_sk;
 
-		if (current < sock)
-			goto close_sk;
-
 		if (values_close(current, sock, 10)) {
 			ret = KSFT_PASS;
 			break;
-- 
cgit 


From c1a31a2f7a9c08665f95a16c40b3551af43cb95c Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Thu, 12 May 2022 20:22:57 -0700
Subject: cgroup: fix racy check in alloc_pagecache_max_30M() helper function

alloc_pagecache_max_30M() in the cgroup memcg tests performs a 50MB
pagecache allocation, which it expects to be capped at 30MB due to the
calling process having a memory.high setting of 30MB.  After the
allocation, the function contains a check that verifies that MB(29) <
memory.current <= MB(30).  This check can actually fail
non-deterministically.

The testcases that use this function are test_memcg_high() and
test_memcg_max(), which set memory.min and memory.max to 30MB respectively
for the cgroup under test.  The allocation can slightly exceed this number
in both cases, and for memory.max, the process performing the allocation
will not have the OOM killer invoked as it's performing a pagecache
allocation.  This patchset therefore updates the above check to instead
use the verify_close() helper function.

Link: https://lkml.kernel.org/r/20220423155619.3669555-6-void@manifault.com
Signed-off-by: David Vernet <void@manifault.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 6d3aace6be53..6ab94317c87b 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -568,9 +568,14 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
 {
 	size_t size = MB(50);
 	int ret = -1;
-	long current;
+	long current, high, max;
 	int fd;
 
+	high = cg_read_long(cgroup, "memory.high");
+	max = cg_read_long(cgroup, "memory.max");
+	if (high != MB(30) && max != MB(30))
+		goto cleanup;
+
 	fd = get_temp_fd();
 	if (fd < 0)
 		return -1;
@@ -579,7 +584,7 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
 		goto cleanup;
 
 	current = cg_read_long(cgroup, "memory.current");
-	if (current <= MB(29) || current > MB(30))
+	if (!values_close(current, MB(30), 5))
 		goto cleanup;
 
 	ret = 0;
-- 
cgit 


From 213adc63dfbcdff9a0c19ec1f2681fda9c05cf6d Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Fri, 13 May 2022 15:09:28 -0400
Subject: kseltest/cgroup: Make test_stress.sh work if run interactively

Commit 54de76c01239 ("kselftest/cgroup: fix test_stress.sh to use OUTPUT
dir") changes the test_core command path from . to $OUTPUT. However,
variable OUTPUT may not be defined if the command is run interactively.
Fix that by using ${OUTPUT:-.} to cover both cases.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 tools/testing/selftests/cgroup/test_stress.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_stress.sh b/tools/testing/selftests/cgroup/test_stress.sh
index 109c044f715f..3c9c4554d5f6 100755
--- a/tools/testing/selftests/cgroup/test_stress.sh
+++ b/tools/testing/selftests/cgroup/test_stress.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-./with_stress.sh -s subsys -s fork ${OUTPUT}/test_core
+./with_stress.sh -s subsys -s fork ${OUTPUT:-.}/test_core
-- 
cgit 


From 0d2d2648931bdb1a629bf0df4e339e6a326a6136 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 13 May 2022 10:37:03 -0700
Subject: selftests/bpf: Fix usdt_400 test case

usdt_400 test case relies on compiler using the same arg spec for
usdt_400 USDT. This assumption breaks with Clang (Clang generates
different arg specs with varying offsets relative to %rbp), so simplify
this further and hard-code the constant which will guarantee that arg
spec is the same across all 400 inlinings.

Fixes: 630301b0d59d ("selftests/bpf: Add basic USDT selftests")
Reported-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220513173703.89271-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/usdt.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index a71f51bdc08d..5f733d50b0d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -190,9 +190,7 @@ __weak void trigger_300_usdts(void)
 
 static void __always_inline f400(int x __attribute__((unused)))
 {
-	static int y;
-
-	STAP_PROBE1(test, usdt_400, y++);
+	STAP_PROBE1(test, usdt_400, 400);
 }
 
 /* this time we have 400 different USDT call sites, but they have uniform
@@ -299,7 +297,7 @@ static void subtest_multispec_usdt(void)
 	trigger_400_usdts();
 
 	ASSERT_EQ(bss->usdt_100_called, 400, "usdt_400_called");
-	ASSERT_EQ(bss->usdt_100_sum, 399 * 400 / 2, "usdt_400_sum");
+	ASSERT_EQ(bss->usdt_100_sum, 400 * 400, "usdt_400_sum");
 
 cleanup:
 	test_usdt__destroy(skel);
-- 
cgit 


From e274f71540082b015568eaf0b08ec70ac08dc4ad Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 12 May 2022 16:26:42 -0700
Subject: selftests: mptcp: add subflow limits test-cases

Add and delete a bunch of endpoints and verify the
respect of configured limits.

This covers the codepath introduced by the previous patch.

Fixes: 69c6ce7b6eca ("selftests: mptcp: add implicit endpoint test case")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 48 +++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 7314257d248a..48ef112f42c2 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1444,6 +1444,33 @@ chk_prio_nr()
 	[ "${dump_stats}" = 1 ] && dump_stats
 }
 
+chk_subflow_nr()
+{
+	local need_title="$1"
+	local msg="$2"
+	local subflow_nr=$3
+	local cnt1
+	local cnt2
+
+	if [ -n "${need_title}" ]; then
+		printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}"
+	else
+		printf "%-${nr_blank}s %s" " " "${msg}"
+	fi
+
+	cnt1=$(ss -N $ns1 -tOni | grep -c token)
+	cnt2=$(ss -N $ns2 -tOni | grep -c token)
+	if [ "$cnt1" != "$subflow_nr" -o "$cnt2" != "$subflow_nr" ]; then
+		echo "[fail] got $cnt1:$cnt2 subflows expected $subflow_nr"
+		fail_test
+		dump_stats=1
+	else
+		echo "[ ok ]"
+	fi
+
+	[ "${dump_stats}" = 1 ] && ( ss -N $ns1 -tOni ; ss -N $ns1 -tOni | grep token; ip -n $ns1 mptcp endpoint )
+}
+
 chk_link_usage()
 {
 	local ns=$1
@@ -2556,7 +2583,7 @@ fastclose_tests()
 	fi
 }
 
-implicit_tests()
+endpoint_tests()
 {
 	# userspace pm type prevents add_addr
 	if reset "implicit EP"; then
@@ -2578,6 +2605,23 @@ implicit_tests()
 			$ns2 10.0.2.2 id 1 flags signal
 		wait
 	fi
+
+	if reset "delete and re-add"; then
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 4 0 0 slow &
+
+		wait_mpj $ns2
+		pm_nl_del_endpoint $ns2 2 10.0.2.2
+		sleep 0.5
+		chk_subflow_nr needtitle "after delete" 1
+
+		pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+		wait_mpj $ns2
+		chk_subflow_nr "" "after re-add" 2
+		wait
+	fi
 }
 
 # [$1: error message]
@@ -2624,7 +2668,7 @@ all_tests_sorted=(
 	d@deny_join_id0_tests
 	m@fullmesh_tests
 	z@fastclose_tests
-	I@implicit_tests
+	I@endpoint_tests
 )
 
 all_tests_args=""
-- 
cgit 


From 9a5681710740e496ee8b08004ddf2c212b76b36a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 10 May 2022 17:45:16 +0100
Subject: selftests/arm64: Log errors in verify_mte_pointer_validity()

When we detect a problem in verify_mte_pointer_validity() while checking
tags we don't log what the problem was which makes debugging harder. Add
some diagnostics.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220510164520.768783-2-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/mte/check_tags_inclusion.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
index deaef1f61076..b906914997ce 100644
--- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -25,8 +25,11 @@ static int verify_mte_pointer_validity(char *ptr, int mode)
 	/* Check the validity of the tagged pointer */
 	memset((void *)ptr, '1', BUFFER_SIZE);
 	mte_wait_after_trig();
-	if (cur_mte_cxt.fault_valid)
+	if (cur_mte_cxt.fault_valid) {
+		ksft_print_msg("Unexpected fault recorded for %p-%p in mode %x\n",
+			       ptr, ptr + BUFFER_SIZE, mode);
 		return KSFT_FAIL;
+	}
 	/* Proceed further for nonzero tags */
 	if (!MT_FETCH_TAG((uintptr_t)ptr))
 		return KSFT_PASS;
@@ -34,10 +37,13 @@ static int verify_mte_pointer_validity(char *ptr, int mode)
 	/* Check the validity outside the range */
 	ptr[BUFFER_SIZE] = '2';
 	mte_wait_after_trig();
-	if (!cur_mte_cxt.fault_valid)
+	if (!cur_mte_cxt.fault_valid) {
+		ksft_print_msg("No valid fault recorded for %p in mode %x\n",
+			       ptr, mode);
 		return KSFT_FAIL;
-	else
+	} else {
 		return KSFT_PASS;
+	}
 }
 
 static int check_single_included_tags(int mem_type, int mode)
-- 
cgit 


From ffc8274c21938b30b10fcd6d4fc0feb29c222955 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 10 May 2022 17:45:17 +0100
Subject: selftests/arm64: Allow zero tags in mte_switch_mode()

mte_switch_mode() currently rejects attempts to set a zero tag however
there are tests such as check_tags_inclusion which attempt to cover cases
with zero tags using mte_switch_mode(). Since it is not clear why we are
rejecting zero tags change the test to accept them.

The issue has not previously been as apparent as it should be since the
return value of mte_switch_mode() was not always checked in the callers
and the tests weren't otherwise failing.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220510164520.768783-3-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/mte/mte_common_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index 260206f4dce0..6ff4c4bcbff1 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -283,7 +283,7 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
 		return -EINVAL;
 	}
 
-	if (!(incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) {
+	if (incl_mask & ~MT_INCLUDE_TAG_MASK) {
 		ksft_print_msg("FAIL: Invalid incl_mask %lx\n", incl_mask);
 		return -EINVAL;
 	}
-- 
cgit 


From 72d6771cb1734a2f32308c34c61043595e4bcb41 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 10 May 2022 17:45:18 +0100
Subject: selftests/arm64: Check failures to set tags in check_tags_inclusion

The MTE check_tags_inclusion test uses the mte_switch_mode() helper but
ignores the return values it generates meaning we might not be testing
the things we're trying to test, fail the test if it reports an error.
The helper will log any errors it returns.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220510164520.768783-4-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../testing/selftests/arm64/mte/check_tags_inclusion.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
index b906914997ce..d180ba3df990 100644
--- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -49,7 +49,7 @@ static int verify_mte_pointer_validity(char *ptr, int mode)
 static int check_single_included_tags(int mem_type, int mode)
 {
 	char *ptr;
-	int tag, run, result = KSFT_PASS;
+	int tag, run, ret, result = KSFT_PASS;
 
 	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
 	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
@@ -57,7 +57,9 @@ static int check_single_included_tags(int mem_type, int mode)
 		return KSFT_FAIL;
 
 	for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
-		mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+		ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+		if (ret != 0)
+			result = KSFT_FAIL;
 		/* Try to catch a excluded tag by a number of tries. */
 		for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
 			ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
@@ -111,14 +113,16 @@ static int check_multiple_included_tags(int mem_type, int mode)
 static int check_all_included_tags(int mem_type, int mode)
 {
 	char *ptr;
-	int run, result = KSFT_PASS;
+	int run, ret, result = KSFT_PASS;
 
 	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
 	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
 				   mem_type, false) != KSFT_PASS)
 		return KSFT_FAIL;
 
-	mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+	ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+	if (ret != 0)
+		return KSFT_FAIL;
 	/* Try to catch a excluded tag by a number of tries. */
 	for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
 		ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
@@ -135,13 +139,15 @@ static int check_all_included_tags(int mem_type, int mode)
 static int check_none_included_tags(int mem_type, int mode)
 {
 	char *ptr;
-	int run;
+	int run, ret;
 
 	ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
 	if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
 		return KSFT_FAIL;
 
-	mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+	ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+	if (ret != 0)
+		return KSFT_FAIL;
 	/* Try to catch a excluded tag by a number of tries. */
 	for (run = 0; run < RUNS; run++) {
 		ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
-- 
cgit 


From 541235dee01140c4ae9e71e8dfbdb4c2f9eac9d5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 10 May 2022 17:45:19 +0100
Subject: selftests/arm64: Remove casts to/from void in check_tags_inclusion

Void pointers may be freely used with other pointer types in C, any casts
between void * and other pointer types serve no purpose other than to
mask potential warnings. Drop such casts from check_tags_inclusion to
help with future review of the code.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220510164520.768783-5-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../selftests/arm64/mte/check_tags_inclusion.c     | 24 +++++++++++-----------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
index d180ba3df990..2b1425b92b69 100644
--- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -23,7 +23,7 @@ static int verify_mte_pointer_validity(char *ptr, int mode)
 {
 	mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
 	/* Check the validity of the tagged pointer */
-	memset((void *)ptr, '1', BUFFER_SIZE);
+	memset(ptr, '1', BUFFER_SIZE);
 	mte_wait_after_trig();
 	if (cur_mte_cxt.fault_valid) {
 		ksft_print_msg("Unexpected fault recorded for %p-%p in mode %x\n",
@@ -51,7 +51,7 @@ static int check_single_included_tags(int mem_type, int mode)
 	char *ptr;
 	int tag, run, ret, result = KSFT_PASS;
 
-	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+	ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
 	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
 				   mem_type, false) != KSFT_PASS)
 		return KSFT_FAIL;
@@ -62,7 +62,7 @@ static int check_single_included_tags(int mem_type, int mode)
 			result = KSFT_FAIL;
 		/* Try to catch a excluded tag by a number of tries. */
 		for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
-			ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+			ptr = mte_insert_tags(ptr, BUFFER_SIZE);
 			/* Check tag value */
 			if (MT_FETCH_TAG((uintptr_t)ptr) == tag) {
 				ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
@@ -74,7 +74,7 @@ static int check_single_included_tags(int mem_type, int mode)
 			result = verify_mte_pointer_validity(ptr, mode);
 		}
 	}
-	mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+	mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
 	return result;
 }
 
@@ -84,7 +84,7 @@ static int check_multiple_included_tags(int mem_type, int mode)
 	int tag, run, result = KSFT_PASS;
 	unsigned long excl_mask = 0;
 
-	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+	ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
 	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
 				   mem_type, false) != KSFT_PASS)
 		return KSFT_FAIL;
@@ -94,7 +94,7 @@ static int check_multiple_included_tags(int mem_type, int mode)
 		mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
 		/* Try to catch a excluded tag by a number of tries. */
 		for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
-			ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+			ptr = mte_insert_tags(ptr, BUFFER_SIZE);
 			/* Check tag value */
 			if (MT_FETCH_TAG((uintptr_t)ptr) < tag) {
 				ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
@@ -106,7 +106,7 @@ static int check_multiple_included_tags(int mem_type, int mode)
 			result = verify_mte_pointer_validity(ptr, mode);
 		}
 	}
-	mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+	mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
 	return result;
 }
 
@@ -115,7 +115,7 @@ static int check_all_included_tags(int mem_type, int mode)
 	char *ptr;
 	int run, ret, result = KSFT_PASS;
 
-	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+	ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
 	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
 				   mem_type, false) != KSFT_PASS)
 		return KSFT_FAIL;
@@ -132,7 +132,7 @@ static int check_all_included_tags(int mem_type, int mode)
 		 */
 		result = verify_mte_pointer_validity(ptr, mode);
 	}
-	mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+	mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
 	return result;
 }
 
@@ -141,7 +141,7 @@ static int check_none_included_tags(int mem_type, int mode)
 	char *ptr;
 	int run, ret;
 
-	ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
+	ptr = mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
 	if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
 		return KSFT_FAIL;
 
@@ -159,12 +159,12 @@ static int check_none_included_tags(int mem_type, int mode)
 		}
 		mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
 		/* Check the write validity of the untagged pointer */
-		memset((void *)ptr, '1', BUFFER_SIZE);
+		memset(ptr, '1', BUFFER_SIZE);
 		mte_wait_after_trig();
 		if (cur_mte_cxt.fault_valid)
 			break;
 	}
-	mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false);
+	mte_free_memory(ptr, BUFFER_SIZE, mem_type, false);
 	if (cur_mte_cxt.fault_valid)
 		return KSFT_FAIL;
 	else
-- 
cgit 


From 0639e02254e6863fc9c96666be45919437a6dc2e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 10 May 2022 17:45:20 +0100
Subject: selftests/arm64: Use switch statements in mte_common_util.c

In the MTE tests there are several places where we use chains of if
statements to open code what could be written as switch statements, move
over to switch statements to make the idiom clearer.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220510164520.768783-6-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../testing/selftests/arm64/mte/mte_common_util.c  | 23 +++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index 6ff4c4bcbff1..00ffd34c66d3 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -128,13 +128,16 @@ static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
 	int prot_flag, map_flag;
 	size_t entire_size = size + range_before + range_after;
 
-	if (mem_type != USE_MALLOC && mem_type != USE_MMAP &&
-	    mem_type != USE_MPROTECT) {
+	switch (mem_type) {
+	case USE_MALLOC:
+		return malloc(entire_size) + range_before;
+	case USE_MMAP:
+	case USE_MPROTECT:
+		break;
+	default:
 		ksft_print_msg("FAIL: Invalid allocate request\n");
 		return NULL;
 	}
-	if (mem_type == USE_MALLOC)
-		return malloc(entire_size) + range_before;
 
 	prot_flag = PROT_READ | PROT_WRITE;
 	if (mem_type == USE_MMAP)
@@ -287,13 +290,19 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
 		ksft_print_msg("FAIL: Invalid incl_mask %lx\n", incl_mask);
 		return -EINVAL;
 	}
+
 	en = PR_TAGGED_ADDR_ENABLE;
-	if (mte_option == MTE_SYNC_ERR)
+	switch (mte_option) {
+	case MTE_SYNC_ERR:
 		en |= PR_MTE_TCF_SYNC;
-	else if (mte_option == MTE_ASYNC_ERR)
+		break;
+	case MTE_ASYNC_ERR:
 		en |= PR_MTE_TCF_ASYNC;
-	else if (mte_option == MTE_NONE_ERR)
+		break;
+	case MTE_NONE_ERR:
 		en |= PR_MTE_TCF_NONE;
+		break;
+	}
 
 	en |= (incl_mask << PR_MTE_TAG_SHIFT);
 	/* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
-- 
cgit 


From d7a49291d786b4400996afe3afcc3ef5eeb6f0ef Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Wed, 11 May 2022 18:21:29 +0100
Subject: kselftest/arm64: bti: force static linking

The "bti" selftests are built with -nostdlib, which apparently
automatically creates a statically linked binary, which is what we want
and need for BTI (to avoid interactions with the dynamic linker).

However this is not true when building a PIE binary, which some
toolchains (Ubuntu) configure as the default.
When compiling btitest with such a toolchain, it will create a
dynamically linked binary, which will probably fail some tests, as the
dynamic linker might not support BTI:
===================
TAP version 13
1..18
not ok 1 nohint_func/call_using_br_x0
not ok 2 nohint_func/call_using_br_x16
not ok 3 nohint_func/call_using_blr
....
===================

To make sure we create static binaries, add an explicit -static on the
linker command line. This forces static linking even if the toolchain
defaults to PIE builds, and fixes btitest runs on BTI enabled machines.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Fixes: 314bcbf09f14 ("kselftest: arm64: Add BTI tests")
Link: https://lore.kernel.org/r/20220511172129.2078337-1-andre.przywara@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/bti/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile
index 73e013c082a6..dafa1c2aa5c4 100644
--- a/tools/testing/selftests/arm64/bti/Makefile
+++ b/tools/testing/selftests/arm64/bti/Makefile
@@ -39,7 +39,7 @@ BTI_OBJS =                                      \
 	teststubs-bti.o                         \
 	trampoline-bti.o
 gen/btitest: $(BTI_OBJS)
-	$(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
+	$(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
 
 NOBTI_OBJS =                                    \
 	test-nobti.o                         \
@@ -50,7 +50,7 @@ NOBTI_OBJS =                                    \
 	teststubs-nobti.o                       \
 	trampoline-nobti.o
 gen/nobtitest: $(NOBTI_OBJS)
-	$(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
+	$(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
 
 # Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
 # to account for any OUTPUT target-dirs optionally provided by
-- 
cgit 


From 9f93c2e0cda49a558c981a57fc4a7f8d143ced93 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 16 May 2022 19:22:13 +0100
Subject: kselftest/arm64: Explicitly build no BTI tests with BTI disabled

In case a distribution enables branch protection by default do as we do for
the main kernel and explicitly disable branch protection when building the
test case for having BTI disabled to ensure it doesn't get turned on by the
toolchain defaults.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220516182213.727589-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/bti/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile
index dafa1c2aa5c4..ccdac414ad94 100644
--- a/tools/testing/selftests/arm64/bti/Makefile
+++ b/tools/testing/selftests/arm64/bti/Makefile
@@ -10,7 +10,7 @@ PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS))
 # cases for statically linked and dynamically lined binaries are
 # slightly different.
 
-CFLAGS_NOBTI = -DBTI=0
+CFLAGS_NOBTI = -mbranch-protection=none -DBTI=0
 CFLAGS_BTI = -mbranch-protection=standard -DBTI=1
 
 CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS)
-- 
cgit 


From 33d4a933e9273bb9b33db8dcd0e564881319443c Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Thu, 12 May 2022 11:35:36 -0700
Subject: kunit: tool: remove dead parse_crash_in_log() logic

This logic depends on the kernel logging a message containing
'kunit test case crashed', but there is no corresponding logic to do so.

This is likely a relic of the revision process KUnit initially went
through when being upstreamed.

Delete it given
1) it's been missing for years and likely won't get implemented
2) the parser has been moving to be a more general KTAP parser,
   kunit-only magic like this isn't how we'd want to implement it.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_parser.py                | 21 -------
 tools/testing/kunit/kunit_tool_test.py             | 17 ++----
 .../kunit/test_data/test_is_test_passed-crash.log  | 70 ----------------------
 3 files changed, 4 insertions(+), 104 deletions(-)
 delete mode 100644 tools/testing/kunit/test_data/test_is_test_passed-crash.log

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index e16331a5bec4..c8c0df56cc51 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -475,26 +475,6 @@ def parse_diagnostic(lines: LineStream) -> List[str]:
 		log.append(lines.pop())
 	return log
 
-DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^# .*?: kunit test case crashed!$')
-
-def parse_crash_in_log(test: Test) -> bool:
-	"""
-	Iterate through the lines of the log to parse for crash message.
-	If crash message found, set status to crashed and return True.
-	Otherwise return False.
-
-	Parameters:
-	test - Test object for current test being parsed
-
-	Return:
-	True if crash message found in log
-	"""
-	for line in test.log:
-		if DIAGNOSTIC_CRASH_MESSAGE.match(line):
-			test.status = TestStatus.TEST_CRASHED
-			return True
-	return False
-
 
 # Printing helper methods:
 
@@ -682,7 +662,6 @@ def bubble_up_test_results(test: Test) -> None:
 	Parameters:
 	test - Test object for current test being parsed
 	"""
-	parse_crash_in_log(test)
 	subtests = test.subtests
 	counts = test.counts
 	status = test.status
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index aebda46bcad8..b417eceeda74 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -230,15 +230,6 @@ class KUnitParserTest(unittest.TestCase):
 		print_mock.stop()
 		self.assertEqual(0, len(result.subtests))
 
-	def test_crashed_test(self):
-		crashed_log = test_data_path('test_is_test_passed-crash.log')
-		with open(crashed_log) as file:
-			result = kunit_parser.parse_run_tests(
-				file.readlines())
-		self.assertEqual(
-			kunit_parser.TestStatus.TEST_CRASHED,
-			result.status)
-
 	def test_skipped_test(self):
 		skipped_log = test_data_path('test_skip_tests.log')
 		with open(skipped_log) as file:
@@ -478,10 +469,10 @@ class KUnitJsonTest(unittest.TestCase):
 			result["sub_groups"][1]["test_cases"][0])
 
 	def test_crashed_test_json(self):
-		result = self._json_for('test_is_test_passed-crash.log')
+		result = self._json_for('test_kernel_panic_interrupt.log')
 		self.assertEqual(
-			{'name': 'example_simple_test', 'status': 'ERROR'},
-			result["sub_groups"][1]["test_cases"][0])
+			{'name': '', 'status': 'ERROR'},
+			result["sub_groups"][2]["test_cases"][1])
 
 	def test_skipped_test_json(self):
 		result = self._json_for('test_skip_tests.log')
@@ -562,7 +553,7 @@ class KUnitMainTest(unittest.TestCase):
 	def test_exec_no_tests(self):
 		self.linux_source_mock.run_kernel = mock.Mock(return_value=['TAP version 14', '1..0'])
 		with self.assertRaises(SystemExit) as e:
-                  kunit.main(['run'], self.linux_source_mock)
+		  kunit.main(['run'], self.linux_source_mock)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
 			args=None, build_dir='.kunit', filter_glob='', timeout=300)
 		self.print_mock.assert_any_call(StrContains(' 0 tests run!'))
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-crash.log b/tools/testing/kunit/test_data/test_is_test_passed-crash.log
deleted file mode 100644
index 4d97f6708c4a..000000000000
--- a/tools/testing/kunit/test_data/test_is_test_passed-crash.log
+++ /dev/null
@@ -1,70 +0,0 @@
-printk: console [tty0] enabled
-printk: console [mc-1] enabled
-TAP version 14
-1..2
-	# Subtest: sysctl_test
-	1..8
-	# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
-	ok 1 - sysctl_test_dointvec_null_tbl_data
-	# sysctl_test_dointvec_table_maxlen_unset: sysctl_test_dointvec_table_maxlen_unset passed
-	ok 2 - sysctl_test_dointvec_table_maxlen_unset
-	# sysctl_test_dointvec_table_len_is_zero: sysctl_test_dointvec_table_len_is_zero passed
-	ok 3 - sysctl_test_dointvec_table_len_is_zero
-	# sysctl_test_dointvec_table_read_but_position_set: sysctl_test_dointvec_table_read_but_position_set passed
-	ok 4 - sysctl_test_dointvec_table_read_but_position_set
-	# sysctl_test_dointvec_happy_single_positive: sysctl_test_dointvec_happy_single_positive passed
-	ok 5 - sysctl_test_dointvec_happy_single_positive
-	# sysctl_test_dointvec_happy_single_negative: sysctl_test_dointvec_happy_single_negative passed
-	ok 6 - sysctl_test_dointvec_happy_single_negative
-	# sysctl_test_dointvec_single_less_int_min: sysctl_test_dointvec_single_less_int_min passed
-	ok 7 - sysctl_test_dointvec_single_less_int_min
-	# sysctl_test_dointvec_single_greater_int_max: sysctl_test_dointvec_single_greater_int_max passed
-	ok 8 - sysctl_test_dointvec_single_greater_int_max
-kunit sysctl_test: all tests passed
-ok 1 - sysctl_test
-	# Subtest: example
-	1..2
-init_suite
-	# example_simple_test: initializing
-Stack:
- 6016f7db 6f81bd30 6f81bdd0 60021450
- 6024b0e8 60021440 60018bbe 16f81bdc0
- 00000001 6f81bd30 6f81bd20 6f81bdd0
-Call Trace:
- [<6016f7db>] ? kunit_try_run_case+0xab/0xf0
- [<60021450>] ? set_signals+0x0/0x60
- [<60021440>] ? get_signals+0x0/0x10
- [<60018bbe>] ? kunit_um_run_try_catch+0x5e/0xc0
- [<60021450>] ? set_signals+0x0/0x60
- [<60021440>] ? get_signals+0x0/0x10
- [<60018bb3>] ? kunit_um_run_try_catch+0x53/0xc0
- [<6016f321>] ? kunit_run_case_catch_errors+0x121/0x1a0
- [<60018b60>] ? kunit_um_run_try_catch+0x0/0xc0
- [<600189e0>] ? kunit_um_throw+0x0/0x180
- [<6016f730>] ? kunit_try_run_case+0x0/0xf0
- [<6016f600>] ? kunit_catch_run_case+0x0/0x130
- [<6016edd0>] ? kunit_vprintk+0x0/0x30
- [<6016ece0>] ? kunit_fail+0x0/0x40
- [<6016eca0>] ? kunit_abort+0x0/0x40
- [<6016ed20>] ? kunit_printk_emit+0x0/0xb0
- [<6016f200>] ? kunit_run_case_catch_errors+0x0/0x1a0
- [<6016f46e>] ? kunit_run_tests+0xce/0x260
- [<6005b390>] ? unregister_console+0x0/0x190
- [<60175b70>] ? suite_kunit_initexample_test_suite+0x0/0x20
- [<60001cbb>] ? do_one_initcall+0x0/0x197
- [<60001d47>] ? do_one_initcall+0x8c/0x197
- [<6005cd20>] ? irq_to_desc+0x0/0x30
- [<60002005>] ? kernel_init_freeable+0x1b3/0x272
- [<6005c5ec>] ? printk+0x0/0x9b
- [<601c0086>] ? kernel_init+0x26/0x160
- [<60014442>] ? new_thread_handler+0x82/0xc0
-
-	# example_simple_test: kunit test case crashed!
-	# example_simple_test: example_simple_test failed
-	not ok 1 - example_simple_test
-	# example_mock_test: initializing
-	# example_mock_test: example_mock_test passed
-	ok 2 - example_mock_test
-kunit example: one or more tests failed
-not ok 2 - example
-List of all partitions:
-- 
cgit 


From dbf0b0d53a2b5afa6ef7372dcedf52302669fc2c Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Thu, 12 May 2022 11:35:37 -0700
Subject: kunit: tool: make parser stop overwriting status of suites w/
 no_tests

Consider this invocation
$ ./tools/testing/kunit/kunit.py parse <<EOF
  TAP version 14
  1..2
  ok 1 - suite
    # Subtest: no_tests_suite
    # catastrophic error!
  not ok 1 - no_tests_suite
EOF

It will have a 0 exit code even though there's a "not ok".

Consider this one:
$ ./tools/testing/kunit/kunit.py parse <<EOF
  TAP version 14
  1..2
  ok 1 - suite
  not ok 1 - no_tests_suite
EOF

It will a non-zero exit code.

Why?
We have this line in the kunit_parser.py
> parent_test = parse_test_header(lines, test)
where we have special handling when we see "# Subtest" and we ignore the
explicit reported "not ok 1" status!

Also, NO_TESTS at a suite-level only results in a non-zero status code
where then there's only one suite atm.

This change is the minimal one to make sure we don't overwrite it.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_parser.py                                | 7 +++++--
 .../kunit/test_data/test_is_test_passed-no_tests_no_plan.log       | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index c8c0df56cc51..9f5a73f36c2d 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -775,8 +775,11 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test:
 
 	# Check for there being no tests
 	if parent_test and len(subtests) == 0:
-		test.status = TestStatus.NO_TESTS
-		test.add_error('0 tests run!')
+		# Don't override a bad status if this test had one reported.
+		# Assumption: no subtests means CRASHED is from Test.__init__()
+		if test.status in (TestStatus.TEST_CRASHED, TestStatus.SUCCESS):
+			test.status = TestStatus.NO_TESTS
+			test.add_error('0 tests run!')
 
 	# Add statuses to TestCounts attribute in Test object
 	bubble_up_test_results(test)
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log
index dd873c981108..4f81876ee6f1 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log
@@ -3,5 +3,5 @@ TAP version 14
   # Subtest: suite
   1..1
     # Subtest: case
-  ok 1 - case # SKIP
+  ok 1 - case
 ok 1 - suite
-- 
cgit 


From 94507ee3e9aeafc5fcc429947b84016eabea6e64 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Thu, 12 May 2022 11:35:38 -0700
Subject: kunit: tool: minor cosmetic cleanups in kunit_parser.py

There should be no behavioral changes from this patch.

This patch removes redundant comment text, inlines a function used in
only one place, and other such minor tweaks.

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_parser.py | 71 +++++++++----------------------------
 1 file changed, 17 insertions(+), 54 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 9f5a73f36c2d..98264177b0bd 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -46,10 +46,8 @@ class Test(object):
 
 	def __str__(self) -> str:
 		"""Returns string representation of a Test class object."""
-		return ('Test(' + str(self.status) + ', ' + self.name +
-			', ' + str(self.expected_count) + ', ' +
-			str(self.subtests) + ', ' + str(self.log) + ', ' +
-			str(self.counts) + ')')
+		return (f'Test({self.status}, {self.name}, {self.expected_count}, '
+			f'{self.subtests}, {self.log}, {self.counts})')
 
 	def __repr__(self) -> str:
 		"""Returns string representation of a Test class object."""
@@ -58,7 +56,7 @@ class Test(object):
 	def add_error(self, error_message: str) -> None:
 		"""Records an error that occurred while parsing this test."""
 		self.counts.errors += 1
-		print_error('Test ' + self.name + ': ' + error_message)
+		print_with_timestamp(red('[ERROR]') + f' Test: {self.name}: {error_message}')
 
 class TestStatus(Enum):
 	"""An enumeration class to represent the status of a test."""
@@ -92,8 +90,7 @@ class TestCounts:
 		self.errors = 0
 
 	def __str__(self) -> str:
-		"""Returns the string representation of a TestCounts object.
-		"""
+		"""Returns the string representation of a TestCounts object."""
 		statuses = [('passed', self.passed), ('failed', self.failed),
 			('crashed', self.crashed), ('skipped', self.skipped),
 			('errors', self.errors)]
@@ -130,30 +127,19 @@ class TestCounts:
 		if self.total() == 0:
 			return TestStatus.NO_TESTS
 		elif self.crashed:
-			# If one of the subtests crash, the expected status
-			# of the Test is crashed.
+			# Crashes should take priority.
 			return TestStatus.TEST_CRASHED
 		elif self.failed:
-			# Otherwise if one of the subtests fail, the
-			# expected status of the Test is failed.
 			return TestStatus.FAILURE
 		elif self.passed:
-			# Otherwise if one of the subtests pass, the
-			# expected status of the Test is passed.
+			# No failures or crashes, looks good!
 			return TestStatus.SUCCESS
 		else:
-			# Finally, if none of the subtests have failed,
-			# crashed, or passed, the expected status of the
-			# Test is skipped.
+			# We have only skipped tests.
 			return TestStatus.SKIPPED
 
 	def add_status(self, status: TestStatus) -> None:
-		"""
-		Increments count of inputted status.
-
-		Parameters:
-		status - status to be added to the TestCounts object
-		"""
+		"""Increments the count for `status`."""
 		if status == TestStatus.SUCCESS:
 			self.passed += 1
 		elif status == TestStatus.FAILURE:
@@ -283,11 +269,9 @@ def check_version(version_num: int, accepted_versions: List[int],
 	test - Test object for current test being parsed
 	"""
 	if version_num < min(accepted_versions):
-		test.add_error(version_type +
-			' version lower than expected!')
+		test.add_error(f'{version_type} version lower than expected!')
 	elif version_num > max(accepted_versions):
-		test.add_error(
-			version_type + ' version higher than expected!')
+		test.add_error(f'{version_type} version higer than expected!')
 
 def parse_ktap_header(lines: LineStream, test: Test) -> bool:
 	"""
@@ -440,8 +424,7 @@ def parse_test_result(lines: LineStream, test: Test,
 	# Check test num
 	num = int(match.group(2))
 	if num != expected_num:
-		test.add_error('Expected test number ' +
-			str(expected_num) + ' but found ' + str(num))
+		test.add_error(f'Expected test number {expected_num} but found {num}')
 
 	# Set status of test object
 	status = match.group(1)
@@ -529,7 +512,7 @@ def format_test_divider(message: str, len_message: int) -> str:
 		# calculate number of dashes for each side of the divider
 		len_1 = int(difference / 2)
 		len_2 = difference - len_1
-	return ('=' * len_1) + ' ' + message + ' ' + ('=' * len_2)
+	return ('=' * len_1) + f' {message} ' + ('=' * len_2)
 
 def print_test_header(test: Test) -> None:
 	"""
@@ -545,20 +528,13 @@ def print_test_header(test: Test) -> None:
 	message = test.name
 	if test.expected_count:
 		if test.expected_count == 1:
-			message += (' (' + str(test.expected_count) +
-				' subtest)')
+			message += ' (1 subtest)'
 		else:
-			message += (' (' + str(test.expected_count) +
-				' subtests)')
+			message += f' ({test.expected_count} subtests)'
 	print_with_timestamp(format_test_divider(message, len(message)))
 
 def print_log(log: Iterable[str]) -> None:
-	"""
-	Prints all strings in saved log for test in yellow.
-
-	Parameters:
-	log - Iterable object with all strings saved in log for test
-	"""
+	"""Prints all strings in saved log for test in yellow."""
 	for m in log:
 		print_with_timestamp(yellow(m))
 
@@ -635,20 +611,7 @@ def print_summary_line(test: Test) -> None:
 		color = yellow
 	else:
 		color = red
-	counts = test.counts
-	print_with_timestamp(color('Testing complete. ' + str(counts)))
-
-def print_error(error_message: str) -> None:
-	"""
-	Prints error message with error format.
-
-	Example:
-	"[ERROR] Test example: missing test plan!"
-
-	Parameters:
-	error_message - message describing error
-	"""
-	print_with_timestamp(red('[ERROR] ') + error_message)
+	print_with_timestamp(color(f'Testing complete. {test.counts}'))
 
 # Other methods:
 
@@ -794,7 +757,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test:
 def parse_run_tests(kernel_output: Iterable[str]) -> Test:
 	"""
 	Using kernel output, extract KTAP lines, parse the lines for test
-	results and print condensed test results and summary line .
+	results and print condensed test results and summary line.
 
 	Parameters:
 	kernel_output - Iterable object contains lines of kernel output
-- 
cgit 


From 0453f984a7b9458f0e469afb039f2841308b1bef Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Mon, 9 May 2022 13:49:09 -0700
Subject: kunit: tool: misc cleanups

This primarily comes from running pylint over kunit tool code and
ignoring some warnings we don't care about.
If we ever got a fully clean setup, we could add this to run_checks.py,
but we're not there yet.

Fix things like
* Drop unused imports
* check `is None`, not `== None` (see PEP 8)
* remove redundant parens around returns
* remove redundant `else` / convert `elif` to `if` where appropriate
* rename make_arch_qemuconfig() param to base_kunitconfig (this is the
  name used in the subclass, and it's a better one)
* kunit_tool_test: check the exit code for SystemExit (could be 0)

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit.py           |  9 ++++-----
 tools/testing/kunit/kunit_config.py    | 12 +++++------
 tools/testing/kunit/kunit_json.py      |  5 +----
 tools/testing/kunit/kunit_kernel.py    | 10 ++++-----
 tools/testing/kunit/kunit_parser.py    | 37 ++++++++++++++++------------------
 tools/testing/kunit/kunit_tool_test.py | 10 +++++----
 tools/testing/kunit/run_checks.py      |  2 +-
 7 files changed, 39 insertions(+), 46 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 6dc710d3996b..13bd72e47da8 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -124,7 +124,7 @@ def _list_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest)
 	lines.pop()
 
 	# Filter out any extraneous non-test output that might have gotten mixed in.
-	return [l for l in lines if re.match('^[^\s.]+\.[^\s.]+$', l)]
+	return [l for l in lines if re.match(r'^[^\s.]+\.[^\s.]+$', l)]
 
 def _suites_from_test_list(tests: List[str]) -> List[str]:
 	"""Extracts all the suites from an ordered list of tests."""
@@ -188,8 +188,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -
 def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus:
 	if test_status in (kunit_parser.TestStatus.SUCCESS, kunit_parser.TestStatus.SKIPPED):
 		return KunitStatus.SUCCESS
-	else:
-		return KunitStatus.TEST_FAILURE
+	return KunitStatus.TEST_FAILURE
 
 def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input_data: Iterable[str]) -> Tuple[KunitResult, kunit_parser.Test]:
 	parse_start = time.time()
@@ -353,7 +352,7 @@ def add_exec_opts(parser) -> None:
 			    'a non-hermetic test, one that might pass/fail based on '
 			    'what ran before it.',
 			    type=str,
-			    choices=['suite', 'test']),
+			    choices=['suite', 'test'])
 
 def add_parse_opts(parser) -> None:
 	parser.add_argument('--raw_output', help='If set don\'t format output from kernel. '
@@ -497,7 +496,7 @@ def main(argv, linux=None):
 		if result.status != KunitStatus.SUCCESS:
 			sys.exit(1)
 	elif cli_args.subcommand == 'parse':
-		if cli_args.file == None:
+		if cli_args.file is None:
 			sys.stdin.reconfigure(errors='backslashreplace')  # pytype: disable=attribute-error
 			kunit_output = sys.stdin
 		else:
diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
index ca33e4b7bcc5..75a8dc1683d4 100644
--- a/tools/testing/kunit/kunit_config.py
+++ b/tools/testing/kunit/kunit_config.py
@@ -20,16 +20,15 @@ class KconfigEntry:
 
 	def __str__(self) -> str:
 		if self.value == 'n':
-			return r'# CONFIG_%s is not set' % (self.name)
-		else:
-			return r'CONFIG_%s=%s' % (self.name, self.value)
+			return f'# CONFIG_{self.name} is not set'
+		return f'CONFIG_{self.name}={self.value}'
 
 
 class KconfigParseError(Exception):
 	"""Error parsing Kconfig defconfig or .config."""
 
 
-class Kconfig(object):
+class Kconfig:
 	"""Represents defconfig or .config specified using the Kconfig language."""
 
 	def __init__(self) -> None:
@@ -49,7 +48,7 @@ class Kconfig(object):
 				if a.value == 'n':
 					continue
 				return False
-			elif a.value != b:
+			if a.value != b:
 				return False
 		return True
 
@@ -91,6 +90,5 @@ def parse_from_string(blob: str) -> Kconfig:
 
 		if line[0] == '#':
 			continue
-		else:
-			raise KconfigParseError('Failed to parse: ' + line)
+		raise KconfigParseError('Failed to parse: ' + line)
 	return kconfig
diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
index 1212423fe6bc..10ff65689dd8 100644
--- a/tools/testing/kunit/kunit_json.py
+++ b/tools/testing/kunit/kunit_json.py
@@ -8,12 +8,9 @@
 
 from dataclasses import dataclass
 import json
-import os
-
-import kunit_parser
+from typing import Any, Dict
 
 from kunit_parser import Test, TestStatus
-from typing import Any, Dict
 
 @dataclass
 class Metadata:
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 1b9c4922a675..3539efaf99ba 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -38,7 +38,7 @@ class BuildError(Exception):
 	"""Represents an error trying to build the Linux kernel."""
 
 
-class LinuxSourceTreeOperations(object):
+class LinuxSourceTreeOperations:
 	"""An abstraction over command line operations performed on a source tree."""
 
 	def __init__(self, linux_arch: str, cross_compile: Optional[str]):
@@ -53,7 +53,7 @@ class LinuxSourceTreeOperations(object):
 		except subprocess.CalledProcessError as e:
 			raise ConfigError(e.output.decode())
 
-	def make_arch_qemuconfig(self, kconfig: kunit_config.Kconfig) -> None:
+	def make_arch_qemuconfig(self, base_kunitconfig: kunit_config.Kconfig) -> None:
 		pass
 
 	def make_allyesconfig(self, build_dir: str, make_options) -> None:
@@ -182,7 +182,7 @@ def get_source_tree_ops(arch: str, cross_compile: Optional[str]) -> LinuxSourceT
 	config_path = os.path.join(QEMU_CONFIGS_DIR, arch + '.py')
 	if arch == 'um':
 		return LinuxSourceTreeOperationsUml(cross_compile=cross_compile)
-	elif os.path.isfile(config_path):
+	if os.path.isfile(config_path):
 		return get_source_tree_ops_from_qemu_config(config_path, cross_compile)[1]
 
 	options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')]
@@ -213,7 +213,7 @@ def get_source_tree_ops_from_qemu_config(config_path: str,
 	return params.linux_arch, LinuxSourceTreeOperationsQemu(
 			params, cross_compile=cross_compile)
 
-class LinuxSourceTree(object):
+class LinuxSourceTree:
 	"""Represents a Linux kernel source tree with KUnit tests."""
 
 	def __init__(
@@ -368,6 +368,6 @@ class LinuxSourceTree(object):
 			waiter.join()
 			subprocess.call(['stty', 'sane'])
 
-	def signal_handler(self, sig, frame) -> None:
+	def signal_handler(self, unused_sig, unused_frame) -> None:
 		logging.error('Build interruption occurred. Cleaning console.')
 		subprocess.call(['stty', 'sane'])
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 98264177b0bd..c5569b367c69 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -15,10 +15,9 @@ import sys
 
 import datetime
 from enum import Enum, auto
-from functools import reduce
 from typing import Iterable, Iterator, List, Optional, Tuple
 
-class Test(object):
+class Test:
 	"""
 	A class to represent a test parsed from KTAP results. All KTAP
 	results within a test log are stored in a main Test object as
@@ -126,17 +125,16 @@ class TestCounts:
 		"""
 		if self.total() == 0:
 			return TestStatus.NO_TESTS
-		elif self.crashed:
+		if self.crashed:
 			# Crashes should take priority.
 			return TestStatus.TEST_CRASHED
-		elif self.failed:
+		if self.failed:
 			return TestStatus.FAILURE
-		elif self.passed:
+		if self.passed:
 			# No failures or crashes, looks good!
 			return TestStatus.SUCCESS
-		else:
-			# We have only skipped tests.
-			return TestStatus.SKIPPED
+		# We have only skipped tests.
+		return TestStatus.SKIPPED
 
 	def add_status(self, status: TestStatus) -> None:
 		"""Increments the count for `status`."""
@@ -381,7 +379,7 @@ def peek_test_name_match(lines: LineStream, test: Test) -> bool:
 	if not match:
 		return False
 	name = match.group(4)
-	return (name == test.name)
+	return name == test.name
 
 def parse_test_result(lines: LineStream, test: Test,
 			expected_num: int) -> bool:
@@ -553,17 +551,16 @@ def format_test_result(test: Test) -> str:
 	String containing formatted test result
 	"""
 	if test.status == TestStatus.SUCCESS:
-		return (green('[PASSED] ') + test.name)
-	elif test.status == TestStatus.SKIPPED:
-		return (yellow('[SKIPPED] ') + test.name)
-	elif test.status == TestStatus.NO_TESTS:
-		return (yellow('[NO TESTS RUN] ') + test.name)
-	elif test.status == TestStatus.TEST_CRASHED:
-		print_log(test.log)
-		return (red('[CRASHED] ') + test.name)
-	else:
+		return green('[PASSED] ') + test.name
+	if test.status == TestStatus.SKIPPED:
+		return yellow('[SKIPPED] ') + test.name
+	if test.status == TestStatus.NO_TESTS:
+		return yellow('[NO TESTS RUN] ') + test.name
+	if test.status == TestStatus.TEST_CRASHED:
 		print_log(test.log)
-		return (red('[FAILED] ') + test.name)
+		return red('[CRASHED] ') + test.name
+	print_log(test.log)
+	return red('[FAILED] ') + test.name
 
 def print_test_result(test: Test) -> None:
 	"""
@@ -607,7 +604,7 @@ def print_summary_line(test: Test) -> None:
 	"""
 	if test.status == TestStatus.SUCCESS:
 		color = green
-	elif test.status == TestStatus.SKIPPED or test.status == TestStatus.NO_TESTS:
+	elif test.status in (TestStatus.SKIPPED, TestStatus.NO_TESTS):
 		color = yellow
 	else:
 		color = red
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index b417eceeda74..25a2eb3bf114 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -251,8 +251,8 @@ class KUnitParserTest(unittest.TestCase):
 
 	def test_ignores_hyphen(self):
 		hyphen_log = test_data_path('test_strip_hyphen.log')
-		file = open(hyphen_log)
-		result = kunit_parser.parse_run_tests(file.readlines())
+		with open(hyphen_log) as file:
+			result = kunit_parser.parse_run_tests(file.readlines())
 
 		# A skipped test does not fail the whole suite.
 		self.assertEqual(
@@ -347,7 +347,7 @@ class LineStreamTest(unittest.TestCase):
 		called_times = 0
 		def generator():
 			nonlocal called_times
-			for i in range(1,5):
+			for _ in range(1,5):
 				called_times += 1
 				yield called_times, str(called_times)
 
@@ -553,7 +553,8 @@ class KUnitMainTest(unittest.TestCase):
 	def test_exec_no_tests(self):
 		self.linux_source_mock.run_kernel = mock.Mock(return_value=['TAP version 14', '1..0'])
 		with self.assertRaises(SystemExit) as e:
-		  kunit.main(['run'], self.linux_source_mock)
+			kunit.main(['run'], self.linux_source_mock)
+		self.assertEqual(e.exception.code, 1)
 		self.linux_source_mock.run_kernel.assert_called_once_with(
 			args=None, build_dir='.kunit', filter_glob='', timeout=300)
 		self.print_mock.assert_any_call(StrContains(' 0 tests run!'))
@@ -588,6 +589,7 @@ class KUnitMainTest(unittest.TestCase):
 		self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
 		with self.assertRaises(SystemExit) as e:
 			kunit.main(['run', '--raw_output=invalid'], self.linux_source_mock)
+		self.assertNotEqual(e.exception.code, 0)
 
 	def test_run_raw_output_does_not_take_positional_args(self):
 		# --raw_output is a string flag, but we don't want it to consume
diff --git a/tools/testing/kunit/run_checks.py b/tools/testing/kunit/run_checks.py
index 13d854afca9d..066e6f938f6d 100755
--- a/tools/testing/kunit/run_checks.py
+++ b/tools/testing/kunit/run_checks.py
@@ -14,7 +14,7 @@ import shutil
 import subprocess
 import sys
 import textwrap
-from typing import Dict, List, Sequence, Tuple
+from typing import Dict, List, Sequence
 
 ABS_TOOL_PATH = os.path.abspath(os.path.dirname(__file__))
 TIMEOUT = datetime.timedelta(minutes=5).total_seconds()
-- 
cgit 


From b18d28475264f5a4f193f047df6618b78127211e Mon Sep 17 00:00:00 2001
From: David Gow <davidgow@google.com>
Date: Sat, 30 Apr 2022 12:56:40 +0800
Subject: kunit: tool: Add list of all valid test configs on UML

It's often desirable (particularly in test automation) to run as many
tests as possible. This config enables all the tests which work as
builtins under UML at present, increasing the total tests run from 156
to 342 (not counting 36 'skipped' tests).

They can be run with:
./tools/testing/kunit/kunit.py run
--kunitconfig=./tools/testing/kunit/configs/all_tests_uml.config

This acts as an in-between point between the KUNIT_ALL_TESTS config
(which enables only tests whose dependencies are already enabled), and
the kunit_tool --alltests option, which tries to use allyesconfig,
taking a very long time to build and breaking very often.

Signed-off-by: David Gow <davidgow@google.com>
Tested-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/configs/all_tests_uml.config | 37 ++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 tools/testing/kunit/configs/all_tests_uml.config

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/configs/all_tests_uml.config b/tools/testing/kunit/configs/all_tests_uml.config
new file mode 100644
index 000000000000..bdee36bef4a3
--- /dev/null
+++ b/tools/testing/kunit/configs/all_tests_uml.config
@@ -0,0 +1,37 @@
+# This config enables as many tests as possible under UML.
+# It is intended for use in continuous integration systems and similar for
+# automated testing of as much as possible.
+# The config is manually maintained, though it uses KUNIT_ALL_TESTS=y to enable
+# any tests whose dependencies are already satisfied. Please feel free to add
+# more options if they any new tests.
+
+CONFIG_KUNIT=y
+CONFIG_KUNIT_EXAMPLE_TEST=y
+CONFIG_KUNIT_ALL_TESTS=y
+
+CONFIG_IIO=y
+
+CONFIG_EXT4_FS=y
+
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+
+CONFIG_VIRTIO_UML=y
+CONFIG_UML_PCI_OVER_VIRTIO=y
+CONFIG_PCI=y
+CONFIG_USB4=y
+
+CONFIG_NET=y
+CONFIG_MCTP=y
+
+CONFIG_INET=y
+CONFIG_MPTCP=y
+
+CONFIG_DAMON=y
+CONFIG_DAMON_VADDR=y
+CONFIG_DAMON_PADDR=y
+CONFIG_DEBUG_FS=y
+CONFIG_DAMON_DBGFS=y
+
+CONFIG_SECURITY=y
+CONFIG_SECURITY_APPARMOR=y
-- 
cgit 


From 8a7ccad38f8b25c8202efd69371a022357286400 Mon Sep 17 00:00:00 2001
From: Brendan Higgins <brendanhiggins@google.com>
Date: Wed, 11 May 2022 17:30:26 -0700
Subject: kunit: tool: update riscv QEMU config with new serial dependency

The config for the serial console for riscv,
CONFIG_SERIAL_EARLYCON_RISCV_SBI, added a dependency,
CONFIG_RISCV_SBI_V01, at some point, so add that in to the base arch
config.

Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/qemu_configs/riscv.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/qemu_configs/riscv.py b/tools/testing/kunit/qemu_configs/riscv.py
index b882fde39435..6207be146d26 100644
--- a/tools/testing/kunit/qemu_configs/riscv.py
+++ b/tools/testing/kunit/qemu_configs/riscv.py
@@ -21,6 +21,7 @@ CONFIG_SOC_VIRT=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_RISCV_SBI_V01=y
 CONFIG_SERIAL_EARLYCON_RISCV_SBI=y''',
 			   qemu_arch='riscv64',
 			   kernel_path='arch/riscv/boot/Image',
-- 
cgit 


From 15477b31db104bc795dd1acccb3e9b89465fff01 Mon Sep 17 00:00:00 2001
From: Gautam Menghani <gautammenghani201@gmail.com>
Date: Sat, 14 May 2022 00:37:20 +0530
Subject: kselftests/ir : Improve readability of modprobe error message

Improve the readability of error message which says module not found.
The new behaviour is consistent with the modprobe command.

Signed-off-by: Gautam Menghani <gautammenghani201@gmail.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/ir/ir_loopback.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/ir/ir_loopback.sh b/tools/testing/selftests/ir/ir_loopback.sh
index b90dc9939f45..aff9299c9416 100755
--- a/tools/testing/selftests/ir/ir_loopback.sh
+++ b/tools/testing/selftests/ir/ir_loopback.sh
@@ -10,7 +10,7 @@ if [ $UID != 0 ]; then
 fi
 
 if ! /sbin/modprobe -q -n rc-loopback; then
-        echo "ir_loopback: module rc-loopback is not found [SKIP]"
+        echo "ir_loopback: module rc-loopback is not found in /lib/modules/`uname -r` [SKIP]"
         exit $ksft_skip
 fi
 
-- 
cgit 


From c43ce39870b3cff3cefb1faf78c577153edc2dde Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Fri, 13 May 2022 17:21:13 -0700
Subject: selftests: mptcp: fix a mp_fail test warning

Old tc versions (iproute2 5.3) show actions in multiple lines, not a
single line. Then the following unexpected MP_FAIL selftest output
occurs:

 file received by server has inverted byte at 169
 ./mptcp_join.sh: line 1277: [: [{"total acts":1},{"actions":[{"order":0 pedit ,"control_action":{"type":"pipe"}keys 1
         index 1 ref 1 bind 1,"installed":0,"last_used":0
         key #0  at 148: val ff000000 mask ffffffff
 5: integer expression expected
 001 Infinite map                      syn[ ok ] - synack[ ok ] - ack[ ok ]
                                       sum[ ok ] - csum  [ ok ]
                                       ftx[ ok ] - failrx[ ok ]
                                       rtx[ ok ] - rstrx [ ok ]
                                       itx[ ok ] - infirx[ ok ]
                                       ftx[ ok ] - failrx[ ok ] invert

This patch adds a 'grep' before 'sed' to fix this.

Fixes: b6e074e171bc ("selftests: mptcp: add infinite map testcase")
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index d1de1e7702fb..7381d1f85209 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -2693,6 +2693,7 @@ fastclose_tests()
 pedit_action_pkts()
 {
 	tc -n $ns2 -j -s action show action pedit index 100 | \
+		grep "packets" | \
 		sed 's/.*"packets":\([0-9]\+\),.*/\1/'
 }
 
-- 
cgit 


From e511c4a3d2a1f64aafc1f5df37a2ffcf7ef91b55 Mon Sep 17 00:00:00 2001
From: Jane Chu <jane.chu@oracle.com>
Date: Fri, 13 May 2022 15:10:58 -0700
Subject: dax: introduce DAX_RECOVERY_WRITE dax access mode

Up till now, dax_direct_access() is used implicitly for normal
access, but for the purpose of recovery write, dax range with
poison is requested.  To make the interface clear, introduce
	enum dax_access_mode {
		DAX_ACCESS,
		DAX_RECOVERY_WRITE,
	}
where DAX_ACCESS is used for normal dax access, and
DAX_RECOVERY_WRITE is used for dax recovery write.

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jane Chu <jane.chu@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Vivek Goyal <vgoyal@redhat.com>
Link: https://lore.kernel.org/r/165247982851.52965.11024212198889762949.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/super.c             | 5 +++--
 drivers/md/dm-linear.c          | 5 +++--
 drivers/md/dm-log-writes.c      | 5 +++--
 drivers/md/dm-stripe.c          | 5 +++--
 drivers/md/dm-target.c          | 4 +++-
 drivers/md/dm-writecache.c      | 7 ++++---
 drivers/md/dm.c                 | 5 +++--
 drivers/nvdimm/pmem.c           | 8 +++++---
 drivers/nvdimm/pmem.h           | 5 ++++-
 drivers/s390/block/dcssblk.c    | 9 ++++++---
 fs/dax.c                        | 9 +++++----
 fs/fuse/dax.c                   | 4 ++--
 fs/fuse/virtio_fs.c             | 6 ++++--
 include/linux/dax.h             | 9 +++++++--
 include/linux/device-mapper.h   | 4 +++-
 tools/testing/nvdimm/pmem-dax.c | 4 +++-
 16 files changed, 61 insertions(+), 33 deletions(-)

(limited to 'tools/testing')

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 0211e6f7b47a..5405eb553430 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -117,6 +117,7 @@ enum dax_device_flags {
  * @dax_dev: a dax_device instance representing the logical memory range
  * @pgoff: offset in pages from the start of the device to translate
  * @nr_pages: number of consecutive pages caller can handle relative to @pfn
+ * @mode: indicator on normal access or recovery write
  * @kaddr: output parameter that returns a virtual address mapping of pfn
  * @pfn: output parameter that returns an absolute pfn translation of @pgoff
  *
@@ -124,7 +125,7 @@ enum dax_device_flags {
  * pages accessible at the device relative @pgoff.
  */
 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
-		void **kaddr, pfn_t *pfn)
+		enum dax_access_mode mode, void **kaddr, pfn_t *pfn)
 {
 	long avail;
 
@@ -138,7 +139,7 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 		return -EINVAL;
 
 	avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages,
-			kaddr, pfn);
+			mode, kaddr, pfn);
 	if (!avail)
 		return -ERANGE;
 	return min(avail, nr_pages);
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 76b486e4d2be..13e263299c9c 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -172,11 +172,12 @@ static struct dax_device *linear_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
 }
 
 static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
-		long nr_pages, void **kaddr, pfn_t *pfn)
+		long nr_pages, enum dax_access_mode mode, void **kaddr,
+		pfn_t *pfn)
 {
 	struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff);
 
-	return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
+	return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn);
 }
 
 static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index c9d036d6bb2e..06bdbed65eb1 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -889,11 +889,12 @@ static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti,
 }
 
 static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
-					 long nr_pages, void **kaddr, pfn_t *pfn)
+		long nr_pages, enum dax_access_mode mode, void **kaddr,
+		pfn_t *pfn)
 {
 	struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff);
 
-	return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
+	return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn);
 }
 
 static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index c81d331d1afe..77d72900e997 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -315,11 +315,12 @@ static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
 }
 
 static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
-		long nr_pages, void **kaddr, pfn_t *pfn)
+		long nr_pages, enum dax_access_mode mode, void **kaddr,
+		pfn_t *pfn)
 {
 	struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
 
-	return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
+	return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn);
 }
 
 static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 64dd0b34fcf4..8cd5184e62f0 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -10,6 +10,7 @@
 #include <linux/init.h>
 #include <linux/kmod.h>
 #include <linux/bio.h>
+#include <linux/dax.h>
 
 #define DM_MSG_PREFIX "target"
 
@@ -142,7 +143,8 @@ static void io_err_release_clone_rq(struct request *clone,
 }
 
 static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
-		long nr_pages, void **kaddr, pfn_t *pfn)
+		long nr_pages, enum dax_access_mode mode, void **kaddr,
+		pfn_t *pfn)
 {
 	return -EIO;
 }
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 5630b470ba42..d74c5a7a0ab4 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -286,7 +286,8 @@ static int persistent_memory_claim(struct dm_writecache *wc)
 
 	id = dax_read_lock();
 
-	da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, &wc->memory_map, &pfn);
+	da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, DAX_ACCESS,
+			&wc->memory_map, &pfn);
 	if (da < 0) {
 		wc->memory_map = NULL;
 		r = da;
@@ -308,8 +309,8 @@ static int persistent_memory_claim(struct dm_writecache *wc)
 		i = 0;
 		do {
 			long daa;
-			daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i, p - i,
-						NULL, &pfn);
+			daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i,
+					p - i, DAX_ACCESS, NULL, &pfn);
 			if (daa <= 0) {
 				r = daa ? daa : -EINVAL;
 				goto err3;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 82957bd460e8..9c452641c3d5 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1093,7 +1093,8 @@ static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
 }
 
 static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
-				 long nr_pages, void **kaddr, pfn_t *pfn)
+		long nr_pages, enum dax_access_mode mode, void **kaddr,
+		pfn_t *pfn)
 {
 	struct mapped_device *md = dax_get_private(dax_dev);
 	sector_t sector = pgoff * PAGE_SECTORS;
@@ -1111,7 +1112,7 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
 	if (len < 1)
 		goto out;
 	nr_pages = min(len, nr_pages);
-	ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn);
+	ret = ti->type->direct_access(ti, pgoff, nr_pages, mode, kaddr, pfn);
 
  out:
 	dm_put_live_table(md, srcu_idx);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 4aa17132a557..47f34c50f944 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -239,7 +239,8 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 
 /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
 __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
-		long nr_pages, void **kaddr, pfn_t *pfn)
+		long nr_pages, enum dax_access_mode mode, void **kaddr,
+		pfn_t *pfn)
 {
 	resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
 
@@ -278,11 +279,12 @@ static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
 }
 
 static long pmem_dax_direct_access(struct dax_device *dax_dev,
-		pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
+		pgoff_t pgoff, long nr_pages, enum dax_access_mode mode,
+		void **kaddr, pfn_t *pfn)
 {
 	struct pmem_device *pmem = dax_get_private(dax_dev);
 
-	return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
+	return __pmem_direct_access(pmem, pgoff, nr_pages, mode, kaddr, pfn);
 }
 
 static const struct dax_operations pmem_dax_ops = {
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 1f51a2361429..392b0b38acb9 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -8,6 +8,8 @@
 #include <linux/pfn_t.h>
 #include <linux/fs.h>
 
+enum dax_access_mode;
+
 /* this definition is in it's own header for tools/testing/nvdimm to consume */
 struct pmem_device {
 	/* One contiguous memory region per device */
@@ -28,7 +30,8 @@ struct pmem_device {
 };
 
 long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
-		long nr_pages, void **kaddr, pfn_t *pfn);
+		long nr_pages, enum dax_access_mode mode, void **kaddr,
+		pfn_t *pfn);
 
 #ifdef CONFIG_MEMORY_FAILURE
 static inline bool test_and_clear_pmem_poison(struct page *page)
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index d614843caf6c..8d0d0eaa3059 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -32,7 +32,8 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode);
 static void dcssblk_release(struct gendisk *disk, fmode_t mode);
 static void dcssblk_submit_bio(struct bio *bio);
 static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
-		long nr_pages, void **kaddr, pfn_t *pfn);
+		long nr_pages, enum dax_access_mode mode, void **kaddr,
+		pfn_t *pfn);
 
 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
 
@@ -50,7 +51,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
 	long rc;
 	void *kaddr;
 
-	rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL);
+	rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS,
+			&kaddr, NULL);
 	if (rc < 0)
 		return rc;
 	memset(kaddr, 0, nr_pages << PAGE_SHIFT);
@@ -927,7 +929,8 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff,
 
 static long
 dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
-		long nr_pages, void **kaddr, pfn_t *pfn)
+		long nr_pages, enum dax_access_mode mode, void **kaddr,
+		pfn_t *pfn)
 {
 	struct dcssblk_dev_info *dev_info = dax_get_private(dax_dev);
 
diff --git a/fs/dax.c b/fs/dax.c
index 67a08a32fccb..ef3103107104 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -721,7 +721,8 @@ static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter
 	int id;
 
 	id = dax_read_lock();
-	rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, &kaddr, NULL);
+	rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, DAX_ACCESS,
+				&kaddr, NULL);
 	if (rc < 0) {
 		dax_read_unlock(id);
 		return rc;
@@ -1013,7 +1014,7 @@ static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,
 
 	id = dax_read_lock();
 	length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
-				   NULL, pfnp);
+				   DAX_ACCESS, NULL, pfnp);
 	if (length < 0) {
 		rc = length;
 		goto out;
@@ -1122,7 +1123,7 @@ static int dax_memzero(struct dax_device *dax_dev, pgoff_t pgoff,
 	void *kaddr;
 	long ret;
 
-	ret = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
+	ret = dax_direct_access(dax_dev, pgoff, 1, DAX_ACCESS, &kaddr, NULL);
 	if (ret > 0) {
 		memset(kaddr + offset, 0, size);
 		dax_flush(dax_dev, kaddr + offset, size);
@@ -1247,7 +1248,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 		}
 
 		map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
-				&kaddr, NULL);
+				DAX_ACCESS, &kaddr, NULL);
 		if (map_len < 0) {
 			ret = map_len;
 			break;
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index d7d3a7f06862..10eb50cbf398 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -1241,8 +1241,8 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
 	INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker);
 
 	id = dax_read_lock();
-	nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), NULL,
-				     NULL);
+	nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size),
+			DAX_ACCESS, NULL, NULL);
 	dax_read_unlock(id);
 	if (nr_pages < 0) {
 		pr_debug("dax_direct_access() returned %ld\n", nr_pages);
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 86b7dbb6a0d4..8db53fa67359 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -752,7 +752,8 @@ static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
  * offset.
  */
 static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
-				    long nr_pages, void **kaddr, pfn_t *pfn)
+				    long nr_pages, enum dax_access_mode mode,
+				    void **kaddr, pfn_t *pfn)
 {
 	struct virtio_fs *fs = dax_get_private(dax_dev);
 	phys_addr_t offset = PFN_PHYS(pgoff);
@@ -772,7 +773,8 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
 	long rc;
 	void *kaddr;
 
-	rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL);
+	rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr,
+			       NULL);
 	if (rc < 0)
 		return rc;
 	memset(kaddr, 0, nr_pages << PAGE_SHIFT);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9fc5f99a0ae2..3f1339bce3c0 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -14,6 +14,11 @@ struct iomap_ops;
 struct iomap_iter;
 struct iomap;
 
+enum dax_access_mode {
+	DAX_ACCESS,
+	DAX_RECOVERY_WRITE,
+};
+
 struct dax_operations {
 	/*
 	 * direct_access: translate a device-relative
@@ -21,7 +26,7 @@ struct dax_operations {
 	 * number of pages available for DAX at that pfn.
 	 */
 	long (*direct_access)(struct dax_device *, pgoff_t, long,
-			void **, pfn_t *);
+			enum dax_access_mode, void **, pfn_t *);
 	/*
 	 * Validate whether this device is usable as an fsdax backing
 	 * device.
@@ -178,7 +183,7 @@ static inline void dax_read_unlock(int id)
 bool dax_alive(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);
 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
-		void **kaddr, pfn_t *pfn);
+		enum dax_access_mode mode, void **kaddr, pfn_t *pfn);
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i);
 size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index c2a3758c4aaa..acdedda0d12b 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -20,6 +20,7 @@ struct dm_table;
 struct dm_report_zones_args;
 struct mapped_device;
 struct bio_vec;
+enum dax_access_mode;
 
 /*
  * Type of table, mapped_device's mempool and request_queue
@@ -146,7 +147,8 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
  * >= 0 : the number of bytes accessible at the address
  */
 typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
-		long nr_pages, void **kaddr, pfn_t *pfn);
+		long nr_pages, enum dax_access_mode node, void **kaddr,
+		pfn_t *pfn);
 typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
 		size_t nr_pages);
 
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
index af19c85558e7..c1ec099a3b1d 100644
--- a/tools/testing/nvdimm/pmem-dax.c
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -4,11 +4,13 @@
  */
 #include "test/nfit_test.h"
 #include <linux/blkdev.h>
+#include <linux/dax.h>
 #include <pmem.h>
 #include <nd.h>
 
 long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
-		long nr_pages, void **kaddr, pfn_t *pfn)
+		long nr_pages, enum dax_access_mode mode, void **kaddr,
+		pfn_t *pfn)
 {
 	resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
 
-- 
cgit 


From 68084a13642001b73aade05819584f18945f3297 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Sat, 14 May 2022 00:21:15 +0000
Subject: selftests/bpf: Fix building bpf selftests statically

bpf selftests can no longer be built with CFLAGS=-static with
liburandom_read.so and its dependent target.

Filter out -static for liburandom_read.so and its dependent target.

When building statically, this leaves urandom_read relying on
system-wide shared libraries.

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220514002115.1376033-1-yosryahmed@google.com
---
 tools/testing/selftests/bpf/Makefile | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6bbc03161544..4030dd6cbc34 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -168,13 +168,15 @@ $(OUTPUT)/%:%.c
 	$(call msg,BINARY,,$@)
 	$(Q)$(LINK.c) $^ $(LDLIBS) -o $@
 
+# Filter out -static for liburandom_read.so and its dependent targets so that static builds
+# do not fail. Static builds leave urandom_read relying on system-wide shared libraries.
 $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
 	$(call msg,LIB,,$@)
-	$(Q)$(CC) $(CFLAGS) -fPIC $(LDFLAGS) $^ $(LDLIBS) --shared -o $@
+	$(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) -fPIC -shared -o $@
 
 $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
 	$(call msg,BINARY,,$@)
-	$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.c,$^)			       \
+	$(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^)  \
 		  liburandom_read.so $(LDLIBS)	       			       \
 		  -Wl,-rpath=. -Wl,--build-id=sha1 -o $@
 
-- 
cgit 


From 38c84c997d40f56273077af5f8ff3e6317d772b7 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Tue, 17 May 2022 18:29:31 +0500
Subject: selftests/lkdtm: Add configs for stackleak and "after free" tests

Add config options which are needed for LKDTM sub-tests:
STACKLEAK_ERASING test needs GCC_PLUGIN_STACKLEAK config.
READ_AFTER_FREE and READ_BUDDY_AFTER_FREE tests need
INIT_ON_FREE_DEFAULT_ON config.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220517132932.1484719-1-usama.anjum@collabora.com
---
 tools/testing/selftests/lkdtm/config | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index 304123688739..5d52f64dfb43 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -2,8 +2,10 @@ CONFIG_LKDTM=y
 CONFIG_DEBUG_LIST=y
 CONFIG_SLAB_FREELIST_HARDENED=y
 CONFIG_FORTIFY_SOURCE=y
+CONFIG_GCC_PLUGIN_STACKLEAK=y
 CONFIG_HARDENED_USERCOPY=y
 CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
+CONFIG_INIT_ON_FREE_DEFAULT_ON=y
 CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
 CONFIG_UBSAN=y
 CONFIG_UBSAN_BOUNDS=y
-- 
cgit 


From 7ba106fcd4b441c5d6e3d1219104e022ca470d00 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Wed, 18 May 2022 10:27:26 +0300
Subject: selftests: netdevsim: Increase sleep time in hw_stats_l3.sh test

hw_stats_l3.sh test is failing often for l3 stats shows less than 20
packets after 2 seconds sleep.

This is happening since there is a race between the 2 seconds sleep and
the netdevsim actually delivering the packets.

Increase the sleep time so the packets will be delivered successfully on
time.

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
index fe1898402987..cba5ac08426b 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
@@ -319,11 +319,11 @@ counter_test()
 	((pkts < 10))
 	check_err $? "$type stats show >= 10 packets after first enablement"
 
-	sleep 2
+	sleep 2.5
 
 	local pkts=$(get_hwstat dummy1 l3 rx.packets)
 	((pkts >= 20))
-	check_err $? "$type stats show < 20 packets after 2s passed"
+	check_err $? "$type stats show < 20 packets after 2.5s passed"
 
 	$IP stats set dev dummy1 ${type}_stats off
 
-- 
cgit 


From e7eaffce47b7db72b077630dbe836f0c4132496d Mon Sep 17 00:00:00 2001
From: David Gow <davidgow@google.com>
Date: Fri, 13 May 2022 16:51:08 +0800
Subject: kunit: tool: Use qemu-system-i386 for i386 runs

We're currently using the x86_64 qemu for i386 builds. While this is not
incorrect, it's probably more sensible to use the i386 one, which will
at least fail properly if we accidentally were to build a 64-bit kernel.

Signed-off-by: David Gow <davidgow@google.com>
Tested-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/qemu_configs/i386.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/kunit/qemu_configs/i386.py b/tools/testing/kunit/qemu_configs/i386.py
index 52b80be40e4b..4463ebefd567 100644
--- a/tools/testing/kunit/qemu_configs/i386.py
+++ b/tools/testing/kunit/qemu_configs/i386.py
@@ -4,7 +4,7 @@ QEMU_ARCH = QemuArchParams(linux_arch='i386',
 			   kconfig='''
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y''',
-			   qemu_arch='x86_64',
+			   qemu_arch='i386',
 			   kernel_path='arch/x86/boot/bzImage',
 			   kernel_command_line='console=ttyS0',
 			   extra_qemu_params=[])
-- 
cgit 


From 70a1b25326dd77e145157ccf1a31c1948032eec4 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 16 May 2022 12:00:20 +0800
Subject: selftests/bpf: Add missed ima_setup.sh in Makefile

When build bpf test and install it to another folder, e.g.

  make -j10 install -C tools/testing/selftests/ TARGETS="bpf" \
	SKIP_TARGETS="" INSTALL_PATH=/tmp/kselftests

The ima_setup.sh is missed in target folder, which makes test_ima failed.

Fix it by adding ima_setup.sh to TEST_PROGS_EXTENDED.

Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220516040020.653291-1-liuhangbin@gmail.com
---
 tools/testing/selftests/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4030dd6cbc34..2d3c8c8f558a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -75,7 +75,7 @@ TEST_PROGS := test_kmod.sh \
 	test_xsk.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
-	with_tunnels.sh \
+	with_tunnels.sh ima_setup.sh \
 	test_xdp_vlan.sh test_bpftool.py
 
 # Compile but not part of 'make run_tests'
-- 
cgit 


From 090f9dd092c6c2e9e4b9d0472b8d8628e4b851cb Mon Sep 17 00:00:00 2001
From: Joachim Wiberg <troglobit@gmail.com>
Date: Wed, 18 May 2022 17:16:30 +0200
Subject: selftests: forwarding: fix missing backslash

Fix missing backslash, introduced in f62c5acc800ee.  Causes all tests to
not be installed.

Fixes: f62c5acc800e ("selftests/net/forwarding: add missing tests to Makefile")
Signed-off-by: Joachim Wiberg <troglobit@gmail.com>
Acked-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20220518151630.2747773-1-troglobit@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index c87e674b61b1..e811090f7748 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -86,7 +86,7 @@ TEST_PROGS = bridge_igmp.sh \
 	vxlan_bridge_1d_port_8472.sh \
 	vxlan_bridge_1d.sh \
 	vxlan_bridge_1q_ipv6.sh \
-	vxlan_bridge_1q_port_8472_ipv6.sh
+	vxlan_bridge_1q_port_8472_ipv6.sh \
 	vxlan_bridge_1q_port_8472.sh \
 	vxlan_bridge_1q.sh \
 	vxlan_symmetric_ipv6.sh \
-- 
cgit 


From 2e4ba0ec978335b4b550bbed95cb198ac3a00745 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 18 May 2022 16:34:43 -0700
Subject: cxl/pci: Move cxl_await_media_ready() to the core

Allow cxl_await_media_ready() to be mocked for testing purposes rather
than carrying the maintenance burden of an indirect function call in the
mainline driver.

With the move cxl_await_media_ready() can no longer reuse the mailbox
timeout override, so add a media_ready_timeout module parameter to the
core to backfill.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165291688340.1426646.4755627801983775011.stgit@dwillia2-xfh
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/pci.c        | 48 +++++++++++++++++++++++++++++++++++++++++++
 drivers/cxl/cxlmem.h          |  3 +--
 drivers/cxl/mem.c             |  2 +-
 drivers/cxl/pci.c             | 45 +---------------------------------------
 tools/testing/cxl/Kbuild      |  1 +
 tools/testing/cxl/test/mem.c  |  7 -------
 tools/testing/cxl/test/mock.c | 15 ++++++++++++++
 7 files changed, 67 insertions(+), 54 deletions(-)

(limited to 'tools/testing')

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index c9a494d6976a..603945f49174 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -1,8 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2021 Intel Corporation. All rights reserved. */
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/device.h>
+#include <linux/delay.h>
 #include <linux/pci.h>
 #include <cxlpci.h>
+#include <cxlmem.h>
 #include <cxl.h>
 #include "core.h"
 
@@ -13,6 +16,10 @@
  * a set of helpers for CXL interactions which occur via PCIe.
  */
 
+static unsigned short media_ready_timeout = 60;
+module_param(media_ready_timeout, ushort, 0644);
+MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready");
+
 struct cxl_walk_context {
 	struct pci_bus *bus;
 	struct cxl_port *port;
@@ -94,3 +101,44 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port)
 	return ctx.count;
 }
 EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL);
+
+/*
+ * Wait up to @media_ready_timeout for the device to report memory
+ * active.
+ */
+int cxl_await_media_ready(struct cxl_dev_state *cxlds)
+{
+	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+	int d = cxlds->cxl_dvsec;
+	bool active = false;
+	u64 md_status;
+	int rc, i;
+
+	for (i = media_ready_timeout; i; i--) {
+		u32 temp;
+
+		rc = pci_read_config_dword(
+			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
+		if (rc)
+			return rc;
+
+		active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
+		if (active)
+			break;
+		msleep(1000);
+	}
+
+	if (!active) {
+		dev_err(&pdev->dev,
+			"timeout awaiting memory active after %d seconds\n",
+			media_ready_timeout);
+		return -ETIMEDOUT;
+	}
+
+	md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+	if (!CXLMDEV_READY(md_status))
+		return -EIO;
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 7235d2f976e5..843916c1dab6 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -192,7 +192,6 @@ struct cxl_endpoint_dvsec_info {
  * @info: Cached DVSEC information about the device.
  * @serial: PCIe Device Serial Number
  * @mbox_send: @dev specific transport for transmitting mailbox commands
- * @wait_media_ready: @dev specific method to await media ready
  *
  * See section 8.2.9.5.2 Capacity Configuration and Label Storage for
  * details on capacity parameters.
@@ -227,7 +226,6 @@ struct cxl_dev_state {
 	u64 serial;
 
 	int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
-	int (*wait_media_ready)(struct cxl_dev_state *cxlds);
 };
 
 enum cxl_opcode {
@@ -348,6 +346,7 @@ struct cxl_mem_command {
 int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in,
 		      size_t in_size, void *out, size_t out_size);
 int cxl_dev_state_identify(struct cxl_dev_state *cxlds);
+int cxl_await_media_ready(struct cxl_dev_state *cxlds);
 int cxl_enumerate_cmds(struct cxl_dev_state *cxlds);
 int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
 struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 80e75a410499..8c3a1c85a7ae 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -165,7 +165,7 @@ unlock:
 	if (rc)
 		return rc;
 
-	rc = cxlds->wait_media_ready(cxlds);
+	rc = cxl_await_media_ready(cxlds);
 	if (rc) {
 		dev_err(dev, "Media not active (%d)\n", rc);
 		return rc;
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 91b266911e52..1bf880fa1fb8 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -48,8 +48,7 @@
  */
 static unsigned short mbox_ready_timeout = 60;
 module_param(mbox_ready_timeout, ushort, 0644);
-MODULE_PARM_DESC(mbox_ready_timeout,
-		 "seconds to wait for mailbox ready / memory active status");
+MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready");
 
 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
 {
@@ -419,46 +418,6 @@ static int wait_for_valid(struct cxl_dev_state *cxlds)
 	return -ETIMEDOUT;
 }
 
-/*
- * Wait up to @mbox_ready_timeout for the device to report memory
- * active.
- */
-static int cxl_await_media_ready(struct cxl_dev_state *cxlds)
-{
-	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
-	int d = cxlds->cxl_dvsec;
-	bool active = false;
-	u64 md_status;
-	int rc, i;
-
-	for (i = mbox_ready_timeout; i; i--) {
-		u32 temp;
-
-		rc = pci_read_config_dword(
-			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
-		if (rc)
-			return rc;
-
-		active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
-		if (active)
-			break;
-		msleep(1000);
-	}
-
-	if (!active) {
-		dev_err(&pdev->dev,
-			"timeout awaiting memory active after %d seconds\n",
-			mbox_ready_timeout);
-		return -ETIMEDOUT;
-	}
-
-	md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
-	if (!CXLMDEV_READY(md_status))
-		return -EIO;
-
-	return 0;
-}
-
 /*
  * Return positive number of non-zero ranges on success and a negative
  * error code on failure. The cxl_mem driver depends on ranges == 0 to
@@ -589,8 +548,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		dev_warn(&pdev->dev,
 			 "Device DVSEC not present, skip CXL.mem init\n");
 
-	cxlds->wait_media_ready = cxl_await_media_ready;
-
 	rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
 	if (rc)
 		return rc;
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 82e49ab0937d..6007fe770122 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -8,6 +8,7 @@ ldflags-y += --wrap=devm_cxl_port_enumerate_dports
 ldflags-y += --wrap=devm_cxl_setup_hdm
 ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
 ldflags-y += --wrap=devm_cxl_enumerate_decoders
+ldflags-y += --wrap=cxl_await_media_ready
 
 DRIVERS := ../../../drivers
 CXL_SRC := $(DRIVERS)/cxl
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index b6b726eff3e2..c519ace17b41 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -237,12 +237,6 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *
 	return rc;
 }
 
-static int cxl_mock_wait_media_ready(struct cxl_dev_state *cxlds)
-{
-	msleep(100);
-	return 0;
-}
-
 static void label_area_release(void *lsa)
 {
 	vfree(lsa);
@@ -278,7 +272,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
 
 	cxlds->serial = pdev->id;
 	cxlds->mbox_send = cxl_mock_mbox_send;
-	cxlds->wait_media_ready = cxl_mock_wait_media_ready;
 	cxlds->payload_size = SZ_4K;
 
 	rc = cxl_enumerate_cmds(cxlds);
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 6e8c9d63c92d..2c01d81ab014 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -193,6 +193,21 @@ int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL);
 
+int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
+{
+	int rc, index;
+	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+	if (ops && ops->is_mock_dev(cxlds->dev))
+		rc = 0;
+	else
+		rc = cxl_await_media_ready(cxlds);
+	put_cxl_mock_ops(index);
+
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL);
+
 MODULE_LICENSE("GPL v2");
 MODULE_IMPORT_NS(ACPI);
 MODULE_IMPORT_NS(CXL);
-- 
cgit 


From 14d78874077442d1d0f08129f5a0ea5070984b4b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 18 May 2022 16:34:48 -0700
Subject: cxl/mem: Consolidate CXL DVSEC Range enumeration in the core

In preparation for fixing the setting of the 'mem_enabled' bit in CXL
DVSEC Control register, move all CXL DVSEC range enumeration into the
same source file.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165291688886.1426646.15046138604010482084.stgit@dwillia2-xfh
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/pci.c        | 129 ++++++++++++++++++++++++++++++++++++++++
 drivers/cxl/cxlmem.h          |   1 -
 drivers/cxl/cxlpci.h          |   4 ++
 drivers/cxl/mem.c             |  14 +++--
 drivers/cxl/pci.c             | 135 ------------------------------------------
 tools/testing/cxl/Kbuild      |   1 +
 tools/testing/cxl/test/mem.c  |  10 ----
 tools/testing/cxl/test/mock.c |  16 +++++
 8 files changed, 158 insertions(+), 152 deletions(-)

(limited to 'tools/testing')

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 603945f49174..ea6711721901 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -142,3 +142,132 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds)
 	return 0;
 }
 EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL);
+
+static int wait_for_valid(struct cxl_dev_state *cxlds)
+{
+	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+	int d = cxlds->cxl_dvsec, rc;
+	u32 val;
+
+	/*
+	 * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high
+	 * and Size Low registers are valid. Must be set within 1 second of
+	 * deassertion of reset to CXL device. Likely it is already set by the
+	 * time this runs, but otherwise give a 1.5 second timeout in case of
+	 * clock skew.
+	 */
+	rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
+	if (rc)
+		return rc;
+
+	if (val & CXL_DVSEC_MEM_INFO_VALID)
+		return 0;
+
+	msleep(1500);
+
+	rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
+	if (rc)
+		return rc;
+
+	if (val & CXL_DVSEC_MEM_INFO_VALID)
+		return 0;
+
+	return -ETIMEDOUT;
+}
+
+/*
+ * Return positive number of non-zero ranges on success and a negative
+ * error code on failure. The cxl_mem driver depends on ranges == 0 to
+ * init HDM operation.
+ */
+int cxl_dvsec_ranges(struct cxl_dev_state *cxlds,
+		     struct cxl_endpoint_dvsec_info *info)
+{
+	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+	int hdm_count, rc, i, ranges = 0;
+	struct device *dev = &pdev->dev;
+	int d = cxlds->cxl_dvsec;
+	u16 cap, ctrl;
+
+	if (!d) {
+		dev_dbg(dev, "No DVSEC Capability\n");
+		return -ENXIO;
+	}
+
+	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap);
+	if (rc)
+		return rc;
+
+	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
+	if (rc)
+		return rc;
+
+	if (!(cap & CXL_DVSEC_MEM_CAPABLE)) {
+		dev_dbg(dev, "Not MEM Capable\n");
+		return -ENXIO;
+	}
+
+	/*
+	 * It is not allowed by spec for MEM.capable to be set and have 0 legacy
+	 * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this
+	 * driver is for a spec defined class code which must be CXL.mem
+	 * capable, there is no point in continuing to enable CXL.mem.
+	 */
+	hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
+	if (!hdm_count || hdm_count > 2)
+		return -EINVAL;
+
+	rc = wait_for_valid(cxlds);
+	if (rc) {
+		dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc);
+		return rc;
+	}
+
+	info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
+
+	for (i = 0; i < hdm_count; i++) {
+		u64 base, size;
+		u32 temp;
+
+		rc = pci_read_config_dword(
+			pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
+		if (rc)
+			return rc;
+
+		size = (u64)temp << 32;
+
+		rc = pci_read_config_dword(
+			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
+		if (rc)
+			return rc;
+
+		size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
+
+		rc = pci_read_config_dword(
+			pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
+		if (rc)
+			return rc;
+
+		base = (u64)temp << 32;
+
+		rc = pci_read_config_dword(
+			pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp);
+		if (rc)
+			return rc;
+
+		base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
+
+		info->dvsec_range[i] = (struct range) {
+			.start = base,
+			.end = base + size - 1
+		};
+
+		if (size)
+			ranges++;
+	}
+
+	info->ranges = ranges;
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_dvsec_ranges, CXL);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 843916c1dab6..60d10ee1e7fc 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -222,7 +222,6 @@ struct cxl_dev_state {
 	u64 next_persistent_bytes;
 
 	resource_size_t component_reg_phys;
-	struct cxl_endpoint_dvsec_info info;
 	u64 serial;
 
 	int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 329e7ea3f36a..ad1b62843195 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -72,4 +72,8 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
 }
 
 int devm_cxl_port_enumerate_dports(struct cxl_port *port);
+struct cxl_dev_state;
+struct cxl_endpoint_dvsec_info;
+int cxl_dvsec_ranges(struct cxl_dev_state *cxlds,
+		     struct cxl_endpoint_dvsec_info *info);
 #endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 8c3a1c85a7ae..0cfbde134fc7 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -58,18 +58,15 @@ static int create_endpoint(struct cxl_memdev *cxlmd,
  * decoders, or if it can not be determined if DVSEC Ranges are in use.
  * Otherwise, returns true.
  */
-__mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds)
+__mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
+				struct cxl_endpoint_dvsec_info *info)
 {
-	struct cxl_endpoint_dvsec_info *info = &cxlds->info;
 	struct cxl_register_map map;
 	struct cxl_component_reg_map *cmap = &map.component_map;
 	bool global_enable, retval = false;
 	void __iomem *crb;
 	u32 global_ctrl;
 
-	if (info->ranges < 0)
-		return false;
-
 	/* map hdm decoder */
 	crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
 	if (!crb) {
@@ -125,6 +122,7 @@ static void enable_suspend(void *data)
 static int cxl_mem_probe(struct device *dev)
 {
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_endpoint_dvsec_info info = { 0 };
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 	struct cxl_port *parent_port;
 	int rc;
@@ -165,6 +163,10 @@ unlock:
 	if (rc)
 		return rc;
 
+	rc = cxl_dvsec_ranges(cxlds, &info);
+	if (rc)
+		return rc;
+
 	rc = cxl_await_media_ready(cxlds);
 	if (rc) {
 		dev_err(dev, "Media not active (%d)\n", rc);
@@ -175,7 +177,7 @@ unlock:
 	 * If DVSEC ranges are being used instead of HDM decoder registers there
 	 * is no use in trying to manage those.
 	 */
-	if (!cxl_hdm_decode_init(cxlds)) {
+	if (!cxl_hdm_decode_init(cxlds, &info)) {
 		dev_err(dev,
 			"Legacy range registers configuration prevents HDM operation.\n");
 		return -EBUSY;
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 1bf880fa1fb8..5a0ae46d4989 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -386,139 +386,6 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
 	return rc;
 }
 
-static int wait_for_valid(struct cxl_dev_state *cxlds)
-{
-	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
-	int d = cxlds->cxl_dvsec, rc;
-	u32 val;
-
-	/*
-	 * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high
-	 * and Size Low registers are valid. Must be set within 1 second of
-	 * deassertion of reset to CXL device. Likely it is already set by the
-	 * time this runs, but otherwise give a 1.5 second timeout in case of
-	 * clock skew.
-	 */
-	rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
-	if (rc)
-		return rc;
-
-	if (val & CXL_DVSEC_MEM_INFO_VALID)
-		return 0;
-
-	msleep(1500);
-
-	rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
-	if (rc)
-		return rc;
-
-	if (val & CXL_DVSEC_MEM_INFO_VALID)
-		return 0;
-
-	return -ETIMEDOUT;
-}
-
-/*
- * Return positive number of non-zero ranges on success and a negative
- * error code on failure. The cxl_mem driver depends on ranges == 0 to
- * init HDM operation.
- */
-static int __cxl_dvsec_ranges(struct cxl_dev_state *cxlds,
-			      struct cxl_endpoint_dvsec_info *info)
-{
-	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
-	int hdm_count, rc, i, ranges = 0;
-	struct device *dev = &pdev->dev;
-	int d = cxlds->cxl_dvsec;
-	u16 cap, ctrl;
-
-	if (!d) {
-		dev_dbg(dev, "No DVSEC Capability\n");
-		return -ENXIO;
-	}
-
-	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap);
-	if (rc)
-		return rc;
-
-	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
-	if (rc)
-		return rc;
-
-	if (!(cap & CXL_DVSEC_MEM_CAPABLE)) {
-		dev_dbg(dev, "Not MEM Capable\n");
-		return -ENXIO;
-	}
-
-	/*
-	 * It is not allowed by spec for MEM.capable to be set and have 0 legacy
-	 * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this
-	 * driver is for a spec defined class code which must be CXL.mem
-	 * capable, there is no point in continuing to enable CXL.mem.
-	 */
-	hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
-	if (!hdm_count || hdm_count > 2)
-		return -EINVAL;
-
-	rc = wait_for_valid(cxlds);
-	if (rc) {
-		dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc);
-		return rc;
-	}
-
-	info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
-
-	for (i = 0; i < hdm_count; i++) {
-		u64 base, size;
-		u32 temp;
-
-		rc = pci_read_config_dword(
-			pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
-		if (rc)
-			return rc;
-
-		size = (u64)temp << 32;
-
-		rc = pci_read_config_dword(
-			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
-		if (rc)
-			return rc;
-
-		size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
-
-		rc = pci_read_config_dword(
-			pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
-		if (rc)
-			return rc;
-
-		base = (u64)temp << 32;
-
-		rc = pci_read_config_dword(
-			pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp);
-		if (rc)
-			return rc;
-
-		base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
-
-		info->dvsec_range[i] = (struct range) {
-			.start = base,
-			.end = base + size - 1
-		};
-
-		if (size)
-			ranges++;
-	}
-
-	return ranges;
-}
-
-static void cxl_dvsec_ranges(struct cxl_dev_state *cxlds)
-{
-	struct cxl_endpoint_dvsec_info *info = &cxlds->info;
-
-	info->ranges = __cxl_dvsec_ranges(cxlds, info);
-}
-
 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct cxl_register_map map;
@@ -583,8 +450,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		return rc;
 
-	cxl_dvsec_ranges(cxlds);
-
 	cxlmd = devm_cxl_add_memdev(cxlds);
 	if (IS_ERR(cxlmd))
 		return PTR_ERR(cxlmd);
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 6007fe770122..2ea6fcb8baa5 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -9,6 +9,7 @@ ldflags-y += --wrap=devm_cxl_setup_hdm
 ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
 ldflags-y += --wrap=devm_cxl_enumerate_decoders
 ldflags-y += --wrap=cxl_await_media_ready
+ldflags-y += --wrap=cxl_dvsec_ranges
 
 DRIVERS := ../../../drivers
 CXL_SRC := $(DRIVERS)/cxl
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index c519ace17b41..6b9239b2afd4 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -242,14 +242,6 @@ static void label_area_release(void *lsa)
 	vfree(lsa);
 }
 
-static void mock_validate_dvsec_ranges(struct cxl_dev_state *cxlds)
-{
-	struct cxl_endpoint_dvsec_info *info;
-
-	info = &cxlds->info;
-	info->mem_enabled = true;
-}
-
 static int cxl_mock_mem_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -286,8 +278,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
 	if (rc)
 		return rc;
 
-	mock_validate_dvsec_ranges(cxlds);
-
 	cxlmd = devm_cxl_add_memdev(cxlds);
 	if (IS_ERR(cxlmd))
 		return PTR_ERR(cxlmd);
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 2c01d81ab014..d6aa644822db 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -208,6 +208,22 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL);
 
+int __wrap_cxl_dvsec_ranges(struct cxl_dev_state *cxlds,
+			    struct cxl_endpoint_dvsec_info *info)
+{
+	int rc = 0, index;
+	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+	if (ops && ops->is_mock_dev(cxlds->dev))
+		info->mem_enabled = 1;
+	else
+		rc = cxl_dvsec_ranges(cxlds, info);
+	put_cxl_mock_ops(index);
+
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_ranges, CXL);
+
 MODULE_LICENSE("GPL v2");
 MODULE_IMPORT_NS(ACPI);
 MODULE_IMPORT_NS(CXL);
-- 
cgit 


From a12562bb70776093b270f79a4b6ef18f4bcead2b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 18 May 2022 16:35:00 -0700
Subject: cxl/mem: Merge cxl_dvsec_ranges() and cxl_hdm_decode_init()

In preparation for changing how the driver handles 'mem_enable' in the CXL
DVSEC control register. Merge the contents of cxl_hdm_decode_init() into
cxl_dvsec_ranges() and rename the combined function cxl_hdm_decode_init().
The possible cleanups and fixes that result from this merge are saved for a
follow-on change.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/165291690027.1426646.10249756632415633752.stgit@dwillia2-xfh
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/pci.c        | 82 +++++++++++++++++++++++++++++++++++++++----
 drivers/cxl/cxlpci.h          |  4 +--
 drivers/cxl/mem.c             | 80 +----------------------------------------
 tools/testing/cxl/Kbuild      |  3 +-
 tools/testing/cxl/mock_mem.c  | 10 ------
 tools/testing/cxl/test/mock.c |  8 ++---
 6 files changed, 83 insertions(+), 104 deletions(-)
 delete mode 100644 tools/testing/cxl/mock_mem.c

(limited to 'tools/testing')

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index f3e59f8b6621..0fbda1a1ca1b 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -175,13 +175,71 @@ static int wait_for_valid(struct cxl_dev_state *cxlds)
 	return -ETIMEDOUT;
 }
 
-/*
- * Return positive number of non-zero ranges on success and a negative
- * error code on failure. The cxl_mem driver depends on ranges == 0 to
- * init HDM operation.
+static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
+				  struct cxl_endpoint_dvsec_info *info)
+{
+	struct cxl_register_map map;
+	struct cxl_component_reg_map *cmap = &map.component_map;
+	bool global_enable, retval = false;
+	void __iomem *crb;
+	u32 global_ctrl;
+
+	/* map hdm decoder */
+	crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
+	if (!crb) {
+		dev_dbg(cxlds->dev, "Failed to map component registers\n");
+		return false;
+	}
+
+	cxl_probe_component_regs(cxlds->dev, crb, cmap);
+	if (!cmap->hdm_decoder.valid) {
+		dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n");
+		goto out;
+	}
+
+	global_ctrl = readl(crb + cmap->hdm_decoder.offset +
+			    CXL_HDM_DECODER_CTRL_OFFSET);
+	global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE;
+
+	/*
+	 * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
+	 * [High,Low] when HDM operation is enabled the range register values
+	 * are ignored by the device, but the spec also recommends matching the
+	 * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
+	 * are expected even though Linux does not require or maintain that
+	 * match.
+	 */
+	if (!global_enable && info->mem_enabled && info->ranges)
+		goto out;
+
+	retval = true;
+
+	/*
+	 * Permanently (for this boot at least) opt the device into HDM
+	 * operation. Individual HDM decoders still need to be enabled after
+	 * this point.
+	 */
+	if (!global_enable) {
+		dev_dbg(cxlds->dev, "Enabling HDM decode\n");
+		writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
+		       crb + cmap->hdm_decoder.offset +
+			       CXL_HDM_DECODER_CTRL_OFFSET);
+	}
+
+out:
+	iounmap(crb);
+	return retval;
+}
+
+/**
+ * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
+ * @cxlds: Device state
+ * @info: DVSEC Range cached enumeration
+ *
+ * Try to enable the endpoint's HDM Decoder Capability
  */
-int cxl_dvsec_ranges(struct cxl_dev_state *cxlds,
-		     struct cxl_endpoint_dvsec_info *info)
+int cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
+			struct cxl_endpoint_dvsec_info *info)
 {
 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
 	int hdm_count, rc, i, ranges = 0;
@@ -270,6 +328,16 @@ int cxl_dvsec_ranges(struct cxl_dev_state *cxlds,
 
 	info->ranges = ranges;
 
+	/*
+	 * If DVSEC ranges are being used instead of HDM decoder registers there
+	 * is no use in trying to manage those.
+	 */
+	if (!__cxl_hdm_decode_init(cxlds, info)) {
+		dev_err(dev,
+			"Legacy range registers configuration prevents HDM operation.\n");
+		return -EBUSY;
+	}
+
 	return 0;
 }
-EXPORT_SYMBOL_NS_GPL(cxl_dvsec_ranges, CXL);
+EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index ad1b62843195..202fdaa8d293 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -74,6 +74,6 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
 int devm_cxl_port_enumerate_dports(struct cxl_port *port);
 struct cxl_dev_state;
 struct cxl_endpoint_dvsec_info;
-int cxl_dvsec_ranges(struct cxl_dev_state *cxlds,
-		     struct cxl_endpoint_dvsec_info *info);
+int cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
+			struct cxl_endpoint_dvsec_info *info);
 #endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 902d1f6e189e..2a5dc92d566f 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -46,74 +46,6 @@ static int create_endpoint(struct cxl_memdev *cxlmd,
 	return cxl_endpoint_autoremove(cxlmd, endpoint);
 }
 
-/**
- * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
- * @cxlds: Device state
- *
- * Additionally, enables global HDM decoding. Warning: don't call this outside
- * of probe. Once probe is complete, the port driver owns all access to the HDM
- * decoder registers.
- *
- * Returns: false if DVSEC Ranges are being used instead of HDM
- * decoders, or if it can not be determined if DVSEC Ranges are in use.
- * Otherwise, returns true.
- */
-__mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
-				struct cxl_endpoint_dvsec_info *info)
-{
-	struct cxl_register_map map;
-	struct cxl_component_reg_map *cmap = &map.component_map;
-	bool global_enable, retval = false;
-	void __iomem *crb;
-	u32 global_ctrl;
-
-	/* map hdm decoder */
-	crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
-	if (!crb) {
-		dev_dbg(cxlds->dev, "Failed to map component registers\n");
-		return false;
-	}
-
-	cxl_probe_component_regs(cxlds->dev, crb, cmap);
-	if (!cmap->hdm_decoder.valid) {
-		dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n");
-		goto out;
-	}
-
-	global_ctrl = readl(crb + cmap->hdm_decoder.offset +
-			    CXL_HDM_DECODER_CTRL_OFFSET);
-	global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE;
-
-	/*
-	 * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
-	 * [High,Low] when HDM operation is enabled the range register values
-	 * are ignored by the device, but the spec also recommends matching the
-	 * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
-	 * are expected even though Linux does not require or maintain that
-	 * match.
-	 */
-	if (!global_enable && info->mem_enabled && info->ranges)
-		goto out;
-
-	retval = true;
-
-	/*
-	 * Permanently (for this boot at least) opt the device into HDM
-	 * operation. Individual HDM decoders still need to be enabled after
-	 * this point.
-	 */
-	if (!global_enable) {
-		dev_dbg(cxlds->dev, "Enabling HDM decode\n");
-		writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
-		       crb + cmap->hdm_decoder.offset +
-			       CXL_HDM_DECODER_CTRL_OFFSET);
-	}
-
-out:
-	iounmap(crb);
-	return retval;
-}
-
 static void enable_suspend(void *data)
 {
 	cxl_mem_active_dec();
@@ -163,7 +95,7 @@ unlock:
 	if (rc)
 		return rc;
 
-	rc = cxl_dvsec_ranges(cxlds, &info);
+	rc = cxl_hdm_decode_init(cxlds, &info);
 	if (rc)
 		return rc;
 
@@ -173,16 +105,6 @@ unlock:
 		return rc;
 	}
 
-	/*
-	 * If DVSEC ranges are being used instead of HDM decoder registers there
-	 * is no use in trying to manage those.
-	 */
-	if (!cxl_hdm_decode_init(cxlds, &info)) {
-		dev_err(dev,
-			"Legacy range registers configuration prevents HDM operation.\n");
-		return -EBUSY;
-	}
-
 	/*
 	 * The kernel may be operating out of CXL memory on this device,
 	 * there is no spec defined way to determine whether this device
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 2ea6fcb8baa5..33543231d453 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -9,7 +9,7 @@ ldflags-y += --wrap=devm_cxl_setup_hdm
 ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
 ldflags-y += --wrap=devm_cxl_enumerate_decoders
 ldflags-y += --wrap=cxl_await_media_ready
-ldflags-y += --wrap=cxl_dvsec_ranges
+ldflags-y += --wrap=cxl_hdm_decode_init
 
 DRIVERS := ../../../drivers
 CXL_SRC := $(DRIVERS)/cxl
@@ -36,7 +36,6 @@ cxl_port-y += config_check.o
 obj-m += cxl_mem.o
 
 cxl_mem-y := $(CXL_SRC)/mem.o
-cxl_mem-y += mock_mem.o
 cxl_mem-y += config_check.o
 
 obj-m += cxl_core.o
diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c
deleted file mode 100644
index 69946f678cfa..000000000000
--- a/tools/testing/cxl/mock_mem.c
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
-
-#include <linux/types.h>
-
-struct cxl_dev_state;
-bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds)
-{
-	return true;
-}
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index d6aa644822db..ddf0e7dd9249 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -208,8 +208,8 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL);
 
-int __wrap_cxl_dvsec_ranges(struct cxl_dev_state *cxlds,
-			    struct cxl_endpoint_dvsec_info *info)
+int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
+				struct cxl_endpoint_dvsec_info *info)
 {
 	int rc = 0, index;
 	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
@@ -217,12 +217,12 @@ int __wrap_cxl_dvsec_ranges(struct cxl_dev_state *cxlds,
 	if (ops && ops->is_mock_dev(cxlds->dev))
 		info->mem_enabled = 1;
 	else
-		rc = cxl_dvsec_ranges(cxlds, info);
+		rc = cxl_hdm_decode_init(cxlds, info);
 	put_cxl_mock_ops(index);
 
 	return rc;
 }
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_ranges, CXL);
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL);
 
 MODULE_LICENSE("GPL v2");
 MODULE_IMPORT_NS(ACPI);
-- 
cgit 


From 92804edb11f065aadb3a4f398bed8a846a035cd3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 18 May 2022 16:35:06 -0700
Subject: cxl/pci: Drop @info argument to cxl_hdm_decode_init()

Now that nothing external to cxl_hdm_decode_init() considers
'struct cxl_endpoint_dvec_info' move it internal to
cxl_hdm_decode_init().

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165291690612.1426646.7866084245521113414.stgit@dwillia2-xfh
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/pci.c        | 15 +++++++--------
 drivers/cxl/cxlpci.h          |  4 +---
 drivers/cxl/mem.c             |  3 +--
 tools/testing/cxl/test/mock.c |  9 +++------
 4 files changed, 12 insertions(+), 19 deletions(-)

(limited to 'tools/testing')

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 0fbda1a1ca1b..7d2238edc379 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -234,14 +234,13 @@ out:
 /**
  * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
  * @cxlds: Device state
- * @info: DVSEC Range cached enumeration
  *
  * Try to enable the endpoint's HDM Decoder Capability
  */
-int cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
-			struct cxl_endpoint_dvsec_info *info)
+int cxl_hdm_decode_init(struct cxl_dev_state *cxlds)
 {
 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+	struct cxl_endpoint_dvsec_info info = { 0 };
 	int hdm_count, rc, i, ranges = 0;
 	struct device *dev = &pdev->dev;
 	int d = cxlds->cxl_dvsec;
@@ -281,8 +280,8 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
 		return rc;
 	}
 
-	info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
-	if (!info->mem_enabled)
+	info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
+	if (!info.mem_enabled)
 		return 0;
 
 	for (i = 0; i < hdm_count; i++) {
@@ -317,7 +316,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
 
 		base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
 
-		info->dvsec_range[i] = (struct range) {
+		info.dvsec_range[i] = (struct range) {
 			.start = base,
 			.end = base + size - 1
 		};
@@ -326,13 +325,13 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
 			ranges++;
 	}
 
-	info->ranges = ranges;
+	info.ranges = ranges;
 
 	/*
 	 * If DVSEC ranges are being used instead of HDM decoder registers there
 	 * is no use in trying to manage those.
 	 */
-	if (!__cxl_hdm_decode_init(cxlds, info)) {
+	if (!__cxl_hdm_decode_init(cxlds, &info)) {
 		dev_err(dev,
 			"Legacy range registers configuration prevents HDM operation.\n");
 		return -EBUSY;
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 202fdaa8d293..53cd34f8813c 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -73,7 +73,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
 
 int devm_cxl_port_enumerate_dports(struct cxl_port *port);
 struct cxl_dev_state;
-struct cxl_endpoint_dvsec_info;
-int cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
-			struct cxl_endpoint_dvsec_info *info);
+int cxl_hdm_decode_init(struct cxl_dev_state *cxlds);
 #endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 2a5dc92d566f..8ce89d128e36 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -54,7 +54,6 @@ static void enable_suspend(void *data)
 static int cxl_mem_probe(struct device *dev)
 {
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-	struct cxl_endpoint_dvsec_info info = { 0 };
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 	struct cxl_port *parent_port;
 	int rc;
@@ -95,7 +94,7 @@ unlock:
 	if (rc)
 		return rc;
 
-	rc = cxl_hdm_decode_init(cxlds, &info);
+	rc = cxl_hdm_decode_init(cxlds);
 	if (rc)
 		return rc;
 
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index ddf0e7dd9249..45ffbb8f519a 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -208,16 +208,13 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL);
 
-int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
-				struct cxl_endpoint_dvsec_info *info)
+bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds)
 {
 	int rc = 0, index;
 	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
 
-	if (ops && ops->is_mock_dev(cxlds->dev))
-		info->mem_enabled = 1;
-	else
-		rc = cxl_hdm_decode_init(cxlds, info);
+	if (!ops || !ops->is_mock_dev(cxlds->dev))
+		rc = cxl_hdm_decode_init(cxlds);
 	put_cxl_mock_ops(index);
 
 	return rc;
-- 
cgit 


From fcfbc93cc33ec601f00f113eca6fc484b930532d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 18 May 2022 16:35:17 -0700
Subject: cxl/port: Reuse 'struct cxl_hdm' context for hdm init

The port driver maps component registers for port operations. Reuse that
mapping for HDM Decoder Capability setup / enable. Move
devm_cxl_setup_hdm() before cxl_hdm_decode_init() and plumb @cxlhdm
through the hdm init helpers.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/165291691712.1426646.14336397551571515480.stgit@dwillia2-xfh
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/pci.c        | 39 ++++++++++-----------------------------
 drivers/cxl/cxlpci.h          |  2 +-
 drivers/cxl/port.c            | 25 ++++++++++++++-----------
 tools/testing/cxl/test/mock.c |  5 +++--
 4 files changed, 28 insertions(+), 43 deletions(-)

(limited to 'tools/testing')

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 7d2238edc379..3305e9b750af 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -176,29 +176,14 @@ static int wait_for_valid(struct cxl_dev_state *cxlds)
 }
 
 static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
+				  struct cxl_hdm *cxlhdm,
 				  struct cxl_endpoint_dvsec_info *info)
 {
-	struct cxl_register_map map;
-	struct cxl_component_reg_map *cmap = &map.component_map;
-	bool global_enable, retval = false;
-	void __iomem *crb;
+	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+	bool global_enable;
 	u32 global_ctrl;
 
-	/* map hdm decoder */
-	crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
-	if (!crb) {
-		dev_dbg(cxlds->dev, "Failed to map component registers\n");
-		return false;
-	}
-
-	cxl_probe_component_regs(cxlds->dev, crb, cmap);
-	if (!cmap->hdm_decoder.valid) {
-		dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n");
-		goto out;
-	}
-
-	global_ctrl = readl(crb + cmap->hdm_decoder.offset +
-			    CXL_HDM_DECODER_CTRL_OFFSET);
+	global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
 	global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE;
 
 	/*
@@ -210,9 +195,7 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
 	 * match.
 	 */
 	if (!global_enable && info->mem_enabled && info->ranges)
-		goto out;
-
-	retval = true;
+		return false;
 
 	/*
 	 * Permanently (for this boot at least) opt the device into HDM
@@ -222,22 +205,20 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
 	if (!global_enable) {
 		dev_dbg(cxlds->dev, "Enabling HDM decode\n");
 		writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
-		       crb + cmap->hdm_decoder.offset +
-			       CXL_HDM_DECODER_CTRL_OFFSET);
+		       hdm + CXL_HDM_DECODER_CTRL_OFFSET);
 	}
 
-out:
-	iounmap(crb);
-	return retval;
+	return true;
 }
 
 /**
  * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
  * @cxlds: Device state
+ * @cxlhdm: Mapped HDM decoder Capability
  *
  * Try to enable the endpoint's HDM Decoder Capability
  */
-int cxl_hdm_decode_init(struct cxl_dev_state *cxlds)
+int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm)
 {
 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
 	struct cxl_endpoint_dvsec_info info = { 0 };
@@ -331,7 +312,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds)
 	 * If DVSEC ranges are being used instead of HDM decoder registers there
 	 * is no use in trying to manage those.
 	 */
-	if (!__cxl_hdm_decode_init(cxlds, &info)) {
+	if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) {
 		dev_err(dev,
 			"Legacy range registers configuration prevents HDM operation.\n");
 		return -EBUSY;
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 53cd34f8813c..fce1c11729c2 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -73,5 +73,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
 
 int devm_cxl_port_enumerate_dports(struct cxl_port *port);
 struct cxl_dev_state;
-int cxl_hdm_decode_init(struct cxl_dev_state *cxlds);
+int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm);
 #endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index a7deaeaf0276..3cf308f114c4 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -36,6 +36,19 @@ static int cxl_port_probe(struct device *dev)
 	struct cxl_hdm *cxlhdm;
 	int rc;
 
+
+	if (!is_cxl_endpoint(port)) {
+		rc = devm_cxl_port_enumerate_dports(port);
+		if (rc < 0)
+			return rc;
+		if (rc == 1)
+			return devm_cxl_add_passthrough_decoder(port);
+	}
+
+	cxlhdm = devm_cxl_setup_hdm(port);
+	if (IS_ERR(cxlhdm))
+		return PTR_ERR(cxlhdm);
+
 	if (is_cxl_endpoint(port)) {
 		struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
 		struct cxl_dev_state *cxlds = cxlmd->cxlds;
@@ -45,7 +58,7 @@ static int cxl_port_probe(struct device *dev)
 		if (rc)
 			return rc;
 
-		rc = cxl_hdm_decode_init(cxlds);
+		rc = cxl_hdm_decode_init(cxlds, cxlhdm);
 		if (rc)
 			return rc;
 
@@ -54,18 +67,8 @@ static int cxl_port_probe(struct device *dev)
 			dev_err(dev, "Media not active (%d)\n", rc);
 			return rc;
 		}
-	} else {
-		rc = devm_cxl_port_enumerate_dports(port);
-		if (rc < 0)
-			return rc;
-		if (rc == 1)
-			return devm_cxl_add_passthrough_decoder(port);
 	}
 
-	cxlhdm = devm_cxl_setup_hdm(port);
-	if (IS_ERR(cxlhdm))
-		return PTR_ERR(cxlhdm);
-
 	rc = devm_cxl_enumerate_decoders(cxlhdm);
 	if (rc) {
 		dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc);
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 45ffbb8f519a..f1f8c40948c5 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -208,13 +208,14 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL);
 
-bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds)
+bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
+				struct cxl_hdm *cxlhdm)
 {
 	int rc = 0, index;
 	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
 
 	if (!ops || !ops->is_mock_dev(cxlds->dev))
-		rc = cxl_hdm_decode_init(cxlds);
+		rc = cxl_hdm_decode_init(cxlds, cxlhdm);
 	put_cxl_mock_ops(index);
 
 	return rc;
-- 
cgit 


From 2ba18161d407c596f256f7c4a6447b8588b9eb55 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Wed, 18 May 2022 15:04:46 -0700
Subject: selftests: mptcp: add MP_FAIL reset testcase

Add the multiple subflows test case for MP_FAIL, to test the MP_FAIL
reset case. Use the test_linkfail value to make 1024KB test files.

Invoke reset_with_fail() to use 'iptables' and 'tc action pedit' rules
to produce the bit flips to trigger the checksum failures on ns2eth2.
Add delays on ns2eth1 to make sure more data can translate on ns2eth2.

The check_invert flag is enabled in reset_with_fail(), so this test
prints out the inverted bytes, instead of the file mismatch errors.

Invoke pedit_action_pkts() to get the numbers of the packets edited
by the tc pedit actions, and print this numbers to the output.

Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 1a5f211c5902..a4406b7a8064 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -2732,6 +2732,16 @@ fail_tests()
 		chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
 		chk_fail_nr 1 -1 invert
 	fi
+
+	# multiple subflows
+	if reset_with_fail "MP_FAIL MP_RST" 2; then
+		tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 1024
+		chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)"
+	fi
 }
 
 userspace_tests()
-- 
cgit 


From ac6c85e962d4c009c499d93657f25f46fd8212b9 Mon Sep 17 00:00:00 2001
From: Anup Patel <apatel@ventanamicro.com>
Date: Sat, 9 Apr 2022 10:02:36 +0530
Subject: KVM: selftests: riscv: Improve unexpected guest trap handling

Currently, we simply hang using "while (1) ;" upon any unexpected
guest traps because the default guest trap handler is guest_hang().

The above approach is not useful to anyone because KVM selftests
users will only see a hung application upon any unexpected guest
trap.

This patch improves unexpected guest trap handling for KVM RISC-V
selftests by doing the following:
1) Return to host user-space
2) Dump VCPU registers
3) Die using TEST_ASSERT(0, ...)

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Tested-by: Mayuresh Chitale <mchitale@ventanamicro.com>
Signed-off-by: Anup Patel <anup@brainfault.org>
---
 .../selftests/kvm/include/riscv/processor.h        |  8 +++---
 tools/testing/selftests/kvm/lib/riscv/processor.c  |  9 ++++---
 tools/testing/selftests/kvm/lib/riscv/ucall.c      | 31 +++++++++++++++-------
 3 files changed, 31 insertions(+), 17 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index eca5c622efd2..4fcfd1c0389d 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -119,10 +119,12 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id,
 #define SATP_ASID_SHIFT				44
 #define SATP_ASID_MASK				_AC(0xFFFF, UL)
 
-#define SBI_EXT_EXPERIMENTAL_START	0x08000000
-#define SBI_EXT_EXPERIMENTAL_END	0x08FFFFFF
+#define SBI_EXT_EXPERIMENTAL_START		0x08000000
+#define SBI_EXT_EXPERIMENTAL_END		0x08FFFFFF
 
-#define KVM_RISCV_SELFTESTS_SBI_EXT	SBI_EXT_EXPERIMENTAL_END
+#define KVM_RISCV_SELFTESTS_SBI_EXT		SBI_EXT_EXPERIMENTAL_END
+#define KVM_RISCV_SELFTESTS_SBI_UCALL		0
+#define KVM_RISCV_SELFTESTS_SBI_UNEXP		1
 
 struct sbiret {
 	long error;
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index 3961487a4870..56e4705f7744 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -268,10 +268,11 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
 		core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
 }
 
-static void __aligned(16) guest_hang(void)
+static void __aligned(16) guest_unexp_trap(void)
 {
-	while (1)
-		;
+	sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
+		  KVM_RISCV_SELFTESTS_SBI_UNEXP,
+		  0, 0, 0, 0, 0, 0);
 }
 
 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
@@ -310,7 +311,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 
 	/* Setup default exception vector of guest */
 	set_reg(vm, vcpuid, RISCV_CSR_REG(stvec),
-		(unsigned long)guest_hang);
+		(unsigned long)guest_unexp_trap);
 }
 
 void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
index 9e42d8248fa6..8550f424d093 100644
--- a/tools/testing/selftests/kvm/lib/riscv/ucall.c
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -60,8 +60,9 @@ void ucall(uint64_t cmd, int nargs, ...)
 		uc.args[i] = va_arg(va, uint64_t);
 	va_end(va);
 
-	sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, 0, (vm_vaddr_t)&uc,
-		  0, 0, 0, 0, 0);
+	sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
+		  KVM_RISCV_SELFTESTS_SBI_UCALL,
+		  (vm_vaddr_t)&uc, 0, 0, 0, 0, 0);
 }
 
 uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
@@ -73,14 +74,24 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
 		memset(uc, 0, sizeof(*uc));
 
 	if (run->exit_reason == KVM_EXIT_RISCV_SBI &&
-	    run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT &&
-	    run->riscv_sbi.function_id == 0) {
-		memcpy(&ucall, addr_gva2hva(vm, run->riscv_sbi.args[0]),
-			sizeof(ucall));
-
-		vcpu_run_complete_io(vm, vcpu_id);
-		if (uc)
-			memcpy(uc, &ucall, sizeof(ucall));
+	    run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) {
+		switch (run->riscv_sbi.function_id) {
+		case KVM_RISCV_SELFTESTS_SBI_UCALL:
+			memcpy(&ucall, addr_gva2hva(vm,
+			       run->riscv_sbi.args[0]), sizeof(ucall));
+
+			vcpu_run_complete_io(vm, vcpu_id);
+			if (uc)
+				memcpy(uc, &ucall, sizeof(ucall));
+
+			break;
+		case KVM_RISCV_SELFTESTS_SBI_UNEXP:
+			vcpu_dump(stderr, vm, vcpu_id, 2);
+			TEST_ASSERT(0, "Unexpected trap taken by guest");
+			break;
+		default:
+			break;
+		}
 	}
 
 	return ucall.cmd;
-- 
cgit 


From dba90d6fb8b0657d45516bfe1eb8fe83d9e425f8 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 6 May 2022 17:45:12 +0800
Subject: KVM: selftests: riscv: Remove unneeded semicolon

Fix the following coccicheck warnings:

./tools/testing/selftests/kvm/lib/riscv/processor.c:353:3-4: Unneeded
semicolon.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Anup Patel <anup@brainfault.org>
---
 tools/testing/selftests/kvm/lib/riscv/processor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index 56e4705f7744..abc0ae5a4fe1 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -351,7 +351,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
 		case 7:
 			id = RISCV_CORE_REG(regs.a7);
 			break;
-		};
+		}
 		set_reg(vm, vcpuid, id, va_arg(ap, uint64_t));
 	}
 
-- 
cgit 


From 04baa2233d55e85e0f0f5dfe0401ecb027da9a0e Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Tue, 17 May 2022 05:12:37 +0000
Subject: selftests: kvm/x86: Add the helper function create_pmu_event_filter

Add a helper function that creates a pmu event filter given an event
list.  Currently, a pmu event filter can only be created with the same
hard coded event list.  Add a way to create one given a different event
list.

Also, rename make_pmu_event_filter to alloc_pmu_event_filter to clarify
it's purpose given the introduction of create_pmu_event_filter.

No functional changes intended.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Message-Id: <20220517051238.2566934-2-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/x86_64/pmu_event_filter_test.c       | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index 0d06ffa95d9d..30c1a5804210 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -208,7 +208,7 @@ static bool sanity_check_pmu(struct kvm_vm *vm)
 	return success;
 }
 
-static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
+static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
 {
 	struct kvm_pmu_event_filter *f;
 	int size = sizeof(*f) + nevents * sizeof(f->events[0]);
@@ -220,19 +220,29 @@ static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
 	return f;
 }
 
-static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+
+static struct kvm_pmu_event_filter *
+create_pmu_event_filter(const uint64_t event_list[],
+			int nevents, uint32_t action)
 {
 	struct kvm_pmu_event_filter *f;
 	int i;
 
-	f = make_pmu_event_filter(ARRAY_SIZE(event_list));
+	f = alloc_pmu_event_filter(nevents);
 	f->action = action;
-	for (i = 0; i < ARRAY_SIZE(event_list); i++)
+	for (i = 0; i < nevents; i++)
 		f->events[i] = event_list[i];
 
 	return f;
 }
 
+static struct kvm_pmu_event_filter *event_filter(uint32_t action)
+{
+	return create_pmu_event_filter(event_list,
+				       ARRAY_SIZE(event_list),
+				       action);
+}
+
 /*
  * Remove the first occurrence of 'event' (if any) from the filter's
  * event list.
-- 
cgit 


From c41ef29cc1d4fa4ac5f1bb8e6ab57bf6f02cf878 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Tue, 17 May 2022 05:12:38 +0000
Subject: selftests: kvm/x86: Verify the pmu event filter matches the correct
 event

Add a test to demonstrate that when the guest programs an event select
it is matched correctly in the pmu event filter and not inadvertently
filtered.  This could happen on AMD if the high nybble[1] in the event
select gets truncated away only leaving the bottom byte[2] left for
matching.

This is a contrived example used for the convenience of demonstrating
this issue, however, this can be applied to event selects 0x28A (OC
Mode Switch) and 0x08A (L1 BTB Correction), where 0x08A could end up
being denied when the event select was only set up to deny 0x28A.

[1] bits 35:32 in the event select register and bits 11:8 in the event
    select.
[2] bits 7:0 in the event select register and bits 7:0 in the event
    select.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Message-Id: <20220517051238.2566934-3-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/x86_64/pmu_event_filter_test.c      | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index 30c1a5804210..93d77574b255 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -281,6 +281,22 @@ static uint64_t test_with_filter(struct kvm_vm *vm,
 	return run_vm_to_sync(vm);
 }
 
+static void test_amd_deny_list(struct kvm_vm *vm)
+{
+	uint64_t event = EVENT(0x1C2, 0);
+	struct kvm_pmu_event_filter *f;
+	uint64_t count;
+
+	f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY);
+	count = test_with_filter(vm, f);
+
+	free(f);
+	if (count != NUM_BRANCHES)
+		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
+			__func__, count, NUM_BRANCHES);
+	TEST_ASSERT(count, "Allowed PMU event is not counting");
+}
+
 static void test_member_deny_list(struct kvm_vm *vm)
 {
 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
@@ -463,6 +479,9 @@ int main(int argc, char *argv[])
 		exit(KSFT_SKIP);
 	}
 
+	if (use_amd_pmu())
+		test_amd_deny_list(vm);
+
 	test_without_filter(vm);
 	test_member_deny_list(vm);
 	test_member_allow_list(vm);
-- 
cgit 


From cbac924200b838cfb8d8b1415113d788089dc50b Mon Sep 17 00:00:00 2001
From: Steffen Eiden <seiden@linux.ibm.com>
Date: Tue, 10 May 2022 14:47:24 +0000
Subject: selftests: drivers/s390x: Add uvdevice tests

Adds some selftests to test ioctl error paths of the uv-uapi.
The Kconfig S390_UV_UAPI must be selected and the Ultravisor facility
must be available. The test can be executed by non-root, however, the
uvdevice special file /dev/uv must be accessible for reading and
writing which may imply root privileges.

  ./test-uv-device
  TAP version 13
  1..6
  # Starting 6 tests from 3 test cases.
  #  RUN           uvio_fixture.att.fault_ioctl_arg ...
  #            OK  uvio_fixture.att.fault_ioctl_arg
  ok 1 uvio_fixture.att.fault_ioctl_arg
  #  RUN           uvio_fixture.att.fault_uvio_arg ...
  #            OK  uvio_fixture.att.fault_uvio_arg
  ok 2 uvio_fixture.att.fault_uvio_arg
  #  RUN           uvio_fixture.att.inval_ioctl_cb ...
  #            OK  uvio_fixture.att.inval_ioctl_cb
  ok 3 uvio_fixture.att.inval_ioctl_cb
  #  RUN           uvio_fixture.att.inval_ioctl_cmd ...
  #            OK  uvio_fixture.att.inval_ioctl_cmd
  ok 4 uvio_fixture.att.inval_ioctl_cmd
  #  RUN           attest_fixture.att_inval_request ...
  #            OK  attest_fixture.att_inval_request
  ok 5 attest_fixture.att_inval_request
  #  RUN           attest_fixture.att_inval_addr ...
  #            OK  attest_fixture.att_inval_addr
  ok 6 attest_fixture.att_inval_addr
  # PASSED: 6 / 6 tests passed.
  # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Steffen Eiden <seiden@linux.ibm.com>
Acked-by: Janosch Frank <frankja@linux.ibm.com>
Message-Id: <20220510144724.3321985-3-seiden@linux.ibm.com>
Link: https://lore.kernel.org/kvm/20220510144724.3321985-3-seiden@linux.ibm.com/
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 MAINTAINERS                                        |   1 +
 tools/testing/selftests/Makefile                   |   1 +
 tools/testing/selftests/drivers/.gitignore         |   1 +
 .../selftests/drivers/s390x/uvdevice/Makefile      |  22 ++
 .../selftests/drivers/s390x/uvdevice/config        |   1 +
 .../drivers/s390x/uvdevice/test_uvdevice.c         | 276 +++++++++++++++++++++
 6 files changed, 302 insertions(+)
 create mode 100644 tools/testing/selftests/drivers/s390x/uvdevice/Makefile
 create mode 100644 tools/testing/selftests/drivers/s390x/uvdevice/config
 create mode 100644 tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c

(limited to 'tools/testing')

diff --git a/MAINTAINERS b/MAINTAINERS
index a74488b7bb7c..bc6fae05ee4b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10784,6 +10784,7 @@ F:	arch/s390/kernel/uv.c
 F:	arch/s390/kvm/
 F:	arch/s390/mm/gmap.c
 F:	drivers/s390/char/uvdevice.c
+F:	tools/testing/selftests/drivers/s390x/uvdevice/
 F:	tools/testing/selftests/kvm/*/s390x/
 F:	tools/testing/selftests/kvm/s390x/
 
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 2319ec87f53d..d6b307371ef7 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -10,6 +10,7 @@ TARGETS += core
 TARGETS += cpufreq
 TARGETS += cpu-hotplug
 TARGETS += drivers/dma-buf
+TARGETS += drivers/s390x/uvdevice
 TARGETS += efivarfs
 TARGETS += exec
 TARGETS += filesystems
diff --git a/tools/testing/selftests/drivers/.gitignore b/tools/testing/selftests/drivers/.gitignore
index ca74f2e1c719..09e23b5afa96 100644
--- a/tools/testing/selftests/drivers/.gitignore
+++ b/tools/testing/selftests/drivers/.gitignore
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 /dma-buf/udmabuf
+/s390x/uvdevice/test_uvdevice
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/Makefile b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile
new file mode 100644
index 000000000000..5e701d2708d4
--- /dev/null
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile
@@ -0,0 +1,22 @@
+include ../../../../../build/Build.include
+
+UNAME_M := $(shell uname -m)
+
+ifneq ($(UNAME_M),s390x)
+nothing:
+.PHONY: all clean run_tests install
+.SILENT:
+else
+
+TEST_GEN_PROGS := test_uvdevice
+
+top_srcdir ?= ../../../../../..
+KSFT_KHDR_INSTALL := 1
+khdr_dir = $(top_srcdir)/usr/include
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
+
+CFLAGS += -Wall -Werror -static -I$(khdr_dir) -I$(LINUX_TOOL_ARCH_INCLUDE)
+
+include ../../../lib.mk
+
+endif
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/config b/tools/testing/selftests/drivers/s390x/uvdevice/config
new file mode 100644
index 000000000000..f28a04b99eff
--- /dev/null
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/config
@@ -0,0 +1 @@
+CONFIG_S390_UV_UAPI=y
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
new file mode 100644
index 000000000000..ea0cdc37b44f
--- /dev/null
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  selftest for the Ultravisor UAPI device
+ *
+ *  Copyright IBM Corp. 2022
+ *  Author(s): Steffen Eiden <seiden@linux.ibm.com>
+ */
+
+#include <stdint.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <asm/uvdevice.h>
+
+#include "../../../kselftest_harness.h"
+
+#define UV_PATH  "/dev/uv"
+#define BUFFER_SIZE 0x200
+FIXTURE(uvio_fixture) {
+	int uv_fd;
+	struct uvio_ioctl_cb uvio_ioctl;
+	uint8_t buffer[BUFFER_SIZE];
+	__u64 fault_page;
+};
+
+FIXTURE_VARIANT(uvio_fixture) {
+	unsigned long ioctl_cmd;
+	uint32_t arg_size;
+};
+
+FIXTURE_VARIANT_ADD(uvio_fixture, att) {
+	.ioctl_cmd = UVIO_IOCTL_ATT,
+	.arg_size = sizeof(struct uvio_attest),
+};
+
+FIXTURE_SETUP(uvio_fixture)
+{
+	self->uv_fd = open(UV_PATH, O_ACCMODE);
+
+	self->uvio_ioctl.argument_addr = (__u64)self->buffer;
+	self->uvio_ioctl.argument_len = variant->arg_size;
+	self->fault_page =
+		(__u64)mmap(NULL, (size_t)getpagesize(), PROT_NONE, MAP_ANONYMOUS, -1, 0);
+}
+
+FIXTURE_TEARDOWN(uvio_fixture)
+{
+	if (self->uv_fd)
+		close(self->uv_fd);
+	munmap((void *)self->fault_page, (size_t)getpagesize());
+}
+
+TEST_F(uvio_fixture, fault_ioctl_arg)
+{
+	int rc, errno_cache;
+
+	rc = ioctl(self->uv_fd, variant->ioctl_cmd, NULL);
+	errno_cache = errno;
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_cache, EFAULT);
+
+	rc = ioctl(self->uv_fd, variant->ioctl_cmd, self->fault_page);
+	errno_cache = errno;
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_cache, EFAULT);
+}
+
+TEST_F(uvio_fixture, fault_uvio_arg)
+{
+	int rc, errno_cache;
+
+	self->uvio_ioctl.argument_addr = 0;
+	rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+	errno_cache = errno;
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_cache, EFAULT);
+
+	self->uvio_ioctl.argument_addr = self->fault_page;
+	rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+	errno_cache = errno;
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_cache, EFAULT);
+}
+
+/*
+ * Test to verify that IOCTLs with invalid values in the ioctl_control block
+ * are rejected.
+ */
+TEST_F(uvio_fixture, inval_ioctl_cb)
+{
+	int rc, errno_cache;
+
+	self->uvio_ioctl.argument_len = 0;
+	rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+	errno_cache = errno;
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_cache, EINVAL);
+
+	self->uvio_ioctl.argument_len = (uint32_t)-1;
+	rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+	errno_cache = errno;
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_cache, EINVAL);
+	self->uvio_ioctl.argument_len = variant->arg_size;
+
+	self->uvio_ioctl.flags = (uint32_t)-1;
+	rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+	errno_cache = errno;
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_cache, EINVAL);
+	self->uvio_ioctl.flags = 0;
+
+	memset(self->uvio_ioctl.reserved14, 0xff, sizeof(self->uvio_ioctl.reserved14));
+	rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+	errno_cache = errno;
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_cache, EINVAL);
+
+	memset(&self->uvio_ioctl, 0x11, sizeof(self->uvio_ioctl));
+	rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+	ASSERT_EQ(rc, -1);
+}
+
+TEST_F(uvio_fixture, inval_ioctl_cmd)
+{
+	int rc, errno_cache;
+	uint8_t nr = _IOC_NR(variant->ioctl_cmd);
+	unsigned long cmds[] = {
+		_IOWR('a', nr, struct uvio_ioctl_cb),
+		_IOWR(UVIO_TYPE_UVC, nr, int),
+		_IO(UVIO_TYPE_UVC, nr),
+		_IOR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb),
+		_IOW(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb),
+	};
+
+	for (size_t i = 0; i < ARRAY_SIZE(cmds); i++) {
+		rc = ioctl(self->uv_fd, cmds[i], &self->uvio_ioctl);
+		errno_cache = errno;
+		ASSERT_EQ(rc, -1);
+		ASSERT_EQ(errno_cache, ENOTTY);
+	}
+}
+
+struct test_attest_buffer {
+	uint8_t arcb[0x180];
+	uint8_t meas[64];
+	uint8_t add[32];
+};
+
+FIXTURE(attest_fixture) {
+	int uv_fd;
+	struct uvio_ioctl_cb uvio_ioctl;
+	struct uvio_attest uvio_attest;
+	struct test_attest_buffer attest_buffer;
+	__u64 fault_page;
+};
+
+FIXTURE_SETUP(attest_fixture)
+{
+	self->uv_fd = open(UV_PATH, O_ACCMODE);
+
+	self->uvio_ioctl.argument_addr = (__u64)&self->uvio_attest;
+	self->uvio_ioctl.argument_len = sizeof(self->uvio_attest);
+
+	self->uvio_attest.arcb_addr = (__u64)&self->attest_buffer.arcb;
+	self->uvio_attest.arcb_len = sizeof(self->attest_buffer.arcb);
+
+	self->uvio_attest.meas_addr = (__u64)&self->attest_buffer.meas;
+	self->uvio_attest.meas_len = sizeof(self->attest_buffer.meas);
+
+	self->uvio_attest.add_data_addr = (__u64)&self->attest_buffer.add;
+	self->uvio_attest.add_data_len = sizeof(self->attest_buffer.add);
+	self->fault_page =
+		(__u64)mmap(NULL, (size_t)getpagesize(), PROT_NONE, MAP_ANONYMOUS, -1, 0);
+}
+
+FIXTURE_TEARDOWN(attest_fixture)
+{
+	if (self->uv_fd)
+		close(self->uv_fd);
+	munmap((void *)self->fault_page, (size_t)getpagesize());
+}
+
+static void att_inval_sizes_test(uint32_t *size, uint32_t max_size, bool test_zero,
+				 struct __test_metadata *_metadata,
+				 FIXTURE_DATA(attest_fixture) *self)
+{
+	int rc, errno_cache;
+	uint32_t tmp = *size;
+
+	if (test_zero) {
+		*size = 0;
+		rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+		errno_cache = errno;
+		ASSERT_EQ(rc, -1);
+		ASSERT_EQ(errno_cache, EINVAL);
+	}
+	*size = max_size + 1;
+	rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+	errno_cache = errno;
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_cache, EINVAL);
+	*size = tmp;
+}
+
+/*
+ * Test to verify that attestation IOCTLs with invalid values in the UVIO
+ * attestation control block are rejected.
+ */
+TEST_F(attest_fixture, att_inval_request)
+{
+	int rc, errno_cache;
+
+	att_inval_sizes_test(&self->uvio_attest.add_data_len, UVIO_ATT_ADDITIONAL_MAX_LEN,
+			     false, _metadata, self);
+	att_inval_sizes_test(&self->uvio_attest.meas_len, UVIO_ATT_MEASUREMENT_MAX_LEN,
+			     true, _metadata, self);
+	att_inval_sizes_test(&self->uvio_attest.arcb_len, UVIO_ATT_ARCB_MAX_LEN,
+			     true, _metadata, self);
+
+	self->uvio_attest.reserved136 = (uint16_t)-1;
+	rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+	errno_cache = errno;
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_cache, EINVAL);
+
+	memset(&self->uvio_attest, 0x11, sizeof(self->uvio_attest));
+	rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+	ASSERT_EQ(rc, -1);
+}
+
+static void att_inval_addr_test(__u64 *addr, struct __test_metadata *_metadata,
+				FIXTURE_DATA(attest_fixture) *self)
+{
+	int rc, errno_cache;
+	__u64 tmp = *addr;
+
+	*addr = 0;
+	rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+	errno_cache = errno;
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_cache, EFAULT);
+	*addr = self->fault_page;
+	rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+	errno_cache = errno;
+	ASSERT_EQ(rc, -1);
+	ASSERT_EQ(errno_cache, EFAULT);
+	*addr = tmp;
+}
+
+TEST_F(attest_fixture, att_inval_addr)
+{
+	att_inval_addr_test(&self->uvio_attest.arcb_addr, _metadata, self);
+	att_inval_addr_test(&self->uvio_attest.add_data_addr, _metadata, self);
+	att_inval_addr_test(&self->uvio_attest.meas_addr, _metadata, self);
+}
+
+static void __attribute__((constructor)) __constructor_order_last(void)
+{
+	if (!__constructor_order)
+		__constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
+}
+
+int main(int argc, char **argv)
+{
+	int fd = open(UV_PATH, O_ACCMODE);
+
+	if (fd < 0)
+		ksft_exit_skip("No uv-device or cannot access " UV_PATH  "\n"
+			       "Enable CONFIG_S390_UV_UAPI and check the access rights on "
+			       UV_PATH ".\n");
+	close(fd);
+	return test_harness_run(argc, argv);
+}
-- 
cgit 


From c71159648c3cf0f7127ddc0bdf3eb4d7885210df Mon Sep 17 00:00:00 2001
From: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Date: Thu, 12 May 2022 15:10:18 +0200
Subject: KVM: s390: selftest: Test suppression indication on key prot
 exception

Check that suppression is not indicated on injection of a key checked
protection exception caused by a memop after it already modified guest
memory, as that violates the definition of suppression.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Link: https://lore.kernel.org/r/20220512131019.2594948-3-scgl@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 tools/testing/selftests/kvm/s390x/memop.c | 46 ++++++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index b04c2c1b3c30..49f26f544127 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -10,6 +10,8 @@
 #include <string.h>
 #include <sys/ioctl.h>
 
+#include <linux/bits.h>
+
 #include "test_util.h"
 #include "kvm_util.h"
 
@@ -194,6 +196,7 @@ static int err_memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo)
 #define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o)
 #define AR(a) ._ar = 1, .ar = (a)
 #define KEY(a) .f_key = 1, .key = (a)
+#define INJECT .f_inject = 1
 
 #define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
 
@@ -430,9 +433,18 @@ static void test_copy_key_fetch_prot(void)
 	TEST_ASSERT(rv == 4, "Should result in protection exception");		\
 })
 
+static void guest_error_key(void)
+{
+	GUEST_SYNC(STAGE_INITED);
+	set_storage_key_range(mem1, PAGE_SIZE, 0x18);
+	set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98);
+	GUEST_SYNC(STAGE_SKEYS_SET);
+	GUEST_SYNC(STAGE_IDLED);
+}
+
 static void test_errors_key(void)
 {
-	struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+	struct test_default t = test_default_init(guest_error_key);
 
 	HOST_SYNC(t.vcpu, STAGE_INITED);
 	HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
@@ -446,6 +458,37 @@ static void test_errors_key(void)
 	kvm_vm_free(t.kvm_vm);
 }
 
+static void test_termination(void)
+{
+	struct test_default t = test_default_init(guest_error_key);
+	uint64_t prefix;
+	uint64_t teid;
+	uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61);
+	uint64_t psw[2];
+
+	HOST_SYNC(t.vcpu, STAGE_INITED);
+	HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+	/* vcpu, mismatching keys after first page */
+	ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT);
+	/*
+	 * The memop injected a program exception and the test needs to check the
+	 * Translation-Exception Identification (TEID). It is necessary to run
+	 * the guest in order to be able to read the TEID from guest memory.
+	 * Set the guest program new PSW, so the guest state is not clobbered.
+	 */
+	prefix = t.run->s.regs.prefix;
+	psw[0] = t.run->psw_mask;
+	psw[1] = t.run->psw_addr;
+	MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464));
+	HOST_SYNC(t.vcpu, STAGE_IDLED);
+	MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
+	/* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
+	ASSERT_EQ(teid & teid_mask, 0);
+
+	kvm_vm_free(t.kvm_vm);
+}
+
 static void test_errors_key_storage_prot_override(void)
 {
 	struct test_default t = test_default_init(guest_copy_key_fetch_prot);
@@ -668,6 +711,7 @@ int main(int argc, char *argv[])
 		test_copy_key_fetch_prot();
 		test_copy_key_fetch_prot_override();
 		test_errors_key();
+		test_termination();
 		test_errors_key_storage_prot_override();
 		test_errors_key_fetch_prot_override_not_enabled();
 		test_errors_key_fetch_prot_override_enabled();
-- 
cgit 


From 7aa424e02a04bba5ecc84afe9b58b16e9e0b34f8 Mon Sep 17 00:00:00 2001
From: Feng Zhou <zhoufeng.zf@bytedance.com>
Date: Wed, 18 May 2022 10:50:53 +0800
Subject: selftests/bpf: Fix some bugs in map_lookup_percpu_elem testcase

comments from Andrii Nakryiko, details in here:
https://lore.kernel.org/lkml/20220511093854.411-1-zhoufeng.zf@bytedance.com/T/

use /* */ instead of //
use libbpf_num_possible_cpus() instead of sysconf(_SC_NPROCESSORS_ONLN)
use 8 bytes for value size
fix memory leak
use ASSERT_EQ instead of ASSERT_OK
add bpf_loop to fetch values on each possible CPU

Fixes: ed7c13776e20c74486b0939a3c1de984c5efb6aa ("selftests/bpf: add test case for bpf_map_lookup_percpu_elem")
Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220518025053.20492-1-zhoufeng.zf@bytedance.com
---
 .../bpf/prog_tests/map_lookup_percpu_elem.c        | 48 ++++++++++------
 .../bpf/progs/test_map_lookup_percpu_elem.c        | 64 +++++++++++++++-------
 2 files changed, 73 insertions(+), 39 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c
index 58b24c2112b0..bfb1bf3fd427 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c
@@ -1,30 +1,38 @@
 // SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2022 Bytedance
+/* Copyright (c) 2022 Bytedance */
 
 #include <test_progs.h>
-
 #include "test_map_lookup_percpu_elem.skel.h"
 
-#define TEST_VALUE  1
-
 void test_map_lookup_percpu_elem(void)
 {
 	struct test_map_lookup_percpu_elem *skel;
-	int key = 0, ret;
-	int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-	int *buf;
+	__u64 key = 0, sum;
+	int ret, i, nr_cpus = libbpf_num_possible_cpus();
+	__u64 *buf;
 
-	buf = (int *)malloc(nr_cpus*sizeof(int));
+	buf = malloc(nr_cpus*sizeof(__u64));
 	if (!ASSERT_OK_PTR(buf, "malloc"))
 		return;
-	memset(buf, 0, nr_cpus*sizeof(int));
-	buf[0] = TEST_VALUE;
 
-	skel = test_map_lookup_percpu_elem__open_and_load();
-	if (!ASSERT_OK_PTR(skel, "test_map_lookup_percpu_elem__open_and_load"))
-		return;
+	for (i = 0; i < nr_cpus; i++)
+		buf[i] = i;
+	sum = (nr_cpus - 1) * nr_cpus / 2;
+
+	skel = test_map_lookup_percpu_elem__open();
+	if (!ASSERT_OK_PTR(skel, "test_map_lookup_percpu_elem__open"))
+		goto exit;
+
+	skel->rodata->my_pid = getpid();
+	skel->rodata->nr_cpus = nr_cpus;
+
+	ret = test_map_lookup_percpu_elem__load(skel);
+	if (!ASSERT_OK(ret, "test_map_lookup_percpu_elem__load"))
+		goto cleanup;
+
 	ret = test_map_lookup_percpu_elem__attach(skel);
-	ASSERT_OK(ret, "test_map_lookup_percpu_elem__attach");
+	if (!ASSERT_OK(ret, "test_map_lookup_percpu_elem__attach"))
+		goto cleanup;
 
 	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_array_map), &key, buf, 0);
 	ASSERT_OK(ret, "percpu_array_map update");
@@ -37,10 +45,14 @@ void test_map_lookup_percpu_elem(void)
 
 	syscall(__NR_getuid);
 
-	ret = skel->bss->percpu_array_elem_val == TEST_VALUE &&
-	      skel->bss->percpu_hash_elem_val == TEST_VALUE &&
-	      skel->bss->percpu_lru_hash_elem_val == TEST_VALUE;
-	ASSERT_OK(!ret, "bpf_map_lookup_percpu_elem success");
+	test_map_lookup_percpu_elem__detach(skel);
+
+	ASSERT_EQ(skel->bss->percpu_array_elem_sum, sum, "percpu_array lookup percpu elem");
+	ASSERT_EQ(skel->bss->percpu_hash_elem_sum, sum, "percpu_hash lookup percpu elem");
+	ASSERT_EQ(skel->bss->percpu_lru_hash_elem_sum, sum, "percpu_lru_hash lookup percpu elem");
 
+cleanup:
 	test_map_lookup_percpu_elem__destroy(skel);
+exit:
+	free(buf);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c
index 5d4ef86cbf48..ca827b1092da 100644
--- a/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c
+++ b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c
@@ -1,52 +1,74 @@
 // SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2022 Bytedance
+/* Copyright (c) 2022 Bytedance */
 
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 
-int percpu_array_elem_val = 0;
-int percpu_hash_elem_val = 0;
-int percpu_lru_hash_elem_val = 0;
+__u64 percpu_array_elem_sum = 0;
+__u64 percpu_hash_elem_sum = 0;
+__u64 percpu_lru_hash_elem_sum = 0;
+const volatile int nr_cpus;
+const volatile int my_pid;
 
 struct {
 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 	__uint(max_entries, 1);
 	__type(key, __u32);
-	__type(value, __u32);
+	__type(value, __u64);
 } percpu_array_map SEC(".maps");
 
 struct {
 	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
 	__uint(max_entries, 1);
-	__type(key, __u32);
-	__type(value, __u32);
+	__type(key, __u64);
+	__type(value, __u64);
 } percpu_hash_map SEC(".maps");
 
 struct {
 	__uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
 	__uint(max_entries, 1);
-	__type(key, __u32);
-	__type(value, __u32);
+	__type(key, __u64);
+	__type(value, __u64);
 } percpu_lru_hash_map SEC(".maps");
 
+struct read_percpu_elem_ctx {
+	void *map;
+	__u64 sum;
+};
+
+static int read_percpu_elem_callback(__u32 index, struct read_percpu_elem_ctx *ctx)
+{
+	__u64 key = 0;
+	__u64 *value;
+
+	value = bpf_map_lookup_percpu_elem(ctx->map, &key, index);
+	if (value)
+		ctx->sum += *value;
+	return 0;
+}
+
 SEC("tp/syscalls/sys_enter_getuid")
 int sysenter_getuid(const void *ctx)
 {
-	__u32 key = 0;
-	__u32 cpu = 0;
-	__u32 *value;
+	struct read_percpu_elem_ctx map_ctx;
 
-	value = bpf_map_lookup_percpu_elem(&percpu_array_map, &key, cpu);
-	if (value)
-		percpu_array_elem_val = *value;
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
 
-	value = bpf_map_lookup_percpu_elem(&percpu_hash_map, &key, cpu);
-	if (value)
-		percpu_hash_elem_val = *value;
+	map_ctx.map = &percpu_array_map;
+	map_ctx.sum = 0;
+	bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0);
+	percpu_array_elem_sum = map_ctx.sum;
 
-	value = bpf_map_lookup_percpu_elem(&percpu_lru_hash_map, &key, cpu);
-	if (value)
-		percpu_lru_hash_elem_val = *value;
+	map_ctx.map = &percpu_hash_map;
+	map_ctx.sum = 0;
+	bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0);
+	percpu_hash_elem_sum = map_ctx.sum;
+
+	map_ctx.map = &percpu_lru_hash_map;
+	map_ctx.sum = 0;
+	bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0);
+	percpu_lru_hash_elem_sum = map_ctx.sum;
 
 	return 0;
 }
-- 
cgit 


From d3294cb1e06d70a689924792c2acb897eac7d781 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Thu, 19 May 2022 16:30:11 -0700
Subject: selftests/bpf: Enable CONFIG_IKCONFIG_PROC in config

CONFIG_IKCONFIG_PROC is required by BPF selftests, otherwise we get
errors like this:

 libbpf: failed to open system Kconfig
 libbpf: failed to load object 'kprobe_multi'
 libbpf: failed to load BPF skeleton 'kprobe_multi': -22

It's because /proc/config.gz is opened in bpf_object__read_kconfig_file()
in tools/lib/bpf/libbpf.c:

        file = gzopen("/proc/config.gz", "r");

So this patch enables CONFIG_IKCONFIG and CONFIG_IKCONFIG_PROC in
tools/testing/selftests/bpf/config.

Suggested-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220519233016.105670-3-mathew.j.martineau@linux.intel.com
---
 tools/testing/selftests/bpf/config | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 08c6e5a66d87..6840d4625e01 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -54,3 +54,5 @@ CONFIG_NF_DEFRAG_IPV6=y
 CONFIG_NF_CONNTRACK=y
 CONFIG_USERFAULTFD=y
 CONFIG_FPROBE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
-- 
cgit 


From 8039d353217c1d9dae921f131cfe4153bc23e960 Mon Sep 17 00:00:00 2001
From: Nicolas Rybowski <nicolas.rybowski@tessares.net>
Date: Thu, 19 May 2022 16:30:12 -0700
Subject: selftests/bpf: Add MPTCP test base

This patch adds a base for MPTCP specific tests.

It is currently limited to the is_mptcp field in case of plain TCP
connection because there is no easy way to get the subflow sk from a msk
in userspace. This implies that we cannot lookup the sk_storage attached
to the subflow sk in the sockops program.

v4:
 - add copyright 2022 (Andrii)
 - use ASSERT_* instead of CHECK_FAIL (Andrii)
 - drop SEC("version") (Andrii)
 - use is_mptcp in tcp_sock, instead of bpf_tcp_sock (Martin & Andrii)

v5:
 - Drop connect_to_mptcp_fd (Martin)
 - Use BPF test skeleton (Andrii)
 - Use ASSERT_EQ (Andrii)
 - Drop the 'msg' parameter of verify_sk

Co-developed-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Nicolas Rybowski <nicolas.rybowski@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Link: https://lore.kernel.org/bpf/20220519233016.105670-4-mathew.j.martineau@linux.intel.com
---
 MAINTAINERS                                    |   1 +
 tools/testing/selftests/bpf/bpf_tcp_helpers.h  |   1 +
 tools/testing/selftests/bpf/config             |   1 +
 tools/testing/selftests/bpf/network_helpers.c  |  40 +++++++--
 tools/testing/selftests/bpf/network_helpers.h  |   2 +
 tools/testing/selftests/bpf/prog_tests/mptcp.c | 112 +++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/mptcp_sock.c |  53 ++++++++++++
 7 files changed, 201 insertions(+), 9 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/mptcp.c
 create mode 100644 tools/testing/selftests/bpf/progs/mptcp_sock.c

(limited to 'tools/testing')

diff --git a/MAINTAINERS b/MAINTAINERS
index cc5559a7fb5c..359afc617b92 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13780,6 +13780,7 @@ F:	include/net/mptcp.h
 F:	include/trace/events/mptcp.h
 F:	include/uapi/linux/mptcp.h
 F:	net/mptcp/
+F:	tools/testing/selftests/bpf/*/*mptcp*.c
 F:	tools/testing/selftests/net/mptcp/
 
 NETWORKING [TCP]
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index b1ede6f0b821..22e0c8849a17 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -81,6 +81,7 @@ struct tcp_sock {
 	__u32	lsndtime;
 	__u32	prior_cwnd;
 	__u64	tcp_mstamp;	/* most recent packet received/sent */
+	bool	is_mptcp;
 } __attribute__((preserve_access_index));
 
 static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 6840d4625e01..3b3edc0fc8a6 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -56,3 +56,4 @@ CONFIG_USERFAULTFD=y
 CONFIG_FPROBE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_MPTCP=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 2bb1f9b3841d..59cf81ec55af 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -21,6 +21,10 @@
 #include "network_helpers.h"
 #include "test_progs.h"
 
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
 #define clean_errno() (errno == 0 ? "None" : strerror(errno))
 #define log_err(MSG, ...) ({						\
 			int __save = errno;				\
@@ -73,13 +77,13 @@ int settimeo(int fd, int timeout_ms)
 
 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
 
-static int __start_server(int type, const struct sockaddr *addr,
+static int __start_server(int type, int protocol, const struct sockaddr *addr,
 			  socklen_t addrlen, int timeout_ms, bool reuseport)
 {
 	int on = 1;
 	int fd;
 
-	fd = socket(addr->sa_family, type, 0);
+	fd = socket(addr->sa_family, type, protocol);
 	if (fd < 0) {
 		log_err("Failed to create server socket");
 		return -1;
@@ -113,8 +117,8 @@ error_close:
 	return -1;
 }
 
-int start_server(int family, int type, const char *addr_str, __u16 port,
-		 int timeout_ms)
+static int start_server_proto(int family, int type, int protocol,
+			      const char *addr_str, __u16 port, int timeout_ms)
 {
 	struct sockaddr_storage addr;
 	socklen_t addrlen;
@@ -122,10 +126,23 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
 	if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
 		return -1;
 
-	return __start_server(type, (struct sockaddr *)&addr,
+	return __start_server(type, protocol, (struct sockaddr *)&addr,
 			      addrlen, timeout_ms, false);
 }
 
+int start_server(int family, int type, const char *addr_str, __u16 port,
+		 int timeout_ms)
+{
+	return start_server_proto(family, type, 0, addr_str, port, timeout_ms);
+}
+
+int start_mptcp_server(int family, const char *addr_str, __u16 port,
+		       int timeout_ms)
+{
+	return start_server_proto(family, SOCK_STREAM, IPPROTO_MPTCP, addr_str,
+				  port, timeout_ms);
+}
+
 int *start_reuseport_server(int family, int type, const char *addr_str,
 			    __u16 port, int timeout_ms, unsigned int nr_listens)
 {
@@ -144,7 +161,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
 	if (!fds)
 		return NULL;
 
-	fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen,
+	fds[0] = __start_server(type, 0, (struct sockaddr *)&addr, addrlen,
 				timeout_ms, true);
 	if (fds[0] == -1)
 		goto close_fds;
@@ -154,7 +171,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
 		goto close_fds;
 
 	for (; nr_fds < nr_listens; nr_fds++) {
-		fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr,
+		fds[nr_fds] = __start_server(type, 0, (struct sockaddr *)&addr,
 					     addrlen, timeout_ms, true);
 		if (fds[nr_fds] == -1)
 			goto close_fds;
@@ -247,7 +264,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
 	struct sockaddr_storage addr;
 	struct sockaddr_in *addr_in;
 	socklen_t addrlen, optlen;
-	int fd, type;
+	int fd, type, protocol;
 
 	if (!opts)
 		opts = &default_opts;
@@ -258,6 +275,11 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
 		return -1;
 	}
 
+	if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
+		log_err("getsockopt(SOL_PROTOCOL)");
+		return -1;
+	}
+
 	addrlen = sizeof(addr);
 	if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
 		log_err("Failed to get server addr");
@@ -265,7 +287,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
 	}
 
 	addr_in = (struct sockaddr_in *)&addr;
-	fd = socket(addr_in->sin_family, type, 0);
+	fd = socket(addr_in->sin_family, type, protocol);
 	if (fd < 0) {
 		log_err("Failed to create client socket");
 		return -1;
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index a4b3b2f9877b..f882c691b790 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -42,6 +42,8 @@ extern struct ipv6_packet pkt_v6;
 int settimeo(int fd, int timeout_ms);
 int start_server(int family, int type, const char *addr, __u16 port,
 		 int timeout_ms);
+int start_mptcp_server(int family, const char *addr, __u16 port,
+		       int timeout_ms);
 int *start_reuseport_server(int family, int type, const char *addr_str,
 			    __u16 port, int timeout_ms,
 			    unsigned int nr_listens);
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
new file mode 100644
index 000000000000..6b346e303b90
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Tessares SA. */
+/* Copyright (c) 2022, SUSE. */
+
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "mptcp_sock.skel.h"
+
+struct mptcp_storage {
+	__u32 invoked;
+	__u32 is_mptcp;
+};
+
+static int verify_sk(int map_fd, int client_fd, __u32 is_mptcp)
+{
+	int err, cfd = client_fd;
+	struct mptcp_storage val;
+
+	if (is_mptcp == 1)
+		return 0;
+
+	err = bpf_map_lookup_elem(map_fd, &cfd, &val);
+	if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+		return err;
+
+	if (!ASSERT_EQ(val.invoked, 1, "unexpected invoked count"))
+		err++;
+
+	if (!ASSERT_EQ(val.is_mptcp, 0, "unexpected is_mptcp"))
+		err++;
+
+	return err;
+}
+
+static int run_test(int cgroup_fd, int server_fd, bool is_mptcp)
+{
+	int client_fd, prog_fd, map_fd, err;
+	struct mptcp_sock *sock_skel;
+
+	sock_skel = mptcp_sock__open_and_load();
+	if (!ASSERT_OK_PTR(sock_skel, "skel_open_load"))
+		return -EIO;
+
+	prog_fd = bpf_program__fd(sock_skel->progs._sockops);
+	if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) {
+		err = -EIO;
+		goto out;
+	}
+
+	map_fd = bpf_map__fd(sock_skel->maps.socket_storage_map);
+	if (!ASSERT_GE(map_fd, 0, "bpf_map__fd")) {
+		err = -EIO;
+		goto out;
+	}
+
+	err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach"))
+		goto out;
+
+	client_fd = connect_to_fd(server_fd, 0);
+	if (!ASSERT_GE(client_fd, 0, "connect to fd")) {
+		err = -EIO;
+		goto out;
+	}
+
+	err += is_mptcp ? verify_sk(map_fd, client_fd, 1) :
+			  verify_sk(map_fd, client_fd, 0);
+
+	close(client_fd);
+
+out:
+	mptcp_sock__destroy(sock_skel);
+	return err;
+}
+
+static void test_base(void)
+{
+	int server_fd, cgroup_fd;
+
+	cgroup_fd = test__join_cgroup("/mptcp");
+	if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
+		return;
+
+	/* without MPTCP */
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+	if (!ASSERT_GE(server_fd, 0, "start_server"))
+		goto with_mptcp;
+
+	ASSERT_OK(run_test(cgroup_fd, server_fd, false), "run_test tcp");
+
+	close(server_fd);
+
+with_mptcp:
+	/* with MPTCP */
+	server_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+	if (!ASSERT_GE(server_fd, 0, "start_mptcp_server"))
+		goto close_cgroup_fd;
+
+	ASSERT_OK(run_test(cgroup_fd, server_fd, true), "run_test mptcp");
+
+	close(server_fd);
+
+close_cgroup_fd:
+	close(cgroup_fd);
+}
+
+void test_mptcp(void)
+{
+	if (test__start_subtest("base"))
+		test_base();
+}
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c
new file mode 100644
index 000000000000..bc09dba0b078
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Tessares SA. */
+/* Copyright (c) 2022, SUSE. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct mptcp_storage {
+	__u32 invoked;
+	__u32 is_mptcp;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct mptcp_storage);
+} socket_storage_map SEC(".maps");
+
+SEC("sockops")
+int _sockops(struct bpf_sock_ops *ctx)
+{
+	struct mptcp_storage *storage;
+	int op = (int)ctx->op;
+	struct tcp_sock *tsk;
+	struct bpf_sock *sk;
+	bool is_mptcp;
+
+	if (op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+		return 1;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 1;
+
+	tsk = bpf_skc_to_tcp_sock(sk);
+	if (!tsk)
+		return 1;
+
+	is_mptcp = bpf_core_field_exists(tsk->is_mptcp) ? tsk->is_mptcp : 0;
+	storage = bpf_sk_storage_get(&socket_storage_map, sk, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 1;
+
+	storage->invoked++;
+	storage->is_mptcp = is_mptcp;
+
+	return 1;
+}
-- 
cgit 


From 3bc48b56e345e2ed83841dd08a00c6a9f112be6c Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Thu, 19 May 2022 16:30:13 -0700
Subject: selftests/bpf: Test bpf_skc_to_mptcp_sock

This patch extends the MPTCP test base, to test the new helper
bpf_skc_to_mptcp_sock().

Define struct mptcp_sock in bpf_tcp_helpers.h, use bpf_skc_to_mptcp_sock
to get the msk socket in progs/mptcp_sock.c and store the infos in
socket_storage_map.

Get the infos from socket_storage_map in prog_tests/mptcp.c. Add a new
function verify_msk() to verify the infos of MPTCP socket, and rename
verify_sk() to verify_tsk() to verify TCP socket only.

v2: Add CONFIG_MPTCP check for clearer error messages

v4:
 - use ASSERT_* instead of CHECK_FAIL (Andrii)
 - drop bpf_mptcp_helpers.h (Andrii)

v5:
 - some 'ASSERT_*' were replaced in the next commit by mistake.
 - Drop CONFIG_MPTCP (Martin)
 - Use ASSERT_EQ (Andrii)

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Link: https://lore.kernel.org/bpf/20220519233016.105670-5-mathew.j.martineau@linux.intel.com
---
 tools/testing/selftests/bpf/bpf_tcp_helpers.h  |  4 ++++
 tools/testing/selftests/bpf/prog_tests/mptcp.c | 27 ++++++++++++++++++++------
 tools/testing/selftests/bpf/progs/mptcp_sock.c | 19 ++++++++++++++----
 3 files changed, 40 insertions(+), 10 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 22e0c8849a17..1a3f6ece429e 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -226,4 +226,8 @@ static __always_inline bool tcp_cc_eq(const char *a, const char *b)
 extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
 extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
 
+struct mptcp_sock {
+	struct inet_connection_sock	sk;
+} __attribute__((preserve_access_index));
+
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 6b346e303b90..227682ae8e09 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -12,14 +12,11 @@ struct mptcp_storage {
 	__u32 is_mptcp;
 };
 
-static int verify_sk(int map_fd, int client_fd, __u32 is_mptcp)
+static int verify_tsk(int map_fd, int client_fd)
 {
 	int err, cfd = client_fd;
 	struct mptcp_storage val;
 
-	if (is_mptcp == 1)
-		return 0;
-
 	err = bpf_map_lookup_elem(map_fd, &cfd, &val);
 	if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
 		return err;
@@ -33,6 +30,24 @@ static int verify_sk(int map_fd, int client_fd, __u32 is_mptcp)
 	return err;
 }
 
+static int verify_msk(int map_fd, int client_fd)
+{
+	int err, cfd = client_fd;
+	struct mptcp_storage val;
+
+	err = bpf_map_lookup_elem(map_fd, &cfd, &val);
+	if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+		return err;
+
+	if (!ASSERT_EQ(val.invoked, 1, "unexpected invoked count"))
+		err++;
+
+	if (!ASSERT_EQ(val.is_mptcp, 1, "unexpected is_mptcp"))
+		err++;
+
+	return err;
+}
+
 static int run_test(int cgroup_fd, int server_fd, bool is_mptcp)
 {
 	int client_fd, prog_fd, map_fd, err;
@@ -64,8 +79,8 @@ static int run_test(int cgroup_fd, int server_fd, bool is_mptcp)
 		goto out;
 	}
 
-	err += is_mptcp ? verify_sk(map_fd, client_fd, 1) :
-			  verify_sk(map_fd, client_fd, 0);
+	err += is_mptcp ? verify_msk(map_fd, client_fd) :
+			  verify_tsk(map_fd, client_fd);
 
 	close(client_fd);
 
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c
index bc09dba0b078..dc73b3fbb50b 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_sock.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c
@@ -24,6 +24,7 @@ SEC("sockops")
 int _sockops(struct bpf_sock_ops *ctx)
 {
 	struct mptcp_storage *storage;
+	struct mptcp_sock *msk;
 	int op = (int)ctx->op;
 	struct tcp_sock *tsk;
 	struct bpf_sock *sk;
@@ -41,11 +42,21 @@ int _sockops(struct bpf_sock_ops *ctx)
 		return 1;
 
 	is_mptcp = bpf_core_field_exists(tsk->is_mptcp) ? tsk->is_mptcp : 0;
-	storage = bpf_sk_storage_get(&socket_storage_map, sk, 0,
-				     BPF_SK_STORAGE_GET_F_CREATE);
-	if (!storage)
-		return 1;
+	if (!is_mptcp) {
+		storage = bpf_sk_storage_get(&socket_storage_map, sk, 0,
+					     BPF_SK_STORAGE_GET_F_CREATE);
+		if (!storage)
+			return 1;
+	} else {
+		msk = bpf_skc_to_mptcp_sock(sk);
+		if (!msk)
+			return 1;
 
+		storage = bpf_sk_storage_get(&socket_storage_map, msk, 0,
+					     BPF_SK_STORAGE_GET_F_CREATE);
+		if (!storage)
+			return 1;
+	}
 	storage->invoked++;
 	storage->is_mptcp = is_mptcp;
 
-- 
cgit 


From 0266223467728d553b99adea769d9ff3b6e41372 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Thu, 19 May 2022 16:30:14 -0700
Subject: selftests/bpf: Verify token of struct mptcp_sock

This patch verifies the struct member token of struct mptcp_sock. Add a
new member token in struct mptcp_storage to store the token value of the
msk socket got by bpf_skc_to_mptcp_sock(). Trace the kernel function
mptcp_pm_new_connection() by using bpf fentry prog to obtain the msk token
and save it in a global bpf variable. Pass the variable to verify_msk() to
verify it with the token saved in socket_storage_map.

v4:
 - use ASSERT_* instead of CHECK_FAIL (Andrii)
 - skip the test if 'ip mptcp monitor' is not supported (Mat)

v5:
 - Drop 'ip mptcp monitor', trace mptcp_pm_new_connection instead (Martin)
 - Use ASSERT_EQ (Andrii)

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Link: https://lore.kernel.org/bpf/20220519233016.105670-6-mathew.j.martineau@linux.intel.com
---
 tools/testing/selftests/bpf/bpf_tcp_helpers.h  |  2 ++
 tools/testing/selftests/bpf/prog_tests/mptcp.c | 15 +++++++++++++--
 tools/testing/selftests/bpf/progs/mptcp_sock.c | 16 ++++++++++++++++
 3 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 1a3f6ece429e..422491872619 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -228,6 +228,8 @@ extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
 
 struct mptcp_sock {
 	struct inet_connection_sock	sk;
+
+	__u32		token;
 } __attribute__((preserve_access_index));
 
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 227682ae8e09..c84d7c593f9f 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -10,6 +10,7 @@
 struct mptcp_storage {
 	__u32 invoked;
 	__u32 is_mptcp;
+	__u32 token;
 };
 
 static int verify_tsk(int map_fd, int client_fd)
@@ -30,11 +31,14 @@ static int verify_tsk(int map_fd, int client_fd)
 	return err;
 }
 
-static int verify_msk(int map_fd, int client_fd)
+static int verify_msk(int map_fd, int client_fd, __u32 token)
 {
 	int err, cfd = client_fd;
 	struct mptcp_storage val;
 
+	if (!ASSERT_GT(token, 0, "invalid token"))
+		return -1;
+
 	err = bpf_map_lookup_elem(map_fd, &cfd, &val);
 	if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
 		return err;
@@ -45,6 +49,9 @@ static int verify_msk(int map_fd, int client_fd)
 	if (!ASSERT_EQ(val.is_mptcp, 1, "unexpected is_mptcp"))
 		err++;
 
+	if (!ASSERT_EQ(val.token, token, "unexpected token"))
+		err++;
+
 	return err;
 }
 
@@ -57,6 +64,10 @@ static int run_test(int cgroup_fd, int server_fd, bool is_mptcp)
 	if (!ASSERT_OK_PTR(sock_skel, "skel_open_load"))
 		return -EIO;
 
+	err = mptcp_sock__attach(sock_skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
 	prog_fd = bpf_program__fd(sock_skel->progs._sockops);
 	if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) {
 		err = -EIO;
@@ -79,7 +90,7 @@ static int run_test(int cgroup_fd, int server_fd, bool is_mptcp)
 		goto out;
 	}
 
-	err += is_mptcp ? verify_msk(map_fd, client_fd) :
+	err += is_mptcp ? verify_msk(map_fd, client_fd, sock_skel->bss->token) :
 			  verify_tsk(map_fd, client_fd);
 
 	close(client_fd);
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c
index dc73b3fbb50b..f038b0e699a2 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_sock.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c
@@ -7,10 +7,12 @@
 #include "bpf_tcp_helpers.h"
 
 char _license[] SEC("license") = "GPL";
+__u32 token = 0;
 
 struct mptcp_storage {
 	__u32 invoked;
 	__u32 is_mptcp;
+	__u32 token;
 };
 
 struct {
@@ -47,6 +49,8 @@ int _sockops(struct bpf_sock_ops *ctx)
 					     BPF_SK_STORAGE_GET_F_CREATE);
 		if (!storage)
 			return 1;
+
+		storage->token = 0;
 	} else {
 		msk = bpf_skc_to_mptcp_sock(sk);
 		if (!msk)
@@ -56,9 +60,21 @@ int _sockops(struct bpf_sock_ops *ctx)
 					     BPF_SK_STORAGE_GET_F_CREATE);
 		if (!storage)
 			return 1;
+
+		storage->token = msk->token;
 	}
 	storage->invoked++;
 	storage->is_mptcp = is_mptcp;
 
 	return 1;
 }
+
+SEC("fentry/mptcp_pm_new_connection")
+int BPF_PROG(trace_mptcp_pm_new_connection, struct mptcp_sock *msk,
+	     const struct sock *ssk, int server_side)
+{
+	if (!server_side)
+		token = msk->token;
+
+	return 0;
+}
-- 
cgit 


From ccc090f469000fd757049028eeb0dff43013f7c1 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Thu, 19 May 2022 16:30:15 -0700
Subject: selftests/bpf: Verify ca_name of struct mptcp_sock

This patch verifies another member of struct mptcp_sock, ca_name. Add a
new function get_msk_ca_name() to read the sysctl tcp_congestion_control
and verify it in verify_msk().

v3: Access the sysctl through the filesystem to avoid compatibility
    issues with the busybox sysctl command.

v4: use ASSERT_* instead of CHECK_FAIL (Andrii)

v5: use ASSERT_STRNEQ() instead of strncmp() (Andrii)

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Link: https://lore.kernel.org/bpf/20220519233016.105670-7-mathew.j.martineau@linux.intel.com
---
 tools/testing/selftests/bpf/bpf_tcp_helpers.h  |  5 +++++
 tools/testing/selftests/bpf/prog_tests/mptcp.c | 31 ++++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/mptcp_sock.c |  3 +++
 3 files changed, 39 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 422491872619..c38c66d5c1e6 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -16,6 +16,10 @@ BPF_PROG(name, args)
 #define SOL_TCP 6
 #endif
 
+#ifndef TCP_CA_NAME_MAX
+#define TCP_CA_NAME_MAX	16
+#endif
+
 #define tcp_jiffies32 ((__u32)bpf_jiffies64())
 
 struct sock_common {
@@ -230,6 +234,7 @@ struct mptcp_sock {
 	struct inet_connection_sock	sk;
 
 	__u32		token;
+	char		ca_name[TCP_CA_NAME_MAX];
 } __attribute__((preserve_access_index));
 
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index c84d7c593f9f..33cafc619913 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -7,10 +7,15 @@
 #include "network_helpers.h"
 #include "mptcp_sock.skel.h"
 
+#ifndef TCP_CA_NAME_MAX
+#define TCP_CA_NAME_MAX	16
+#endif
+
 struct mptcp_storage {
 	__u32 invoked;
 	__u32 is_mptcp;
 	__u32 token;
+	char ca_name[TCP_CA_NAME_MAX];
 };
 
 static int verify_tsk(int map_fd, int client_fd)
@@ -31,14 +36,37 @@ static int verify_tsk(int map_fd, int client_fd)
 	return err;
 }
 
+static void get_msk_ca_name(char ca_name[])
+{
+	size_t len;
+	int fd;
+
+	fd = open("/proc/sys/net/ipv4/tcp_congestion_control", O_RDONLY);
+	if (!ASSERT_GE(fd, 0, "failed to open tcp_congestion_control"))
+		return;
+
+	len = read(fd, ca_name, TCP_CA_NAME_MAX);
+	if (!ASSERT_GT(len, 0, "failed to read ca_name"))
+		goto err;
+
+	if (len > 0 && ca_name[len - 1] == '\n')
+		ca_name[len - 1] = '\0';
+
+err:
+	close(fd);
+}
+
 static int verify_msk(int map_fd, int client_fd, __u32 token)
 {
+	char ca_name[TCP_CA_NAME_MAX];
 	int err, cfd = client_fd;
 	struct mptcp_storage val;
 
 	if (!ASSERT_GT(token, 0, "invalid token"))
 		return -1;
 
+	get_msk_ca_name(ca_name);
+
 	err = bpf_map_lookup_elem(map_fd, &cfd, &val);
 	if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
 		return err;
@@ -52,6 +80,9 @@ static int verify_msk(int map_fd, int client_fd, __u32 token)
 	if (!ASSERT_EQ(val.token, token, "unexpected token"))
 		err++;
 
+	if (!ASSERT_STRNEQ(val.ca_name, ca_name, TCP_CA_NAME_MAX, "unexpected ca_name"))
+		err++;
+
 	return err;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c
index f038b0e699a2..65e2b5543fc7 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_sock.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c
@@ -13,6 +13,7 @@ struct mptcp_storage {
 	__u32 invoked;
 	__u32 is_mptcp;
 	__u32 token;
+	char ca_name[TCP_CA_NAME_MAX];
 };
 
 struct {
@@ -51,6 +52,7 @@ int _sockops(struct bpf_sock_ops *ctx)
 			return 1;
 
 		storage->token = 0;
+		__builtin_memset(storage->ca_name, 0, TCP_CA_NAME_MAX);
 	} else {
 		msk = bpf_skc_to_mptcp_sock(sk);
 		if (!msk)
@@ -62,6 +64,7 @@ int _sockops(struct bpf_sock_ops *ctx)
 			return 1;
 
 		storage->token = msk->token;
+		__builtin_memcpy(storage->ca_name, msk->ca_name, TCP_CA_NAME_MAX);
 	}
 	storage->invoked++;
 	storage->is_mptcp = is_mptcp;
-- 
cgit 


From 4f90d034bba9bdcb06a3cb53c43012351c1b39ff Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Thu, 19 May 2022 16:30:16 -0700
Subject: selftests/bpf: Verify first of struct mptcp_sock

This patch verifies the 'first' struct member of struct mptcp_sock, which
points to the first subflow of msk. Save 'sk' in mptcp_storage, and verify
it with 'first' in verify_msk().

v5:
 - Use ASSERT_EQ() instead of a manual comparison + log (Andrii).

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Link: https://lore.kernel.org/bpf/20220519233016.105670-8-mathew.j.martineau@linux.intel.com
---
 tools/testing/selftests/bpf/bpf_tcp_helpers.h  | 1 +
 tools/testing/selftests/bpf/prog_tests/mptcp.c | 5 +++++
 tools/testing/selftests/bpf/progs/mptcp_sock.c | 5 +++++
 3 files changed, 11 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index c38c66d5c1e6..82a7c9de95f9 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -234,6 +234,7 @@ struct mptcp_sock {
 	struct inet_connection_sock	sk;
 
 	__u32		token;
+	struct sock	*first;
 	char		ca_name[TCP_CA_NAME_MAX];
 } __attribute__((preserve_access_index));
 
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 33cafc619913..59f08d6d1d53 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -14,7 +14,9 @@
 struct mptcp_storage {
 	__u32 invoked;
 	__u32 is_mptcp;
+	struct sock *sk;
 	__u32 token;
+	struct sock *first;
 	char ca_name[TCP_CA_NAME_MAX];
 };
 
@@ -80,6 +82,9 @@ static int verify_msk(int map_fd, int client_fd, __u32 token)
 	if (!ASSERT_EQ(val.token, token, "unexpected token"))
 		err++;
 
+	if (!ASSERT_EQ(val.first, val.sk, "unexpected first"))
+		err++;
+
 	if (!ASSERT_STRNEQ(val.ca_name, ca_name, TCP_CA_NAME_MAX, "unexpected ca_name"))
 		err++;
 
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c
index 65e2b5543fc7..91a0d7eff2ac 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_sock.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c
@@ -12,7 +12,9 @@ __u32 token = 0;
 struct mptcp_storage {
 	__u32 invoked;
 	__u32 is_mptcp;
+	struct sock *sk;
 	__u32 token;
+	struct sock *first;
 	char ca_name[TCP_CA_NAME_MAX];
 };
 
@@ -53,6 +55,7 @@ int _sockops(struct bpf_sock_ops *ctx)
 
 		storage->token = 0;
 		__builtin_memset(storage->ca_name, 0, TCP_CA_NAME_MAX);
+		storage->first = NULL;
 	} else {
 		msk = bpf_skc_to_mptcp_sock(sk);
 		if (!msk)
@@ -65,9 +68,11 @@ int _sockops(struct bpf_sock_ops *ctx)
 
 		storage->token = msk->token;
 		__builtin_memcpy(storage->ca_name, msk->ca_name, TCP_CA_NAME_MAX);
+		storage->first = msk->first;
 	}
 	storage->invoked++;
 	storage->is_mptcp = is_mptcp;
+	storage->sk = (struct sock *)sk;
 
 	return 1;
 }
-- 
cgit 


From b23316aabffa835ecc516cb81daeef5b9155e8a5 Mon Sep 17 00:00:00 2001
From: Yuntao Wang <ytcoode@gmail.com>
Date: Thu, 19 May 2022 23:06:10 +0800
Subject: selftests/bpf: Add missing trampoline program type to
 trampoline_count test

Currently the trampoline_count test doesn't include any fmod_ret bpf
programs, fix it to make the test cover all possible trampoline program
types.

Since fmod_ret bpf programs can't be attached to __set_task_comm function,
as it's neither whitelisted for error injection nor a security hook, change
it to bpf_modify_return_test.

This patch also does some other cleanups such as removing duplicate code,
dropping inconsistent comments, etc.

Signed-off-by: Yuntao Wang <ytcoode@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220519150610.601313-1-ytcoode@gmail.com
---
 include/linux/bpf.h                                |   2 +-
 .../selftests/bpf/prog_tests/trampoline_count.c    | 134 ++++++++-------------
 .../selftests/bpf/progs/test_trampoline_count.c    |  16 +--
 3 files changed, 61 insertions(+), 91 deletions(-)

(limited to 'tools/testing')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a3ef078401cf..cc4d5e394031 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -724,7 +724,7 @@ struct btf_func_model {
 #define BPF_TRAMP_F_RET_FENTRY_RET	BIT(4)
 
 /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
- * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
+ * bytes on x86.
  */
 #define BPF_MAX_TRAMP_LINKS 38
 
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index 9c795ee52b7b..b0acbda6dbf5 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -1,126 +1,94 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #define _GNU_SOURCE
-#include <sched.h>
-#include <sys/prctl.h>
 #include <test_progs.h>
 
 #define MAX_TRAMP_PROGS 38
 
 struct inst {
 	struct bpf_object *obj;
-	struct bpf_link   *link_fentry;
-	struct bpf_link   *link_fexit;
+	struct bpf_link   *link;
 };
 
-static int test_task_rename(void)
-{
-	int fd, duration = 0, err;
-	char buf[] = "test_overhead";
-
-	fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
-	if (CHECK(fd < 0, "open /proc", "err %d", errno))
-		return -1;
-	err = write(fd, buf, sizeof(buf));
-	if (err < 0) {
-		CHECK(err < 0, "task rename", "err %d", errno);
-		close(fd);
-		return -1;
-	}
-	close(fd);
-	return 0;
-}
-
-static struct bpf_link *load(struct bpf_object *obj, const char *name)
+static struct bpf_program *load_prog(char *file, char *name, struct inst *inst)
 {
+	struct bpf_object *obj;
 	struct bpf_program *prog;
-	int duration = 0;
+	int err;
+
+	obj = bpf_object__open_file(file, NULL);
+	if (!ASSERT_OK_PTR(obj, "obj_open_file"))
+		return NULL;
+
+	inst->obj = obj;
+
+	err = bpf_object__load(obj);
+	if (!ASSERT_OK(err, "obj_load"))
+		return NULL;
 
 	prog = bpf_object__find_program_by_name(obj, name);
-	if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name))
-		return ERR_PTR(-EINVAL);
-	return bpf_program__attach_trace(prog);
+	if (!ASSERT_OK_PTR(prog, "obj_find_prog"))
+		return NULL;
+
+	return prog;
 }
 
 /* TODO: use different target function to run in concurrent mode */
 void serial_test_trampoline_count(void)
 {
-	const char *fentry_name = "prog1";
-	const char *fexit_name = "prog2";
-	const char *object = "test_trampoline_count.o";
-	struct inst inst[MAX_TRAMP_PROGS] = {};
-	int err, i = 0, duration = 0;
-	struct bpf_object *obj;
+	char *file = "test_trampoline_count.o";
+	char *const progs[] = { "fentry_test", "fmod_ret_test", "fexit_test" };
+	struct inst inst[MAX_TRAMP_PROGS + 1] = {};
+	struct bpf_program *prog;
 	struct bpf_link *link;
-	char comm[16] = {};
+	int prog_fd, err, i;
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
 
 	/* attach 'allowed' trampoline programs */
 	for (i = 0; i < MAX_TRAMP_PROGS; i++) {
-		obj = bpf_object__open_file(object, NULL);
-		if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
-			obj = NULL;
+		prog = load_prog(file, progs[i % ARRAY_SIZE(progs)], &inst[i]);
+		if (!prog)
 			goto cleanup;
-		}
 
-		err = bpf_object__load(obj);
-		if (CHECK(err, "obj_load", "err %d\n", err))
+		link = bpf_program__attach(prog);
+		if (!ASSERT_OK_PTR(link, "attach_prog"))
 			goto cleanup;
-		inst[i].obj = obj;
-		obj = NULL;
-
-		if (rand() % 2) {
-			link = load(inst[i].obj, fentry_name);
-			if (!ASSERT_OK_PTR(link, "attach_prog")) {
-				link = NULL;
-				goto cleanup;
-			}
-			inst[i].link_fentry = link;
-		} else {
-			link = load(inst[i].obj, fexit_name);
-			if (!ASSERT_OK_PTR(link, "attach_prog")) {
-				link = NULL;
-				goto cleanup;
-			}
-			inst[i].link_fexit = link;
-		}
+
+		inst[i].link = link;
 	}
 
 	/* and try 1 extra.. */
-	obj = bpf_object__open_file(object, NULL);
-	if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
-		obj = NULL;
+	prog = load_prog(file, "fmod_ret_test", &inst[i]);
+	if (!prog)
 		goto cleanup;
-	}
-
-	err = bpf_object__load(obj);
-	if (CHECK(err, "obj_load", "err %d\n", err))
-		goto cleanup_extra;
 
 	/* ..that needs to fail */
-	link = load(obj, fentry_name);
-	err = libbpf_get_error(link);
-	if (!ASSERT_ERR_PTR(link, "cannot attach over the limit")) {
-		bpf_link__destroy(link);
-		goto cleanup_extra;
+	link = bpf_program__attach(prog);
+	if (!ASSERT_ERR_PTR(link, "attach_prog")) {
+		inst[i].link = link;
+		goto cleanup;
 	}
 
 	/* with E2BIG error */
-	ASSERT_EQ(err, -E2BIG, "proper error check");
-	ASSERT_EQ(link, NULL, "ptr_is_null");
+	if (!ASSERT_EQ(libbpf_get_error(link), -E2BIG, "E2BIG"))
+		goto cleanup;
+	if (!ASSERT_EQ(link, NULL, "ptr_is_null"))
+		goto cleanup;
 
 	/* and finaly execute the probe */
-	if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
-		goto cleanup_extra;
-	CHECK_FAIL(test_task_rename());
-	CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L));
+	prog_fd = bpf_program__fd(prog);
+	if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd"))
+		goto cleanup;
+
+	err = bpf_prog_test_run_opts(prog_fd, &opts);
+	if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+		goto cleanup;
+
+	ASSERT_EQ(opts.retval & 0xffff, 4, "bpf_modify_return_test.result");
+	ASSERT_EQ(opts.retval >> 16, 1, "bpf_modify_return_test.side_effect");
 
-cleanup_extra:
-	bpf_object__close(obj);
 cleanup:
-	if (i >= MAX_TRAMP_PROGS)
-		i = MAX_TRAMP_PROGS - 1;
 	for (; i >= 0; i--) {
-		bpf_link__destroy(inst[i].link_fentry);
-		bpf_link__destroy(inst[i].link_fexit);
+		bpf_link__destroy(inst[i].link);
 		bpf_object__close(inst[i].obj);
 	}
 }
diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
index f030e469d05b..7765720da7d5 100644
--- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c
+++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
@@ -1,20 +1,22 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <stdbool.h>
-#include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
-struct task_struct;
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(fentry_test, int a, int *b)
+{
+	return 0;
+}
 
-SEC("fentry/__set_task_comm")
-int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec)
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int a, int *b, int ret)
 {
 	return 0;
 }
 
-SEC("fexit/__set_task_comm")
-int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec)
+SEC("fexit/bpf_modify_return_test")
+int BPF_PROG(fexit_test, int a, int *b, int ret)
 {
 	return 0;
 }
-- 
cgit 


From fa376860658252a4559026496528c5d3a36b52e3 Mon Sep 17 00:00:00 2001
From: Mykola Lysenko <mykolal@fb.com>
Date: Fri, 20 May 2022 00:01:44 -0700
Subject: selftests/bpf: Fix subtest number formatting in test_progs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove weird spaces around / while preserving proper
indentation

Signed-off-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Daniel Müller <deso@posteo.net>
Link: https://lore.kernel.org/bpf/20220520070144.10312-1-mykolal@fb.com
---
 tools/testing/selftests/bpf/test_progs.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index a07da648af3b..307f8f41437d 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -230,9 +230,11 @@ static void print_test_log(char *log_buf, size_t log_cnt)
 		fprintf(env.stdout, "\n");
 }
 
+#define TEST_NUM_WIDTH 7
+
 static void print_test_name(int test_num, const char *test_name, char *result)
 {
-	fprintf(env.stdout, "#%-9d %s", test_num, test_name);
+	fprintf(env.stdout, "#%-*d %s", TEST_NUM_WIDTH, test_num, test_name);
 
 	if (result)
 		fprintf(env.stdout, ":%s", result);
@@ -244,8 +246,12 @@ static void print_subtest_name(int test_num, int subtest_num,
 			       const char *test_name, char *subtest_name,
 			       char *result)
 {
-	fprintf(env.stdout, "#%-3d/%-5d %s/%s",
-		test_num, subtest_num,
+	char test_num_str[TEST_NUM_WIDTH + 1];
+
+	snprintf(test_num_str, sizeof(test_num_str), "%d/%d", test_num, subtest_num);
+
+	fprintf(env.stdout, "#%-*s %s/%s",
+		TEST_NUM_WIDTH, test_num_str,
 		test_name, subtest_name);
 
 	if (result)
-- 
cgit 


From 2dc323b1c4cb8ab7db9f8286a9c3267ce66419ab Mon Sep 17 00:00:00 2001
From: Mykola Lysenko <mykolal@fb.com>
Date: Thu, 19 May 2022 23:13:03 -0700
Subject: selftests/bpf: Remove filtered subtests from output

Currently filtered subtests show up in the output as skipped.

Before:
$ sudo ./test_progs -t log_fixup/missing_map
 #94 /1     log_fixup/bad_core_relo_trunc_none:SKIP
 #94 /2     log_fixup/bad_core_relo_trunc_partial:SKIP
 #94 /3     log_fixup/bad_core_relo_trunc_full:SKIP
 #94 /4     log_fixup/bad_core_relo_subprog:SKIP
 #94 /5     log_fixup/missing_map:OK
 #94        log_fixup:OK
Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED

After:
$ sudo ./test_progs -t log_fixup/missing_map
 #94 /5     log_fixup/missing_map:OK
 #94        log_fixup:OK
Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220520061303.4004808-1-mykolal@fb.com
---
 tools/testing/selftests/bpf/test_progs.c | 8 ++++++--
 tools/testing/selftests/bpf/test_progs.h | 2 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 307f8f41437d..c639f2e56fc5 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -271,6 +271,7 @@ static void dump_test_log(const struct prog_test_def *test,
 	int i;
 	struct subtest_state *subtest_state;
 	bool subtest_failed;
+	bool subtest_filtered;
 	bool print_subtest;
 
 	/* we do not print anything in the worker thread */
@@ -289,9 +290,10 @@ static void dump_test_log(const struct prog_test_def *test,
 	for (i = 0; i < test_state->subtest_num; i++) {
 		subtest_state = &test_state->subtest_states[i];
 		subtest_failed = subtest_state->error_cnt;
+		subtest_filtered = subtest_state->filtered;
 		print_subtest = verbose() || force_log || subtest_failed;
 
-		if (skip_ok_subtests && !subtest_failed)
+		if ((skip_ok_subtests && !subtest_failed) || subtest_filtered)
 			continue;
 
 		if (subtest_state->log_cnt && print_subtest) {
@@ -423,7 +425,7 @@ bool test__start_subtest(const char *subtest_name)
 				state->subtest_num,
 				test->test_name,
 				subtest_name)) {
-		subtest_state->skipped = true;
+		subtest_state->filtered = true;
 		return false;
 	}
 
@@ -1129,6 +1131,7 @@ static int dispatch_thread_send_subtests(int sock_fd, struct test_state *state)
 		subtest_state->name = strdup(msg.subtest_done.name);
 		subtest_state->error_cnt = msg.subtest_done.error_cnt;
 		subtest_state->skipped = msg.subtest_done.skipped;
+		subtest_state->filtered = msg.subtest_done.filtered;
 
 		/* collect all logs */
 		if (msg.subtest_done.have_log)
@@ -1424,6 +1427,7 @@ static int worker_main_send_subtests(int sock, struct test_state *state)
 
 		msg.subtest_done.error_cnt = subtest_state->error_cnt;
 		msg.subtest_done.skipped = subtest_state->skipped;
+		msg.subtest_done.filtered = subtest_state->filtered;
 		msg.subtest_done.have_log = false;
 
 		if (verbose() || state->force_log || subtest_state->error_cnt) {
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index dd1b91d7985a..5fe1365c2bb1 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -70,6 +70,7 @@ struct subtest_state {
 	char *log_buf;
 	int error_cnt;
 	bool skipped;
+	bool filtered;
 
 	FILE *stdout;
 };
@@ -156,6 +157,7 @@ struct msg {
 			char name[MAX_SUBTEST_NAME + 1];
 			int error_cnt;
 			bool skipped;
+			bool filtered;
 			bool have_log;
 		} subtest_done;
 	};
-- 
cgit 


From 5feba47273952918e6563d692d9c5ce35a2165b3 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Thu, 19 May 2022 10:09:21 +0300
Subject: selftests: fib_nexthops: Make ping timeout configurable

Commit 49bb39bddad2 ("selftests: fib_nexthops: Make the test more robust")
increased the timeout of ping commands to 5 seconds, to make the test
more robust. Make the timeout configurable using '-w' argument to allow
user to change it depending on the system that runs the test. Some systems
suffer from slow forwarding performance, so they may need to change the
timeout.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220519070921.3559701-1-amcohen@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/fib_nexthops.sh | 53 +++++++++++++++--------------
 1 file changed, 28 insertions(+), 25 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index a99ee3fb2e13..d5a0dd548989 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -56,6 +56,7 @@ TESTS="${ALL_TESTS}"
 VERBOSE=0
 PAUSE_ON_FAIL=no
 PAUSE=no
+PING_TIMEOUT=5
 
 nsid=100
 
@@ -882,13 +883,13 @@ ipv6_fcnal_runtime()
 	log_test $? 0 "Route delete"
 
 	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
-	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Ping with nexthop"
 
 	run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3"
 	run_cmd "$IP nexthop add id 122 group 81/82"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Ping - multipath"
 
 	#
@@ -896,26 +897,26 @@ ipv6_fcnal_runtime()
 	#
 	run_cmd "$IP -6 nexthop add id 83 blackhole"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83"
-	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 2 "Ping - blackhole"
 
 	run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Ping - blackhole replaced with gateway"
 
 	run_cmd "$IP -6 nexthop replace id 83 blackhole"
-	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 2 "Ping - gateway replaced by blackhole"
 
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	if [ $? -eq 0 ]; then
 		run_cmd "$IP nexthop replace id 122 group 83"
-		run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
+		run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 		log_test $? 2 "Ping - group with blackhole"
 
 		run_cmd "$IP nexthop replace id 122 group 81/82"
-		run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
+		run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 		log_test $? 0 "Ping - group blackhole replaced with gateways"
 	else
 		log_test 2 0 "Ping - multipath failed"
@@ -1003,10 +1004,10 @@ ipv6_fcnal_runtime()
 	run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3"
 	run_cmd "$IP nexthop add id 93 group 91/92"
 	run_cmd "$IP -6 ro add default nhid 91"
-	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Nexthop with default route and rpfilter"
 	run_cmd "$IP -6 ro replace default nhid 93"
-	run_cmd "ip netns exec me ping -c1 -w5 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Nexthop with multipath default route and rpfilter"
 
 	# TO-DO:
@@ -1460,13 +1461,13 @@ ipv4_fcnal_runtime()
 	#
 	run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 21"
-	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Basic ping"
 
 	run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
 	run_cmd "$IP nexthop add id 122 group 21/22"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - multipath"
 
 	run_cmd "$IP ro delete 172.16.101.1/32 nhid 122"
@@ -1477,7 +1478,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1"
 	run_cmd "$IP ro add default nhid 501"
 	run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20"
-	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - multiple default routes, nh first"
 
 	# flip the order
@@ -1486,7 +1487,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20"
 	run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1"
 	run_cmd "$IP ro add default nhid 501 metric 20"
-	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - multiple default routes, nh second"
 
 	run_cmd "$IP nexthop delete nhid 501"
@@ -1497,26 +1498,26 @@ ipv4_fcnal_runtime()
 	#
 	run_cmd "$IP nexthop add id 23 blackhole"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 23"
-	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 2 "Ping - blackhole"
 
 	run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - blackhole replaced with gateway"
 
 	run_cmd "$IP nexthop replace id 23 blackhole"
-	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 2 "Ping - gateway replaced by blackhole"
 
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	if [ $? -eq 0 ]; then
 		run_cmd "$IP nexthop replace id 122 group 23"
-		run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+		run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 		log_test $? 2 "Ping - group with blackhole"
 
 		run_cmd "$IP nexthop replace id 122 group 21/22"
-		run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+		run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 		log_test $? 0 "Ping - group blackhole replaced with gateways"
 	else
 		log_test 2 0 "Ping - multipath failed"
@@ -1543,7 +1544,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1"
 	set +e
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 24"
-	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv6 nexthop with IPv4 route"
 
 	$IP neigh sh | grep -q "${lladdr} dev veth1"
@@ -1567,11 +1568,11 @@ ipv4_fcnal_runtime()
 
 	check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
 
-	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv6 nexthop with IPv4 route"
 
 	run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv4 route with IPv6 gateway"
 
 	$IP neigh sh | grep -q "${lladdr} dev veth1"
@@ -1588,7 +1589,7 @@ ipv4_fcnal_runtime()
 
 	run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
 	run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w5 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv4 default route with IPv6 gateway"
 
 	#
@@ -2253,6 +2254,7 @@ usage: ${0##*/} OPTS
         -p          Pause on fail
         -P          Pause after each test before cleanup
         -v          verbose mode (show commands and output)
+	-w	    Timeout for ping
 
     Runtime test
 	-n num	    Number of nexthops to target
@@ -2265,7 +2267,7 @@ EOF
 ################################################################################
 # main
 
-while getopts :t:pP46hv o
+while getopts :t:pP46hv:w: o
 do
 	case $o in
 		t) TESTS=$OPTARG;;
@@ -2274,6 +2276,7 @@ do
 		p) PAUSE_ON_FAIL=yes;;
 		P) PAUSE=yes;;
 		v) VERBOSE=$(($VERBOSE + 1));;
+		w) PING_TIMEOUT=$OPTARG;;
 		h) usage; exit 0;;
 		*) usage; exit 1;;
 	esac
-- 
cgit 


From 538aaf9b2383701094a47797b4554c6a21c83eed Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Thu, 19 May 2022 17:18:34 -0700
Subject: selftests: Add test for timing a bind request to a port with a
 populated bhash entry

This test populates the bhash table for a given port with
MAX_THREADS * MAX_CONNECTIONS sockets, and then times how long
a bind request on the port takes.

When populating the bhash table, we create the sockets and then bind
the sockets to the same address and port (SO_REUSEADDR and SO_REUSEPORT
are set). When timing how long a bind on the port takes, we bind on a
different address without SO_REUSEPORT set. We do not set SO_REUSEPORT
because we are interested in the case where the bind request does not
go through the tb->fastreuseport path, which is fragile (eg
tb->fastreuseport path does not work if binding with a different uid).

To run the test locally, I did:
* ulimit -n 65535000
* ip addr add 2001:0db8:0:f101::1 dev eth0
* ./bind_bhash_test 443

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/.gitignore        |   1 +
 tools/testing/selftests/net/Makefile          |   2 +
 tools/testing/selftests/net/bind_bhash_test.c | 119 ++++++++++++++++++++++++++
 3 files changed, 122 insertions(+)
 create mode 100644 tools/testing/selftests/net/bind_bhash_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 21a411b04890..735423136bc4 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -36,3 +36,4 @@ gro
 ioam6_parser
 toeplitz
 cmsg_sender
+bind_bhash_test
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 7ea54af55490..464df13831f2 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -59,6 +59,7 @@ TEST_GEN_FILES += toeplitz
 TEST_GEN_FILES += cmsg_sender
 TEST_GEN_FILES += stress_reuseport_listen
 TEST_PROGS += test_vxlan_vnifiltering.sh
+TEST_GEN_FILES += bind_bhash_test
 
 TEST_FILES := settings
 
@@ -69,4 +70,5 @@ include bpf/Makefile
 
 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
+$(OUTPUT)/bind_bhash_test: LDLIBS += -lpthread
 $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/bind_bhash_test.c b/tools/testing/selftests/net/bind_bhash_test.c
new file mode 100644
index 000000000000..252e73754e76
--- /dev/null
+++ b/tools/testing/selftests/net/bind_bhash_test.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This times how long it takes to bind to a port when the port already
+ * has multiple sockets in its bhash table.
+ *
+ * In the setup(), we populate the port's bhash table with
+ * MAX_THREADS * MAX_CONNECTIONS number of entries.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <netdb.h>
+#include <pthread.h>
+
+#define MAX_THREADS 600
+#define MAX_CONNECTIONS 40
+
+static const char *bind_addr = "::1";
+static const char *port;
+
+static int fd_array[MAX_THREADS][MAX_CONNECTIONS];
+
+static int bind_socket(int opt, const char *addr)
+{
+	struct addrinfo *res, hint = {};
+	int sock_fd, reuse = 1, err;
+
+	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (sock_fd < 0) {
+		perror("socket fd err");
+		return -1;
+	}
+
+	hint.ai_family = AF_INET6;
+	hint.ai_socktype = SOCK_STREAM;
+
+	err = getaddrinfo(addr, port, &hint, &res);
+	if (err) {
+		perror("getaddrinfo failed");
+		return -1;
+	}
+
+	if (opt) {
+		err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse));
+		if (err) {
+			perror("setsockopt failed");
+			return -1;
+		}
+	}
+
+	err = bind(sock_fd, res->ai_addr, res->ai_addrlen);
+	if (err) {
+		perror("failed to bind to port");
+		return -1;
+	}
+
+	return sock_fd;
+}
+
+static void *setup(void *arg)
+{
+	int sock_fd, i;
+	int *array = (int *)arg;
+
+	for (i = 0; i < MAX_CONNECTIONS; i++) {
+		sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);
+		if (sock_fd < 0)
+			return NULL;
+		array[i] = sock_fd;
+	}
+
+	return NULL;
+}
+
+int main(int argc, const char *argv[])
+{
+	int listener_fd, sock_fd, i, j;
+	pthread_t tid[MAX_THREADS];
+	clock_t begin, end;
+
+	if (argc != 2) {
+		printf("Usage: listener <port>\n");
+		return -1;
+	}
+
+	port = argv[1];
+
+	listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);
+	if (listen(listener_fd, 100) < 0) {
+		perror("listen failed");
+		return -1;
+	}
+
+	/* Set up threads to populate the bhash table entry for the port */
+	for (i = 0; i < MAX_THREADS; i++)
+		pthread_create(&tid[i], NULL, setup, fd_array[i]);
+
+	for (i = 0; i < MAX_THREADS; i++)
+		pthread_join(tid[i], NULL);
+
+	begin = clock();
+
+	/* Bind to the same port on a different address */
+	sock_fd  = bind_socket(0, "2001:0db8:0:f101::1");
+
+	end = clock();
+
+	printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);
+
+	/* clean up */
+	close(sock_fd);
+	close(listener_fd);
+	for (i = 0; i < MAX_THREADS; i++) {
+		for (j = 0; i < MAX_THREADS; i++)
+			close(fd_array[i][j]);
+	}
+
+	return 0;
+}
-- 
cgit 


From 90a039fd19fc35d03a74ce2973992c878546cb20 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Thu, 19 May 2022 15:25:34 +0100
Subject: selftests/bpf: add tests verifying unprivileged bpf behaviour

tests load/attach bpf prog with maps, perfbuf and ringbuf, pinning
them.  Then effective caps are dropped and we verify we can

- pick up the pin
- create ringbuf/perfbuf
- get ringbuf/perfbuf events, carry out map update, lookup and delete
- create a link

Negative testing also ensures

- BPF prog load fails
- BPF map create fails
- get fd by id fails
- get next id fails
- query fails
- BTF load fails

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/1652970334-30510-3-git-send-email-alan.maguire@oracle.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/unpriv_bpf_disabled.c | 312 +++++++++++++++++++++
 .../selftests/bpf/progs/test_unpriv_bpf_disabled.c |  83 ++++++
 2 files changed, 395 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
new file mode 100644
index 000000000000..2800185179cf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_unpriv_bpf_disabled.skel.h"
+
+#include "cap_helpers.h"
+
+/* Using CAP_LAST_CAP is risky here, since it can get pulled in from
+ * an old /usr/include/linux/capability.h and be < CAP_BPF; as a result
+ * CAP_BPF would not be included in ALL_CAPS.  Instead use CAP_BPF as
+ * we know its value is correct since it is explicitly defined in
+ * cap_helpers.h.
+ */
+#define ALL_CAPS	((2ULL << CAP_BPF) - 1)
+
+#define PINPATH		"/sys/fs/bpf/unpriv_bpf_disabled_"
+#define NUM_MAPS	7
+
+static __u32 got_perfbuf_val;
+static __u32 got_ringbuf_val;
+
+static int process_ringbuf(void *ctx, void *data, size_t len)
+{
+	if (ASSERT_EQ(len, sizeof(__u32), "ringbuf_size_valid"))
+		got_ringbuf_val = *(__u32 *)data;
+	return 0;
+}
+
+static void process_perfbuf(void *ctx, int cpu, void *data, __u32 len)
+{
+	if (ASSERT_EQ(len, sizeof(__u32), "perfbuf_size_valid"))
+		got_perfbuf_val = *(__u32 *)data;
+}
+
+static int sysctl_set(const char *sysctl_path, char *old_val, const char *new_val)
+{
+	int ret = 0;
+	FILE *fp;
+
+	fp = fopen(sysctl_path, "r+");
+	if (!fp)
+		return -errno;
+	if (old_val && fscanf(fp, "%s", old_val) <= 0) {
+		ret = -ENOENT;
+	} else if (!old_val || strcmp(old_val, new_val) != 0) {
+		fseek(fp, 0, SEEK_SET);
+		if (fprintf(fp, "%s", new_val) < 0)
+			ret = -errno;
+	}
+	fclose(fp);
+
+	return ret;
+}
+
+static void test_unpriv_bpf_disabled_positive(struct test_unpriv_bpf_disabled *skel,
+					      __u32 prog_id, int prog_fd, int perf_fd,
+					      char **map_paths, int *map_fds)
+{
+	struct perf_buffer *perfbuf = NULL;
+	struct ring_buffer *ringbuf = NULL;
+	int i, nr_cpus, link_fd = -1;
+
+	nr_cpus = bpf_num_possible_cpus();
+
+	skel->bss->perfbuf_val = 1;
+	skel->bss->ringbuf_val = 2;
+
+	/* Positive tests for unprivileged BPF disabled. Verify we can
+	 * - retrieve and interact with pinned maps;
+	 * - set up and interact with perf buffer;
+	 * - set up and interact with ring buffer;
+	 * - create a link
+	 */
+	perfbuf = perf_buffer__new(bpf_map__fd(skel->maps.perfbuf), 8, process_perfbuf, NULL, NULL,
+				   NULL);
+	if (!ASSERT_OK_PTR(perfbuf, "perf_buffer__new"))
+		goto cleanup;
+
+	ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf), process_ringbuf, NULL, NULL);
+	if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
+		goto cleanup;
+
+	/* trigger & validate perf event, ringbuf output */
+	usleep(1);
+
+	ASSERT_GT(perf_buffer__poll(perfbuf, 100), -1, "perf_buffer__poll");
+	ASSERT_EQ(got_perfbuf_val, skel->bss->perfbuf_val, "check_perfbuf_val");
+	ASSERT_EQ(ring_buffer__consume(ringbuf), 1, "ring_buffer__consume");
+	ASSERT_EQ(got_ringbuf_val, skel->bss->ringbuf_val, "check_ringbuf_val");
+
+	for (i = 0; i < NUM_MAPS; i++) {
+		map_fds[i] = bpf_obj_get(map_paths[i]);
+		if (!ASSERT_GT(map_fds[i], -1, "obj_get"))
+			goto cleanup;
+	}
+
+	for (i = 0; i < NUM_MAPS; i++) {
+		bool prog_array = strstr(map_paths[i], "prog_array") != NULL;
+		bool array = strstr(map_paths[i], "array") != NULL;
+		bool buf = strstr(map_paths[i], "buf") != NULL;
+		__u32 key = 0, vals[nr_cpus], lookup_vals[nr_cpus];
+		__u32 expected_val = 1;
+		int j;
+
+		/* skip ringbuf, perfbuf */
+		if (buf)
+			continue;
+
+		for (j = 0; j < nr_cpus; j++)
+			vals[j] = expected_val;
+
+		if (prog_array) {
+			/* need valid prog array value */
+			vals[0] = prog_fd;
+			/* prog array lookup returns prog id, not fd */
+			expected_val = prog_id;
+		}
+		ASSERT_OK(bpf_map_update_elem(map_fds[i], &key, vals, 0), "map_update_elem");
+		ASSERT_OK(bpf_map_lookup_elem(map_fds[i], &key, &lookup_vals), "map_lookup_elem");
+		ASSERT_EQ(lookup_vals[0], expected_val, "map_lookup_elem_values");
+		if (!array)
+			ASSERT_OK(bpf_map_delete_elem(map_fds[i], &key), "map_delete_elem");
+	}
+
+	link_fd = bpf_link_create(bpf_program__fd(skel->progs.handle_perf_event), perf_fd,
+				  BPF_PERF_EVENT, NULL);
+	ASSERT_GT(link_fd, 0, "link_create");
+
+cleanup:
+	if (link_fd)
+		close(link_fd);
+	if (perfbuf)
+		perf_buffer__free(perfbuf);
+	if (ringbuf)
+		ring_buffer__free(ringbuf);
+}
+
+static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *skel,
+					      __u32 prog_id, int prog_fd, int perf_fd,
+					      char **map_paths, int *map_fds)
+{
+	const struct bpf_insn prog_insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn);
+	LIBBPF_OPTS(bpf_prog_load_opts, load_opts);
+	struct bpf_map_info map_info = {};
+	__u32 map_info_len = sizeof(map_info);
+	struct bpf_link_info link_info = {};
+	__u32 link_info_len = sizeof(link_info);
+	struct btf *btf = NULL;
+	__u32 attach_flags = 0;
+	__u32 prog_ids[3] = {};
+	__u32 prog_cnt = 3;
+	__u32 next;
+	int i;
+
+	/* Negative tests for unprivileged BPF disabled.  Verify we cannot
+	 * - load BPF programs;
+	 * - create BPF maps;
+	 * - get a prog/map/link fd by id;
+	 * - get next prog/map/link id
+	 * - query prog
+	 * - BTF load
+	 */
+	ASSERT_EQ(bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "simple_prog", "GPL",
+				prog_insns, prog_insn_cnt, &load_opts),
+		  -EPERM, "prog_load_fails");
+
+	for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_BLOOM_FILTER; i++)
+		ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL),
+			  -EPERM, "map_create_fails");
+
+	ASSERT_EQ(bpf_prog_get_fd_by_id(prog_id), -EPERM, "prog_get_fd_by_id_fails");
+	ASSERT_EQ(bpf_prog_get_next_id(prog_id, &next), -EPERM, "prog_get_next_id_fails");
+	ASSERT_EQ(bpf_prog_get_next_id(0, &next), -EPERM, "prog_get_next_id_fails");
+
+	if (ASSERT_OK(bpf_obj_get_info_by_fd(map_fds[0], &map_info, &map_info_len),
+		      "obj_get_info_by_fd")) {
+		ASSERT_EQ(bpf_map_get_fd_by_id(map_info.id), -EPERM, "map_get_fd_by_id_fails");
+		ASSERT_EQ(bpf_map_get_next_id(map_info.id, &next), -EPERM,
+			  "map_get_next_id_fails");
+	}
+	ASSERT_EQ(bpf_map_get_next_id(0, &next), -EPERM, "map_get_next_id_fails");
+
+	if (ASSERT_OK(bpf_obj_get_info_by_fd(bpf_link__fd(skel->links.sys_nanosleep_enter),
+					     &link_info, &link_info_len),
+		      "obj_get_info_by_fd")) {
+		ASSERT_EQ(bpf_link_get_fd_by_id(link_info.id), -EPERM, "link_get_fd_by_id_fails");
+		ASSERT_EQ(bpf_link_get_next_id(link_info.id, &next), -EPERM,
+			  "link_get_next_id_fails");
+	}
+	ASSERT_EQ(bpf_link_get_next_id(0, &next), -EPERM, "link_get_next_id_fails");
+
+	ASSERT_EQ(bpf_prog_query(prog_fd, BPF_TRACE_FENTRY, 0, &attach_flags, prog_ids,
+				 &prog_cnt), -EPERM, "prog_query_fails");
+
+	btf = btf__new_empty();
+	if (ASSERT_OK_PTR(btf, "empty_btf") &&
+	    ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "unpriv_int_type")) {
+		const void *raw_btf_data;
+		__u32 raw_btf_size;
+
+		raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+		if (ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good"))
+			ASSERT_EQ(bpf_btf_load(raw_btf_data, raw_btf_size, NULL), -EPERM,
+				  "bpf_btf_load_fails");
+	}
+	btf__free(btf);
+}
+
+void test_unpriv_bpf_disabled(void)
+{
+	char *map_paths[NUM_MAPS] = {	PINPATH	"array",
+					PINPATH "percpu_array",
+					PINPATH "hash",
+					PINPATH "percpu_hash",
+					PINPATH "perfbuf",
+					PINPATH "ringbuf",
+					PINPATH "prog_array" };
+	int map_fds[NUM_MAPS];
+	struct test_unpriv_bpf_disabled *skel;
+	char unprivileged_bpf_disabled_orig[32] = {};
+	char perf_event_paranoid_orig[32] = {};
+	struct bpf_prog_info prog_info = {};
+	__u32 prog_info_len = sizeof(prog_info);
+	struct perf_event_attr attr = {};
+	int prog_fd, perf_fd = -1, i, ret;
+	__u64 save_caps = 0;
+	__u32 prog_id;
+
+	skel = test_unpriv_bpf_disabled__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	skel->bss->test_pid = getpid();
+
+	map_fds[0] = bpf_map__fd(skel->maps.array);
+	map_fds[1] = bpf_map__fd(skel->maps.percpu_array);
+	map_fds[2] = bpf_map__fd(skel->maps.hash);
+	map_fds[3] = bpf_map__fd(skel->maps.percpu_hash);
+	map_fds[4] = bpf_map__fd(skel->maps.perfbuf);
+	map_fds[5] = bpf_map__fd(skel->maps.ringbuf);
+	map_fds[6] = bpf_map__fd(skel->maps.prog_array);
+
+	for (i = 0; i < NUM_MAPS; i++)
+		ASSERT_OK(bpf_obj_pin(map_fds[i], map_paths[i]), "pin map_fd");
+
+	/* allow user without caps to use perf events */
+	if (!ASSERT_OK(sysctl_set("/proc/sys/kernel/perf_event_paranoid", perf_event_paranoid_orig,
+				  "-1"),
+		       "set_perf_event_paranoid"))
+		goto cleanup;
+	/* ensure unprivileged bpf disabled is set */
+	ret = sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled",
+			 unprivileged_bpf_disabled_orig, "2");
+	if (ret == -EPERM) {
+		/* if unprivileged_bpf_disabled=1, we get -EPERM back; that's okay. */
+		if (!ASSERT_OK(strcmp(unprivileged_bpf_disabled_orig, "1"),
+			       "unpriviliged_bpf_disabled_on"))
+			goto cleanup;
+	} else {
+		if (!ASSERT_OK(ret, "set unpriviliged_bpf_disabled"))
+			goto cleanup;
+	}
+
+	prog_fd = bpf_program__fd(skel->progs.sys_nanosleep_enter);
+	ASSERT_OK(bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len),
+		  "obj_get_info_by_fd");
+	prog_id = prog_info.id;
+	ASSERT_GT(prog_id, 0, "valid_prog_id");
+
+	attr.size = sizeof(attr);
+	attr.type = PERF_TYPE_SOFTWARE;
+	attr.config = PERF_COUNT_SW_CPU_CLOCK;
+	attr.freq = 1;
+	attr.sample_freq = 1000;
+	perf_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+	if (!ASSERT_GE(perf_fd, 0, "perf_fd"))
+		goto cleanup;
+
+	if (!ASSERT_OK(test_unpriv_bpf_disabled__attach(skel), "skel_attach"))
+		goto cleanup;
+
+	if (!ASSERT_OK(cap_disable_effective(ALL_CAPS, &save_caps), "disable caps"))
+		goto cleanup;
+
+	if (test__start_subtest("unpriv_bpf_disabled_positive"))
+		test_unpriv_bpf_disabled_positive(skel, prog_id, prog_fd, perf_fd, map_paths,
+						  map_fds);
+
+	if (test__start_subtest("unpriv_bpf_disabled_negative"))
+		test_unpriv_bpf_disabled_negative(skel, prog_id, prog_fd, perf_fd, map_paths,
+						  map_fds);
+
+cleanup:
+	close(perf_fd);
+	if (save_caps)
+		cap_enable_effective(save_caps, NULL);
+	if (strlen(perf_event_paranoid_orig) > 0)
+		sysctl_set("/proc/sys/kernel/perf_event_paranoid", NULL, perf_event_paranoid_orig);
+	if (strlen(unprivileged_bpf_disabled_orig) > 0)
+		sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled", NULL,
+			   unprivileged_bpf_disabled_orig);
+	for (i = 0; i < NUM_MAPS; i++)
+		unlink(map_paths[i]);
+	test_unpriv_bpf_disabled__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c
new file mode 100644
index 000000000000..fc423e43a3cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+__u32 perfbuf_val = 0;
+__u32 ringbuf_val = 0;
+
+int test_pid;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} array SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} percpu_array SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} hash SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} percpu_hash SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__type(key, __u32);
+	__type(value, __u32);
+} perfbuf SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} prog_array SEC(".maps");
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int sys_nanosleep_enter(void *ctx)
+{
+	int cur_pid;
+
+	cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (cur_pid != test_pid)
+		return 0;
+
+	bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU, &perfbuf_val, sizeof(perfbuf_val));
+	bpf_ringbuf_output(&ringbuf, &ringbuf_val, sizeof(ringbuf_val), 0);
+
+	return 0;
+}
+
+SEC("perf_event")
+int handle_perf_event(void *ctx)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 48482f4dd3432e5e62873bf0f2e254cfb8ce2ac2 Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Tue, 8 Jun 2021 16:48:09 +1000
Subject: selftests/powerpc: Better reporting in spectre_v2

In commit f3054ffd71b5 ("selftests/powerpc: Return skip code for
spectre_v2"), the spectre_v2 selftest is updated to be aware of cases
where the vulnerability status reported in sysfs is incorrect, skipping
the test instead.

This happens because qemu can misrepresent the mitigation status of the
host to the guest. If the count cache is disabled in the host, and this
is correctly reported to the guest, then the guest won't apply
mitigations. If the guest is then migrated to a new host where
mitigations are necessary, it is now vulnerable because it has not
applied mitigations.

Update the selftest to report when we see excessive misses, indicative of
the count cache being disabled. If software flushing is enabled, also
warn that these flushes are just wasting performance.

Signed-off-by: Russell Currey <ruscur@russell.cc>
[mpe: Rebase and update change log appropriately]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210608064809.199116-1-ruscur@russell.cc
---
 .../selftests/powerpc/security/spectre_v2.c        | 24 ++++++++++++++--------
 1 file changed, 15 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index 80dc97e3ec57..0e231e89bfa4 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -182,17 +182,23 @@ int spectre_v2_test(void)
 	case COUNT_CACHE_FLUSH_HW:
 		// These should all not affect userspace branch prediction
 		if (miss_percent > 15) {
+			if (miss_percent > 95) {
+				/*
+				 * Such a mismatch may be caused by a system being unaware
+				 * the count cache is disabled. This may be to enable
+				 * guest migration between hosts with different settings.
+				 * Return skip code to avoid detecting this as an error.
+				 * We are not vulnerable and reporting otherwise, so
+				 * missing such a mismatch is safe.
+				 */
+				printf("Branch misses > 95%% unexpected in this configuration.\n");
+				printf("Count cache likely disabled without Linux knowing.\n");
+				if (state == COUNT_CACHE_FLUSH_SW)
+					printf("WARNING: Kernel performing unnecessary flushes.\n");
+				return 4;
+			}
 			printf("Branch misses > 15%% unexpected in this configuration!\n");
 			printf("Possible mis-match between reported & actual mitigation\n");
-			/*
-			 * Such a mismatch may be caused by a guest system
-			 * reporting as vulnerable when the host is mitigated.
-			 * Return skip code to avoid detecting this as an error.
-			 * We are not vulnerable and reporting otherwise, so
-			 * missing such a mismatch is safe.
-			 */
-			if (miss_percent > 95)
-				return 4;
 
 			return 1;
 		}
-- 
cgit 


From 079e5fd3a1e41c186c1bc4166d409d22e70729bf Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.ibm.com>
Date: Tue, 22 Mar 2022 10:26:38 +0530
Subject: selftests/powerpc/pmu/ebb: remove fixed_instruction.S

Commit 3752e453f6ba ("selftests/powerpc: Add tests of PMU EBBs") added
selftest testcases to verify EBB interface. instruction_count_test.c
testcase needs a fixed loop function to count overhead. Instead of using
the thirty_two_instruction_loop() in fixed_instruction_loop.S in ebb
folder, file is linked with thirty_two_instruction_loop() in loop.S from
top folder. Since fixed_instruction_loop.S not used, patch removes the
file.

Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220322045638.10443-1-maddy@linux.ibm.com
---
 .../powerpc/pmu/ebb/fixed_instruction_loop.S       | 43 ----------------------
 1 file changed, 43 deletions(-)
 delete mode 100644 tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
deleted file mode 100644
index 08a7b5f133b9..000000000000
--- a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2014, Michael Ellerman, IBM Corp.
- */
-
-#include <ppc-asm.h>
-
-	.text
-
-FUNC_START(thirty_two_instruction_loop)
-	cmpwi	r3,0
-	beqlr
-	addi	r4,r3,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1	# 28 addi's
-	subi	r3,r3,1
-	b	FUNC_NAME(thirty_two_instruction_loop)
-FUNC_END(thirty_two_instruction_loop)
-- 
cgit 


From 7801cb1dc60f7348687ca1c3aa03a944687563f0 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Sat, 19 Mar 2022 23:20:25 +0000
Subject: selftests/powerpc/pmu: fix spelling mistake "mis-match" -> "mismatch"

There are a few spelling mistakes in error messages. Fix them.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220319232025.22067-1-colin.i.king@gmail.com
---
 tools/testing/selftests/powerpc/security/spectre_v2.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index 0e231e89bfa4..5b2abb719ef2 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -198,7 +198,7 @@ int spectre_v2_test(void)
 				return 4;
 			}
 			printf("Branch misses > 15%% unexpected in this configuration!\n");
-			printf("Possible mis-match between reported & actual mitigation\n");
+			printf("Possible mismatch between reported & actual mitigation\n");
 
 			return 1;
 		}
@@ -207,14 +207,14 @@ int spectre_v2_test(void)
 		// This seems to affect userspace branch prediction a bit?
 		if (miss_percent > 25) {
 			printf("Branch misses > 25%% unexpected in this configuration!\n");
-			printf("Possible mis-match between reported & actual mitigation\n");
+			printf("Possible mismatch between reported & actual mitigation\n");
 			return 1;
 		}
 		break;
 	case COUNT_CACHE_DISABLED:
 		if (miss_percent < 95) {
 			printf("Branch misses < 95%% unexpected in this configuration!\n");
-			printf("Possible mis-match between reported & actual mitigation\n");
+			printf("Possible mismatch between reported & actual mitigation\n");
 			return 1;
 		}
 		break;
-- 
cgit 


From a3f7404c0befe336108094b3141169abd1fb1561 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Sat, 21 May 2022 14:37:06 +0500
Subject: net: selftests: Add stress_reuseport_listen to .gitignore

Add newly added stress_reuseport_listen object to .gitignore file.

Fixes: ec8cb4f617a2 ("net: selftests: Stress reuseport listen")
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 735423136bc4..b984f8c8d523 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -5,6 +5,7 @@ socket
 psock_fanout
 psock_snd
 psock_tpacket
+stress_reuseport_listen
 reuseport_addr_any
 reuseport_bpf
 reuseport_bpf_cpu
-- 
cgit 


From 980e74cac800881b30d8984a43ead421487eb3a3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 22 May 2022 12:50:38 +0300
Subject: selftests: ocelot: tc_flower_chains: streamline test output

Bring this driver-specific selftest output in line with the other
selftests.

Before:

Testing VLAN pop..                      OK
Testing VLAN push..                     OK
Testing ingress VLAN modification..             OK
Testing egress VLAN modification..              OK
Testing frame prioritization..          OK

After:

TEST: VLAN pop                                                      [ OK ]
TEST: VLAN push                                                     [ OK ]
TEST: Ingress VLAN modification                                     [ OK ]
TEST: Egress VLAN modification                                      [ OK ]
TEST: Frame prioritization                                          [ OK ]

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/ocelot/tc_flower_chains.sh         | 52 ++++++++++------------
 1 file changed, 24 insertions(+), 28 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index 4401a654c2c0..a27f24a6aa07 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -204,7 +204,7 @@ cleanup()
 
 test_vlan_pop()
 {
-	printf "Testing VLAN pop..			"
+	RET=0
 
 	tcpdump_start $eth2
 
@@ -217,18 +217,17 @@ test_vlan_pop()
 
 	tcpdump_stop $eth2
 
-	if tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then
-		echo "OK"
-	else
-		echo "FAIL"
-	fi
+	tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"
+	check_err "$?" "untagged reception"
 
 	tcpdump_cleanup $eth2
+
+	log_test "VLAN pop"
 }
 
 test_vlan_push()
 {
-	printf "Testing VLAN push..			"
+	RET=0
 
 	tcpdump_start $eth3.100
 
@@ -238,18 +237,17 @@ test_vlan_push()
 
 	tcpdump_stop $eth3.100
 
-	if tcpdump_show $eth3.100 | grep -q "$eth2_mac > $eth3_mac"; then
-		echo "OK"
-	else
-		echo "FAIL"
-	fi
+	tcpdump_show $eth3.100 | grep -q "$eth2_mac > $eth3_mac"
+	check_err "$?" "tagged reception"
 
 	tcpdump_cleanup $eth3.100
+
+	log_test "VLAN push"
 }
 
 test_vlan_ingress_modify()
 {
-	printf "Testing ingress VLAN modification..		"
+	RET=0
 
 	ip link set br0 type bridge vlan_filtering 1
 	bridge vlan add dev $eth0 vid 200
@@ -269,11 +267,8 @@ test_vlan_ingress_modify()
 
 	tcpdump_stop $eth2
 
-	if tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
-		echo "OK"
-	else
-		echo "FAIL"
-	fi
+	tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"
+	check_err "$?" "tagged reception"
 
 	tcpdump_cleanup $eth2
 
@@ -283,11 +278,13 @@ test_vlan_ingress_modify()
 	bridge vlan del dev $eth0 vid 300
 	bridge vlan del dev $eth1 vid 300
 	ip link set br0 type bridge vlan_filtering 0
+
+	log_test "Ingress VLAN modification"
 }
 
 test_vlan_egress_modify()
 {
-	printf "Testing egress VLAN modification..		"
+	RET=0
 
 	tc qdisc add dev $eth1 clsact
 
@@ -307,11 +304,8 @@ test_vlan_egress_modify()
 
 	tcpdump_stop $eth2
 
-	if tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
-		echo "OK"
-	else
-		echo "FAIL"
-	fi
+	tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"
+	check_err "$?" "tagged reception"
 
 	tcpdump_cleanup $eth2
 
@@ -321,14 +315,14 @@ test_vlan_egress_modify()
 	bridge vlan del dev $eth0 vid 200
 	bridge vlan del dev $eth1 vid 200
 	ip link set br0 type bridge vlan_filtering 0
+
+	log_test "Egress VLAN modification"
 }
 
 test_skbedit_priority()
 {
 	local num_pkts=100
 
-	printf "Testing frame prioritization..		"
-
 	before=$(ethtool_stats_get $eth0 'rx_green_prio_7')
 
 	$MZ $eth3 -q -c $num_pkts -p 64 -a $eth3_mac -b $eth2_mac -t ip -A 10.1.1.2
@@ -336,10 +330,12 @@ test_skbedit_priority()
 	after=$(ethtool_stats_get $eth0 'rx_green_prio_7')
 
 	if [ $((after - before)) = $num_pkts ]; then
-		echo "OK"
+		RET=0
 	else
-		echo "FAIL"
+		RET=1
 	fi
+
+	log_test "Frame prioritization"
 }
 
 trap cleanup EXIT
-- 
cgit 


From 93196ef911bac57f43034317c50e9ca42acf55c4 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 22 May 2022 12:50:39 +0300
Subject: selftests: ocelot: tc_flower_chains: use conventional interface names

This is a robotic rename as follows:

eth0 -> swp1
eth1 -> swp2
eth2 -> h2
eth3 -> h1

This brings the selftest more in line with the other forwarding
selftests, where h1 is connected to swp1, and h2 to swp2.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/ocelot/tc_flower_chains.sh         | 134 ++++++++++-----------
 1 file changed, 67 insertions(+), 67 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index a27f24a6aa07..ecaeae7197b8 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -15,7 +15,7 @@ require_command tcpdump
 #   |       DUT ports         Generator ports     |
 #   | +--------+ +--------+ +--------+ +--------+ |
 #   | |        | |        | |        | |        | |
-#   | |  eth0  | |  eth1  | |  eth2  | |  eth3  | |
+#   | |  swp1  | |  swp2  | |   h2   | |    h1  | |
 #   | |        | |        | |        | |        | |
 #   +-+--------+-+--------+-+--------+-+--------+-+
 #          |         |           |          |
@@ -24,15 +24,15 @@ require_command tcpdump
 #          |                                |
 #          +--------------------------------+
 
-eth0=${NETIFS[p1]}
-eth1=${NETIFS[p2]}
-eth2=${NETIFS[p3]}
-eth3=${NETIFS[p4]}
+swp1=${NETIFS[p1]}
+swp2=${NETIFS[p2]}
+h2=${NETIFS[p3]}
+h1=${NETIFS[p4]}
 
-eth0_mac="de:ad:be:ef:00:00"
-eth1_mac="de:ad:be:ef:00:01"
-eth2_mac="de:ad:be:ef:00:02"
-eth3_mac="de:ad:be:ef:00:03"
+swp1_mac="de:ad:be:ef:00:00"
+swp2_mac="de:ad:be:ef:00:01"
+h2_mac="de:ad:be:ef:00:02"
+h1_mac="de:ad:be:ef:00:03"
 
 # Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number
 # used by the kernel driver. The numbers are:
@@ -156,39 +156,39 @@ create_tcam_skeleton()
 
 setup_prepare()
 {
-	ip link set $eth0 up
-	ip link set $eth1 up
-	ip link set $eth2 up
-	ip link set $eth3 up
+	ip link set $swp1 up
+	ip link set $swp2 up
+	ip link set $h2 up
+	ip link set $h1 up
 
-	create_tcam_skeleton $eth0
+	create_tcam_skeleton $swp1
 
 	ip link add br0 type bridge
-	ip link set $eth0 master br0
-	ip link set $eth1 master br0
+	ip link set $swp1 master br0
+	ip link set $swp2 master br0
 	ip link set br0 up
 
-	ip link add link $eth3 name $eth3.100 type vlan id 100
-	ip link set $eth3.100 up
+	ip link add link $h1 name $h1.100 type vlan id 100
+	ip link set $h1.100 up
 
-	ip link add link $eth3 name $eth3.200 type vlan id 200
-	ip link set $eth3.200 up
+	ip link add link $h1 name $h1.200 type vlan id 200
+	ip link set $h1.200 up
 
-	tc filter add dev $eth0 ingress chain $(IS1 1) pref 1 \
+	tc filter add dev $swp1 ingress chain $(IS1 1) pref 1 \
 		protocol 802.1Q flower skip_sw vlan_id 100 \
 		action vlan pop \
 		action goto chain $(IS1 2)
 
-	tc filter add dev $eth0 egress chain $(ES0) pref 1 \
-		flower skip_sw indev $eth1 \
+	tc filter add dev $swp1 egress chain $(ES0) pref 1 \
+		flower skip_sw indev $swp2 \
 		action vlan push protocol 802.1Q id 100
 
-	tc filter add dev $eth0 ingress chain $(IS1 0) pref 2 \
+	tc filter add dev $swp1 ingress chain $(IS1 0) pref 2 \
 		protocol ipv4 flower skip_sw src_ip 10.1.1.2 \
 		action skbedit priority 7 \
 		action goto chain $(IS1 1)
 
-	tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \
+	tc filter add dev $swp1 ingress chain $(IS2 0 0) pref 1 \
 		protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
 		action police rate 50mbit burst 64k conform-exceed drop/pipe \
 		action goto chain $(IS2 1 0)
@@ -196,9 +196,9 @@ setup_prepare()
 
 cleanup()
 {
-	ip link del $eth3.200
-	ip link del $eth3.100
-	tc qdisc del dev $eth0 clsact
+	ip link del $h1.200
+	ip link del $h1.100
+	tc qdisc del dev $swp1 clsact
 	ip link del br0
 }
 
@@ -206,21 +206,21 @@ test_vlan_pop()
 {
 	RET=0
 
-	tcpdump_start $eth2
+	tcpdump_start $h2
 
 	# Work around Mausezahn VLAN builder bug
 	# (https://github.com/netsniff-ng/netsniff-ng/issues/225) by using
 	# an 8021q upper
-	$MZ $eth3.100 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+	$MZ $h1.100 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
 
 	sleep 1
 
-	tcpdump_stop $eth2
+	tcpdump_stop $h2
 
-	tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"
+	tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, ethertype IPv4"
 	check_err "$?" "untagged reception"
 
-	tcpdump_cleanup $eth2
+	tcpdump_cleanup $h2
 
 	log_test "VLAN pop"
 }
@@ -229,18 +229,18 @@ test_vlan_push()
 {
 	RET=0
 
-	tcpdump_start $eth3.100
+	tcpdump_start $h1.100
 
-	$MZ $eth2 -q -c 1 -p 64 -a $eth2_mac -b $eth3_mac -t ip
+	$MZ $h2 -q -c 1 -p 64 -a $h2_mac -b $h1_mac -t ip
 
 	sleep 1
 
-	tcpdump_stop $eth3.100
+	tcpdump_stop $h1.100
 
-	tcpdump_show $eth3.100 | grep -q "$eth2_mac > $eth3_mac"
+	tcpdump_show $h1.100 | grep -q "$h2_mac > $h1_mac"
 	check_err "$?" "tagged reception"
 
-	tcpdump_cleanup $eth3.100
+	tcpdump_cleanup $h1.100
 
 	log_test "VLAN push"
 }
@@ -250,33 +250,33 @@ test_vlan_ingress_modify()
 	RET=0
 
 	ip link set br0 type bridge vlan_filtering 1
-	bridge vlan add dev $eth0 vid 200
-	bridge vlan add dev $eth0 vid 300
-	bridge vlan add dev $eth1 vid 300
+	bridge vlan add dev $swp1 vid 200
+	bridge vlan add dev $swp1 vid 300
+	bridge vlan add dev $swp2 vid 300
 
-	tc filter add dev $eth0 ingress chain $(IS1 2) pref 3 \
+	tc filter add dev $swp1 ingress chain $(IS1 2) pref 3 \
 		protocol 802.1Q flower skip_sw vlan_id 200 \
 		action vlan modify id 300 \
 		action goto chain $(IS2 0 0)
 
-	tcpdump_start $eth2
+	tcpdump_start $h2
 
-	$MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+	$MZ $h1.200 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
 
 	sleep 1
 
-	tcpdump_stop $eth2
+	tcpdump_stop $h2
 
-	tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"
+	tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, .* vlan 300"
 	check_err "$?" "tagged reception"
 
-	tcpdump_cleanup $eth2
+	tcpdump_cleanup $h2
 
-	tc filter del dev $eth0 ingress chain $(IS1 2) pref 3
+	tc filter del dev $swp1 ingress chain $(IS1 2) pref 3
 
-	bridge vlan del dev $eth0 vid 200
-	bridge vlan del dev $eth0 vid 300
-	bridge vlan del dev $eth1 vid 300
+	bridge vlan del dev $swp1 vid 200
+	bridge vlan del dev $swp1 vid 300
+	bridge vlan del dev $swp2 vid 300
 	ip link set br0 type bridge vlan_filtering 0
 
 	log_test "Ingress VLAN modification"
@@ -286,34 +286,34 @@ test_vlan_egress_modify()
 {
 	RET=0
 
-	tc qdisc add dev $eth1 clsact
+	tc qdisc add dev $swp2 clsact
 
 	ip link set br0 type bridge vlan_filtering 1
-	bridge vlan add dev $eth0 vid 200
-	bridge vlan add dev $eth1 vid 200
+	bridge vlan add dev $swp1 vid 200
+	bridge vlan add dev $swp2 vid 200
 
-	tc filter add dev $eth1 egress chain $(ES0) pref 3 \
+	tc filter add dev $swp2 egress chain $(ES0) pref 3 \
 		protocol 802.1Q flower skip_sw vlan_id 200 vlan_prio 0 \
 		action vlan modify id 300 priority 7
 
-	tcpdump_start $eth2
+	tcpdump_start $h2
 
-	$MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+	$MZ $h1.200 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
 
 	sleep 1
 
-	tcpdump_stop $eth2
+	tcpdump_stop $h2
 
-	tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"
+	tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, .* vlan 300"
 	check_err "$?" "tagged reception"
 
-	tcpdump_cleanup $eth2
+	tcpdump_cleanup $h2
 
-	tc filter del dev $eth1 egress chain $(ES0) pref 3
-	tc qdisc del dev $eth1 clsact
+	tc filter del dev $swp2 egress chain $(ES0) pref 3
+	tc qdisc del dev $swp2 clsact
 
-	bridge vlan del dev $eth0 vid 200
-	bridge vlan del dev $eth1 vid 200
+	bridge vlan del dev $swp1 vid 200
+	bridge vlan del dev $swp2 vid 200
 	ip link set br0 type bridge vlan_filtering 0
 
 	log_test "Egress VLAN modification"
@@ -323,11 +323,11 @@ test_skbedit_priority()
 {
 	local num_pkts=100
 
-	before=$(ethtool_stats_get $eth0 'rx_green_prio_7')
+	before=$(ethtool_stats_get $swp1 'rx_green_prio_7')
 
-	$MZ $eth3 -q -c $num_pkts -p 64 -a $eth3_mac -b $eth2_mac -t ip -A 10.1.1.2
+	$MZ $h1 -q -c $num_pkts -p 64 -a $h1_mac -b $h2_mac -t ip -A 10.1.1.2
 
-	after=$(ethtool_stats_get $eth0 'rx_green_prio_7')
+	after=$(ethtool_stats_get $swp1 'rx_green_prio_7')
 
 	if [ $((after - before)) = $num_pkts ]; then
 		RET=0
-- 
cgit 


From 4ea1396a8bd5b7af4df15c52396c640a92b05b30 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 22 May 2022 12:50:40 +0300
Subject: selftests: ocelot: tc_flower_chains: reorder interfaces

Use the standard interface order h1, swp1, swp2, h2 that is used by the
forwarding selftest framework. The previous order was confusing even
with the ASCII drawing. That isn't needed anymore.

This also drops the fixed MAC addresses and uses STABLE_MAC_ADDRS, which
ensures the MAC addresses are unique.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/ocelot/tc_flower_chains.sh         | 42 ++++++++++------------
 1 file changed, 19 insertions(+), 23 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index ecaeae7197b8..9c79bbcce5a8 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -4,35 +4,17 @@
 
 WAIT_TIME=1
 NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
 lib_dir=$(dirname $0)/../../../net/forwarding
 source $lib_dir/tc_common.sh
 source $lib_dir/lib.sh
 
 require_command tcpdump
 
-#
-#   +---------------------------------------------+
-#   |       DUT ports         Generator ports     |
-#   | +--------+ +--------+ +--------+ +--------+ |
-#   | |        | |        | |        | |        | |
-#   | |  swp1  | |  swp2  | |   h2   | |    h1  | |
-#   | |        | |        | |        | |        | |
-#   +-+--------+-+--------+-+--------+-+--------+-+
-#          |         |           |          |
-#          |         |           |          |
-#          |         +-----------+          |
-#          |                                |
-#          +--------------------------------+
-
-swp1=${NETIFS[p1]}
-swp2=${NETIFS[p2]}
-h2=${NETIFS[p3]}
-h1=${NETIFS[p4]}
-
-swp1_mac="de:ad:be:ef:00:00"
-swp2_mac="de:ad:be:ef:00:01"
-h2_mac="de:ad:be:ef:00:02"
-h1_mac="de:ad:be:ef:00:03"
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
 
 # Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number
 # used by the kernel driver. The numbers are:
@@ -204,6 +186,9 @@ cleanup()
 
 test_vlan_pop()
 {
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+
 	RET=0
 
 	tcpdump_start $h2
@@ -227,6 +212,9 @@ test_vlan_pop()
 
 test_vlan_push()
 {
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+
 	RET=0
 
 	tcpdump_start $h1.100
@@ -247,6 +235,9 @@ test_vlan_push()
 
 test_vlan_ingress_modify()
 {
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+
 	RET=0
 
 	ip link set br0 type bridge vlan_filtering 1
@@ -284,6 +275,9 @@ test_vlan_ingress_modify()
 
 test_vlan_egress_modify()
 {
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+
 	RET=0
 
 	tc qdisc add dev $swp2 clsact
@@ -321,6 +315,8 @@ test_vlan_egress_modify()
 
 test_skbedit_priority()
 {
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
 	local num_pkts=100
 
 	before=$(ethtool_stats_get $swp1 'rx_green_prio_7')
-- 
cgit 


From 371183fa578a4cf56b3ae12e54b7f01a4249add1 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:05:11 +0200
Subject: selftests/landlock: Format with clang-format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Let's follow a consistent and documented coding style.  Everything may
not be to our liking but it is better than tacit knowledge.  Moreover,
this will help maintain style consistency between different developers.

This contains only whitespace changes.

Automatically formatted with:
clang-format-14 -i tools/testing/selftests/landlock/*.[ch]

Link: https://lore.kernel.org/r/20220506160513.523257-6-mic@digikod.net
Cc: stable@vger.kernel.org
[mic: Update style according to
https://lore.kernel.org/r/02494cb8-2aa5-1769-f28d-d7206f284e5a@digikod.net]
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 tools/testing/selftests/landlock/base_test.c   |  80 ++---
 tools/testing/selftests/landlock/common.h      |  64 ++--
 tools/testing/selftests/landlock/fs_test.c     | 399 ++++++++++++++-----------
 tools/testing/selftests/landlock/ptrace_test.c |  20 +-
 4 files changed, 312 insertions(+), 251 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index ca40abe9daa8..3faeae4233a4 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -18,10 +18,11 @@
 #include "common.h"
 
 #ifndef O_PATH
-#define O_PATH		010000000
+#define O_PATH 010000000
 #endif
 
-TEST(inconsistent_attr) {
+TEST(inconsistent_attr)
+{
 	const long page_size = sysconf(_SC_PAGESIZE);
 	char *const buf = malloc(page_size + 1);
 	struct landlock_ruleset_attr *const ruleset_attr = (void *)buf;
@@ -39,15 +40,16 @@ TEST(inconsistent_attr) {
 	/* The size if less than sizeof(struct landlock_attr_enforce). */
 	ASSERT_EQ(EFAULT, errno);
 
-	ASSERT_EQ(-1, landlock_create_ruleset(NULL,
-				sizeof(struct landlock_ruleset_attr), 0));
+	ASSERT_EQ(-1, landlock_create_ruleset(
+			      NULL, sizeof(struct landlock_ruleset_attr), 0));
 	ASSERT_EQ(EFAULT, errno);
 
 	ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
 	ASSERT_EQ(E2BIG, errno);
 
-	ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr,
-				sizeof(struct landlock_ruleset_attr), 0));
+	ASSERT_EQ(-1, landlock_create_ruleset(
+			      ruleset_attr,
+			      sizeof(struct landlock_ruleset_attr), 0));
 	ASSERT_EQ(ENOMSG, errno);
 	ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0));
 	ASSERT_EQ(ENOMSG, errno);
@@ -63,32 +65,35 @@ TEST(inconsistent_attr) {
 	free(buf);
 }
 
-TEST(abi_version) {
+TEST(abi_version)
+{
 	const struct landlock_ruleset_attr ruleset_attr = {
 		.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
 	};
 	ASSERT_EQ(1, landlock_create_ruleset(NULL, 0,
-				LANDLOCK_CREATE_RULESET_VERSION));
+					     LANDLOCK_CREATE_RULESET_VERSION));
 
 	ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
-				LANDLOCK_CREATE_RULESET_VERSION));
+					      LANDLOCK_CREATE_RULESET_VERSION));
 	ASSERT_EQ(EINVAL, errno);
 
 	ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
-				LANDLOCK_CREATE_RULESET_VERSION));
+					      LANDLOCK_CREATE_RULESET_VERSION));
 	ASSERT_EQ(EINVAL, errno);
 
-	ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
-				sizeof(ruleset_attr),
-				LANDLOCK_CREATE_RULESET_VERSION));
+	ASSERT_EQ(-1,
+		  landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+					  LANDLOCK_CREATE_RULESET_VERSION));
 	ASSERT_EQ(EINVAL, errno);
 
 	ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0,
-				LANDLOCK_CREATE_RULESET_VERSION | 1 << 31));
+					      LANDLOCK_CREATE_RULESET_VERSION |
+						      1 << 31));
 	ASSERT_EQ(EINVAL, errno);
 }
 
-TEST(inval_create_ruleset_flags) {
+TEST(inval_create_ruleset_flags)
+{
 	const int last_flag = LANDLOCK_CREATE_RULESET_VERSION;
 	const int invalid_flag = last_flag << 1;
 	const struct landlock_ruleset_attr ruleset_attr = {
@@ -102,38 +107,42 @@ TEST(inval_create_ruleset_flags) {
 	ASSERT_EQ(EINVAL, errno);
 
 	ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
-				invalid_flag));
+					      invalid_flag));
 	ASSERT_EQ(EINVAL, errno);
 
-	ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
-				sizeof(ruleset_attr), invalid_flag));
+	ASSERT_EQ(-1,
+		  landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+					  invalid_flag));
 	ASSERT_EQ(EINVAL, errno);
 }
 
-TEST(empty_path_beneath_attr) {
+TEST(empty_path_beneath_attr)
+{
 	const struct landlock_ruleset_attr ruleset_attr = {
 		.handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
 	};
-	const int ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-			sizeof(ruleset_attr), 0);
+	const int ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 
 	ASSERT_LE(0, ruleset_fd);
 
 	/* Similar to struct landlock_path_beneath_attr.parent_fd = 0 */
 	ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-				NULL, 0));
+					NULL, 0));
 	ASSERT_EQ(EFAULT, errno);
 	ASSERT_EQ(0, close(ruleset_fd));
 }
 
-TEST(inval_fd_enforce) {
+TEST(inval_fd_enforce)
+{
 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
 
 	ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
 	ASSERT_EQ(EBADF, errno);
 }
 
-TEST(unpriv_enforce_without_no_new_privs) {
+TEST(unpriv_enforce_without_no_new_privs)
+{
 	int err;
 
 	drop_caps(_metadata);
@@ -151,8 +160,8 @@ TEST(ruleset_fd_io)
 	char buf;
 
 	drop_caps(_metadata);
-	ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-			sizeof(ruleset_attr), 0);
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 	ASSERT_LE(0, ruleset_fd);
 
 	ASSERT_EQ(-1, write(ruleset_fd, ".", 1));
@@ -197,14 +206,15 @@ TEST(ruleset_fd_transfer)
 	drop_caps(_metadata);
 
 	/* Creates a test ruleset with a simple rule. */
-	ruleset_fd_tx = landlock_create_ruleset(&ruleset_attr,
-			sizeof(ruleset_attr), 0);
+	ruleset_fd_tx =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 	ASSERT_LE(0, ruleset_fd_tx);
-	path_beneath_attr.parent_fd = open("/tmp", O_PATH | O_NOFOLLOW |
-			O_DIRECTORY | O_CLOEXEC);
+	path_beneath_attr.parent_fd =
+		open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
 	ASSERT_LE(0, path_beneath_attr.parent_fd);
-	ASSERT_EQ(0, landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH,
-				&path_beneath_attr, 0));
+	ASSERT_EQ(0,
+		  landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH,
+				    &path_beneath_attr, 0));
 	ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
 
 	cmsg = CMSG_FIRSTHDR(&msg);
@@ -215,7 +225,8 @@ TEST(ruleset_fd_transfer)
 	memcpy(CMSG_DATA(cmsg), &ruleset_fd_tx, sizeof(ruleset_fd_tx));
 
 	/* Sends the ruleset FD over a socketpair and then close it. */
-	ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socket_fds));
+	ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
+				socket_fds));
 	ASSERT_EQ(sizeof(data_tx), sendmsg(socket_fds[0], &msg, 0));
 	ASSERT_EQ(0, close(socket_fds[0]));
 	ASSERT_EQ(0, close(ruleset_fd_tx));
@@ -226,7 +237,8 @@ TEST(ruleset_fd_transfer)
 		int ruleset_fd_rx;
 
 		*(char *)msg.msg_iov->iov_base = '\0';
-		ASSERT_EQ(sizeof(data_tx), recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC));
+		ASSERT_EQ(sizeof(data_tx),
+			  recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC));
 		ASSERT_EQ('.', *(char *)msg.msg_iov->iov_base);
 		ASSERT_EQ(0, close(socket_fds[1]));
 		cmsg = CMSG_FIRSTHDR(&msg);
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index 435ba6963756..7ba18eb23783 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -75,9 +75,9 @@
 /* clang-format on */
 
 #ifndef landlock_create_ruleset
-static inline int landlock_create_ruleset(
-		const struct landlock_ruleset_attr *const attr,
-		const size_t size, const __u32 flags)
+static inline int
+landlock_create_ruleset(const struct landlock_ruleset_attr *const attr,
+			const size_t size, const __u32 flags)
 {
 	return syscall(__NR_landlock_create_ruleset, attr, size, flags);
 }
@@ -85,17 +85,18 @@ static inline int landlock_create_ruleset(
 
 #ifndef landlock_add_rule
 static inline int landlock_add_rule(const int ruleset_fd,
-		const enum landlock_rule_type rule_type,
-		const void *const rule_attr, const __u32 flags)
+				    const enum landlock_rule_type rule_type,
+				    const void *const rule_attr,
+				    const __u32 flags)
 {
-	return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type,
-			rule_attr, flags);
+	return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr,
+		       flags);
 }
 #endif
 
 #ifndef landlock_restrict_self
 static inline int landlock_restrict_self(const int ruleset_fd,
-		const __u32 flags)
+					 const __u32 flags)
 {
 	return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
 }
@@ -113,69 +114,76 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
 	};
 
 	cap_p = cap_get_proc();
-	EXPECT_NE(NULL, cap_p) {
+	EXPECT_NE(NULL, cap_p)
+	{
 		TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
 	}
-	EXPECT_NE(-1, cap_clear(cap_p)) {
+	EXPECT_NE(-1, cap_clear(cap_p))
+	{
 		TH_LOG("Failed to cap_clear: %s", strerror(errno));
 	}
 	if (!drop_all) {
 		EXPECT_NE(-1, cap_set_flag(cap_p, CAP_PERMITTED,
-					ARRAY_SIZE(caps), caps, CAP_SET)) {
+					   ARRAY_SIZE(caps), caps, CAP_SET))
+		{
 			TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
 		}
 	}
-	EXPECT_NE(-1, cap_set_proc(cap_p)) {
+	EXPECT_NE(-1, cap_set_proc(cap_p))
+	{
 		TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
 	}
-	EXPECT_NE(-1, cap_free(cap_p)) {
+	EXPECT_NE(-1, cap_free(cap_p))
+	{
 		TH_LOG("Failed to cap_free: %s", strerror(errno));
 	}
 }
 
 /* We cannot put such helpers in a library because of kselftest_harness.h . */
-__attribute__((__unused__))
-static void disable_caps(struct __test_metadata *const _metadata)
+__attribute__((__unused__)) static void
+disable_caps(struct __test_metadata *const _metadata)
 {
 	_init_caps(_metadata, false);
 }
 
-__attribute__((__unused__))
-static void drop_caps(struct __test_metadata *const _metadata)
+__attribute__((__unused__)) static void
+drop_caps(struct __test_metadata *const _metadata)
 {
 	_init_caps(_metadata, true);
 }
 
 static void _effective_cap(struct __test_metadata *const _metadata,
-		const cap_value_t caps, const cap_flag_value_t value)
+			   const cap_value_t caps, const cap_flag_value_t value)
 {
 	cap_t cap_p;
 
 	cap_p = cap_get_proc();
-	EXPECT_NE(NULL, cap_p) {
+	EXPECT_NE(NULL, cap_p)
+	{
 		TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
 	}
-	EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value)) {
+	EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value))
+	{
 		TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
 	}
-	EXPECT_NE(-1, cap_set_proc(cap_p)) {
+	EXPECT_NE(-1, cap_set_proc(cap_p))
+	{
 		TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
 	}
-	EXPECT_NE(-1, cap_free(cap_p)) {
+	EXPECT_NE(-1, cap_free(cap_p))
+	{
 		TH_LOG("Failed to cap_free: %s", strerror(errno));
 	}
 }
 
-__attribute__((__unused__))
-static void set_cap(struct __test_metadata *const _metadata,
-		const cap_value_t caps)
+__attribute__((__unused__)) static void
+set_cap(struct __test_metadata *const _metadata, const cap_value_t caps)
 {
 	_effective_cap(_metadata, caps, CAP_SET);
 }
 
-__attribute__((__unused__))
-static void clear_cap(struct __test_metadata *const _metadata,
-		const cap_value_t caps)
+__attribute__((__unused__)) static void
+clear_cap(struct __test_metadata *const _metadata, const cap_value_t caps)
 {
 	_effective_cap(_metadata, caps, CAP_CLEAR);
 }
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 198184ca0396..28b01cb30c78 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -22,8 +22,8 @@
 
 #include "common.h"
 
-#define TMP_DIR		"tmp"
-#define BINARY_PATH	"./true"
+#define TMP_DIR "tmp"
+#define BINARY_PATH "./true"
 
 /* Paths (sibling number and depth) */
 static const char dir_s1d1[] = TMP_DIR "/s1d1";
@@ -75,7 +75,7 @@ static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3";
  */
 
 static void mkdir_parents(struct __test_metadata *const _metadata,
-		const char *const path)
+			  const char *const path)
 {
 	char *walker;
 	const char *parent;
@@ -90,9 +90,10 @@ static void mkdir_parents(struct __test_metadata *const _metadata,
 			continue;
 		walker[i] = '\0';
 		err = mkdir(parent, 0700);
-		ASSERT_FALSE(err && errno != EEXIST) {
-			TH_LOG("Failed to create directory \"%s\": %s",
-					parent, strerror(errno));
+		ASSERT_FALSE(err && errno != EEXIST)
+		{
+			TH_LOG("Failed to create directory \"%s\": %s", parent,
+			       strerror(errno));
 		}
 		walker[i] = '/';
 	}
@@ -100,22 +101,24 @@ static void mkdir_parents(struct __test_metadata *const _metadata,
 }
 
 static void create_directory(struct __test_metadata *const _metadata,
-		const char *const path)
+			     const char *const path)
 {
 	mkdir_parents(_metadata, path);
-	ASSERT_EQ(0, mkdir(path, 0700)) {
+	ASSERT_EQ(0, mkdir(path, 0700))
+	{
 		TH_LOG("Failed to create directory \"%s\": %s", path,
-				strerror(errno));
+		       strerror(errno));
 	}
 }
 
 static void create_file(struct __test_metadata *const _metadata,
-		const char *const path)
+			const char *const path)
 {
 	mkdir_parents(_metadata, path);
-	ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0)) {
+	ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0))
+	{
 		TH_LOG("Failed to create file \"%s\": %s", path,
-				strerror(errno));
+		       strerror(errno));
 	}
 }
 
@@ -243,7 +246,8 @@ FIXTURE_TEARDOWN(layout1)
  * This helper enables to use the ASSERT_* macros and print the line number
  * pointing to the test caller.
  */
-static int test_open_rel(const int dirfd, const char *const path, const int flags)
+static int test_open_rel(const int dirfd, const char *const path,
+			 const int flags)
 {
 	int fd;
 
@@ -292,23 +296,23 @@ TEST_F_FORK(layout1, inval)
 {
 	struct landlock_path_beneath_attr path_beneath = {
 		.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
-			LANDLOCK_ACCESS_FS_WRITE_FILE,
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
 		.parent_fd = -1,
 	};
 	struct landlock_ruleset_attr ruleset_attr = {
 		.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE |
-			LANDLOCK_ACCESS_FS_WRITE_FILE,
+				     LANDLOCK_ACCESS_FS_WRITE_FILE,
 	};
 	int ruleset_fd;
 
-	path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY |
-			O_CLOEXEC);
+	path_beneath.parent_fd =
+		open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
 	ASSERT_LE(0, path_beneath.parent_fd);
 
 	ruleset_fd = open(dir_s1d1, O_PATH | O_DIRECTORY | O_CLOEXEC);
 	ASSERT_LE(0, ruleset_fd);
 	ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-				&path_beneath, 0));
+					&path_beneath, 0));
 	/* Returns EBADF because ruleset_fd is not a landlock-ruleset FD. */
 	ASSERT_EQ(EBADF, errno);
 	ASSERT_EQ(0, close(ruleset_fd));
@@ -316,55 +320,55 @@ TEST_F_FORK(layout1, inval)
 	ruleset_fd = open(dir_s1d1, O_DIRECTORY | O_CLOEXEC);
 	ASSERT_LE(0, ruleset_fd);
 	ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-				&path_beneath, 0));
+					&path_beneath, 0));
 	/* Returns EBADFD because ruleset_fd is not a valid ruleset. */
 	ASSERT_EQ(EBADFD, errno);
 	ASSERT_EQ(0, close(ruleset_fd));
 
 	/* Gets a real ruleset. */
-	ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-			sizeof(ruleset_attr), 0);
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 	ASSERT_LE(0, ruleset_fd);
 	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-				&path_beneath, 0));
+				       &path_beneath, 0));
 	ASSERT_EQ(0, close(path_beneath.parent_fd));
 
 	/* Tests without O_PATH. */
 	path_beneath.parent_fd = open(dir_s1d2, O_DIRECTORY | O_CLOEXEC);
 	ASSERT_LE(0, path_beneath.parent_fd);
 	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-				&path_beneath, 0));
+				       &path_beneath, 0));
 	ASSERT_EQ(0, close(path_beneath.parent_fd));
 
 	/* Tests with a ruleset FD. */
 	path_beneath.parent_fd = ruleset_fd;
 	ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-				&path_beneath, 0));
+					&path_beneath, 0));
 	ASSERT_EQ(EBADFD, errno);
 
 	/* Checks unhandled allowed_access. */
-	path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY |
-			O_CLOEXEC);
+	path_beneath.parent_fd =
+		open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
 	ASSERT_LE(0, path_beneath.parent_fd);
 
 	/* Test with legitimate values. */
 	path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_EXECUTE;
 	ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-				&path_beneath, 0));
+					&path_beneath, 0));
 	ASSERT_EQ(EINVAL, errno);
 	path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE;
 
 	/* Test with unknown (64-bits) value. */
 	path_beneath.allowed_access |= (1ULL << 60);
 	ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-				&path_beneath, 0));
+					&path_beneath, 0));
 	ASSERT_EQ(EINVAL, errno);
 	path_beneath.allowed_access &= ~(1ULL << 60);
 
 	/* Test with no access. */
 	path_beneath.allowed_access = 0;
 	ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-				&path_beneath, 0));
+					&path_beneath, 0));
 	ASSERT_EQ(ENOMSG, errno);
 	path_beneath.allowed_access &= ~(1ULL << 60);
 
@@ -409,8 +413,8 @@ TEST_F_FORK(layout1, file_access_rights)
 	struct landlock_ruleset_attr ruleset_attr = {
 		.handled_access_fs = ACCESS_ALL,
 	};
-	const int ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-			sizeof(ruleset_attr), 0);
+	const int ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 
 	ASSERT_LE(0, ruleset_fd);
 
@@ -420,7 +424,7 @@ TEST_F_FORK(layout1, file_access_rights)
 	for (access = 1; access <= ACCESS_LAST; access <<= 1) {
 		path_beneath.allowed_access = access;
 		err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-				&path_beneath, 0);
+					&path_beneath, 0);
 		if ((access | ACCESS_FILE) == ACCESS_FILE) {
 			ASSERT_EQ(0, err);
 		} else {
@@ -432,22 +436,24 @@ TEST_F_FORK(layout1, file_access_rights)
 }
 
 static void add_path_beneath(struct __test_metadata *const _metadata,
-		const int ruleset_fd, const __u64 allowed_access,
-		const char *const path)
+			     const int ruleset_fd, const __u64 allowed_access,
+			     const char *const path)
 {
 	struct landlock_path_beneath_attr path_beneath = {
 		.allowed_access = allowed_access,
 	};
 
 	path_beneath.parent_fd = open(path, O_PATH | O_CLOEXEC);
-	ASSERT_LE(0, path_beneath.parent_fd) {
+	ASSERT_LE(0, path_beneath.parent_fd)
+	{
 		TH_LOG("Failed to open directory \"%s\": %s", path,
-				strerror(errno));
+		       strerror(errno));
 	}
 	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-				&path_beneath, 0)) {
+				       &path_beneath, 0))
+	{
 		TH_LOG("Failed to update the ruleset with \"%s\": %s", path,
-				strerror(errno));
+		       strerror(errno));
 	}
 	ASSERT_EQ(0, close(path_beneath.parent_fd));
 }
@@ -470,38 +476,43 @@ struct rule {
 /* clang-format on */
 
 static int create_ruleset(struct __test_metadata *const _metadata,
-		const __u64 handled_access_fs, const struct rule rules[])
+			  const __u64 handled_access_fs,
+			  const struct rule rules[])
 {
 	int ruleset_fd, i;
 	struct landlock_ruleset_attr ruleset_attr = {
 		.handled_access_fs = handled_access_fs,
 	};
 
-	ASSERT_NE(NULL, rules) {
+	ASSERT_NE(NULL, rules)
+	{
 		TH_LOG("No rule list");
 	}
-	ASSERT_NE(NULL, rules[0].path) {
+	ASSERT_NE(NULL, rules[0].path)
+	{
 		TH_LOG("Empty rule list");
 	}
 
-	ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-			sizeof(ruleset_attr), 0);
-	ASSERT_LE(0, ruleset_fd) {
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, ruleset_fd)
+	{
 		TH_LOG("Failed to create a ruleset: %s", strerror(errno));
 	}
 
 	for (i = 0; rules[i].path; i++) {
 		add_path_beneath(_metadata, ruleset_fd, rules[i].access,
-				rules[i].path);
+				 rules[i].path);
 	}
 	return ruleset_fd;
 }
 
 static void enforce_ruleset(struct __test_metadata *const _metadata,
-		const int ruleset_fd)
+			    const int ruleset_fd)
 {
 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
-	ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) {
+	ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0))
+	{
 		TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
 	}
 }
@@ -512,13 +523,14 @@ TEST_F_FORK(layout1, proc_nsfs)
 		{
 			.path = "/dev/null",
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_WRITE_FILE,
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
 		{},
 	};
 	struct landlock_path_beneath_attr path_beneath;
-	const int ruleset_fd = create_ruleset(_metadata, rules[0].access |
-			LANDLOCK_ACCESS_FS_READ_DIR, rules);
+	const int ruleset_fd = create_ruleset(
+		_metadata, rules[0].access | LANDLOCK_ACCESS_FS_READ_DIR,
+		rules);
 
 	ASSERT_LE(0, ruleset_fd);
 	ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
@@ -545,16 +557,17 @@ TEST_F_FORK(layout1, proc_nsfs)
 	 * references to a ruleset.
 	 */
 	path_beneath.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
-		LANDLOCK_ACCESS_FS_WRITE_FILE,
+				      LANDLOCK_ACCESS_FS_WRITE_FILE,
 	path_beneath.parent_fd = open("/proc/self/ns/mnt", O_PATH | O_CLOEXEC);
 	ASSERT_LE(0, path_beneath.parent_fd);
 	ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-				&path_beneath, 0));
+					&path_beneath, 0));
 	ASSERT_EQ(EBADFD, errno);
 	ASSERT_EQ(0, close(path_beneath.parent_fd));
 }
 
-TEST_F_FORK(layout1, unpriv) {
+TEST_F_FORK(layout1, unpriv)
+{
 	const struct rule rules[] = {
 		{
 			.path = dir_s1d2,
@@ -586,7 +599,7 @@ TEST_F_FORK(layout1, effective_access)
 		{
 			.path = file1_s2d2,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_WRITE_FILE,
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
 		{},
 	};
@@ -662,12 +675,12 @@ TEST_F_FORK(layout1, ruleset_overlap)
 		{
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_WRITE_FILE,
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
 		{
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_READ_DIR,
+				  LANDLOCK_ACCESS_FS_READ_DIR,
 		},
 		{},
 	};
@@ -717,8 +730,8 @@ TEST_F_FORK(layout1, non_overlapping_accesses)
 	ASSERT_EQ(0, unlink(file1_s1d1));
 	ASSERT_EQ(0, unlink(file1_s1d2));
 
-	ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG,
-			layer1);
+	ruleset_fd =
+		create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, layer1);
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
@@ -729,7 +742,7 @@ TEST_F_FORK(layout1, non_overlapping_accesses)
 	ASSERT_EQ(0, unlink(file1_s1d2));
 
 	ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REMOVE_FILE,
-			layer2);
+				    layer2);
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
@@ -775,7 +788,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 		{
 			.path = dir_s1d3,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_WRITE_FILE,
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
 		/* ...but also denies read access via its grandparent directory. */
 		{
@@ -839,7 +852,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 	int ruleset_fd;
 
 	ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
-			layer1_read);
+				    layer1_read);
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
@@ -849,8 +862,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 	ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
 	ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
 
-	ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
-			LANDLOCK_ACCESS_FS_WRITE_FILE, layer2_read_write);
+	ruleset_fd = create_ruleset(_metadata,
+				    LANDLOCK_ACCESS_FS_READ_FILE |
+					    LANDLOCK_ACCESS_FS_WRITE_FILE,
+				    layer2_read_write);
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
@@ -861,7 +876,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 	ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
 
 	ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
-			layer3_read);
+				    layer3_read);
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
@@ -872,8 +887,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 	ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
 
 	/* This time, denies write access for the file hierarchy. */
-	ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
-			LANDLOCK_ACCESS_FS_WRITE_FILE, layer4_read_write);
+	ruleset_fd = create_ruleset(_metadata,
+				    LANDLOCK_ACCESS_FS_READ_FILE |
+					    LANDLOCK_ACCESS_FS_WRITE_FILE,
+				    layer4_read_write);
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
@@ -888,7 +905,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 	ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
 
 	ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
-			layer5_read);
+				    layer5_read);
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
@@ -900,7 +917,7 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 	ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
 
 	ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_EXECUTE,
-			layer6_execute);
+				    layer6_execute);
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
@@ -911,8 +928,10 @@ TEST_F_FORK(layout1, interleaved_masked_accesses)
 	ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
 	ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
 
-	ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
-			LANDLOCK_ACCESS_FS_WRITE_FILE, layer7_read_write);
+	ruleset_fd = create_ruleset(_metadata,
+				    LANDLOCK_ACCESS_FS_READ_FILE |
+					    LANDLOCK_ACCESS_FS_WRITE_FILE,
+				    layer7_read_write);
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
@@ -930,7 +949,7 @@ TEST_F_FORK(layout1, inherit_subset)
 		{
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_READ_DIR,
+				  LANDLOCK_ACCESS_FS_READ_DIR,
 		},
 		{},
 	};
@@ -958,7 +977,7 @@ TEST_F_FORK(layout1, inherit_subset)
 	 * ANDed with the previous ones.
 	 */
 	add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
-			dir_s1d2);
+			 dir_s1d2);
 	/*
 	 * According to ruleset_fd, dir_s1d2 should now have the
 	 * LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE
@@ -1013,7 +1032,7 @@ TEST_F_FORK(layout1, inherit_subset)
 	 * that there was no rule tied to it before.
 	 */
 	add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
-			dir_s1d3);
+			 dir_s1d3);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
 
@@ -1063,8 +1082,10 @@ TEST_F_FORK(layout1, inherit_superset)
 	ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
 
 	/* Now dir_s1d2, parent of dir_s1d3, gets a new rule tied to it. */
-	add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_READ_FILE |
-			LANDLOCK_ACCESS_FS_READ_DIR, dir_s1d2);
+	add_path_beneath(_metadata, ruleset_fd,
+			 LANDLOCK_ACCESS_FS_READ_FILE |
+				 LANDLOCK_ACCESS_FS_READ_DIR,
+			 dir_s1d2);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
 
@@ -1106,15 +1127,15 @@ TEST_F_FORK(layout1, empty_or_same_ruleset)
 	int ruleset_fd;
 
 	/* Tests empty handled_access_fs. */
-	ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-			sizeof(ruleset_attr), 0);
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 	ASSERT_LE(-1, ruleset_fd);
 	ASSERT_EQ(ENOMSG, errno);
 
 	/* Enforces policy which deny read access to all files. */
 	ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE;
-	ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-			sizeof(ruleset_attr), 0);
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
@@ -1122,8 +1143,8 @@ TEST_F_FORK(layout1, empty_or_same_ruleset)
 
 	/* Nests a policy which deny read access to all directories. */
 	ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR;
-	ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-			sizeof(ruleset_attr), 0);
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
@@ -1258,7 +1279,8 @@ TEST_F_FORK(layout1, rule_inside_mount_ns)
 	int ruleset_fd;
 
 	set_cap(_metadata, CAP_SYS_ADMIN);
-	ASSERT_EQ(0, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)) {
+	ASSERT_EQ(0, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3))
+	{
 		TH_LOG("Failed to pivot root: %s", strerror(errno));
 	};
 	ASSERT_EQ(0, chdir("/"));
@@ -1311,12 +1333,13 @@ TEST_F_FORK(layout1, move_mount)
 
 	set_cap(_metadata, CAP_SYS_ADMIN);
 	ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
-				dir_s1d2, 0)) {
+			     dir_s1d2, 0))
+	{
 		TH_LOG("Failed to move mount: %s", strerror(errno));
 	}
 
 	ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
-				dir_s3d2, 0));
+			     dir_s3d2, 0));
 	clear_cap(_metadata, CAP_SYS_ADMIN);
 
 	enforce_ruleset(_metadata, ruleset_fd);
@@ -1324,7 +1347,7 @@ TEST_F_FORK(layout1, move_mount)
 
 	set_cap(_metadata, CAP_SYS_ADMIN);
 	ASSERT_EQ(-1, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
-				dir_s1d2, 0));
+			      dir_s1d2, 0));
 	ASSERT_EQ(EPERM, errno);
 	clear_cap(_metadata, CAP_SYS_ADMIN);
 }
@@ -1371,7 +1394,7 @@ enum relative_access {
 };
 
 static void test_relative_path(struct __test_metadata *const _metadata,
-		const enum relative_access rel)
+			       const enum relative_access rel)
 {
 	/*
 	 * Common layer to check that chroot doesn't ignore it (i.e. a chroot
@@ -1434,14 +1457,16 @@ static void test_relative_path(struct __test_metadata *const _metadata,
 		break;
 	case REL_CHROOT_ONLY:
 		/* Do chroot into dir_s1d2 (relative to dir_s2d2). */
-		ASSERT_EQ(0, chroot("../../s1d1/s1d2")) {
+		ASSERT_EQ(0, chroot("../../s1d1/s1d2"))
+		{
 			TH_LOG("Failed to chroot: %s", strerror(errno));
 		}
 		dirfd = AT_FDCWD;
 		break;
 	case REL_CHROOT_CHDIR:
 		/* Do chroot into dir_s1d2. */
-		ASSERT_EQ(0, chroot(".")) {
+		ASSERT_EQ(0, chroot("."))
+		{
 			TH_LOG("Failed to chroot: %s", strerror(errno));
 		}
 		dirfd = AT_FDCWD;
@@ -1449,7 +1474,7 @@ static void test_relative_path(struct __test_metadata *const _metadata,
 	}
 
 	ASSERT_EQ((rel == REL_CHROOT_CHDIR) ? 0 : EACCES,
-			test_open_rel(dirfd, "..", O_RDONLY));
+		  test_open_rel(dirfd, "..", O_RDONLY));
 	ASSERT_EQ(0, test_open_rel(dirfd, ".", O_RDONLY));
 
 	if (rel == REL_CHROOT_ONLY) {
@@ -1471,11 +1496,13 @@ static void test_relative_path(struct __test_metadata *const _metadata,
 	if (rel != REL_CHROOT_CHDIR) {
 		ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s1d1", O_RDONLY));
 		ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2", O_RDONLY));
-		ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3", O_RDONLY));
+		ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3",
+					   O_RDONLY));
 
 		ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s2d1", O_RDONLY));
 		ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2", O_RDONLY));
-		ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3", O_RDONLY));
+		ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3",
+					   O_RDONLY));
 	}
 
 	if (rel == REL_OPEN)
@@ -1504,40 +1531,42 @@ TEST_F_FORK(layout1, relative_chroot_chdir)
 }
 
 static void copy_binary(struct __test_metadata *const _metadata,
-		const char *const dst_path)
+			const char *const dst_path)
 {
 	int dst_fd, src_fd;
 	struct stat statbuf;
 
 	dst_fd = open(dst_path, O_WRONLY | O_TRUNC | O_CLOEXEC);
-	ASSERT_LE(0, dst_fd) {
-		TH_LOG("Failed to open \"%s\": %s", dst_path,
-				strerror(errno));
+	ASSERT_LE(0, dst_fd)
+	{
+		TH_LOG("Failed to open \"%s\": %s", dst_path, strerror(errno));
 	}
 	src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC);
-	ASSERT_LE(0, src_fd) {
+	ASSERT_LE(0, src_fd)
+	{
 		TH_LOG("Failed to open \"" BINARY_PATH "\": %s",
-				strerror(errno));
+		       strerror(errno));
 	}
 	ASSERT_EQ(0, fstat(src_fd, &statbuf));
-	ASSERT_EQ(statbuf.st_size, sendfile(dst_fd, src_fd, 0,
-				statbuf.st_size));
+	ASSERT_EQ(statbuf.st_size,
+		  sendfile(dst_fd, src_fd, 0, statbuf.st_size));
 	ASSERT_EQ(0, close(src_fd));
 	ASSERT_EQ(0, close(dst_fd));
 }
 
-static void test_execute(struct __test_metadata *const _metadata,
-		const int err, const char *const path)
+static void test_execute(struct __test_metadata *const _metadata, const int err,
+			 const char *const path)
 {
 	int status;
-	char *const argv[] = {(char *)path, NULL};
+	char *const argv[] = { (char *)path, NULL };
 	const pid_t child = fork();
 
 	ASSERT_LE(0, child);
 	if (child == 0) {
-		ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL)) {
+		ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL))
+		{
 			TH_LOG("Failed to execute \"%s\": %s", path,
-					strerror(errno));
+			       strerror(errno));
 		};
 		ASSERT_EQ(err, errno);
 		_exit(_metadata->passed ? 2 : 1);
@@ -1545,9 +1574,10 @@ static void test_execute(struct __test_metadata *const _metadata,
 	}
 	ASSERT_EQ(child, waitpid(child, &status, 0));
 	ASSERT_EQ(1, WIFEXITED(status));
-	ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) {
+	ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status))
+	{
 		TH_LOG("Unexpected return code for \"%s\": %s", path,
-				strerror(errno));
+		       strerror(errno));
 	};
 }
 
@@ -1560,8 +1590,8 @@ TEST_F_FORK(layout1, execute)
 		},
 		{},
 	};
-	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-			rules);
+	const int ruleset_fd =
+		create_ruleset(_metadata, rules[0].access, rules);
 
 	ASSERT_LE(0, ruleset_fd);
 	copy_binary(_metadata, file1_s1d1);
@@ -1593,8 +1623,8 @@ TEST_F_FORK(layout1, link)
 		},
 		{},
 	};
-	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-			rules);
+	const int ruleset_fd =
+		create_ruleset(_metadata, rules[0].access, rules);
 
 	ASSERT_LE(0, ruleset_fd);
 
@@ -1630,8 +1660,8 @@ TEST_F_FORK(layout1, rename_file)
 		},
 		{},
 	};
-	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-			rules);
+	const int ruleset_fd =
+		create_ruleset(_metadata, rules[0].access, rules);
 
 	ASSERT_LE(0, ruleset_fd);
 
@@ -1684,14 +1714,14 @@ TEST_F_FORK(layout1, rename_file)
 
 	/* Exchanges and renames files with same parent. */
 	ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3,
-				RENAME_EXCHANGE));
+			       RENAME_EXCHANGE));
 	ASSERT_EQ(0, rename(file2_s2d3, file1_s2d3));
 
 	/* Exchanges files and directories with same parent, twice. */
 	ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
-				RENAME_EXCHANGE));
+			       RENAME_EXCHANGE));
 	ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
-				RENAME_EXCHANGE));
+			       RENAME_EXCHANGE));
 }
 
 TEST_F_FORK(layout1, rename_dir)
@@ -1707,8 +1737,8 @@ TEST_F_FORK(layout1, rename_dir)
 		},
 		{},
 	};
-	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-			rules);
+	const int ruleset_fd =
+		create_ruleset(_metadata, rules[0].access, rules);
 
 	ASSERT_LE(0, ruleset_fd);
 
@@ -1745,7 +1775,7 @@ TEST_F_FORK(layout1, rename_dir)
 	 * directory removal.
 	 */
 	ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s1d2,
-				RENAME_EXCHANGE));
+			       RENAME_EXCHANGE));
 	ASSERT_EQ(0, unlink(dir_s1d3));
 	ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
 	ASSERT_EQ(0, rename(file1_s1d2, dir_s1d3));
@@ -1761,8 +1791,8 @@ TEST_F_FORK(layout1, remove_dir)
 		},
 		{},
 	};
-	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-			rules);
+	const int ruleset_fd =
+		create_ruleset(_metadata, rules[0].access, rules);
 
 	ASSERT_LE(0, ruleset_fd);
 
@@ -1798,8 +1828,8 @@ TEST_F_FORK(layout1, remove_file)
 		},
 		{},
 	};
-	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-			rules);
+	const int ruleset_fd =
+		create_ruleset(_metadata, rules[0].access, rules);
 
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
@@ -1814,7 +1844,8 @@ TEST_F_FORK(layout1, remove_file)
 }
 
 static void test_make_file(struct __test_metadata *const _metadata,
-		const __u64 access, const mode_t mode, const dev_t dev)
+			   const __u64 access, const mode_t mode,
+			   const dev_t dev)
 {
 	const struct rule rules[] = {
 		{
@@ -1829,9 +1860,10 @@ static void test_make_file(struct __test_metadata *const _metadata,
 
 	ASSERT_EQ(0, unlink(file1_s1d1));
 	ASSERT_EQ(0, unlink(file2_s1d1));
-	ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev)) {
-		TH_LOG("Failed to make file \"%s\": %s",
-				file2_s1d1, strerror(errno));
+	ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev))
+	{
+		TH_LOG("Failed to make file \"%s\": %s", file2_s1d1,
+		       strerror(errno));
 	};
 
 	ASSERT_EQ(0, unlink(file1_s1d2));
@@ -1850,9 +1882,10 @@ static void test_make_file(struct __test_metadata *const _metadata,
 	ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
 	ASSERT_EQ(EACCES, errno);
 
-	ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev)) {
-		TH_LOG("Failed to make file \"%s\": %s",
-				file1_s1d2, strerror(errno));
+	ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev))
+	{
+		TH_LOG("Failed to make file \"%s\": %s", file1_s1d2,
+		       strerror(errno));
 	};
 	ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
 	ASSERT_EQ(0, unlink(file2_s1d2));
@@ -1869,7 +1902,7 @@ TEST_F_FORK(layout1, make_char)
 	/* Creates a /dev/null device. */
 	set_cap(_metadata, CAP_MKNOD);
 	test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_CHAR, S_IFCHR,
-			makedev(1, 3));
+		       makedev(1, 3));
 }
 
 TEST_F_FORK(layout1, make_block)
@@ -1877,7 +1910,7 @@ TEST_F_FORK(layout1, make_block)
 	/* Creates a /dev/loop0 device. */
 	set_cap(_metadata, CAP_MKNOD);
 	test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_BLOCK, S_IFBLK,
-			makedev(7, 0));
+		       makedev(7, 0));
 }
 
 TEST_F_FORK(layout1, make_reg_1)
@@ -1909,8 +1942,8 @@ TEST_F_FORK(layout1, make_sym)
 		},
 		{},
 	};
-	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-			rules);
+	const int ruleset_fd =
+		create_ruleset(_metadata, rules[0].access, rules);
 
 	ASSERT_LE(0, ruleset_fd);
 
@@ -1954,8 +1987,8 @@ TEST_F_FORK(layout1, make_dir)
 		},
 		{},
 	};
-	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-			rules);
+	const int ruleset_fd =
+		create_ruleset(_metadata, rules[0].access, rules);
 
 	ASSERT_LE(0, ruleset_fd);
 
@@ -1974,12 +2007,12 @@ TEST_F_FORK(layout1, make_dir)
 }
 
 static int open_proc_fd(struct __test_metadata *const _metadata, const int fd,
-		const int open_flags)
+			const int open_flags)
 {
 	static const char path_template[] = "/proc/self/fd/%d";
 	char procfd_path[sizeof(path_template) + 10];
-	const int procfd_path_size = snprintf(procfd_path, sizeof(procfd_path),
-			path_template, fd);
+	const int procfd_path_size =
+		snprintf(procfd_path, sizeof(procfd_path), path_template, fd);
 
 	ASSERT_LT(procfd_path_size, sizeof(procfd_path));
 	return open(procfd_path, open_flags);
@@ -1995,9 +2028,10 @@ TEST_F_FORK(layout1, proc_unlinked_file)
 		{},
 	};
 	int reg_fd, proc_fd;
-	const int ruleset_fd = create_ruleset(_metadata,
-			LANDLOCK_ACCESS_FS_READ_FILE |
-			LANDLOCK_ACCESS_FS_WRITE_FILE, rules);
+	const int ruleset_fd = create_ruleset(
+		_metadata,
+		LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_WRITE_FILE,
+		rules);
 
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
@@ -2014,9 +2048,10 @@ TEST_F_FORK(layout1, proc_unlinked_file)
 	ASSERT_EQ(0, close(proc_fd));
 
 	proc_fd = open_proc_fd(_metadata, reg_fd, O_RDWR | O_CLOEXEC);
-	ASSERT_EQ(-1, proc_fd) {
-		TH_LOG("Successfully opened /proc/self/fd/%d: %s",
-				reg_fd, strerror(errno));
+	ASSERT_EQ(-1, proc_fd)
+	{
+		TH_LOG("Successfully opened /proc/self/fd/%d: %s", reg_fd,
+		       strerror(errno));
 	}
 	ASSERT_EQ(EACCES, errno);
 
@@ -2032,13 +2067,13 @@ TEST_F_FORK(layout1, proc_pipe)
 		{
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_WRITE_FILE,
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
 		{},
 	};
 	/* Limits read and write access to files tied to the filesystem. */
-	const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
-			rules);
+	const int ruleset_fd =
+		create_ruleset(_metadata, rules[0].access, rules);
 
 	ASSERT_LE(0, ruleset_fd);
 	enforce_ruleset(_metadata, ruleset_fd);
@@ -2050,7 +2085,8 @@ TEST_F_FORK(layout1, proc_pipe)
 
 	/* Checks access to pipes through FD. */
 	ASSERT_EQ(0, pipe2(pipe_fds, O_CLOEXEC));
-	ASSERT_EQ(1, write(pipe_fds[1], ".", 1)) {
+	ASSERT_EQ(1, write(pipe_fds[1], ".", 1))
+	{
 		TH_LOG("Failed to write in pipe: %s", strerror(errno));
 	}
 	ASSERT_EQ(1, read(pipe_fds[0], &buf, 1));
@@ -2059,9 +2095,10 @@ TEST_F_FORK(layout1, proc_pipe)
 	/* Checks write access to pipe through /proc/self/fd . */
 	proc_fd = open_proc_fd(_metadata, pipe_fds[1], O_WRONLY | O_CLOEXEC);
 	ASSERT_LE(0, proc_fd);
-	ASSERT_EQ(1, write(proc_fd, ".", 1)) {
+	ASSERT_EQ(1, write(proc_fd, ".", 1))
+	{
 		TH_LOG("Failed to write through /proc/self/fd/%d: %s",
-				pipe_fds[1], strerror(errno));
+		       pipe_fds[1], strerror(errno));
 	}
 	ASSERT_EQ(0, close(proc_fd));
 
@@ -2069,9 +2106,10 @@ TEST_F_FORK(layout1, proc_pipe)
 	proc_fd = open_proc_fd(_metadata, pipe_fds[0], O_RDONLY | O_CLOEXEC);
 	ASSERT_LE(0, proc_fd);
 	buf = '\0';
-	ASSERT_EQ(1, read(proc_fd, &buf, 1)) {
+	ASSERT_EQ(1, read(proc_fd, &buf, 1))
+	{
 		TH_LOG("Failed to read through /proc/self/fd/%d: %s",
-				pipe_fds[1], strerror(errno));
+		       pipe_fds[1], strerror(errno));
 	}
 	ASSERT_EQ(0, close(proc_fd));
 
@@ -2292,8 +2330,8 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
 	ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
 }
 
-#define LOWER_BASE	TMP_DIR "/lower"
-#define LOWER_DATA	LOWER_BASE "/data"
+#define LOWER_BASE TMP_DIR "/lower"
+#define LOWER_DATA LOWER_BASE "/data"
 static const char lower_fl1[] = LOWER_DATA "/fl1";
 static const char lower_dl1[] = LOWER_DATA "/dl1";
 static const char lower_dl1_fl2[] = LOWER_DATA "/dl1/fl2";
@@ -2319,9 +2357,9 @@ static const char (*lower_sub_files[])[] = {
 	NULL,
 };
 
-#define UPPER_BASE	TMP_DIR "/upper"
-#define UPPER_DATA	UPPER_BASE "/data"
-#define UPPER_WORK	UPPER_BASE "/work"
+#define UPPER_BASE TMP_DIR "/upper"
+#define UPPER_DATA UPPER_BASE "/data"
+#define UPPER_WORK UPPER_BASE "/work"
 static const char upper_fu1[] = UPPER_DATA "/fu1";
 static const char upper_du1[] = UPPER_DATA "/du1";
 static const char upper_du1_fu2[] = UPPER_DATA "/du1/fu2";
@@ -2347,8 +2385,8 @@ static const char (*upper_sub_files[])[] = {
 	NULL,
 };
 
-#define MERGE_BASE	TMP_DIR "/merge"
-#define MERGE_DATA	MERGE_BASE "/data"
+#define MERGE_BASE TMP_DIR "/merge"
+#define MERGE_DATA MERGE_BASE "/data"
 static const char merge_fl1[] = MERGE_DATA "/fl1";
 static const char merge_dl1[] = MERGE_DATA "/dl1";
 static const char merge_dl1_fl2[] = MERGE_DATA "/dl1/fl2";
@@ -2374,12 +2412,8 @@ static const char (*merge_base_directories[])[] = {
 	NULL,
 };
 static const char (*merge_sub_files[])[] = {
-	&merge_dl1_fl2,
-	&merge_du1_fu2,
-	&merge_do1_fo2,
-	&merge_do1_fl3,
-	&merge_do1_fu3,
-	NULL,
+	&merge_dl1_fl2, &merge_du1_fu2, &merge_do1_fo2,
+	&merge_do1_fl3, &merge_do1_fu3, NULL,
 };
 
 /*
@@ -2455,9 +2489,8 @@ FIXTURE_SETUP(layout2_overlay)
 	set_cap(_metadata, CAP_SYS_ADMIN);
 	set_cap(_metadata, CAP_DAC_OVERRIDE);
 	ASSERT_EQ(0, mount("overlay", MERGE_DATA, "overlay", 0,
-				"lowerdir=" LOWER_DATA
-				",upperdir=" UPPER_DATA
-				",workdir=" UPPER_WORK));
+			   "lowerdir=" LOWER_DATA ",upperdir=" UPPER_DATA
+			   ",workdir=" UPPER_WORK));
 	clear_cap(_metadata, CAP_DAC_OVERRIDE);
 	clear_cap(_metadata, CAP_SYS_ADMIN);
 }
@@ -2524,9 +2557,9 @@ TEST_F_FORK(layout2_overlay, no_restriction)
 	ASSERT_EQ(0, test_open(merge_do1_fu3, O_RDONLY));
 }
 
-#define for_each_path(path_list, path_entry, i)			\
-	for (i = 0, path_entry = *path_list[i]; path_list[i];	\
-			path_entry = *path_list[++i])
+#define for_each_path(path_list, path_entry, i)               \
+	for (i = 0, path_entry = *path_list[i]; path_list[i]; \
+	     path_entry = *path_list[++i])
 
 TEST_F_FORK(layout2_overlay, same_content_different_file)
 {
@@ -2622,27 +2655,27 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
 		{
 			.path = merge_dl1_fl2,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_WRITE_FILE,
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
 		{
 			.path = merge_du1_fu2,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_WRITE_FILE,
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
 		{
 			.path = merge_do1_fo2,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_WRITE_FILE,
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
 		{
 			.path = merge_do1_fl3,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_WRITE_FILE,
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
 		{
 			.path = merge_do1_fu3,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_WRITE_FILE,
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
 		{},
 	};
@@ -2650,7 +2683,7 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
 		{
 			.path = MERGE_DATA,
 			.access = LANDLOCK_ACCESS_FS_READ_FILE |
-				LANDLOCK_ACCESS_FS_WRITE_FILE,
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
 		},
 		{},
 	};
@@ -2670,7 +2703,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
 		ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
 	}
 	for_each_path(lower_base_directories, path_entry, i) {
-		ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+		ASSERT_EQ(EACCES,
+			  test_open(path_entry, O_RDONLY | O_DIRECTORY));
 	}
 	for_each_path(lower_sub_files, path_entry, i) {
 		ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
@@ -2682,7 +2716,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
 		ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
 	}
 	for_each_path(upper_base_directories, path_entry, i) {
-		ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+		ASSERT_EQ(EACCES,
+			  test_open(path_entry, O_RDONLY | O_DIRECTORY));
 	}
 	for_each_path(upper_sub_files, path_entry, i) {
 		ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
@@ -2767,7 +2802,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
 		ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
 	}
 	for_each_path(merge_base_directories, path_entry, i) {
-		ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+		ASSERT_EQ(EACCES,
+			  test_open(path_entry, O_RDONLY | O_DIRECTORY));
 	}
 	for_each_path(merge_sub_files, path_entry, i) {
 		ASSERT_EQ(0, test_open(path_entry, O_RDWR));
@@ -2792,7 +2828,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
 		ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
 	}
 	for_each_path(merge_base_directories, path_entry, i) {
-		ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+		ASSERT_EQ(EACCES,
+			  test_open(path_entry, O_RDONLY | O_DIRECTORY));
 	}
 	for_each_path(merge_sub_files, path_entry, i) {
 		ASSERT_EQ(0, test_open(path_entry, O_RDWR));
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
index 090adadfe2dc..c28ef98ff3ac 100644
--- a/tools/testing/selftests/landlock/ptrace_test.c
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -26,9 +26,10 @@ static void create_domain(struct __test_metadata *const _metadata)
 		.handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_BLOCK,
 	};
 
-	ruleset_fd = landlock_create_ruleset(&ruleset_attr,
-			sizeof(ruleset_attr), 0);
-	EXPECT_LE(0, ruleset_fd) {
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	EXPECT_LE(0, ruleset_fd)
+	{
 		TH_LOG("Failed to create a ruleset: %s", strerror(errno));
 	}
 	EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
@@ -43,7 +44,7 @@ static int test_ptrace_read(const pid_t pid)
 	int procenv_path_size, fd;
 
 	procenv_path_size = snprintf(procenv_path, sizeof(procenv_path),
-			path_template, pid);
+				     path_template, pid);
 	if (procenv_path_size >= sizeof(procenv_path))
 		return E2BIG;
 
@@ -63,7 +64,8 @@ static int test_ptrace_read(const pid_t pid)
 FIXTURE(hierarchy) {};
 /* clang-format on */
 
-FIXTURE_VARIANT(hierarchy) {
+FIXTURE_VARIANT(hierarchy)
+{
 	const bool domain_both;
 	const bool domain_parent;
 	const bool domain_child;
@@ -217,10 +219,12 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) {
 };
 
 FIXTURE_SETUP(hierarchy)
-{ }
+{
+}
 
 FIXTURE_TEARDOWN(hierarchy)
-{ }
+{
+}
 
 /* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */
 TEST_F(hierarchy, trace)
@@ -348,7 +352,7 @@ TEST_F(hierarchy, trace)
 	ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
 	ASSERT_EQ(child, waitpid(child, &status, 0));
 	if (WIFSIGNALED(status) || !WIFEXITED(status) ||
-			WEXITSTATUS(status) != EXIT_SUCCESS)
+	    WEXITSTATUS(status) != EXIT_SUCCESS)
 		_metadata->passed = 0;
 }
 
-- 
cgit 


From 87129ef13603ae46c82bcd09eed948acf0506dbb Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:08:12 +0200
Subject: selftests/landlock: Make tests build with old libc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace SYS_<syscall> with __NR_<syscall>.  Using the __NR_<syscall>
notation, provided by UAPI, is useful to build tests on systems without
the SYS_<syscall> definitions.

Replace SYS_pivot_root with __NR_pivot_root, and SYS_move_mount with
__NR_move_mount.

Define renameat2() and RENAME_EXCHANGE if they are unknown to old build
systems.

Cc: Shuah Khan <shuah@kernel.org>
Link: https://lore.kernel.org/r/20220506160820.524344-3-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 tools/testing/selftests/landlock/fs_test.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 28b01cb30c78..cc7fa7b17578 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -22,6 +22,19 @@
 
 #include "common.h"
 
+#ifndef renameat2
+int renameat2(int olddirfd, const char *oldpath, int newdirfd,
+	      const char *newpath, unsigned int flags)
+{
+	return syscall(__NR_renameat2, olddirfd, oldpath, newdirfd, newpath,
+		       flags);
+}
+#endif
+
+#ifndef RENAME_EXCHANGE
+#define RENAME_EXCHANGE (1 << 1)
+#endif
+
 #define TMP_DIR "tmp"
 #define BINARY_PATH "./true"
 
@@ -1279,7 +1292,7 @@ TEST_F_FORK(layout1, rule_inside_mount_ns)
 	int ruleset_fd;
 
 	set_cap(_metadata, CAP_SYS_ADMIN);
-	ASSERT_EQ(0, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3))
+	ASSERT_EQ(0, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3))
 	{
 		TH_LOG("Failed to pivot root: %s", strerror(errno));
 	};
@@ -1313,7 +1326,7 @@ TEST_F_FORK(layout1, mount_and_pivot)
 	set_cap(_metadata, CAP_SYS_ADMIN);
 	ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL));
 	ASSERT_EQ(EPERM, errno);
-	ASSERT_EQ(-1, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3));
+	ASSERT_EQ(-1, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
 	ASSERT_EQ(EPERM, errno);
 	clear_cap(_metadata, CAP_SYS_ADMIN);
 }
@@ -1332,13 +1345,13 @@ TEST_F_FORK(layout1, move_mount)
 	ASSERT_LE(0, ruleset_fd);
 
 	set_cap(_metadata, CAP_SYS_ADMIN);
-	ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+	ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
 			     dir_s1d2, 0))
 	{
 		TH_LOG("Failed to move mount: %s", strerror(errno));
 	}
 
-	ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
+	ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
 			     dir_s3d2, 0));
 	clear_cap(_metadata, CAP_SYS_ADMIN);
 
@@ -1346,7 +1359,7 @@ TEST_F_FORK(layout1, move_mount)
 	ASSERT_EQ(0, close(ruleset_fd));
 
 	set_cap(_metadata, CAP_SYS_ADMIN);
-	ASSERT_EQ(-1, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+	ASSERT_EQ(-1, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
 			      dir_s1d2, 0));
 	ASSERT_EQ(EPERM, errno);
 	clear_cap(_metadata, CAP_SYS_ADMIN);
-- 
cgit 


From 291865bd7e8bb4b4033d341fa02dafa728e6378c Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:08:13 +0200
Subject: selftests/landlock: Extend tests for minimal valid attribute size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This might be useful when the struct landlock_ruleset_attr will get more
fields.

Cc: Shuah Khan <shuah@kernel.org>
Link: https://lore.kernel.org/r/20220506160820.524344-4-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 tools/testing/selftests/landlock/base_test.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 3faeae4233a4..be9b937256ac 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -35,6 +35,8 @@ TEST(inconsistent_attr)
 	ASSERT_EQ(EINVAL, errno);
 	ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0));
 	ASSERT_EQ(EINVAL, errno);
+	ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 7, 0));
+	ASSERT_EQ(EINVAL, errno);
 
 	ASSERT_EQ(-1, landlock_create_ruleset(NULL, 1, 0));
 	/* The size if less than sizeof(struct landlock_attr_enforce). */
@@ -47,6 +49,9 @@ TEST(inconsistent_attr)
 	ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
 	ASSERT_EQ(E2BIG, errno);
 
+	/* Checks minimal valid attribute size. */
+	ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 8, 0));
+	ASSERT_EQ(ENOMSG, errno);
 	ASSERT_EQ(-1, landlock_create_ruleset(
 			      ruleset_attr,
 			      sizeof(struct landlock_ruleset_attr), 0));
-- 
cgit 


From c56b3bf566da5a0dd3b58ad97a614b0928b06ebf Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:08:14 +0200
Subject: selftests/landlock: Add tests for unknown access rights
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make sure that trying to use unknown access rights returns an error.

Cc: Shuah Khan <shuah@kernel.org>
Link: https://lore.kernel.org/r/20220506160820.524344-5-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 tools/testing/selftests/landlock/fs_test.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index cc7fa7b17578..f293b7e2a1a7 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -448,6 +448,22 @@ TEST_F_FORK(layout1, file_access_rights)
 	ASSERT_EQ(0, close(path_beneath.parent_fd));
 }
 
+TEST_F_FORK(layout1, unknown_access_rights)
+{
+	__u64 access_mask;
+
+	for (access_mask = 1ULL << 63; access_mask != ACCESS_LAST;
+	     access_mask >>= 1) {
+		struct landlock_ruleset_attr ruleset_attr = {
+			.handled_access_fs = access_mask,
+		};
+
+		ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+						      sizeof(ruleset_attr), 0));
+		ASSERT_EQ(EINVAL, errno);
+	}
+}
+
 static void add_path_beneath(struct __test_metadata *const _metadata,
 			     const int ruleset_fd, const __u64 allowed_access,
 			     const char *const path)
-- 
cgit 


From d18955d094d09a220cf8f533f5e896a2fe31575a Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:08:15 +0200
Subject: selftests/landlock: Extend access right tests to directories
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make sure that all filesystem access rights can be tied to directories.

Rename layout1.file_access_rights to layout1.file_and_dir_access_rights
to reflect this change.

Cc: Shuah Khan <shuah@kernel.org>
Link: https://lore.kernel.org/r/20220506160820.524344-6-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 tools/testing/selftests/landlock/fs_test.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index f293b7e2a1a7..75f9358512df 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -418,11 +418,12 @@ TEST_F_FORK(layout1, inval)
 
 /* clang-format on */
 
-TEST_F_FORK(layout1, file_access_rights)
+TEST_F_FORK(layout1, file_and_dir_access_rights)
 {
 	__u64 access;
 	int err;
-	struct landlock_path_beneath_attr path_beneath = {};
+	struct landlock_path_beneath_attr path_beneath_file = {},
+					  path_beneath_dir = {};
 	struct landlock_ruleset_attr ruleset_attr = {
 		.handled_access_fs = ACCESS_ALL,
 	};
@@ -432,20 +433,33 @@ TEST_F_FORK(layout1, file_access_rights)
 	ASSERT_LE(0, ruleset_fd);
 
 	/* Tests access rights for files. */
-	path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
-	ASSERT_LE(0, path_beneath.parent_fd);
+	path_beneath_file.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
+	ASSERT_LE(0, path_beneath_file.parent_fd);
+
+	/* Tests access rights for directories. */
+	path_beneath_dir.parent_fd =
+		open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
+	ASSERT_LE(0, path_beneath_dir.parent_fd);
+
 	for (access = 1; access <= ACCESS_LAST; access <<= 1) {
-		path_beneath.allowed_access = access;
+		path_beneath_dir.allowed_access = access;
+		ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+					       LANDLOCK_RULE_PATH_BENEATH,
+					       &path_beneath_dir, 0));
+
+		path_beneath_file.allowed_access = access;
 		err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
-					&path_beneath, 0);
-		if ((access | ACCESS_FILE) == ACCESS_FILE) {
+					&path_beneath_file, 0);
+		if (access & ACCESS_FILE) {
 			ASSERT_EQ(0, err);
 		} else {
 			ASSERT_EQ(-1, err);
 			ASSERT_EQ(EINVAL, errno);
 		}
 	}
-	ASSERT_EQ(0, close(path_beneath.parent_fd));
+	ASSERT_EQ(0, close(path_beneath_file.parent_fd));
+	ASSERT_EQ(0, close(path_beneath_dir.parent_fd));
+	ASSERT_EQ(0, close(ruleset_fd));
 }
 
 TEST_F_FORK(layout1, unknown_access_rights)
-- 
cgit 


From 6a1bdd4a0bfc30fa4fa2b3a979e6525f28996db9 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:08:16 +0200
Subject: selftests/landlock: Fully test file rename with "remove" access
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These tests were missing to check the check_access_path() call with all
combinations of maybe_remove(old_dentry) and maybe_remove(new_dentry).

Extend layout1.link with a new complementary test and check that
REMOVE_FILE is not required to link a file.

Cc: Shuah Khan <shuah@kernel.org>
Link: https://lore.kernel.org/r/20220506160820.524344-7-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 tools/testing/selftests/landlock/fs_test.c | 41 +++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 75f9358512df..9165f6adf7b9 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -1659,15 +1659,21 @@ TEST_F_FORK(layout1, execute)
 
 TEST_F_FORK(layout1, link)
 {
-	const struct rule rules[] = {
+	const struct rule layer1[] = {
 		{
 			.path = dir_s1d2,
 			.access = LANDLOCK_ACCESS_FS_MAKE_REG,
 		},
 		{},
 	};
-	const int ruleset_fd =
-		create_ruleset(_metadata, rules[0].access, rules);
+	const struct rule layer2[] = {
+		{
+			.path = dir_s1d3,
+			.access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+		},
+		{},
+	};
+	int ruleset_fd = create_ruleset(_metadata, layer1[0].access, layer1);
 
 	ASSERT_LE(0, ruleset_fd);
 
@@ -1680,14 +1686,30 @@ TEST_F_FORK(layout1, link)
 
 	ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
 	ASSERT_EQ(EACCES, errno);
+
 	/* Denies linking because of reparenting. */
 	ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
 	ASSERT_EQ(EXDEV, errno);
 	ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
 	ASSERT_EQ(EXDEV, errno);
+	ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2));
+	ASSERT_EQ(EXDEV, errno);
 
 	ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
 	ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
+
+	/* Prepares for next unlinks. */
+	ASSERT_EQ(0, unlink(file2_s1d2));
+	ASSERT_EQ(0, unlink(file2_s1d3));
+
+	ruleset_fd = create_ruleset(_metadata, layer2[0].access, layer2);
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	/* Checks that linkind doesn't require the ability to delete a file. */
+	ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
+	ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
 }
 
 TEST_F_FORK(layout1, rename_file)
@@ -1708,7 +1730,6 @@ TEST_F_FORK(layout1, rename_file)
 
 	ASSERT_LE(0, ruleset_fd);
 
-	ASSERT_EQ(0, unlink(file1_s1d1));
 	ASSERT_EQ(0, unlink(file1_s1d2));
 
 	enforce_ruleset(_metadata, ruleset_fd);
@@ -1744,9 +1765,15 @@ TEST_F_FORK(layout1, rename_file)
 	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s2d1,
 				RENAME_EXCHANGE));
 	ASSERT_EQ(EACCES, errno);
+	/* Checks that file1_s2d1 cannot be removed (instead of ENOTDIR). */
+	ASSERT_EQ(-1, rename(dir_s2d2, file1_s2d1));
+	ASSERT_EQ(EACCES, errno);
 	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, dir_s2d2,
 				RENAME_EXCHANGE));
 	ASSERT_EQ(EACCES, errno);
+	/* Checks that file1_s1d1 cannot be removed (instead of EISDIR). */
+	ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2));
+	ASSERT_EQ(EACCES, errno);
 
 	/* Renames files with different parents. */
 	ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
@@ -1809,9 +1836,15 @@ TEST_F_FORK(layout1, rename_dir)
 	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d1, AT_FDCWD, dir_s2d1,
 				RENAME_EXCHANGE));
 	ASSERT_EQ(EACCES, errno);
+	/* Checks that dir_s1d2 cannot be removed (instead of ENOTDIR). */
+	ASSERT_EQ(-1, rename(dir_s1d2, file1_s1d1));
+	ASSERT_EQ(EACCES, errno);
 	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s1d2,
 				RENAME_EXCHANGE));
 	ASSERT_EQ(EACCES, errno);
+	/* Checks that dir_s1d2 cannot be removed (instead of EISDIR). */
+	ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2));
+	ASSERT_EQ(EACCES, errno);
 
 	/*
 	 * Exchanges and renames directory to the same parent, which allows
-- 
cgit 


From d1788ad990874734341b05ab8ccb6448c09c6422 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:08:17 +0200
Subject: selftests/landlock: Add tests for O_PATH
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The O_PATH flag is currently not handled by Landlock.  Let's make sure
this behavior will remain consistent with the same ruleset over time.

Cc: Shuah Khan <shuah@kernel.org>
Link: https://lore.kernel.org/r/20220506160820.524344-8-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 tools/testing/selftests/landlock/fs_test.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 9165f6adf7b9..a8f54c4462eb 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -654,17 +654,23 @@ TEST_F_FORK(layout1, effective_access)
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
 
-	/* Tests on a directory. */
+	/* Tests on a directory (with or without O_PATH). */
 	ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+	ASSERT_EQ(0, test_open("/", O_RDONLY | O_PATH));
 	ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
+	ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_PATH));
 	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+	ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY | O_PATH));
+
 	ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
 	ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
 	ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
 	ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
 
-	/* Tests on a file. */
+	/* Tests on a file (with or without O_PATH). */
 	ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY));
+	ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_PATH));
+
 	ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
 
 	/* Checks effective read and write actions. */
-- 
cgit 


From 589172e5636c4d16c40b90e87543d43defe2d968 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:08:18 +0200
Subject: landlock: Change landlock_add_rule(2) argument check ordering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This makes more sense to first check the ruleset FD and then the rule
attribute.  It will be useful to factor out code for other rule types.

Add inval_add_rule_arguments tests, extension of empty_path_beneath_attr
tests, to also check error ordering for landlock_add_rule(2).

Link: https://lore.kernel.org/r/20220506160820.524344-9-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 security/landlock/syscalls.c                 | 22 ++++++++++--------
 tools/testing/selftests/landlock/base_test.c | 34 ++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index 7edc1d50e2bf..a7396220c9d4 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -318,20 +318,24 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
 	if (flags)
 		return -EINVAL;
 
-	if (rule_type != LANDLOCK_RULE_PATH_BENEATH)
-		return -EINVAL;
-
-	/* Copies raw user space buffer, only one type for now. */
-	res = copy_from_user(&path_beneath_attr, rule_attr,
-			     sizeof(path_beneath_attr));
-	if (res)
-		return -EFAULT;
-
 	/* Gets and checks the ruleset. */
 	ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_WRITE);
 	if (IS_ERR(ruleset))
 		return PTR_ERR(ruleset);
 
+	if (rule_type != LANDLOCK_RULE_PATH_BENEATH) {
+		err = -EINVAL;
+		goto out_put_ruleset;
+	}
+
+	/* Copies raw user space buffer, only one type for now. */
+	res = copy_from_user(&path_beneath_attr, rule_attr,
+			     sizeof(path_beneath_attr));
+	if (res) {
+		err = -EFAULT;
+		goto out_put_ruleset;
+	}
+
 	/*
 	 * Informs about useless rule: empty allowed_access (i.e. deny rules)
 	 * are ignored in path walks.
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index be9b937256ac..18b779471dcb 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -121,20 +121,50 @@ TEST(inval_create_ruleset_flags)
 	ASSERT_EQ(EINVAL, errno);
 }
 
-TEST(empty_path_beneath_attr)
+/* Tests ordering of syscall argument checks. */
+TEST(add_rule_checks_ordering)
 {
 	const struct landlock_ruleset_attr ruleset_attr = {
 		.handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
 	};
+	struct landlock_path_beneath_attr path_beneath_attr = {
+		.allowed_access = LANDLOCK_ACCESS_FS_EXECUTE,
+		.parent_fd = -1,
+	};
 	const int ruleset_fd =
 		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
 
 	ASSERT_LE(0, ruleset_fd);
 
-	/* Similar to struct landlock_path_beneath_attr.parent_fd = 0 */
+	/* Checks invalid flags. */
+	ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 1));
+	ASSERT_EQ(EINVAL, errno);
+
+	/* Checks invalid ruleset FD. */
+	ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 0));
+	ASSERT_EQ(EBADF, errno);
+
+	/* Checks invalid rule type. */
+	ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, 0, NULL, 0));
+	ASSERT_EQ(EINVAL, errno);
+
+	/* Checks invalid rule attr. */
 	ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
 					NULL, 0));
 	ASSERT_EQ(EFAULT, errno);
+
+	/* Checks invalid path_beneath.parent_fd. */
+	ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+					&path_beneath_attr, 0));
+	ASSERT_EQ(EBADF, errno);
+
+	/* Checks valid call. */
+	path_beneath_attr.parent_fd =
+		open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+	ASSERT_LE(0, path_beneath_attr.parent_fd);
+	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+				       &path_beneath_attr, 0));
+	ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
 	ASSERT_EQ(0, close(ruleset_fd));
 }
 
-- 
cgit 


From eba39ca4b155c54adf471a69e91799cc1727873f Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:08:19 +0200
Subject: landlock: Change landlock_restrict_self(2) check ordering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to the Landlock goal to be a security feature available to
unprivileges processes, it makes more sense to first check for
no_new_privs before checking anything else (i.e. syscall arguments).

Merge inval_fd_enforce and unpriv_enforce_without_no_new_privs tests
into the new restrict_self_checks_ordering.  This is similar to the
previous commit checking other syscalls.

Link: https://lore.kernel.org/r/20220506160820.524344-10-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 security/landlock/syscalls.c                 |  8 ++---
 tools/testing/selftests/landlock/base_test.c | 47 ++++++++++++++++++++++------
 2 files changed, 41 insertions(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index a7396220c9d4..507d43827afe 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -405,10 +405,6 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
 	if (!landlock_initialized)
 		return -EOPNOTSUPP;
 
-	/* No flag for now. */
-	if (flags)
-		return -EINVAL;
-
 	/*
 	 * Similar checks as for seccomp(2), except that an -EPERM may be
 	 * returned.
@@ -417,6 +413,10 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
 	    !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
 		return -EPERM;
 
+	/* No flag for now. */
+	if (flags)
+		return -EINVAL;
+
 	/* Gets and checks the ruleset. */
 	ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ);
 	if (IS_ERR(ruleset))
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 18b779471dcb..21fb33581419 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -168,22 +168,49 @@ TEST(add_rule_checks_ordering)
 	ASSERT_EQ(0, close(ruleset_fd));
 }
 
-TEST(inval_fd_enforce)
+/* Tests ordering of syscall argument and permission checks. */
+TEST(restrict_self_checks_ordering)
 {
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+	};
+	struct landlock_path_beneath_attr path_beneath_attr = {
+		.allowed_access = LANDLOCK_ACCESS_FS_EXECUTE,
+		.parent_fd = -1,
+	};
+	const int ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+	ASSERT_LE(0, ruleset_fd);
+	path_beneath_attr.parent_fd =
+		open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+	ASSERT_LE(0, path_beneath_attr.parent_fd);
+	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+				       &path_beneath_attr, 0));
+	ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
+
+	/* Checks unprivileged enforcement without no_new_privs. */
+	drop_caps(_metadata);
+	ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
+	ASSERT_EQ(EPERM, errno);
+	ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
+	ASSERT_EQ(EPERM, errno);
+	ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
+	ASSERT_EQ(EPERM, errno);
+
 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
 
+	/* Checks invalid flags. */
+	ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
+	ASSERT_EQ(EINVAL, errno);
+
+	/* Checks invalid ruleset FD. */
 	ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
 	ASSERT_EQ(EBADF, errno);
-}
-
-TEST(unpriv_enforce_without_no_new_privs)
-{
-	int err;
 
-	drop_caps(_metadata);
-	err = landlock_restrict_self(-1, 0);
-	ASSERT_EQ(EPERM, errno);
-	ASSERT_EQ(err, -1);
+	/* Checks valid call. */
+	ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+	ASSERT_EQ(0, close(ruleset_fd));
 }
 
 TEST(ruleset_fd_io)
-- 
cgit 


From 6533d0c3a86ee1cc74ff37ac92ca597deb87015c Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:08:20 +0200
Subject: selftests/landlock: Test landlock_create_ruleset(2) argument check
 ordering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add inval_create_ruleset_arguments, extension of
inval_create_ruleset_flags, to also check error ordering for
landlock_create_ruleset(2).

This is similar to the previous commit checking landlock_add_rule(2).

Test coverage for security/landlock is 94.4% of 504 lines accorging to
gcc/gcov-11.

Link: https://lore.kernel.org/r/20220506160820.524344-11-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 tools/testing/selftests/landlock/base_test.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 21fb33581419..35f64832b869 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -97,14 +97,17 @@ TEST(abi_version)
 	ASSERT_EQ(EINVAL, errno);
 }
 
-TEST(inval_create_ruleset_flags)
+/* Tests ordering of syscall argument checks. */
+TEST(create_ruleset_checks_ordering)
 {
 	const int last_flag = LANDLOCK_CREATE_RULESET_VERSION;
 	const int invalid_flag = last_flag << 1;
+	int ruleset_fd;
 	const struct landlock_ruleset_attr ruleset_attr = {
 		.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
 	};
 
+	/* Checks priority for invalid flags. */
 	ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, invalid_flag));
 	ASSERT_EQ(EINVAL, errno);
 
@@ -119,6 +122,22 @@ TEST(inval_create_ruleset_flags)
 		  landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
 					  invalid_flag));
 	ASSERT_EQ(EINVAL, errno);
+
+	/* Checks too big ruleset_attr size. */
+	ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, -1, 0));
+	ASSERT_EQ(E2BIG, errno);
+
+	/* Checks too small ruleset_attr size. */
+	ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, 0));
+	ASSERT_EQ(EINVAL, errno);
+	ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 1, 0));
+	ASSERT_EQ(EINVAL, errno);
+
+	/* Checks valid call. */
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
 }
 
 /* Tests ordering of syscall argument checks. */
-- 
cgit 


From 75c542d6c6cc48720376862d5496d51509160dfd Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:10:52 +0200
Subject: landlock: Reduce the maximum number of layers to 16
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The maximum number of nested Landlock domains is currently 64.  Because
of the following fix and to help reduce the stack size, let's reduce it
to 16.  This seems large enough for a lot of use cases (e.g. sandboxed
init service, spawning a sandboxed SSH service, in nested sandboxed
containers).  Reducing the number of nested domains may also help to
discover misuse of Landlock (e.g. creating a domain per rule).

Add and use a dedicated layer_mask_t typedef to fit with the number of
layers.  This might be useful when changing it and to keep it consistent
with the maximum number of layers.

Reviewed-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/r/20220506161102.525323-3-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 Documentation/userspace-api/landlock.rst   |  4 ++--
 security/landlock/fs.c                     | 17 +++++++----------
 security/landlock/limits.h                 |  2 +-
 security/landlock/ruleset.h                |  4 ++++
 tools/testing/selftests/landlock/fs_test.c |  2 +-
 5 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst
index f35552ff19ba..b68e7a51009f 100644
--- a/Documentation/userspace-api/landlock.rst
+++ b/Documentation/userspace-api/landlock.rst
@@ -267,8 +267,8 @@ restrict such paths with dedicated ruleset flags.
 Ruleset layers
 --------------
 
-There is a limit of 64 layers of stacked rulesets.  This can be an issue for a
-task willing to enforce a new ruleset in complement to its 64 inherited
+There is a limit of 16 layers of stacked rulesets.  This can be an issue for a
+task willing to enforce a new ruleset in complement to its 16 inherited
 rulesets.  Once this limit is reached, sys_landlock_restrict_self() returns
 E2BIG.  It is then strongly suggested to carefully build rulesets once in the
 life of a thread, especially for applications able to launch other applications
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index d4006add8bdf..f48c0a3b1e75 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -183,10 +183,10 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
 
 /* Access-control management */
 
-static inline u64 unmask_layers(const struct landlock_ruleset *const domain,
-				const struct path *const path,
-				const access_mask_t access_request,
-				u64 layer_mask)
+static inline layer_mask_t
+unmask_layers(const struct landlock_ruleset *const domain,
+	      const struct path *const path, const access_mask_t access_request,
+	      layer_mask_t layer_mask)
 {
 	const struct landlock_rule *rule;
 	const struct inode *inode;
@@ -212,11 +212,11 @@ static inline u64 unmask_layers(const struct landlock_ruleset *const domain,
 	 */
 	for (i = 0; i < rule->num_layers; i++) {
 		const struct landlock_layer *const layer = &rule->layers[i];
-		const u64 layer_level = BIT_ULL(layer->level - 1);
+		const layer_mask_t layer_bit = BIT_ULL(layer->level - 1);
 
 		/* Checks that the layer grants access to the full request. */
 		if ((layer->access & access_request) == access_request) {
-			layer_mask &= ~layer_level;
+			layer_mask &= ~layer_bit;
 
 			if (layer_mask == 0)
 				return layer_mask;
@@ -231,12 +231,9 @@ static int check_access_path(const struct landlock_ruleset *const domain,
 {
 	bool allowed = false;
 	struct path walker_path;
-	u64 layer_mask;
+	layer_mask_t layer_mask;
 	size_t i;
 
-	/* Make sure all layers can be checked. */
-	BUILD_BUG_ON(BITS_PER_TYPE(layer_mask) < LANDLOCK_MAX_NUM_LAYERS);
-
 	if (!access_request)
 		return 0;
 	if (WARN_ON_ONCE(!domain || !path))
diff --git a/security/landlock/limits.h b/security/landlock/limits.h
index 41372f22837f..17c2a2e7fe1e 100644
--- a/security/landlock/limits.h
+++ b/security/landlock/limits.h
@@ -15,7 +15,7 @@
 
 /* clang-format off */
 
-#define LANDLOCK_MAX_NUM_LAYERS		64
+#define LANDLOCK_MAX_NUM_LAYERS		16
 #define LANDLOCK_MAX_NUM_RULES		U32_MAX
 
 #define LANDLOCK_LAST_ACCESS_FS		LANDLOCK_ACCESS_FS_MAKE_SYM
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index 8d5717594931..521af2848951 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -23,6 +23,10 @@ typedef u16 access_mask_t;
 /* Makes sure all filesystem access rights can be stored. */
 static_assert(BITS_PER_TYPE(access_mask_t) >= LANDLOCK_NUM_ACCESS_FS);
 
+typedef u16 layer_mask_t;
+/* Makes sure all layers can be checked. */
+static_assert(BITS_PER_TYPE(layer_mask_t) >= LANDLOCK_MAX_NUM_LAYERS);
+
 /**
  * struct landlock_layer - Access rights for a given layer
  */
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index a8f54c4462eb..e13f046a172a 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -1159,7 +1159,7 @@ TEST_F_FORK(layout1, max_layers)
 	const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
 
 	ASSERT_LE(0, ruleset_fd);
-	for (i = 0; i < 64; i++)
+	for (i = 0; i < 16; i++)
 		enforce_ruleset(_metadata, ruleset_fd);
 
 	for (i = 0; i < 2; i++) {
-- 
cgit 


From 8ba0005ff418ec356e176b26eaa04a6ac755d05b Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:10:54 +0200
Subject: landlock: Fix same-layer rule unions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The original behavior was to check if the full set of requested accesses
was allowed by at least a rule of every relevant layer.  This didn't
take into account requests for multiple accesses and same-layer rules
allowing the union of these accesses in a complementary way.  As a
result, multiple accesses requested on a file hierarchy matching rules
that, together, allowed these accesses, but without a unique rule
allowing all of them, was illegitimately denied.  This case should be
rare in practice and it can only be triggered by the path_rename or
file_open hook implementations.

For instance, if, for the same layer, a rule allows execution
beneath /a/b and another rule allows read beneath /a, requesting access
to read and execute at the same time for /a/b should be allowed for this
layer.

This was an inconsistency because the union of same-layer rule accesses
was already allowed if requested once at a time anyway.

This fix changes the way allowed accesses are gathered over a path walk.
To take into account all these rule accesses, we store in a matrix all
layer granting the set of requested accesses, according to the handled
accesses.  To avoid heap allocation, we use an array on the stack which
is 2*13 bytes.  A following commit bringing the LANDLOCK_ACCESS_FS_REFER
access right will increase this size to reach 112 bytes (2*14*4) in case
of link or rename actions.

Add a new layout1.layer_rule_unions test to check that accesses from
different rules pertaining to the same layer are ORed in a file
hierarchy.  Also test that it is not the case for rules from different
layers.

Reviewed-by: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/r/20220506161102.525323-5-mic@digikod.net
Cc: stable@vger.kernel.org
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 security/landlock/fs.c                     |  78 ++++++++++++++-------
 security/landlock/ruleset.h                |   2 +
 tools/testing/selftests/landlock/fs_test.c | 107 +++++++++++++++++++++++++++++
 3 files changed, 161 insertions(+), 26 deletions(-)

(limited to 'tools/testing')

diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 20953bff8fd5..c5749301b37d 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -207,45 +207,67 @@ find_rule(const struct landlock_ruleset *const domain,
 	return rule;
 }
 
-static inline layer_mask_t unmask_layers(const struct landlock_rule *const rule,
-					 const access_mask_t access_request,
-					 layer_mask_t layer_mask)
+/*
+ * @layer_masks is read and may be updated according to the access request and
+ * the matching rule.
+ *
+ * Returns true if the request is allowed (i.e. relevant layer masks for the
+ * request are empty).
+ */
+static inline bool
+unmask_layers(const struct landlock_rule *const rule,
+	      const access_mask_t access_request,
+	      layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
 {
 	size_t layer_level;
 
+	if (!access_request || !layer_masks)
+		return true;
 	if (!rule)
-		return layer_mask;
+		return false;
 
 	/*
 	 * An access is granted if, for each policy layer, at least one rule
-	 * encountered on the pathwalk grants the requested accesses,
-	 * regardless of their position in the layer stack.  We must then check
+	 * encountered on the pathwalk grants the requested access,
+	 * regardless of its position in the layer stack.  We must then check
 	 * the remaining layers for each inode, from the first added layer to
-	 * the last one.
+	 * the last one.  When there is multiple requested accesses, for each
+	 * policy layer, the full set of requested accesses may not be granted
+	 * by only one rule, but by the union (binary OR) of multiple rules.
+	 * E.g. /a/b <execute> + /a <read> => /a/b <execute + read>
 	 */
 	for (layer_level = 0; layer_level < rule->num_layers; layer_level++) {
 		const struct landlock_layer *const layer =
 			&rule->layers[layer_level];
 		const layer_mask_t layer_bit = BIT_ULL(layer->level - 1);
+		const unsigned long access_req = access_request;
+		unsigned long access_bit;
+		bool is_empty;
 
-		/* Checks that the layer grants access to the full request. */
-		if ((layer->access & access_request) == access_request) {
-			layer_mask &= ~layer_bit;
-
-			if (layer_mask == 0)
-				return layer_mask;
+		/*
+		 * Records in @layer_masks which layer grants access to each
+		 * requested access.
+		 */
+		is_empty = true;
+		for_each_set_bit(access_bit, &access_req,
+				 ARRAY_SIZE(*layer_masks)) {
+			if (layer->access & BIT_ULL(access_bit))
+				(*layer_masks)[access_bit] &= ~layer_bit;
+			is_empty = is_empty && !(*layer_masks)[access_bit];
 		}
+		if (is_empty)
+			return true;
 	}
-	return layer_mask;
+	return false;
 }
 
 static int check_access_path(const struct landlock_ruleset *const domain,
 			     const struct path *const path,
 			     const access_mask_t access_request)
 {
-	bool allowed = false;
+	layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
+	bool allowed = false, has_access = false;
 	struct path walker_path;
-	layer_mask_t layer_mask;
 	size_t i;
 
 	if (!access_request)
@@ -265,13 +287,20 @@ static int check_access_path(const struct landlock_ruleset *const domain,
 		return -EACCES;
 
 	/* Saves all layers handling a subset of requested accesses. */
-	layer_mask = 0;
 	for (i = 0; i < domain->num_layers; i++) {
-		if (domain->fs_access_masks[i] & access_request)
-			layer_mask |= BIT_ULL(i);
+		const unsigned long access_req = access_request;
+		unsigned long access_bit;
+
+		for_each_set_bit(access_bit, &access_req,
+				 ARRAY_SIZE(layer_masks)) {
+			if (domain->fs_access_masks[i] & BIT_ULL(access_bit)) {
+				layer_masks[access_bit] |= BIT_ULL(i);
+				has_access = true;
+			}
+		}
 	}
 	/* An access request not handled by the domain is allowed. */
-	if (layer_mask == 0)
+	if (!has_access)
 		return 0;
 
 	walker_path = *path;
@@ -283,14 +312,11 @@ static int check_access_path(const struct landlock_ruleset *const domain,
 	while (true) {
 		struct dentry *parent_dentry;
 
-		layer_mask =
-			unmask_layers(find_rule(domain, walker_path.dentry),
-				      access_request, layer_mask);
-		if (layer_mask == 0) {
+		allowed = unmask_layers(find_rule(domain, walker_path.dentry),
+					access_request, &layer_masks);
+		if (allowed)
 			/* Stops when a rule from each layer grants access. */
-			allowed = true;
 			break;
-		}
 
 jump_up:
 		if (walker_path.dentry == walker_path.mnt->mnt_root) {
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index 521af2848951..d43231b783e4 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -22,6 +22,8 @@
 typedef u16 access_mask_t;
 /* Makes sure all filesystem access rights can be stored. */
 static_assert(BITS_PER_TYPE(access_mask_t) >= LANDLOCK_NUM_ACCESS_FS);
+/* Makes sure for_each_set_bit() and for_each_clear_bit() calls are OK. */
+static_assert(sizeof(unsigned long) >= sizeof(access_mask_t));
 
 typedef u16 layer_mask_t;
 /* Makes sure all layers can be checked. */
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index e13f046a172a..a4fdcda62bde 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -758,6 +758,113 @@ TEST_F_FORK(layout1, ruleset_overlap)
 	ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
 }
 
+TEST_F_FORK(layout1, layer_rule_unions)
+{
+	const struct rule layer1[] = {
+		{
+			.path = dir_s1d2,
+			.access = LANDLOCK_ACCESS_FS_READ_FILE,
+		},
+		/* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+		{
+			.path = dir_s1d3,
+			.access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+		},
+		{},
+	};
+	const struct rule layer2[] = {
+		/* Doesn't change anything from layer1. */
+		{
+			.path = dir_s1d2,
+			.access = LANDLOCK_ACCESS_FS_READ_FILE |
+				  LANDLOCK_ACCESS_FS_WRITE_FILE,
+		},
+		{},
+	};
+	const struct rule layer3[] = {
+		/* Only allows write (but not read) to dir_s1d3. */
+		{
+			.path = dir_s1d2,
+			.access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+		},
+		{},
+	};
+	int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1);
+
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	/* Checks s1d1 hierarchy with layer1. */
+	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+	ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+	/* Checks s1d2 hierarchy with layer1. */
+	ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+	ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+	ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+	ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+	/* Checks s1d3 hierarchy with layer1. */
+	ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+	ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+	/* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+	ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+	ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+	/* Doesn't change anything from layer1. */
+	ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2);
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	/* Checks s1d1 hierarchy with layer2. */
+	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+	ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+	/* Checks s1d2 hierarchy with layer2. */
+	ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+	ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+	ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+	ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+	/* Checks s1d3 hierarchy with layer2. */
+	ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+	ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+	/* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+	ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+	ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+	/* Only allows write (but not read) to dir_s1d3. */
+	ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3);
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	/* Checks s1d1 hierarchy with layer3. */
+	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+	ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+	ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+	/* Checks s1d2 hierarchy with layer3. */
+	ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY));
+	ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+	ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+	ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+	/* Checks s1d3 hierarchy with layer3. */
+	ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
+	ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+	/* dir_s1d3 should now deny READ_FILE and WRITE_FILE (O_RDWR). */
+	ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDWR));
+	ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+}
+
 TEST_F_FORK(layout1, non_overlapping_accesses)
 {
 	const struct rule layer1[] = {
-- 
cgit 


From b91c3e4ea756b12b7d992529226edce1cfd854d7 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:10:57 +0200
Subject: landlock: Add support for file reparenting with
 LANDLOCK_ACCESS_FS_REFER
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new LANDLOCK_ACCESS_FS_REFER access right to enable policy writers
to allow sandboxed processes to link and rename files from and to a
specific set of file hierarchies.  This access right should be composed
with LANDLOCK_ACCESS_FS_MAKE_* for the destination of a link or rename,
and with LANDLOCK_ACCESS_FS_REMOVE_* for a source of a rename.  This
lift a Landlock limitation that always denied changing the parent of an
inode.

Renaming or linking to the same directory is still always allowed,
whatever LANDLOCK_ACCESS_FS_REFER is used or not, because it is not
considered a threat to user data.

However, creating multiple links or renaming to a different parent
directory may lead to privilege escalations if not handled properly.
Indeed, we must be sure that the source doesn't gain more privileges by
being accessible from the destination.  This is handled by making sure
that the source hierarchy (including the referenced file or directory
itself) restricts at least as much the destination hierarchy.  If it is
not the case, an EXDEV error is returned, making it potentially possible
for user space to copy the file hierarchy instead of moving or linking
it.

Instead of creating different access rights for the source and the
destination, we choose to make it simple and consistent for users.
Indeed, considering the previous constraint, it would be weird to
require such destination access right to be also granted to the source
(to make it a superset).  Moreover, RENAME_EXCHANGE would also add to
the confusion because of paths being both a source and a destination.

See the provided documentation for additional details.

New tests are provided with a following commit.

Reviewed-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
Link: https://lore.kernel.org/r/20220506161102.525323-8-mic@digikod.net
---
 include/uapi/linux/landlock.h                |  27 +-
 security/landlock/fs.c                       | 600 +++++++++++++++++++++++----
 security/landlock/limits.h                   |   2 +-
 security/landlock/syscalls.c                 |   2 +-
 tools/testing/selftests/landlock/base_test.c |   2 +-
 tools/testing/selftests/landlock/fs_test.c   |   3 +-
 6 files changed, 556 insertions(+), 80 deletions(-)

(limited to 'tools/testing')

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index 21c8d58283c9..23df4e0e8ace 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -21,8 +21,14 @@ struct landlock_ruleset_attr {
 	/**
 	 * @handled_access_fs: Bitmask of actions (cf. `Filesystem flags`_)
 	 * that is handled by this ruleset and should then be forbidden if no
-	 * rule explicitly allow them.  This is needed for backward
-	 * compatibility reasons.
+	 * rule explicitly allow them: it is a deny-by-default list that should
+	 * contain as much Landlock access rights as possible. Indeed, all
+	 * Landlock filesystem access rights that are not part of
+	 * handled_access_fs are allowed.  This is needed for backward
+	 * compatibility reasons.  One exception is the
+	 * LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly
+	 * handled, but must still be explicitly handled to add new rules with
+	 * this access right.
 	 */
 	__u64 handled_access_fs;
 };
@@ -112,6 +118,22 @@ struct landlock_path_beneath_attr {
  * - %LANDLOCK_ACCESS_FS_MAKE_FIFO: Create (or rename or link) a named pipe.
  * - %LANDLOCK_ACCESS_FS_MAKE_BLOCK: Create (or rename or link) a block device.
  * - %LANDLOCK_ACCESS_FS_MAKE_SYM: Create (or rename or link) a symbolic link.
+ * - %LANDLOCK_ACCESS_FS_REFER: Link or rename a file from or to a different
+ *   directory (i.e. reparent a file hierarchy).  This access right is
+ *   available since the second version of the Landlock ABI.  This is also the
+ *   only access right which is always considered handled by any ruleset in
+ *   such a way that reparenting a file hierarchy is always denied by default.
+ *   To avoid privilege escalation, it is not enough to add a rule with this
+ *   access right.  When linking or renaming a file, the destination directory
+ *   hierarchy must also always have the same or a superset of restrictions of
+ *   the source hierarchy.  If it is not the case, or if the domain doesn't
+ *   handle this access right, such actions are denied by default with errno
+ *   set to EXDEV.  Linking also requires a LANDLOCK_ACCESS_FS_MAKE_* access
+ *   right on the destination directory, and renaming also requires a
+ *   LANDLOCK_ACCESS_FS_REMOVE_* access right on the source's (file or
+ *   directory) parent.  Otherwise, such actions are denied with errno set to
+ *   EACCES.  The EACCES errno prevails over EXDEV to let user space
+ *   efficiently deal with an unrecoverable error.
  *
  * .. warning::
  *
@@ -137,6 +159,7 @@ struct landlock_path_beneath_attr {
 #define LANDLOCK_ACCESS_FS_MAKE_FIFO			(1ULL << 10)
 #define LANDLOCK_ACCESS_FS_MAKE_BLOCK			(1ULL << 11)
 #define LANDLOCK_ACCESS_FS_MAKE_SYM			(1ULL << 12)
+#define LANDLOCK_ACCESS_FS_REFER			(1ULL << 13)
 /* clang-format on */
 
 #endif /* _UAPI_LINUX_LANDLOCK_H */
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 30b42cdee52e..ec5a6247cd3e 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -4,6 +4,7 @@
  *
  * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
  * Copyright © 2018-2020 ANSSI
+ * Copyright © 2021-2022 Microsoft Corporation
  */
 
 #include <linux/atomic.h>
@@ -273,40 +274,262 @@ static inline bool is_nouser_or_private(const struct dentry *dentry)
 		unlikely(IS_PRIVATE(d_backing_inode(dentry))));
 }
 
-static int check_access_path(const struct landlock_ruleset *const domain,
-			     const struct path *const path,
-			     const access_mask_t access_request)
+static inline access_mask_t
+get_handled_accesses(const struct landlock_ruleset *const domain)
 {
-	layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
-	bool allowed = false, has_access = false;
-	struct path walker_path;
-	size_t i;
+	access_mask_t access_dom = 0;
+	unsigned long access_bit;
+
+	for (access_bit = 0; access_bit < LANDLOCK_NUM_ACCESS_FS;
+	     access_bit++) {
+		size_t layer_level;
+
+		for (layer_level = 0; layer_level < domain->num_layers;
+		     layer_level++) {
+			if (domain->fs_access_masks[layer_level] &
+			    BIT_ULL(access_bit)) {
+				access_dom |= BIT_ULL(access_bit);
+				break;
+			}
+		}
+	}
+	return access_dom;
+}
+
+static inline access_mask_t
+init_layer_masks(const struct landlock_ruleset *const domain,
+		 const access_mask_t access_request,
+		 layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+{
+	access_mask_t handled_accesses = 0;
+	size_t layer_level;
 
+	memset(layer_masks, 0, sizeof(*layer_masks));
+	/* An empty access request can happen because of O_WRONLY | O_RDWR. */
 	if (!access_request)
 		return 0;
-	if (WARN_ON_ONCE(!domain || !path))
-		return 0;
-	if (is_nouser_or_private(path->dentry))
-		return 0;
-	if (WARN_ON_ONCE(domain->num_layers < 1))
-		return -EACCES;
 
-	/* Saves all layers handling a subset of requested accesses. */
-	for (i = 0; i < domain->num_layers; i++) {
+	/* Saves all handled accesses per layer. */
+	for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
 		const unsigned long access_req = access_request;
 		unsigned long access_bit;
 
 		for_each_set_bit(access_bit, &access_req,
-				 ARRAY_SIZE(layer_masks)) {
-			if (domain->fs_access_masks[i] & BIT_ULL(access_bit)) {
-				layer_masks[access_bit] |= BIT_ULL(i);
-				has_access = true;
+				 ARRAY_SIZE(*layer_masks)) {
+			if (domain->fs_access_masks[layer_level] &
+			    BIT_ULL(access_bit)) {
+				(*layer_masks)[access_bit] |=
+					BIT_ULL(layer_level);
+				handled_accesses |= BIT_ULL(access_bit);
 			}
 		}
 	}
-	/* An access request not handled by the domain is allowed. */
-	if (!has_access)
+	return handled_accesses;
+}
+
+/*
+ * Check that a destination file hierarchy has more restrictions than a source
+ * file hierarchy.  This is only used for link and rename actions.
+ *
+ * @layer_masks_child2: Optional child masks.
+ */
+static inline bool no_more_access(
+	const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
+	const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS],
+	const bool child1_is_directory,
+	const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
+	const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS],
+	const bool child2_is_directory)
+{
+	unsigned long access_bit;
+
+	for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2);
+	     access_bit++) {
+		/* Ignores accesses that only make sense for directories. */
+		const bool is_file_access =
+			!!(BIT_ULL(access_bit) & ACCESS_FILE);
+
+		if (child1_is_directory || is_file_access) {
+			/*
+			 * Checks if the destination restrictions are a
+			 * superset of the source ones (i.e. inherited access
+			 * rights without child exceptions):
+			 * restrictions(parent2) >= restrictions(child1)
+			 */
+			if ((((*layer_masks_parent1)[access_bit] &
+			      (*layer_masks_child1)[access_bit]) |
+			     (*layer_masks_parent2)[access_bit]) !=
+			    (*layer_masks_parent2)[access_bit])
+				return false;
+		}
+
+		if (!layer_masks_child2)
+			continue;
+		if (child2_is_directory || is_file_access) {
+			/*
+			 * Checks inverted restrictions for RENAME_EXCHANGE:
+			 * restrictions(parent1) >= restrictions(child2)
+			 */
+			if ((((*layer_masks_parent2)[access_bit] &
+			      (*layer_masks_child2)[access_bit]) |
+			     (*layer_masks_parent1)[access_bit]) !=
+			    (*layer_masks_parent1)[access_bit])
+				return false;
+		}
+	}
+	return true;
+}
+
+/*
+ * Removes @layer_masks accesses that are not requested.
+ *
+ * Returns true if the request is allowed, false otherwise.
+ */
+static inline bool
+scope_to_request(const access_mask_t access_request,
+		 layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+{
+	const unsigned long access_req = access_request;
+	unsigned long access_bit;
+
+	if (WARN_ON_ONCE(!layer_masks))
+		return true;
+
+	for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks))
+		(*layer_masks)[access_bit] = 0;
+	return !memchr_inv(layer_masks, 0, sizeof(*layer_masks));
+}
+
+/*
+ * Returns true if there is at least one access right different than
+ * LANDLOCK_ACCESS_FS_REFER.
+ */
+static inline bool
+is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS],
+	  const access_mask_t access_request)
+{
+	unsigned long access_bit;
+	/* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */
+	const unsigned long access_check = access_request &
+					   ~LANDLOCK_ACCESS_FS_REFER;
+
+	if (!layer_masks)
+		return false;
+
+	for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) {
+		if ((*layer_masks)[access_bit])
+			return true;
+	}
+	return false;
+}
+
+/**
+ * check_access_path_dual - Check accesses for requests with a common path
+ *
+ * @domain: Domain to check against.
+ * @path: File hierarchy to walk through.
+ * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is
+ *     equal to @layer_masks_parent2 (if any).  This is tied to the unique
+ *     requested path for most actions, or the source in case of a refer action
+ *     (i.e. rename or link), or the source and destination in case of
+ *     RENAME_EXCHANGE.
+ * @layer_masks_parent1: Pointer to a matrix of layer masks per access
+ *     masks, identifying the layers that forbid a specific access.  Bits from
+ *     this matrix can be unset according to the @path walk.  An empty matrix
+ *     means that @domain allows all possible Landlock accesses (i.e. not only
+ *     those identified by @access_request_parent1).  This matrix can
+ *     initially refer to domain layer masks and, when the accesses for the
+ *     destination and source are the same, to requested layer masks.
+ * @dentry_child1: Dentry to the initial child of the parent1 path.  This
+ *     pointer must be NULL for non-refer actions (i.e. not link nor rename).
+ * @access_request_parent2: Similar to @access_request_parent1 but for a
+ *     request involving a source and a destination.  This refers to the
+ *     destination, except in case of RENAME_EXCHANGE where it also refers to
+ *     the source.  Must be set to 0 when using a simple path request.
+ * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer
+ *     action.  This must be NULL otherwise.
+ * @dentry_child2: Dentry to the initial child of the parent2 path.  This
+ *     pointer is only set for RENAME_EXCHANGE actions and must be NULL
+ *     otherwise.
+ *
+ * This helper first checks that the destination has a superset of restrictions
+ * compared to the source (if any) for a common path.  Because of
+ * RENAME_EXCHANGE actions, source and destinations may be swapped.  It then
+ * checks that the collected accesses and the remaining ones are enough to
+ * allow the request.
+ *
+ * Returns:
+ * - 0 if the access request is granted;
+ * - -EACCES if it is denied because of access right other than
+ *   LANDLOCK_ACCESS_FS_REFER;
+ * - -EXDEV if the renaming or linking would be a privileged escalation
+ *   (according to each layered policies), or if LANDLOCK_ACCESS_FS_REFER is
+ *   not allowed by the source or the destination.
+ */
+static int check_access_path_dual(
+	const struct landlock_ruleset *const domain,
+	const struct path *const path,
+	const access_mask_t access_request_parent1,
+	layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
+	const struct dentry *const dentry_child1,
+	const access_mask_t access_request_parent2,
+	layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
+	const struct dentry *const dentry_child2)
+{
+	bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check,
+	     child1_is_directory = true, child2_is_directory = true;
+	struct path walker_path;
+	access_mask_t access_masked_parent1, access_masked_parent2;
+	layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS],
+		_layer_masks_child2[LANDLOCK_NUM_ACCESS_FS];
+	layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL,
+	(*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL;
+
+	if (!access_request_parent1 && !access_request_parent2)
 		return 0;
+	if (WARN_ON_ONCE(!domain || !path))
+		return 0;
+	if (is_nouser_or_private(path->dentry))
+		return 0;
+	if (WARN_ON_ONCE(domain->num_layers < 1 || !layer_masks_parent1))
+		return -EACCES;
+
+	if (unlikely(layer_masks_parent2)) {
+		if (WARN_ON_ONCE(!dentry_child1))
+			return -EACCES;
+		/*
+		 * For a double request, first check for potential privilege
+		 * escalation by looking at domain handled accesses (which are
+		 * a superset of the meaningful requested accesses).
+		 */
+		access_masked_parent1 = access_masked_parent2 =
+			get_handled_accesses(domain);
+		is_dom_check = true;
+	} else {
+		if (WARN_ON_ONCE(dentry_child1 || dentry_child2))
+			return -EACCES;
+		/* For a simple request, only check for requested accesses. */
+		access_masked_parent1 = access_request_parent1;
+		access_masked_parent2 = access_request_parent2;
+		is_dom_check = false;
+	}
+
+	if (unlikely(dentry_child1)) {
+		unmask_layers(find_rule(domain, dentry_child1),
+			      init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+					       &_layer_masks_child1),
+			      &_layer_masks_child1);
+		layer_masks_child1 = &_layer_masks_child1;
+		child1_is_directory = d_is_dir(dentry_child1);
+	}
+	if (unlikely(dentry_child2)) {
+		unmask_layers(find_rule(domain, dentry_child2),
+			      init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+					       &_layer_masks_child2),
+			      &_layer_masks_child2);
+		layer_masks_child2 = &_layer_masks_child2;
+		child2_is_directory = d_is_dir(dentry_child2);
+	}
 
 	walker_path = *path;
 	path_get(&walker_path);
@@ -316,11 +539,52 @@ static int check_access_path(const struct landlock_ruleset *const domain,
 	 */
 	while (true) {
 		struct dentry *parent_dentry;
+		const struct landlock_rule *rule;
+
+		/*
+		 * If at least all accesses allowed on the destination are
+		 * already allowed on the source, respectively if there is at
+		 * least as much as restrictions on the destination than on the
+		 * source, then we can safely refer files from the source to
+		 * the destination without risking a privilege escalation.
+		 * This also applies in the case of RENAME_EXCHANGE, which
+		 * implies checks on both direction.  This is crucial for
+		 * standalone multilayered security policies.  Furthermore,
+		 * this helps avoid policy writers to shoot themselves in the
+		 * foot.
+		 */
+		if (unlikely(is_dom_check &&
+			     no_more_access(
+				     layer_masks_parent1, layer_masks_child1,
+				     child1_is_directory, layer_masks_parent2,
+				     layer_masks_child2,
+				     child2_is_directory))) {
+			allowed_parent1 = scope_to_request(
+				access_request_parent1, layer_masks_parent1);
+			allowed_parent2 = scope_to_request(
+				access_request_parent2, layer_masks_parent2);
+
+			/* Stops when all accesses are granted. */
+			if (allowed_parent1 && allowed_parent2)
+				break;
 
-		allowed = unmask_layers(find_rule(domain, walker_path.dentry),
-					access_request, &layer_masks);
-		if (allowed)
-			/* Stops when a rule from each layer grants access. */
+			/*
+			 * Now, downgrades the remaining checks from domain
+			 * handled accesses to requested accesses.
+			 */
+			is_dom_check = false;
+			access_masked_parent1 = access_request_parent1;
+			access_masked_parent2 = access_request_parent2;
+		}
+
+		rule = find_rule(domain, walker_path.dentry);
+		allowed_parent1 = unmask_layers(rule, access_masked_parent1,
+						layer_masks_parent1);
+		allowed_parent2 = unmask_layers(rule, access_masked_parent2,
+						layer_masks_parent2);
+
+		/* Stops when a rule from each layer grants access. */
+		if (allowed_parent1 && allowed_parent2)
 			break;
 
 jump_up:
@@ -333,7 +597,6 @@ jump_up:
 				 * Stops at the real root.  Denies access
 				 * because not all layers have granted access.
 				 */
-				allowed = false;
 				break;
 			}
 		}
@@ -343,7 +606,8 @@ jump_up:
 			 * access to internal filesystems (e.g. nsfs, which is
 			 * reachable through /proc/<pid>/ns/<namespace>).
 			 */
-			allowed = !!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
+			allowed_parent1 = allowed_parent2 =
+				!!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
 			break;
 		}
 		parent_dentry = dget_parent(walker_path.dentry);
@@ -351,7 +615,36 @@ jump_up:
 		walker_path.dentry = parent_dentry;
 	}
 	path_put(&walker_path);
-	return allowed ? 0 : -EACCES;
+
+	if (allowed_parent1 && allowed_parent2)
+		return 0;
+
+	/*
+	 * This prioritizes EACCES over EXDEV for all actions, including
+	 * renames with RENAME_EXCHANGE.
+	 */
+	if (likely(is_eacces(layer_masks_parent1, access_request_parent1) ||
+		   is_eacces(layer_masks_parent2, access_request_parent2)))
+		return -EACCES;
+
+	/*
+	 * Gracefully forbids reparenting if the destination directory
+	 * hierarchy is not a superset of restrictions of the source directory
+	 * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the
+	 * source or the destination.
+	 */
+	return -EXDEV;
+}
+
+static inline int check_access_path(const struct landlock_ruleset *const domain,
+				    const struct path *const path,
+				    access_mask_t access_request)
+{
+	layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
+
+	access_request = init_layer_masks(domain, access_request, &layer_masks);
+	return check_access_path_dual(domain, path, access_request,
+				      &layer_masks, NULL, 0, NULL, NULL);
 }
 
 static inline int current_check_access_path(const struct path *const path,
@@ -398,6 +691,206 @@ static inline access_mask_t maybe_remove(const struct dentry *const dentry)
 				  LANDLOCK_ACCESS_FS_REMOVE_FILE;
 }
 
+/**
+ * collect_domain_accesses - Walk through a file path and collect accesses
+ *
+ * @domain: Domain to check against.
+ * @mnt_root: Last directory to check.
+ * @dir: Directory to start the walk from.
+ * @layer_masks_dom: Where to store the collected accesses.
+ *
+ * This helper is useful to begin a path walk from the @dir directory to a
+ * @mnt_root directory used as a mount point.  This mount point is the common
+ * ancestor between the source and the destination of a renamed and linked
+ * file.  While walking from @dir to @mnt_root, we record all the domain's
+ * allowed accesses in @layer_masks_dom.
+ *
+ * This is similar to check_access_path_dual() but much simpler because it only
+ * handles walking on the same mount point and only check one set of accesses.
+ *
+ * Returns:
+ * - true if all the domain access rights are allowed for @dir;
+ * - false if the walk reached @mnt_root.
+ */
+static bool collect_domain_accesses(
+	const struct landlock_ruleset *const domain,
+	const struct dentry *const mnt_root, struct dentry *dir,
+	layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS])
+{
+	unsigned long access_dom;
+	bool ret = false;
+
+	if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom))
+		return true;
+	if (is_nouser_or_private(dir))
+		return true;
+
+	access_dom = init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+				      layer_masks_dom);
+
+	dget(dir);
+	while (true) {
+		struct dentry *parent_dentry;
+
+		/* Gets all layers allowing all domain accesses. */
+		if (unmask_layers(find_rule(domain, dir), access_dom,
+				  layer_masks_dom)) {
+			/*
+			 * Stops when all handled accesses are allowed by at
+			 * least one rule in each layer.
+			 */
+			ret = true;
+			break;
+		}
+
+		/* We should not reach a root other than @mnt_root. */
+		if (dir == mnt_root || WARN_ON_ONCE(IS_ROOT(dir)))
+			break;
+
+		parent_dentry = dget_parent(dir);
+		dput(dir);
+		dir = parent_dentry;
+	}
+	dput(dir);
+	return ret;
+}
+
+/**
+ * current_check_refer_path - Check if a rename or link action is allowed
+ *
+ * @old_dentry: File or directory requested to be moved or linked.
+ * @new_dir: Destination parent directory.
+ * @new_dentry: Destination file or directory.
+ * @removable: Sets to true if it is a rename operation.
+ * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE.
+ *
+ * Because of its unprivileged constraints, Landlock relies on file hierarchies
+ * (and not only inodes) to tie access rights to files.  Being able to link or
+ * rename a file hierarchy brings some challenges.  Indeed, moving or linking a
+ * file (i.e. creating a new reference to an inode) can have an impact on the
+ * actions allowed for a set of files if it would change its parent directory
+ * (i.e. reparenting).
+ *
+ * To avoid trivial access right bypasses, Landlock first checks if the file or
+ * directory requested to be moved would gain new access rights inherited from
+ * its new hierarchy.  Before returning any error, Landlock then checks that
+ * the parent source hierarchy and the destination hierarchy would allow the
+ * link or rename action.  If it is not the case, an error with EACCES is
+ * returned to inform user space that there is no way to remove or create the
+ * requested source file type.  If it should be allowed but the new inherited
+ * access rights would be greater than the source access rights, then the
+ * kernel returns an error with EXDEV.  Prioritizing EACCES over EXDEV enables
+ * user space to abort the whole operation if there is no way to do it, or to
+ * manually copy the source to the destination if this remains allowed, e.g.
+ * because file creation is allowed on the destination directory but not direct
+ * linking.
+ *
+ * To achieve this goal, the kernel needs to compare two file hierarchies: the
+ * one identifying the source file or directory (including itself), and the
+ * destination one.  This can be seen as a multilayer partial ordering problem.
+ * The kernel walks through these paths and collects in a matrix the access
+ * rights that are denied per layer.  These matrices are then compared to see
+ * if the destination one has more (or the same) restrictions as the source
+ * one.  If this is the case, the requested action will not return EXDEV, which
+ * doesn't mean the action is allowed.  The parent hierarchy of the source
+ * (i.e. parent directory), and the destination hierarchy must also be checked
+ * to verify that they explicitly allow such action (i.e.  referencing,
+ * creation and potentially removal rights).  The kernel implementation is then
+ * required to rely on potentially four matrices of access rights: one for the
+ * source file or directory (i.e. the child), a potentially other one for the
+ * other source/destination (in case of RENAME_EXCHANGE), one for the source
+ * parent hierarchy and a last one for the destination hierarchy.  These
+ * ephemeral matrices take some space on the stack, which limits the number of
+ * layers to a deemed reasonable number: 16.
+ *
+ * Returns:
+ * - 0 if access is allowed;
+ * - -EXDEV if @old_dentry would inherit new access rights from @new_dir;
+ * - -EACCES if file removal or creation is denied.
+ */
+static int current_check_refer_path(struct dentry *const old_dentry,
+				    const struct path *const new_dir,
+				    struct dentry *const new_dentry,
+				    const bool removable, const bool exchange)
+{
+	const struct landlock_ruleset *const dom =
+		landlock_get_current_domain();
+	bool allow_parent1, allow_parent2;
+	access_mask_t access_request_parent1, access_request_parent2;
+	struct path mnt_dir;
+	layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS],
+		layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS];
+
+	if (!dom)
+		return 0;
+	if (WARN_ON_ONCE(dom->num_layers < 1))
+		return -EACCES;
+	if (unlikely(d_is_negative(old_dentry)))
+		return -ENOENT;
+	if (exchange) {
+		if (unlikely(d_is_negative(new_dentry)))
+			return -ENOENT;
+		access_request_parent1 =
+			get_mode_access(d_backing_inode(new_dentry)->i_mode);
+	} else {
+		access_request_parent1 = 0;
+	}
+	access_request_parent2 =
+		get_mode_access(d_backing_inode(old_dentry)->i_mode);
+	if (removable) {
+		access_request_parent1 |= maybe_remove(old_dentry);
+		access_request_parent2 |= maybe_remove(new_dentry);
+	}
+
+	/* The mount points are the same for old and new paths, cf. EXDEV. */
+	if (old_dentry->d_parent == new_dir->dentry) {
+		/*
+		 * The LANDLOCK_ACCESS_FS_REFER access right is not required
+		 * for same-directory referer (i.e. no reparenting).
+		 */
+		access_request_parent1 = init_layer_masks(
+			dom, access_request_parent1 | access_request_parent2,
+			&layer_masks_parent1);
+		return check_access_path_dual(dom, new_dir,
+					      access_request_parent1,
+					      &layer_masks_parent1, NULL, 0,
+					      NULL, NULL);
+	}
+
+	/* Backward compatibility: no reparenting support. */
+	if (!(get_handled_accesses(dom) & LANDLOCK_ACCESS_FS_REFER))
+		return -EXDEV;
+
+	access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER;
+	access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER;
+
+	/* Saves the common mount point. */
+	mnt_dir.mnt = new_dir->mnt;
+	mnt_dir.dentry = new_dir->mnt->mnt_root;
+
+	/* new_dir->dentry is equal to new_dentry->d_parent */
+	allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry,
+						old_dentry->d_parent,
+						&layer_masks_parent1);
+	allow_parent2 = collect_domain_accesses(
+		dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2);
+
+	if (allow_parent1 && allow_parent2)
+		return 0;
+
+	/*
+	 * To be able to compare source and destination domain access rights,
+	 * take into account the @old_dentry access rights aggregated with its
+	 * parent access rights.  This will be useful to compare with the
+	 * destination parent access rights.
+	 */
+	return check_access_path_dual(dom, &mnt_dir, access_request_parent1,
+				      &layer_masks_parent1, old_dentry,
+				      access_request_parent2,
+				      &layer_masks_parent2,
+				      exchange ? new_dentry : NULL);
+}
+
 /* Inode hooks */
 
 static void hook_inode_free_security(struct inode *const inode)
@@ -591,32 +1084,12 @@ static int hook_sb_pivotroot(const struct path *const old_path,
 
 /* Path hooks */
 
-/*
- * Creating multiple links or renaming may lead to privilege escalations if not
- * handled properly.  Indeed, we must be sure that the source doesn't gain more
- * privileges by being accessible from the destination.  This is getting more
- * complex when dealing with multiple layers.  The whole picture can be seen as
- * a multilayer partial ordering problem.  A future version of Landlock will
- * deal with that.
- */
 static int hook_path_link(struct dentry *const old_dentry,
 			  const struct path *const new_dir,
 			  struct dentry *const new_dentry)
 {
-	const struct landlock_ruleset *const dom =
-		landlock_get_current_domain();
-
-	if (!dom)
-		return 0;
-	/* The mount points are the same for old and new paths, cf. EXDEV. */
-	if (old_dentry->d_parent != new_dir->dentry)
-		/* Gracefully forbids reparenting. */
-		return -EXDEV;
-	if (unlikely(d_is_negative(old_dentry)))
-		return -ENOENT;
-	return check_access_path(
-		dom, new_dir,
-		get_mode_access(d_backing_inode(old_dentry)->i_mode));
+	return current_check_refer_path(old_dentry, new_dir, new_dentry, false,
+					false);
 }
 
 static int hook_path_rename(const struct path *const old_dir,
@@ -625,30 +1098,9 @@ static int hook_path_rename(const struct path *const old_dir,
 			    struct dentry *const new_dentry,
 			    const unsigned int flags)
 {
-	const struct landlock_ruleset *const dom =
-		landlock_get_current_domain();
-	u32 exchange_access = 0;
-
-	if (!dom)
-		return 0;
-	/* The mount points are the same for old and new paths, cf. EXDEV. */
-	if (old_dir->dentry != new_dir->dentry)
-		/* Gracefully forbids reparenting. */
-		return -EXDEV;
-	if (flags & RENAME_EXCHANGE) {
-		if (unlikely(d_is_negative(new_dentry)))
-			return -ENOENT;
-		exchange_access =
-			get_mode_access(d_backing_inode(new_dentry)->i_mode);
-	}
-	if (unlikely(d_is_negative(old_dentry)))
-		return -ENOENT;
-	/* RENAME_EXCHANGE is handled because directories are the same. */
-	return check_access_path(
-		dom, old_dir,
-		maybe_remove(old_dentry) | maybe_remove(new_dentry) |
-			exchange_access |
-			get_mode_access(d_backing_inode(old_dentry)->i_mode));
+	/* old_dir refers to old_dentry->d_parent and new_dir->mnt */
+	return current_check_refer_path(old_dentry, new_dir, new_dentry, true,
+					!!(flags & RENAME_EXCHANGE));
 }
 
 static int hook_path_mkdir(const struct path *const dir,
diff --git a/security/landlock/limits.h b/security/landlock/limits.h
index 17c2a2e7fe1e..b54184ab9439 100644
--- a/security/landlock/limits.h
+++ b/security/landlock/limits.h
@@ -18,7 +18,7 @@
 #define LANDLOCK_MAX_NUM_LAYERS		16
 #define LANDLOCK_MAX_NUM_RULES		U32_MAX
 
-#define LANDLOCK_LAST_ACCESS_FS		LANDLOCK_ACCESS_FS_MAKE_SYM
+#define LANDLOCK_LAST_ACCESS_FS		LANDLOCK_ACCESS_FS_REFER
 #define LANDLOCK_MASK_ACCESS_FS		((LANDLOCK_LAST_ACCESS_FS << 1) - 1)
 #define LANDLOCK_NUM_ACCESS_FS		__const_hweight64(LANDLOCK_MASK_ACCESS_FS)
 
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
index 507d43827afe..735a0865ea11 100644
--- a/security/landlock/syscalls.c
+++ b/security/landlock/syscalls.c
@@ -129,7 +129,7 @@ static const struct file_operations ruleset_fops = {
 	.write = fop_dummy_write,
 };
 
-#define LANDLOCK_ABI_VERSION 1
+#define LANDLOCK_ABI_VERSION 2
 
 /**
  * sys_landlock_create_ruleset - Create a new ruleset
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 35f64832b869..da9290817866 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -75,7 +75,7 @@ TEST(abi_version)
 	const struct landlock_ruleset_attr ruleset_attr = {
 		.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
 	};
-	ASSERT_EQ(1, landlock_create_ruleset(NULL, 0,
+	ASSERT_EQ(2, landlock_create_ruleset(NULL, 0,
 					     LANDLOCK_CREATE_RULESET_VERSION));
 
 	ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index a4fdcda62bde..69f9c7409198 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -401,7 +401,7 @@ TEST_F_FORK(layout1, inval)
 	LANDLOCK_ACCESS_FS_WRITE_FILE | \
 	LANDLOCK_ACCESS_FS_READ_FILE)
 
-#define ACCESS_LAST LANDLOCK_ACCESS_FS_MAKE_SYM
+#define ACCESS_LAST LANDLOCK_ACCESS_FS_REFER
 
 #define ACCESS_ALL ( \
 	ACCESS_FILE | \
@@ -414,6 +414,7 @@ TEST_F_FORK(layout1, inval)
 	LANDLOCK_ACCESS_FS_MAKE_SOCK | \
 	LANDLOCK_ACCESS_FS_MAKE_FIFO | \
 	LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
+	LANDLOCK_ACCESS_FS_MAKE_SYM | \
 	ACCESS_LAST)
 
 /* clang-format on */
-- 
cgit 


From f4056b9266b571c63f30cda70c2d89f7b7e8bb7b Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 6 May 2022 18:10:58 +0200
Subject: selftests/landlock: Add 11 new test suites dedicated to file
 reparenting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These test suites try to check all edge cases for directory and file
renaming or linking involving a new parent directory, with and without
LANDLOCK_ACCESS_FS_REFER and other access rights.

layout1:
* reparent_refer: Tests simple FS_REFER usage.
* reparent_link: Tests a mix of FS_MAKE_REG and FS_REFER with links.
* reparent_rename: Tests a mix of FS_MAKE_REG and FS_REFER with renames
  and RENAME_EXCHANGE.
* reparent_exdev_layers_rename1/2: Tests renames with two layers.
* reparent_exdev_layers_exchange1/2/3: Tests exchanges with two layers.
* reparent_remove: Tests file and directory removal with rename.
* reparent_dom_superset: Tests access partial ordering.

layout1_bind:
* reparent_cross_mount: Tests FS_REFER propagation across mount points.

Test coverage for security/landlock is 95.4% of 604 lines according to
gcc/gcov-11.

Cc: Paul Moore <paul@paul-moore.com>
Signed-off-by: Mickaël Salaün <mic@digikod.net>
Link: https://lore.kernel.org/r/20220506161102.525323-9-mic@digikod.net
---
 tools/testing/selftests/landlock/fs_test.c | 755 ++++++++++++++++++++++++++++-
 1 file changed, 754 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 69f9c7409198..21a2ce8fa739 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -146,7 +146,7 @@ static int remove_path(const char *const path)
 		goto out;
 	}
 	if (unlink(path) && rmdir(path)) {
-		if (errno != ENOENT)
+		if (errno != ENOENT && errno != ENOTDIR)
 			err = errno;
 		goto out;
 	}
@@ -1972,6 +1972,721 @@ TEST_F_FORK(layout1, rename_dir)
 	ASSERT_EQ(0, rmdir(dir_s1d3));
 }
 
+TEST_F_FORK(layout1, reparent_refer)
+{
+	const struct rule layer1[] = {
+		{
+			.path = dir_s1d2,
+			.access = LANDLOCK_ACCESS_FS_REFER,
+		},
+		{
+			.path = dir_s2d2,
+			.access = LANDLOCK_ACCESS_FS_REFER,
+		},
+		{},
+	};
+	int ruleset_fd =
+		create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REFER, layer1);
+
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d1));
+	ASSERT_EQ(EXDEV, errno);
+	ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d2));
+	ASSERT_EQ(EXDEV, errno);
+	ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d3));
+	ASSERT_EQ(EXDEV, errno);
+
+	ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d1));
+	ASSERT_EQ(EXDEV, errno);
+	ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d2));
+	ASSERT_EQ(EXDEV, errno);
+	/*
+	 * Moving should only be allowed when the source and the destination
+	 * parent directory have REFER.
+	 */
+	ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d3));
+	ASSERT_EQ(ENOTEMPTY, errno);
+	ASSERT_EQ(0, unlink(file1_s2d3));
+	ASSERT_EQ(0, unlink(file2_s2d3));
+	ASSERT_EQ(0, rename(dir_s1d3, dir_s2d3));
+}
+
+TEST_F_FORK(layout1, reparent_link)
+{
+	const struct rule layer1[] = {
+		{
+			.path = dir_s1d2,
+			.access = LANDLOCK_ACCESS_FS_MAKE_REG,
+		},
+		{
+			.path = dir_s1d3,
+			.access = LANDLOCK_ACCESS_FS_REFER,
+		},
+		{
+			.path = dir_s2d2,
+			.access = LANDLOCK_ACCESS_FS_REFER,
+		},
+		{
+			.path = dir_s2d3,
+			.access = LANDLOCK_ACCESS_FS_MAKE_REG,
+		},
+		{},
+	};
+	const int ruleset_fd = create_ruleset(
+		_metadata,
+		LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	ASSERT_EQ(0, unlink(file1_s1d1));
+	ASSERT_EQ(0, unlink(file1_s1d2));
+	ASSERT_EQ(0, unlink(file1_s1d3));
+
+	/* Denies linking because of missing MAKE_REG. */
+	ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+	ASSERT_EQ(EACCES, errno);
+	/* Denies linking because of missing source and destination REFER. */
+	ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
+	ASSERT_EQ(EXDEV, errno);
+	/* Denies linking because of missing source REFER. */
+	ASSERT_EQ(-1, link(file1_s2d1, file1_s1d3));
+	ASSERT_EQ(EXDEV, errno);
+
+	/* Denies linking because of missing MAKE_REG. */
+	ASSERT_EQ(-1, link(file1_s2d2, file1_s1d1));
+	ASSERT_EQ(EACCES, errno);
+	/* Denies linking because of missing destination REFER. */
+	ASSERT_EQ(-1, link(file1_s2d2, file1_s1d2));
+	ASSERT_EQ(EXDEV, errno);
+
+	/* Allows linking because of REFER and MAKE_REG. */
+	ASSERT_EQ(0, link(file1_s2d2, file1_s1d3));
+	ASSERT_EQ(0, unlink(file1_s2d2));
+	/* Reverse linking denied because of missing MAKE_REG. */
+	ASSERT_EQ(-1, link(file1_s1d3, file1_s2d2));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(0, unlink(file1_s2d3));
+	/* Checks reverse linking. */
+	ASSERT_EQ(0, link(file1_s1d3, file1_s2d3));
+	ASSERT_EQ(0, unlink(file1_s1d3));
+
+	/*
+	 * This is OK for a file link, but it should not be allowed for a
+	 * directory rename (because of the superset of access rights.
+	 */
+	ASSERT_EQ(0, link(file1_s2d3, file1_s1d3));
+	ASSERT_EQ(0, unlink(file1_s1d3));
+
+	ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
+	ASSERT_EQ(EXDEV, errno);
+	ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2));
+	ASSERT_EQ(EXDEV, errno);
+
+	ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
+	ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
+}
+
+TEST_F_FORK(layout1, reparent_rename)
+{
+	/* Same rules as for reparent_link. */
+	const struct rule layer1[] = {
+		{
+			.path = dir_s1d2,
+			.access = LANDLOCK_ACCESS_FS_MAKE_REG,
+		},
+		{
+			.path = dir_s1d3,
+			.access = LANDLOCK_ACCESS_FS_REFER,
+		},
+		{
+			.path = dir_s2d2,
+			.access = LANDLOCK_ACCESS_FS_REFER,
+		},
+		{
+			.path = dir_s2d3,
+			.access = LANDLOCK_ACCESS_FS_MAKE_REG,
+		},
+		{},
+	};
+	const int ruleset_fd = create_ruleset(
+		_metadata,
+		LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	ASSERT_EQ(0, unlink(file1_s1d2));
+	ASSERT_EQ(0, unlink(file1_s1d3));
+
+	/* Denies renaming because of missing MAKE_REG. */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file1_s1d1,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file2_s1d1,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(0, unlink(file1_s1d1));
+	ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
+	ASSERT_EQ(EACCES, errno);
+	/* Even denies same file exchange. */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file2_s1d1,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+
+	/* Denies renaming because of missing source and destination REFER. */
+	ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d2));
+	ASSERT_EQ(EXDEV, errno);
+	/*
+	 * Denies renaming because of missing MAKE_REG, source and destination
+	 * REFER.
+	 */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file2_s1d1,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file1_s2d1,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+
+	/* Denies renaming because of missing source REFER. */
+	ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
+	ASSERT_EQ(EXDEV, errno);
+	/* Denies renaming because of missing MAKE_REG. */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file2_s1d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+
+	/* Denies renaming because of missing MAKE_REG. */
+	ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d1));
+	ASSERT_EQ(EACCES, errno);
+	/* Denies renaming because of missing destination REFER*/
+	ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
+	ASSERT_EQ(EXDEV, errno);
+
+	/* Denies exchange because of one missing MAKE_REG. */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, file2_s1d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+	/* Allows renaming because of REFER and MAKE_REG. */
+	ASSERT_EQ(0, rename(file1_s2d2, file1_s1d3));
+
+	/* Reverse renaming denied because of missing MAKE_REG. */
+	ASSERT_EQ(-1, rename(file1_s1d3, file1_s2d2));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(0, unlink(file1_s2d3));
+	ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+	/* Tests reverse renaming. */
+	ASSERT_EQ(0, rename(file1_s2d3, file1_s1d3));
+	ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s1d3,
+			       RENAME_EXCHANGE));
+	ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+	/*
+	 * This is OK for a file rename, but it should not be allowed for a
+	 * directory rename (because of the superset of access rights).
+	 */
+	ASSERT_EQ(0, rename(file1_s2d3, file1_s1d3));
+	ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+	/*
+	 * Tests superset restrictions applied to directories.  Not only the
+	 * dir_s2d3's parent (dir_s2d2) should be taken into account but also
+	 * access rights tied to dir_s2d3. dir_s2d2 is missing one access right
+	 * compared to dir_s1d3/file1_s1d3 (MAKE_REG) but it is provided
+	 * directly by the moved dir_s2d3.
+	 */
+	ASSERT_EQ(0, rename(dir_s2d3, file1_s1d3));
+	ASSERT_EQ(0, rename(file1_s1d3, dir_s2d3));
+	/*
+	 * The first rename is allowed but not the exchange because dir_s1d3's
+	 * parent (dir_s1d2) doesn't have REFER.
+	 */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EXDEV, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s2d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EXDEV, errno);
+	ASSERT_EQ(-1, rename(file1_s2d3, dir_s1d3));
+	ASSERT_EQ(EXDEV, errno);
+
+	ASSERT_EQ(-1, rename(file2_s1d2, file1_s1d3));
+	ASSERT_EQ(EXDEV, errno);
+	ASSERT_EQ(-1, rename(file2_s1d3, file1_s1d2));
+	ASSERT_EQ(EXDEV, errno);
+
+	/* Renaming in the same directory is always allowed. */
+	ASSERT_EQ(0, rename(file2_s1d2, file1_s1d2));
+	ASSERT_EQ(0, rename(file2_s1d3, file1_s1d3));
+
+	ASSERT_EQ(0, unlink(file1_s1d2));
+	/* Denies because of missing source MAKE_REG and destination REFER. */
+	ASSERT_EQ(-1, rename(dir_s2d3, file1_s1d2));
+	ASSERT_EQ(EXDEV, errno);
+
+	ASSERT_EQ(0, unlink(file1_s1d3));
+	/* Denies because of missing source MAKE_REG and REFER. */
+	ASSERT_EQ(-1, rename(dir_s2d2, file1_s1d3));
+	ASSERT_EQ(EXDEV, errno);
+}
+
+static void
+reparent_exdev_layers_enforce1(struct __test_metadata *const _metadata)
+{
+	const struct rule layer1[] = {
+		{
+			.path = dir_s1d2,
+			.access = LANDLOCK_ACCESS_FS_REFER,
+		},
+		{
+			/* Interesting for the layer2 tests. */
+			.path = dir_s1d3,
+			.access = LANDLOCK_ACCESS_FS_MAKE_REG,
+		},
+		{
+			.path = dir_s2d2,
+			.access = LANDLOCK_ACCESS_FS_REFER,
+		},
+		{
+			.path = dir_s2d3,
+			.access = LANDLOCK_ACCESS_FS_MAKE_REG,
+		},
+		{},
+	};
+	const int ruleset_fd = create_ruleset(
+		_metadata,
+		LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+}
+
+static void
+reparent_exdev_layers_enforce2(struct __test_metadata *const _metadata)
+{
+	const struct rule layer2[] = {
+		{
+			.path = dir_s2d3,
+			.access = LANDLOCK_ACCESS_FS_MAKE_DIR,
+		},
+		{},
+	};
+	/*
+	 * Same checks as before but with a second layer and a new MAKE_DIR
+	 * rule (and no explicit handling of REFER).
+	 */
+	const int ruleset_fd =
+		create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_DIR, layer2);
+
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_rename1)
+{
+	ASSERT_EQ(0, unlink(file1_s2d2));
+	ASSERT_EQ(0, unlink(file1_s2d3));
+
+	reparent_exdev_layers_enforce1(_metadata);
+
+	/*
+	 * Moving the dir_s1d3 directory below dir_s2d2 is allowed by Landlock
+	 * because it doesn't inherit new access rights.
+	 */
+	ASSERT_EQ(0, rename(dir_s1d3, file1_s2d2));
+	ASSERT_EQ(0, rename(file1_s2d2, dir_s1d3));
+
+	/*
+	 * Moving the dir_s1d3 directory below dir_s2d3 is allowed, even if it
+	 * gets a new inherited access rights (MAKE_REG), because MAKE_REG is
+	 * already allowed for dir_s1d3.
+	 */
+	ASSERT_EQ(0, rename(dir_s1d3, file1_s2d3));
+	ASSERT_EQ(0, rename(file1_s2d3, dir_s1d3));
+
+	/*
+	 * However, moving the file1_s1d3 file below dir_s2d3 is allowed
+	 * because it cannot inherit MAKE_REG right (which is dedicated to
+	 * directories).
+	 */
+	ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+	reparent_exdev_layers_enforce2(_metadata);
+
+	/*
+	 * Moving the dir_s1d3 directory below dir_s2d2 is now denied because
+	 * MAKE_DIR is not tied to dir_s2d2.
+	 */
+	ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d2));
+	ASSERT_EQ(EACCES, errno);
+
+	/*
+	 * Moving the dir_s1d3 directory below dir_s2d3 is forbidden because it
+	 * would grants MAKE_REG and MAKE_DIR rights to it.
+	 */
+	ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d3));
+	ASSERT_EQ(EXDEV, errno);
+
+	/*
+	 * However, moving the file2_s1d3 file below dir_s2d3 is allowed
+	 * because it cannot inherit MAKE_REG nor MAKE_DIR rights (which are
+	 * dedicated to directories).
+	 */
+	ASSERT_EQ(0, rename(file2_s1d3, file1_s2d3));
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_rename2)
+{
+	reparent_exdev_layers_enforce1(_metadata);
+
+	/* Checks EACCES predominance over EXDEV. */
+	ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d2));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d2));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3));
+	ASSERT_EQ(EXDEV, errno);
+	/* Modify layout! */
+	ASSERT_EQ(0, rename(file1_s1d2, file1_s2d3));
+
+	/* Without REFER source. */
+	ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2));
+	ASSERT_EQ(EXDEV, errno);
+	ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d2));
+	ASSERT_EQ(EXDEV, errno);
+
+	reparent_exdev_layers_enforce2(_metadata);
+
+	/* Checks EACCES predominance over EXDEV. */
+	ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d2));
+	ASSERT_EQ(EACCES, errno);
+	/* Checks with actual file2_s1d2. */
+	ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d2));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3));
+	ASSERT_EQ(EXDEV, errno);
+	/* Modify layout! */
+	ASSERT_EQ(0, rename(file2_s1d2, file1_s2d3));
+
+	/* Without REFER source, EACCES wins over EXDEV. */
+	ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d2));
+	ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange1)
+{
+	const char *const dir_file1_s1d2 = file1_s1d2, *const dir_file2_s2d3 =
+							       file2_s2d3;
+
+	ASSERT_EQ(0, unlink(file1_s1d2));
+	ASSERT_EQ(0, mkdir(file1_s1d2, 0700));
+	ASSERT_EQ(0, unlink(file2_s2d3));
+	ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+	reparent_exdev_layers_enforce1(_metadata);
+
+	/* Error predominance with file exchange: returns EXDEV and EACCES. */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d1,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+
+	/*
+	 * Checks with directories which creation could be allowed, but denied
+	 * because of access rights that would be inherited.
+	 */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD,
+				dir_file2_s2d3, RENAME_EXCHANGE));
+	ASSERT_EQ(EXDEV, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD,
+				dir_file1_s1d2, RENAME_EXCHANGE));
+	ASSERT_EQ(EXDEV, errno);
+
+	/* Checks with same access rights. */
+	ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s2d3,
+			       RENAME_EXCHANGE));
+	ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
+			       RENAME_EXCHANGE));
+
+	/* Checks with different (child-only) access rights. */
+	ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_file1_s1d2,
+			       RENAME_EXCHANGE));
+	ASSERT_EQ(0, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, dir_s2d3,
+			       RENAME_EXCHANGE));
+
+	/*
+	 * Checks that exchange between file and directory are consistent.
+	 *
+	 * Moving a file (file1_s2d2) to a directory which only grants more
+	 * directory-related access rights is allowed, and at the same time
+	 * moving a directory (dir_file2_s2d3) to another directory which
+	 * grants less access rights is allowed too.
+	 *
+	 * See layout1.reparent_exdev_layers_exchange3 for inverted arguments.
+	 */
+	ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+			       RENAME_EXCHANGE));
+	/*
+	 * However, moving back the directory is denied because it would get
+	 * more access rights than the current state and because file creation
+	 * is forbidden (in dir_s2d2).
+	 */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+
+	reparent_exdev_layers_enforce2(_metadata);
+
+	/* Error predominance with file exchange: returns EXDEV and EACCES. */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d1,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+
+	/* Checks with directories which creation is now denied. */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD,
+				dir_file2_s2d3, RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD,
+				dir_file1_s1d2, RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+
+	/* Checks with different (child-only) access rights. */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s2d3,
+				RENAME_EXCHANGE));
+	/* Denied because of MAKE_DIR. */
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+
+	/* Checks with different (child-only) access rights. */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_file1_s1d2,
+				RENAME_EXCHANGE));
+	/* Denied because of MAKE_DIR. */
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, dir_s2d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+
+	/* See layout1.reparent_exdev_layers_exchange2 for complement. */
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange2)
+{
+	const char *const dir_file2_s2d3 = file2_s2d3;
+
+	ASSERT_EQ(0, unlink(file2_s2d3));
+	ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+	reparent_exdev_layers_enforce1(_metadata);
+	reparent_exdev_layers_enforce2(_metadata);
+
+	/* Checks that exchange between file and directory are consistent. */
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange3)
+{
+	const char *const dir_file2_s2d3 = file2_s2d3;
+
+	ASSERT_EQ(0, unlink(file2_s2d3));
+	ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+	reparent_exdev_layers_enforce1(_metadata);
+
+	/*
+	 * Checks that exchange between file and directory are consistent,
+	 * including with inverted arguments (see
+	 * layout1.reparent_exdev_layers_exchange1).
+	 */
+	ASSERT_EQ(0, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+			       RENAME_EXCHANGE));
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_remove)
+{
+	const struct rule layer1[] = {
+		{
+			.path = dir_s1d1,
+			.access = LANDLOCK_ACCESS_FS_REFER |
+				  LANDLOCK_ACCESS_FS_REMOVE_DIR,
+		},
+		{
+			.path = dir_s1d2,
+			.access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+		},
+		{
+			.path = dir_s2d1,
+			.access = LANDLOCK_ACCESS_FS_REFER |
+				  LANDLOCK_ACCESS_FS_REMOVE_FILE,
+		},
+		{},
+	};
+	const int ruleset_fd = create_ruleset(
+		_metadata,
+		LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_REMOVE_DIR |
+			LANDLOCK_ACCESS_FS_REMOVE_FILE,
+		layer1);
+
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	/* Access denied because of wrong/swapped remove file/dir. */
+	ASSERT_EQ(-1, rename(file1_s1d1, dir_s2d2));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, rename(dir_s2d2, file1_s1d1));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s2d2,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s2d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+
+	/* Access allowed thanks to the matching rights. */
+	ASSERT_EQ(-1, rename(file1_s2d1, dir_s1d2));
+	ASSERT_EQ(EISDIR, errno);
+	ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d1));
+	ASSERT_EQ(ENOTDIR, errno);
+	ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d1));
+	ASSERT_EQ(ENOTDIR, errno);
+	ASSERT_EQ(0, unlink(file1_s2d1));
+	ASSERT_EQ(0, unlink(file1_s1d3));
+	ASSERT_EQ(0, unlink(file2_s1d3));
+	ASSERT_EQ(0, rename(dir_s1d3, file1_s2d1));
+
+	/* Effectively removes a file and a directory by exchanging them. */
+	ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
+	ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
+			       RENAME_EXCHANGE));
+	ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
+				RENAME_EXCHANGE));
+	ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_dom_superset)
+{
+	const struct rule layer1[] = {
+		{
+			.path = dir_s1d2,
+			.access = LANDLOCK_ACCESS_FS_REFER,
+		},
+		{
+			.path = file1_s1d2,
+			.access = LANDLOCK_ACCESS_FS_EXECUTE,
+		},
+		{
+			.path = dir_s1d3,
+			.access = LANDLOCK_ACCESS_FS_MAKE_SOCK |
+				  LANDLOCK_ACCESS_FS_EXECUTE,
+		},
+		{
+			.path = dir_s2d2,
+			.access = LANDLOCK_ACCESS_FS_REFER |
+				  LANDLOCK_ACCESS_FS_EXECUTE |
+				  LANDLOCK_ACCESS_FS_MAKE_SOCK,
+		},
+		{
+			.path = dir_s2d3,
+			.access = LANDLOCK_ACCESS_FS_READ_FILE |
+				  LANDLOCK_ACCESS_FS_MAKE_FIFO,
+		},
+		{},
+	};
+	int ruleset_fd = create_ruleset(_metadata,
+					LANDLOCK_ACCESS_FS_REFER |
+						LANDLOCK_ACCESS_FS_EXECUTE |
+						LANDLOCK_ACCESS_FS_MAKE_SOCK |
+						LANDLOCK_ACCESS_FS_READ_FILE |
+						LANDLOCK_ACCESS_FS_MAKE_FIFO,
+					layer1);
+
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d1));
+	ASSERT_EQ(EXDEV, errno);
+	/*
+	 * Moving file1_s1d2 beneath dir_s2d3 would grant it the READ_FILE
+	 * access right.
+	 */
+	ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d3));
+	ASSERT_EQ(EXDEV, errno);
+	/*
+	 * Moving file1_s1d2 should be allowed even if dir_s2d2 grants a
+	 * superset of access rights compared to dir_s1d2, because file1_s1d2
+	 * already has these access rights anyway.
+	 */
+	ASSERT_EQ(0, rename(file1_s1d2, file1_s2d2));
+	ASSERT_EQ(0, rename(file1_s2d2, file1_s1d2));
+
+	ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d1));
+	ASSERT_EQ(EXDEV, errno);
+	/*
+	 * Moving dir_s1d3 beneath dir_s2d3 would grant it the MAKE_FIFO access
+	 * right.
+	 */
+	ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d3));
+	ASSERT_EQ(EXDEV, errno);
+	/*
+	 * Moving dir_s1d3 should be allowed even if dir_s2d2 grants a superset
+	 * of access rights compared to dir_s1d2, because dir_s1d3 already has
+	 * these access rights anyway.
+	 */
+	ASSERT_EQ(0, rename(dir_s1d3, file1_s2d2));
+	ASSERT_EQ(0, rename(file1_s2d2, dir_s1d3));
+
+	/*
+	 * Moving file1_s2d3 beneath dir_s1d2 is allowed, but moving it back
+	 * will be denied because the new inherited access rights from dir_s1d2
+	 * will be less than the destination (original) dir_s2d3.  This is a
+	 * sinkhole scenario where we cannot move back files or directories.
+	 */
+	ASSERT_EQ(0, rename(file1_s2d3, file2_s1d2));
+	ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d3));
+	ASSERT_EQ(EXDEV, errno);
+	ASSERT_EQ(0, unlink(file2_s1d2));
+	ASSERT_EQ(0, unlink(file2_s2d3));
+	/*
+	 * Checks similar directory one-way move: dir_s2d3 loses EXECUTE and
+	 * MAKE_SOCK which were inherited from dir_s1d3.
+	 */
+	ASSERT_EQ(0, rename(dir_s2d3, file2_s1d2));
+	ASSERT_EQ(-1, rename(file2_s1d2, dir_s2d3));
+	ASSERT_EQ(EXDEV, errno);
+}
+
 TEST_F_FORK(layout1, remove_dir)
 {
 	const struct rule rules[] = {
@@ -2520,6 +3235,44 @@ TEST_F_FORK(layout1_bind, same_content_same_file)
 	ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
 }
 
+TEST_F_FORK(layout1_bind, reparent_cross_mount)
+{
+	const struct rule layer1[] = {
+		{
+			/* dir_s2d1 is beneath the dir_s2d2 mount point. */
+			.path = dir_s2d1,
+			.access = LANDLOCK_ACCESS_FS_REFER,
+		},
+		{
+			.path = bind_dir_s1d3,
+			.access = LANDLOCK_ACCESS_FS_EXECUTE,
+		},
+		{},
+	};
+	int ruleset_fd = create_ruleset(
+		_metadata,
+		LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_EXECUTE, layer1);
+
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	/* Checks basic denied move. */
+	ASSERT_EQ(-1, rename(file1_s1d1, file1_s1d2));
+	ASSERT_EQ(EXDEV, errno);
+
+	/* Checks real cross-mount move (Landlock is not involved). */
+	ASSERT_EQ(-1, rename(file1_s2d1, file1_s2d2));
+	ASSERT_EQ(EXDEV, errno);
+
+	/* Checks move that will give more accesses. */
+	ASSERT_EQ(-1, rename(file1_s2d2, bind_file1_s1d3));
+	ASSERT_EQ(EXDEV, errno);
+
+	/* Checks legitimate downgrade move. */
+	ASSERT_EQ(0, rename(bind_file1_s1d3, file1_s2d2));
+}
+
 #define LOWER_BASE TMP_DIR "/lower"
 #define LOWER_DATA LOWER_BASE "/data"
 static const char lower_fl1[] = LOWER_DATA "/fl1";
-- 
cgit 


From 4050764cbaa25760aab40857f723393c07898474 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 23 May 2022 08:20:44 -0700
Subject: selftests/bpf: fix btf_dump/btf_dump due to recent clang change

Latest llvm-project upstream had a change of behavior
related to qualifiers on function return type ([1]).
This caused selftests btf_dump/btf_dump failure.
The following example shows what changed.

  $ cat t.c
  typedef const char * const (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int));
  struct t {
    int a;
    fn_ptr_arr2_t l;
  };
  int foo(struct t *arg) {
    return arg->a;
  }

Compiled with latest upstream llvm15,
  $ clang -O2 -g -target bpf -S -emit-llvm t.c
The related generated debuginfo IR looks like:
  !16 = !DIDerivedType(tag: DW_TAG_typedef, name: "fn_ptr_arr2_t", file: !1, line: 1, baseType: !17)
  !17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, size: 320, elements: !32)
  !18 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !19)
  !19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64)
  !20 = !DISubroutineType(types: !21)
  !21 = !{!22, null}
  !22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !23, size: 64)
  !23 = !DISubroutineType(types: !24)
  !24 = !{!25, !28}
  !25 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !26, size: 64)
  !26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !27)
  !27 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
You can see two intermediate const qualifier to pointer are dropped in debuginfo IR.

With llvm14, we have following debuginfo IR:
  !16 = !DIDerivedType(tag: DW_TAG_typedef, name: "fn_ptr_arr2_t", file: !1, line: 1, baseType: !17)
  !17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, size: 320, elements: !34)
  !18 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !19)
  !19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64)
  !20 = !DISubroutineType(types: !21)
  !21 = !{!22, null}
  !22 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !23)
  !23 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !24, size: 64)
  !24 = !DISubroutineType(types: !25)
  !25 = !{!26, !30}
  !26 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !27)
  !27 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !28, size: 64)
  !28 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !29)
  !29 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
All const qualifiers are preserved.

To adapt the selftest to both old and new llvm, this patch removed
the intermediate const qualifier in const-to-ptr types, to make the
test succeed again.

  [1] https://reviews.llvm.org/D125919

Reported-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220523152044.3905809-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index 1c7105fcae3c..4ee4748133fe 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -94,7 +94,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int);
 
 typedef char * (*fn_ptr_arr1_t[10])(int **);
 
-typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int));
+typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int));
 
 struct struct_w_typedefs {
 	int_t a;
-- 
cgit 


From f9a3eca4bc0452a7079282d1ad87d65cb3463f0a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 23 May 2022 12:56:04 +0100
Subject: selftests/bpf: Fix spelling mistake: "unpriviliged" -> "unprivileged"

There are spelling mistakes in ASSERT messages. Fix these.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220523115604.49942-1-colin.i.king@gmail.com
---
 tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
index 2800185179cf..1ed3cc2092db 100644
--- a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
+++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
@@ -261,10 +261,10 @@ void test_unpriv_bpf_disabled(void)
 	if (ret == -EPERM) {
 		/* if unprivileged_bpf_disabled=1, we get -EPERM back; that's okay. */
 		if (!ASSERT_OK(strcmp(unprivileged_bpf_disabled_orig, "1"),
-			       "unpriviliged_bpf_disabled_on"))
+			       "unprivileged_bpf_disabled_on"))
 			goto cleanup;
 	} else {
-		if (!ASSERT_OK(ret, "set unpriviliged_bpf_disabled"))
+		if (!ASSERT_OK(ret, "set unprivileged_bpf_disabled"))
 			goto cleanup;
 	}
 
-- 
cgit 


From 0cf7052a55128f7fd7905f6ae6eb995d6db76b52 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 23 May 2022 14:07:12 -0700
Subject: selftests/bpf: Dynptr tests

This patch adds tests for dynptrs, which include cases that the
verifier needs to reject (for example, a bpf_ringbuf_reserve_dynptr
without a corresponding bpf_ringbuf_submit/discard_dynptr) as well
as cases that should successfully pass.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220523210712.3641569-7-joannelkoong@gmail.com
---
 tools/testing/selftests/bpf/prog_tests/dynptr.c    | 137 +++++
 tools/testing/selftests/bpf/progs/dynptr_fail.c    | 588 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/dynptr_success.c | 164 ++++++
 3 files changed, 889 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/dynptr.c
 create mode 100644 tools/testing/selftests/bpf/progs/dynptr_fail.c
 create mode 100644 tools/testing/selftests/bpf/progs/dynptr_success.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
new file mode 100644
index 000000000000..3c7aa82b98e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <test_progs.h>
+#include "dynptr_fail.skel.h"
+#include "dynptr_success.skel.h"
+
+static size_t log_buf_sz = 1048576; /* 1 MB */
+static char obj_log_buf[1048576];
+
+static struct {
+	const char *prog_name;
+	const char *expected_err_msg;
+} dynptr_tests[] = {
+	/* failure cases */
+	{"ringbuf_missing_release1", "Unreleased reference id=1"},
+	{"ringbuf_missing_release2", "Unreleased reference id=2"},
+	{"ringbuf_missing_release_callback", "Unreleased reference id"},
+	{"use_after_invalid", "Expected an initialized dynptr as arg #3"},
+	{"ringbuf_invalid_api", "type=mem expected=alloc_mem"},
+	{"add_dynptr_to_map1", "invalid indirect read from stack"},
+	{"add_dynptr_to_map2", "invalid indirect read from stack"},
+	{"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"},
+	{"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"},
+	{"data_slice_use_after_release", "invalid mem access 'scalar'"},
+	{"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"},
+	{"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"},
+	{"invalid_helper1", "invalid indirect read from stack"},
+	{"invalid_helper2", "Expected an initialized dynptr as arg #3"},
+	{"invalid_write1", "Expected an initialized dynptr as arg #1"},
+	{"invalid_write2", "Expected an initialized dynptr as arg #3"},
+	{"invalid_write3", "Expected an initialized ringbuf dynptr as arg #1"},
+	{"invalid_write4", "arg 1 is an unacquired reference"},
+	{"invalid_read1", "invalid read from stack"},
+	{"invalid_read2", "cannot pass in dynptr at an offset"},
+	{"invalid_read3", "invalid read from stack"},
+	{"invalid_read4", "invalid read from stack"},
+	{"invalid_offset", "invalid write to stack"},
+	{"global", "type=map_value expected=fp"},
+	{"release_twice", "arg 1 is an unacquired reference"},
+	{"release_twice_callback", "arg 1 is an unacquired reference"},
+	{"dynptr_from_mem_invalid_api",
+		"Unsupported reg type fp for bpf_dynptr_from_mem data"},
+
+	/* success cases */
+	{"test_read_write", NULL},
+	{"test_data_slice", NULL},
+	{"test_ringbuf", NULL},
+};
+
+static void verify_fail(const char *prog_name, const char *expected_err_msg)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	struct bpf_program *prog;
+	struct dynptr_fail *skel;
+	int err;
+
+	opts.kernel_log_buf = obj_log_buf;
+	opts.kernel_log_size = log_buf_sz;
+	opts.kernel_log_level = 1;
+
+	skel = dynptr_fail__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto cleanup;
+
+	bpf_program__set_autoload(prog, true);
+
+	bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
+
+	err = dynptr_fail__load(skel);
+	if (!ASSERT_ERR(err, "unexpected load success"))
+		goto cleanup;
+
+	if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
+		fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
+		fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+	}
+
+cleanup:
+	dynptr_fail__destroy(skel);
+}
+
+static void verify_success(const char *prog_name)
+{
+	struct dynptr_success *skel;
+	struct bpf_program *prog;
+	struct bpf_link *link;
+
+	skel = dynptr_success__open();
+	if (!ASSERT_OK_PTR(skel, "dynptr_success__open"))
+		return;
+
+	skel->bss->pid = getpid();
+
+	bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
+
+	dynptr_success__load(skel);
+	if (!ASSERT_OK_PTR(skel, "dynptr_success__load"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto cleanup;
+
+	link = bpf_program__attach(prog);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+		goto cleanup;
+
+	usleep(1);
+
+	ASSERT_EQ(skel->bss->err, 0, "err");
+
+	bpf_link__destroy(link);
+
+cleanup:
+	dynptr_success__destroy(skel);
+}
+
+void test_dynptr(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dynptr_tests); i++) {
+		if (!test__start_subtest(dynptr_tests[i].prog_name))
+			continue;
+
+		if (dynptr_tests[i].expected_err_msg)
+			verify_fail(dynptr_tests[i].prog_name,
+				    dynptr_tests[i].expected_err_msg);
+		else
+			verify_success(dynptr_tests[i].prog_name);
+	}
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
new file mode 100644
index 000000000000..d811cff73597
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -0,0 +1,588 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct test_info {
+	int x;
+	struct bpf_dynptr ptr;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct bpf_dynptr);
+} array_map1 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct test_info);
+} array_map2 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} array_map3 SEC(".maps");
+
+struct sample {
+	int pid;
+	long value;
+	char comm[16];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+int err, val;
+
+static int get_map_val_dynptr(struct bpf_dynptr *ptr)
+{
+	__u32 key = 0, *map_val;
+
+	bpf_map_update_elem(&array_map3, &key, &val, 0);
+
+	map_val = bpf_map_lookup_elem(&array_map3, &key);
+	if (!map_val)
+		return -ENOENT;
+
+	bpf_dynptr_from_mem(map_val, sizeof(*map_val), 0, ptr);
+
+	return 0;
+}
+
+/* Every bpf_ringbuf_reserve_dynptr call must have a corresponding
+ * bpf_ringbuf_submit/discard_dynptr call
+ */
+SEC("?raw_tp/sys_nanosleep")
+int ringbuf_missing_release1(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	/* missing a call to bpf_ringbuf_discard/submit_dynptr */
+
+	return 0;
+}
+
+SEC("?raw_tp/sys_nanosleep")
+int ringbuf_missing_release2(void *ctx)
+{
+	struct bpf_dynptr ptr1, ptr2;
+	struct sample *sample;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr1);
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2);
+
+	sample = bpf_dynptr_data(&ptr1, 0, sizeof(*sample));
+	if (!sample) {
+		bpf_ringbuf_discard_dynptr(&ptr1, 0);
+		bpf_ringbuf_discard_dynptr(&ptr2, 0);
+		return 0;
+	}
+
+	bpf_ringbuf_submit_dynptr(&ptr1, 0);
+
+	/* missing a call to bpf_ringbuf_discard/submit_dynptr on ptr2 */
+
+	return 0;
+}
+
+static int missing_release_callback_fn(__u32 index, void *data)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	/* missing a call to bpf_ringbuf_discard/submit_dynptr */
+
+	return 0;
+}
+
+/* Any dynptr initialized within a callback must have bpf_dynptr_put called */
+SEC("?raw_tp/sys_nanosleep")
+int ringbuf_missing_release_callback(void *ctx)
+{
+	bpf_loop(10, missing_release_callback_fn, NULL, 0);
+	return 0;
+}
+
+/* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */
+SEC("?raw_tp/sys_nanosleep")
+int ringbuf_release_uninit_dynptr(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	/* this should fail */
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* A dynptr can't be used after it has been invalidated */
+SEC("?raw_tp/sys_nanosleep")
+int use_after_invalid(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	char read_data[64];
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr);
+
+	bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	/* this should fail */
+	bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+
+	return 0;
+}
+
+/* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */
+SEC("?raw_tp/sys_nanosleep")
+int ringbuf_invalid_api(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	struct sample *sample;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr);
+	sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample));
+	if (!sample)
+		goto done;
+
+	sample->pid = 123;
+
+	/* invalid API use. need to use dynptr API to submit/discard */
+	bpf_ringbuf_submit(sample, 0);
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
+
+/* Can't add a dynptr to a map */
+SEC("?raw_tp/sys_nanosleep")
+int add_dynptr_to_map1(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	int key = 0;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	/* this should fail */
+	bpf_map_update_elem(&array_map1, &key, &ptr, 0);
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* Can't add a struct with an embedded dynptr to a map */
+SEC("?raw_tp/sys_nanosleep")
+int add_dynptr_to_map2(void *ctx)
+{
+	struct test_info x;
+	int key = 0;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &x.ptr);
+
+	/* this should fail */
+	bpf_map_update_elem(&array_map2, &key, &x, 0);
+
+	bpf_ringbuf_submit_dynptr(&x.ptr, 0);
+
+	return 0;
+}
+
+/* A data slice can't be accessed out of bounds */
+SEC("?raw_tp/sys_nanosleep")
+int data_slice_out_of_bounds_ringbuf(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	void *data;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr);
+
+	data  = bpf_dynptr_data(&ptr, 0, 8);
+	if (!data)
+		goto done;
+
+	/* can't index out of bounds of the data slice */
+	val = *((char *)data + 8);
+
+done:
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+	return 0;
+}
+
+SEC("?raw_tp/sys_nanosleep")
+int data_slice_out_of_bounds_map_value(void *ctx)
+{
+	__u32 key = 0, map_val;
+	struct bpf_dynptr ptr;
+	void *data;
+
+	get_map_val_dynptr(&ptr);
+
+	data  = bpf_dynptr_data(&ptr, 0, sizeof(map_val));
+	if (!data)
+		return 0;
+
+	/* can't index out of bounds of the data slice */
+	val = *((char *)data + (sizeof(map_val) + 1));
+
+	return 0;
+}
+
+/* A data slice can't be used after it has been released */
+SEC("?raw_tp/sys_nanosleep")
+int data_slice_use_after_release(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	struct sample *sample;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr);
+	sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample));
+	if (!sample)
+		goto done;
+
+	sample->pid = 123;
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	/* this should fail */
+	val = sample->pid;
+
+	return 0;
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
+
+/* A data slice must be first checked for NULL */
+SEC("?raw_tp/sys_nanosleep")
+int data_slice_missing_null_check1(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	void *data;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr);
+
+	data  = bpf_dynptr_data(&ptr, 0, 8);
+
+	/* missing if (!data) check */
+
+	/* this should fail */
+	*(__u8 *)data = 3;
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+	return 0;
+}
+
+/* A data slice can't be dereferenced if it wasn't checked for null */
+SEC("?raw_tp/sys_nanosleep")
+int data_slice_missing_null_check2(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	__u64 *data1, *data2;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr);
+
+	data1 = bpf_dynptr_data(&ptr, 0, 8);
+	data2 = bpf_dynptr_data(&ptr, 0, 8);
+	if (data1)
+		/* this should fail */
+		*data2 = 3;
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
+
+/* Can't pass in a dynptr as an arg to a helper function that doesn't take in a
+ * dynptr argument
+ */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_helper1(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	get_map_val_dynptr(&ptr);
+
+	/* this should fail */
+	bpf_strncmp((const char *)&ptr, sizeof(ptr), "hello!");
+
+	return 0;
+}
+
+/* A dynptr can't be passed into a helper function at a non-zero offset */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_helper2(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	char read_data[64];
+
+	get_map_val_dynptr(&ptr);
+
+	/* this should fail */
+	bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0);
+
+	return 0;
+}
+
+/* A bpf_dynptr is invalidated if it's been written into */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_write1(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	void *data;
+	__u8 x = 0;
+
+	get_map_val_dynptr(&ptr);
+
+	memcpy(&ptr, &x, sizeof(x));
+
+	/* this should fail */
+	data = bpf_dynptr_data(&ptr, 0, 1);
+
+	return 0;
+}
+
+/*
+ * A bpf_dynptr can't be used as a dynptr if it has been written into at a fixed
+ * offset
+ */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_write2(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	char read_data[64];
+	__u8 x = 0;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+	memcpy((void *)&ptr + 8, &x, sizeof(x));
+
+	/* this should fail */
+	bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/*
+ * A bpf_dynptr can't be used as a dynptr if it has been written into at a
+ * non-const offset
+ */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_write3(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	char stack_buf[16];
+	unsigned long len;
+	__u8 x = 0;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr);
+
+	memcpy(stack_buf, &val, sizeof(val));
+	len = stack_buf[0] & 0xf;
+
+	memcpy((void *)&ptr + len, &x, sizeof(x));
+
+	/* this should fail */
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+static int invalid_write4_callback(__u32 index, void *data)
+{
+	*(__u32 *)data = 123;
+
+	return 0;
+}
+
+/* If the dynptr is written into in a callback function, it should
+ * be invalidated as a dynptr
+ */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_write4(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+	bpf_loop(10, invalid_write4_callback, &ptr, 0);
+
+	/* this should fail */
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */
+struct bpf_dynptr global_dynptr;
+SEC("?raw_tp/sys_nanosleep")
+int global(void *ctx)
+{
+	/* this should fail */
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &global_dynptr);
+
+	bpf_ringbuf_discard_dynptr(&global_dynptr, 0);
+
+	return 0;
+}
+
+/* A direct read should fail */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_read1(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+	/* this should fail */
+	val = *(int *)&ptr;
+
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* A direct read at an offset should fail */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_read2(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	char read_data[64];
+
+	get_map_val_dynptr(&ptr);
+
+	/* this should fail */
+	bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0);
+
+	return 0;
+}
+
+/* A direct read at an offset into the lower stack slot should fail */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_read3(void *ctx)
+{
+	struct bpf_dynptr ptr1, ptr2;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr1);
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr2);
+
+	/* this should fail */
+	memcpy(&val, (void *)&ptr1 + 8, sizeof(val));
+
+	bpf_ringbuf_discard_dynptr(&ptr1, 0);
+	bpf_ringbuf_discard_dynptr(&ptr2, 0);
+
+	return 0;
+}
+
+static int invalid_read4_callback(__u32 index, void *data)
+{
+	/* this should fail */
+	val = *(__u32 *)data;
+
+	return 0;
+}
+
+/* A direct read within a callback function should fail */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_read4(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+	bpf_loop(10, invalid_read4_callback, &ptr, 0);
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* Initializing a dynptr on an offset should fail */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_offset(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	/* this should fail */
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr + 1);
+
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* Can't release a dynptr twice */
+SEC("?raw_tp/sys_nanosleep")
+int release_twice(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr);
+
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+	/* this second release should fail */
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+static int release_twice_callback_fn(__u32 index, void *data)
+{
+	/* this should fail */
+	bpf_ringbuf_discard_dynptr(data, 0);
+
+	return 0;
+}
+
+/* Test that releasing a dynptr twice, where one of the releases happens
+ * within a calback function, fails
+ */
+SEC("?raw_tp/sys_nanosleep")
+int release_twice_callback(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 32, 0, &ptr);
+
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+	bpf_loop(10, release_twice_callback_fn, &ptr, 0);
+
+	return 0;
+}
+
+/* Reject unsupported local mem types for dynptr_from_mem API */
+SEC("?raw_tp/sys_nanosleep")
+int dynptr_from_mem_invalid_api(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	int x = 0;
+
+	/* this should fail */
+	bpf_dynptr_from_mem(&x, sizeof(x), 0, &ptr);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
new file mode 100644
index 000000000000..d67be48df4b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char _license[] SEC("license") = "GPL";
+
+int pid, err, val;
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} array_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_read_write(void *ctx)
+{
+	char write_data[64] = "hello there, world!!";
+	char read_data[64] = {}, buf[64] = {};
+	struct bpf_dynptr ptr;
+	int i;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr);
+
+	/* Write data into the dynptr */
+	err = err ?: bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data));
+
+	/* Read the data that was written into the dynptr */
+	err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+
+	/* Ensure the data we read matches the data we wrote */
+	for (i = 0; i < sizeof(read_data); i++) {
+		if (read_data[i] != write_data[i]) {
+			err = 1;
+			break;
+		}
+	}
+
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_data_slice(void *ctx)
+{
+	__u32 key = 0, val = 235, *map_val;
+	struct bpf_dynptr ptr;
+	__u32 map_val_size;
+	void *data;
+
+	map_val_size = sizeof(*map_val);
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	bpf_map_update_elem(&array_map, &key, &val, 0);
+
+	map_val = bpf_map_lookup_elem(&array_map, &key);
+	if (!map_val) {
+		err = 1;
+		return 0;
+	}
+
+	bpf_dynptr_from_mem(map_val, map_val_size, 0, &ptr);
+
+	/* Try getting a data slice that is out of range */
+	data = bpf_dynptr_data(&ptr, map_val_size + 1, 1);
+	if (data) {
+		err = 2;
+		return 0;
+	}
+
+	/* Try getting more bytes than available */
+	data = bpf_dynptr_data(&ptr, 0, map_val_size + 1);
+	if (data) {
+		err = 3;
+		return 0;
+	}
+
+	data = bpf_dynptr_data(&ptr, 0, sizeof(__u32));
+	if (!data) {
+		err = 4;
+		return 0;
+	}
+
+	*(__u32 *)data = 999;
+
+	err = bpf_probe_read_kernel(&val, sizeof(val), data);
+	if (err)
+		return 0;
+
+	if (val != *(int *)data)
+		err = 5;
+
+	return 0;
+}
+
+static int ringbuf_callback(__u32 index, void *data)
+{
+	struct sample *sample;
+
+	struct bpf_dynptr *ptr = (struct bpf_dynptr *)data;
+
+	sample = bpf_dynptr_data(ptr, 0, sizeof(*sample));
+	if (!sample)
+		err = 2;
+	else
+		sample->pid += index;
+
+	return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_ringbuf(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	struct sample *sample;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	val = 100;
+
+	/* check that you can reserve a dynamic size reservation */
+	err = bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	sample = err ? NULL : bpf_dynptr_data(&ptr, 0, sizeof(*sample));
+	if (!sample) {
+		err = 1;
+		goto done;
+	}
+
+	sample->pid = 10;
+
+	/* Can pass dynptr to callback functions */
+	bpf_loop(10, ringbuf_callback, &ptr, 0);
+
+	if (sample->pid != 55)
+		err = 2;
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
-- 
cgit 


From 825be3b5abae1e67db45ff7d4b9a7764a2419bd9 Mon Sep 17 00:00:00 2001
From: Yang Weijiang <weijiang.yang@intel.com>
Date: Thu, 12 May 2022 04:40:46 -0400
Subject: KVM: selftests: x86: Fix test failure on arch lbr capable platforms

On Arch LBR capable platforms, LBR_FMT in perf capability msr is 0x3f,
so the last format test will fail. Use a true invalid format(0x30) for
the test if it's running on these platforms. Opportunistically change
the file name to reflect the tests actually carried out.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
Message-Id: <20220512084046.105479-1-weijiang.yang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile               |   2 +-
 .../selftests/kvm/x86_64/vmx_pmu_caps_test.c       | 116 +++++++++++++++++++++
 .../selftests/kvm/x86_64/vmx_pmu_msrs_test.c       | 114 --------------------
 3 files changed, 117 insertions(+), 115 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
 delete mode 100644 tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index af582d168621..d0d09ca0d495 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -83,7 +83,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
 TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
 TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
 TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test
 TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
 TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
new file mode 100644
index 000000000000..97b7fd4a9a3d
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for VMX-pmu perf capability msr
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Test to check the effect of various CPUID settings on
+ * MSR_IA32_PERF_CAPABILITIES MSR, and check that what
+ * we write with KVM_SET_MSR is _not_ modified by the guest
+ * and check it can be retrieved with KVM_GET_MSR, also test
+ * the invalid LBR formats are rejected.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define VCPU_ID	      0
+
+#define X86_FEATURE_PDCM	(1<<15)
+#define PMU_CAP_FW_WRITES	(1ULL << 13)
+#define PMU_CAP_LBR_FMT		0x3f
+
+union cpuid10_eax {
+	struct {
+		unsigned int version_id:8;
+		unsigned int num_counters:8;
+		unsigned int bit_width:8;
+		unsigned int mask_length:8;
+	} split;
+	unsigned int full;
+};
+
+union perf_capabilities {
+	struct {
+		u64	lbr_format:6;
+		u64	pebs_trap:1;
+		u64	pebs_arch_reg:1;
+		u64	pebs_format:4;
+		u64	smm_freeze:1;
+		u64	full_width_write:1;
+		u64 pebs_baseline:1;
+		u64	perf_metrics:1;
+		u64	pebs_output_pt_available:1;
+		u64	anythread_deprecated:1;
+	};
+	u64	capabilities;
+};
+
+static void guest_code(void)
+{
+	wrmsr(MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_cpuid2 *cpuid;
+	struct kvm_cpuid_entry2 *entry_1_0;
+	struct kvm_cpuid_entry2 *entry_a_0;
+	bool pdcm_supported = false;
+	struct kvm_vm *vm;
+	int ret;
+	union cpuid10_eax eax;
+	union perf_capabilities host_cap;
+
+	host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+	host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	cpuid = kvm_get_supported_cpuid();
+
+	if (kvm_get_cpuid_max_basic() >= 0xa) {
+		entry_1_0 = kvm_get_supported_cpuid_index(1, 0);
+		entry_a_0 = kvm_get_supported_cpuid_index(0xa, 0);
+		pdcm_supported = entry_1_0 && !!(entry_1_0->ecx & X86_FEATURE_PDCM);
+		eax.full = entry_a_0->eax;
+	}
+	if (!pdcm_supported) {
+		print_skip("MSR_IA32_PERF_CAPABILITIES is not supported by the vCPU");
+		exit(KSFT_SKIP);
+	}
+	if (!eax.split.version_id) {
+		print_skip("PMU is not supported by the vCPU");
+		exit(KSFT_SKIP);
+	}
+
+	/* testcase 1, set capabilities when we have PDCM bit */
+	vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+	vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
+
+	/* check capabilities can be retrieved with KVM_GET_MSR */
+	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+
+	/* check whatever we write with KVM_SET_MSR is _not_ modified */
+	vcpu_run(vm, VCPU_ID);
+	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+
+	/* testcase 2, check valid LBR formats are accepted */
+	vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0);
+	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0);
+
+	vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
+	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
+
+	/* testcase 3, check invalid LBR format is rejected */
+	/* Note, on Arch LBR capable platforms, LBR_FMT in perf capability msr is 0x3f,
+	 * to avoid the failure, use a true invalid format 0x30 for the test. */
+	ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0x30);
+	TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
+
+	printf("Completed perf capability tests.\n");
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c
deleted file mode 100644
index 2454a1f2ca0c..000000000000
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c
+++ /dev/null
@@ -1,114 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * VMX-pmu related msrs test
- *
- * Copyright (C) 2021 Intel Corporation
- *
- * Test to check the effect of various CPUID settings
- * on the MSR_IA32_PERF_CAPABILITIES MSR, and check that
- * whatever we write with KVM_SET_MSR is _not_ modified
- * in the guest and test it can be retrieved with KVM_GET_MSR.
- *
- * Test to check that invalid LBR formats are rejected.
- */
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <sys/ioctl.h>
-
-#include "kvm_util.h"
-#include "vmx.h"
-
-#define VCPU_ID	      0
-
-#define X86_FEATURE_PDCM	(1<<15)
-#define PMU_CAP_FW_WRITES	(1ULL << 13)
-#define PMU_CAP_LBR_FMT		0x3f
-
-union cpuid10_eax {
-	struct {
-		unsigned int version_id:8;
-		unsigned int num_counters:8;
-		unsigned int bit_width:8;
-		unsigned int mask_length:8;
-	} split;
-	unsigned int full;
-};
-
-union perf_capabilities {
-	struct {
-		u64	lbr_format:6;
-		u64	pebs_trap:1;
-		u64	pebs_arch_reg:1;
-		u64	pebs_format:4;
-		u64	smm_freeze:1;
-		u64	full_width_write:1;
-		u64 pebs_baseline:1;
-		u64	perf_metrics:1;
-		u64	pebs_output_pt_available:1;
-		u64	anythread_deprecated:1;
-	};
-	u64	capabilities;
-};
-
-static void guest_code(void)
-{
-	wrmsr(MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
-}
-
-int main(int argc, char *argv[])
-{
-	struct kvm_cpuid2 *cpuid;
-	struct kvm_cpuid_entry2 *entry_1_0;
-	struct kvm_cpuid_entry2 *entry_a_0;
-	bool pdcm_supported = false;
-	struct kvm_vm *vm;
-	int ret;
-	union cpuid10_eax eax;
-	union perf_capabilities host_cap;
-
-	host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
-	host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT);
-
-	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
-	cpuid = kvm_get_supported_cpuid();
-
-	if (kvm_get_cpuid_max_basic() >= 0xa) {
-		entry_1_0 = kvm_get_supported_cpuid_index(1, 0);
-		entry_a_0 = kvm_get_supported_cpuid_index(0xa, 0);
-		pdcm_supported = entry_1_0 && !!(entry_1_0->ecx & X86_FEATURE_PDCM);
-		eax.full = entry_a_0->eax;
-	}
-	if (!pdcm_supported) {
-		print_skip("MSR_IA32_PERF_CAPABILITIES is not supported by the vCPU");
-		exit(KSFT_SKIP);
-	}
-	if (!eax.split.version_id) {
-		print_skip("PMU is not supported by the vCPU");
-		exit(KSFT_SKIP);
-	}
-
-	/* testcase 1, set capabilities when we have PDCM bit */
-	vcpu_set_cpuid(vm, VCPU_ID, cpuid);
-	vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
-
-	/* check capabilities can be retrieved with KVM_GET_MSR */
-	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
-
-	/* check whatever we write with KVM_SET_MSR is _not_ modified */
-	vcpu_run(vm, VCPU_ID);
-	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
-
-	/* testcase 2, check valid LBR formats are accepted */
-	vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0);
-	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0);
-
-	vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
-	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
-
-	/* testcase 3, check invalid LBR format is rejected */
-	ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
-	TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
-
-	kvm_vm_free(vm);
-}
-- 
cgit 


From 366d4a12cdcf3d83c8162ff6e0046c123567c754 Mon Sep 17 00:00:00 2001
From: Like Xu <likexu@tencent.com>
Date: Thu, 19 May 2022 01:01:17 +0800
Subject: KVM: selftests: x86: Sync the new name of the test case to .gitignore

Fixing side effect of the so-called opportunistic change in the commit.

Fixes: dc8a9febbab0 ("KVM: selftests: x86: Fix test failure on arch lbr capable platforms")
Signed-off-by: Like Xu <likexu@tencent.com>
Message-Id: <20220518170118.66263-2-likexu@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 54fc269c1788..4509a3a7eeae 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -55,7 +55,7 @@
 /x86_64/xen_shinfo_test
 /x86_64/xen_vmcall_test
 /x86_64/xss_msr_test
-/x86_64/vmx_pmu_msrs_test
+/x86_64/vmx_pmu_caps_test
 /access_tracking_perf_test
 /demand_paging_test
 /dirty_log_test
-- 
cgit 


From 7fb6378701dc0d8f19c1ac4623b55f5125f0e286 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 22 May 2022 16:18:51 +0200
Subject: cgroup: fix an error handling path in alloc_pagecache_max_30M()

If the first goto is taken, 'fd' is not opened yet (and is un-initialized).
So a direct return is safer.

Link: https://lkml.kernel.org/r/628312312eb40e0e39463a2c06415fde5295c716.1653229120.git.christophe.jaillet@wanadoo.fr
Fixes: c1a31a2f7a9c ("cgroup: fix racy check in alloc_pagecache_max_30M() helper function")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: David Vernet <void@manifault.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 6ab94317c87b..44a974ec472c 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -574,7 +574,7 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
 	high = cg_read_long(cgroup, "memory.high");
 	max = cg_read_long(cgroup, "memory.max");
 	if (high != MB(30) && max != MB(30))
-		goto cleanup;
+		return -1;
 
 	fd = get_temp_fd();
 	if (fd < 0)
-- 
cgit 


From 33776141b81296e604a39a8065b28b89c61c7f74 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 18 May 2022 13:43:16 -0700
Subject: selftests: vm: add process_mrelease tests

Introduce process_mrelease syscall sanity tests which include tests
which expect to fail:

- process_mrelease with invalid pidfd and flags inputs
- process_mrelease on a live process with no pending signals

and valid process_mrelease usage which is expected to succeed.  Because
process_mrelease has to be used against a process with a pending SIGKILL,
it's possible that the process exits before process_mrelease gets called.
In such cases we retry the test with a victim that allocates twice more
memory up to 1GB.  This would require the victim process to spend more
time during exit and process_mrelease has a better chance of catching the
process before it exits and succeeding.

On success the test reports the amount of memory the child had to allocate
for reaping to succeed.  Sample output:

$ mrelease_test
Success reaping a child with 1MB of memory allocations

On failure the test reports the failure. Sample outputs:

$ mrelease_test
All process_mrelease attempts failed!

$ mrelease_test
process_mrelease: Invalid argument

Link: https://lkml.kernel.org/r/20220518204316.13131-1-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Christian Brauner (Microsoft) <brauner@kernel.org>
Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/.gitignore      |   1 +
 tools/testing/selftests/vm/Makefile        |   1 +
 tools/testing/selftests/vm/mrelease_test.c | 200 +++++++++++++++++++++++++++++
 tools/testing/selftests/vm/run_vmtests.sh  |   2 +
 4 files changed, 204 insertions(+)
 create mode 100644 tools/testing/selftests/vm/mrelease_test.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 3cb4fa771ec2..6c2ac4208c27 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -10,6 +10,7 @@ map_populate
 thuge-gen
 compaction_test
 mlock2-tests
+mrelease_test
 mremap_dontunmap
 mremap_test
 on-fault-limit
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index f1228370e99b..8111a33e4824 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -44,6 +44,7 @@ TEST_GEN_FILES += memfd_secret
 TEST_GEN_FILES += migration
 TEST_GEN_FILES += mlock-random-test
 TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += mrelease_test
 TEST_GEN_FILES += mremap_dontunmap
 TEST_GEN_FILES += mremap_test
 TEST_GEN_FILES += on-fault-limit
diff --git a/tools/testing/selftests/vm/mrelease_test.c b/tools/testing/selftests/vm/mrelease_test.c
new file mode 100644
index 000000000000..96671c2f7d48
--- /dev/null
+++ b/tools/testing/selftests/vm/mrelease_test.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2022 Google LLC
+ */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "util.h"
+
+#include "../kselftest.h"
+
+#ifndef __NR_pidfd_open
+#define __NR_pidfd_open -1
+#endif
+
+#ifndef __NR_process_mrelease
+#define __NR_process_mrelease -1
+#endif
+
+#define MB(x) (x << 20)
+#define MAX_SIZE_MB 1024
+
+static int alloc_noexit(unsigned long nr_pages, int pipefd)
+{
+	int ppid = getppid();
+	int timeout = 10; /* 10sec timeout to get killed */
+	unsigned long i;
+	char *buf;
+
+	buf = (char *)mmap(NULL, nr_pages * PAGE_SIZE, PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANON, 0, 0);
+	if (buf == MAP_FAILED) {
+		perror("mmap failed, halting the test");
+		return KSFT_FAIL;
+	}
+
+	for (i = 0; i < nr_pages; i++)
+		*((unsigned long *)(buf + (i * PAGE_SIZE))) = i;
+
+	/* Signal the parent that the child is ready */
+	if (write(pipefd, "", 1) < 0) {
+		perror("write");
+		return KSFT_FAIL;
+	}
+
+	/* Wait to be killed (when reparenting happens) */
+	while (getppid() == ppid && timeout > 0) {
+		sleep(1);
+		timeout--;
+	}
+
+	munmap(buf, nr_pages * PAGE_SIZE);
+
+	return (timeout > 0) ? KSFT_PASS : KSFT_FAIL;
+}
+
+/* The process_mrelease calls in this test are expected to fail */
+static void run_negative_tests(int pidfd)
+{
+	/* Test invalid flags. Expect to fail with EINVAL error code. */
+	if (!syscall(__NR_process_mrelease, pidfd, (unsigned int)-1) ||
+			errno != EINVAL) {
+		perror("process_mrelease with wrong flags");
+		exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+	}
+	/*
+	 * Test reaping while process is alive with no pending SIGKILL.
+	 * Expect to fail with EINVAL error code.
+	 */
+	if (!syscall(__NR_process_mrelease, pidfd, 0) || errno != EINVAL) {
+		perror("process_mrelease on a live process");
+		exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+	}
+}
+
+static int child_main(int pipefd[], size_t size)
+{
+	int res;
+
+	/* Allocate and fault-in memory and wait to be killed */
+	close(pipefd[0]);
+	res = alloc_noexit(MB(size) / PAGE_SIZE, pipefd[1]);
+	close(pipefd[1]);
+	return res;
+}
+
+int main(void)
+{
+	int pipefd[2], pidfd;
+	bool success, retry;
+	size_t size;
+	pid_t pid;
+	char byte;
+	int res;
+
+	/* Test a wrong pidfd */
+	if (!syscall(__NR_process_mrelease, -1, 0) || errno != EBADF) {
+		perror("process_mrelease with wrong pidfd");
+		exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+	}
+
+	/* Start the test with 1MB child memory allocation */
+	size = 1;
+retry:
+	/*
+	 * Pipe for the child to signal when it's done allocating
+	 * memory
+	 */
+	if (pipe(pipefd)) {
+		perror("pipe");
+		exit(KSFT_FAIL);
+	}
+	pid = fork();
+	if (pid < 0) {
+		perror("fork");
+		close(pipefd[0]);
+		close(pipefd[1]);
+		exit(KSFT_FAIL);
+	}
+
+	if (pid == 0) {
+		/* Child main routine */
+		res = child_main(pipefd, size);
+		exit(res);
+	}
+
+	/*
+	 * Parent main routine:
+	 * Wait for the child to finish allocations, then kill and reap
+	 */
+	close(pipefd[1]);
+	/* Block until the child is ready */
+	res = read(pipefd[0], &byte, 1);
+	close(pipefd[0]);
+	if (res < 0) {
+		perror("read");
+		if (!kill(pid, SIGKILL))
+			waitpid(pid, NULL, 0);
+		exit(KSFT_FAIL);
+	}
+
+	pidfd = syscall(__NR_pidfd_open, pid, 0);
+	if (pidfd < 0) {
+		perror("pidfd_open");
+		if (!kill(pid, SIGKILL))
+			waitpid(pid, NULL, 0);
+		exit(KSFT_FAIL);
+	}
+
+	/* Run negative tests which require a live child */
+	run_negative_tests(pidfd);
+
+	if (kill(pid, SIGKILL)) {
+		perror("kill");
+		exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+	}
+
+	success = (syscall(__NR_process_mrelease, pidfd, 0) == 0);
+	if (!success) {
+		/*
+		 * If we failed to reap because the child exited too soon,
+		 * before we could call process_mrelease. Double child's memory
+		 * which causes it to spend more time on cleanup and increases
+		 * our chances of reaping its memory before it exits.
+		 * Retry until we succeed or reach MAX_SIZE_MB.
+		 */
+		if (errno == ESRCH) {
+			retry = (size <= MAX_SIZE_MB);
+		} else {
+			perror("process_mrelease");
+			waitpid(pid, NULL, 0);
+			exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
+		}
+	}
+
+	/* Cleanup to prevent zombies */
+	if (waitpid(pid, NULL, 0) < 0) {
+		perror("waitpid");
+		exit(KSFT_FAIL);
+	}
+	close(pidfd);
+
+	if (!success) {
+		if (retry) {
+			size *= 2;
+			goto retry;
+		}
+		printf("All process_mrelease attempts failed!\n");
+		exit(KSFT_FAIL);
+	}
+
+	printf("Success reaping a child with %zuMB of memory allocations\n",
+	       size);
+	return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index a2302b5faaf2..41fce8bea929 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -141,6 +141,8 @@ run_test ./mlock-random-test
 
 run_test ./mlock2-tests
 
+run_test ./mrelease_test
+
 run_test ./mremap_test
 
 run_test ./thuge-gen
-- 
cgit 


From 75c96ccea2e1de1342996722ee505d2cadedc0dd Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@inria.fr>
Date: Sat, 21 May 2022 13:11:30 +0200
Subject: selftests/vm/pkeys: fix typo in comment

Spelling mistake (triple letters) in comment.  Detected with the help of
Coccinelle.

Link: https://lkml.kernel.org/r/20220521111145.81697-80-Julia.Lawall@inria.fr
Signed-off-by: Julia Lawall <Julia.Lawall@inria.fr>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/protection_keys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 2d0ae88665db..291bc1e07842 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -1523,7 +1523,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
 	/*
 	 * Reset the shadow, assuming that the above mprotect()
 	 * correctly changed PKRU, but to an unknown value since
-	 * the actual alllocated pkey is unknown.
+	 * the actual allocated pkey is unknown.
 	 */
 	shadow_pkey_reg = __read_pkey_reg();
 
-- 
cgit 


From 3d3921ed271b0e23d60c91fcad089f2f5e71af98 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Sat, 21 May 2022 14:43:13 +0500
Subject: selftests: vm: add migration to the .gitignore

Add newly added migration test object to .gitignore file.

Link: https://lkml.kernel.org/r/20220521094313.166505-1-usama.anjum@collabora.com
Fixes: 0c2d08728470 ("mm: add selftests for migration entries")
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 6c2ac4208c27..31e5eea2a9b9 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -9,6 +9,7 @@ map_hugetlb
 map_populate
 thuge-gen
 compaction_test
+migration
 mlock2-tests
 mrelease_test
 mremap_dontunmap
-- 
cgit 


From 9aa1af954db02a3228763015356684a169503c68 Mon Sep 17 00:00:00 2001
From: Patrick Wang <patrick.wang.shcn@gmail.com>
Date: Sat, 21 May 2022 16:38:23 +0800
Subject: selftests: vm: check numa_available() before operating
 "merge_across_nodes" in ksm_tests

Patch series "selftests: vm: a few fixup patches".

This series contains three fixup patches for vm selftests.  They are
independent.  Please see the patches.


This patch (of 3):

Currently, ksm_tests operates "merge_across_nodes" with NUMA either
enabled or disabled.  In a system with NUMA disabled, these operations
will fail and output a misleading report given "merge_across_nodes" does
not exist in sysfs:

  ----------------------------
  running ./ksm_tests -M -p 10
  ----------------------------
  f /sys/kernel/mm/ksm/merge_across_nodes
  fopen: No such file or directory
  Cannot save default tunables
  [FAIL]
  ----------------------

So check numa_available() before those operations to skip them if NUMA is
disabled.

Link: https://lkml.kernel.org/r/20220521083825.319654-1-patrick.wang.shcn@gmail.com
Link: https://lkml.kernel.org/r/20220521083825.319654-2-patrick.wang.shcn@gmail.com
Signed-off-by: Patrick Wang <patrick.wang.shcn@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/ksm_tests.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
index fd85f15869d1..2fcf24312da8 100644
--- a/tools/testing/selftests/vm/ksm_tests.c
+++ b/tools/testing/selftests/vm/ksm_tests.c
@@ -221,7 +221,8 @@ static bool assert_ksm_pages_count(long dupl_page_count)
 static int ksm_save_def(struct ksm_sysfs *ksm_sysfs)
 {
 	if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) ||
-	    ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) ||
+	    numa_available() ? 0 :
+		ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) ||
 	    ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) ||
 	    ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) ||
 	    ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) ||
@@ -236,7 +237,8 @@ static int ksm_save_def(struct ksm_sysfs *ksm_sysfs)
 static int ksm_restore(struct ksm_sysfs *ksm_sysfs)
 {
 	if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) ||
-	    ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) ||
+	    numa_available() ? 0 :
+		ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) ||
 	    ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) ||
 	    ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) ||
 	    ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) ||
@@ -720,7 +722,8 @@ int main(int argc, char *argv[])
 
 	if (ksm_write_sysfs(KSM_FP("run"), 2) ||
 	    ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) ||
-	    ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
+	    numa_available() ? 0 :
+		ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
 	    ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count))
 		return KSFT_FAIL;
 
-- 
cgit 


From ccd2a1201d267bf6f1950bf31cfd55fb4e17a231 Mon Sep 17 00:00:00 2001
From: Patrick Wang <patrick.wang.shcn@gmail.com>
Date: Sat, 21 May 2022 16:38:24 +0800
Subject: selftests: vm: add "test_hmm.sh" to TEST_FILES

The "test_hmm.sh" file used by run_vmtests.sh dose not be installed into
INSTALL_PATH.  Thus run_vmtests.sh can not call it in INSTALL_PATH:

  ---------------------------
  running ./test_hmm.sh smoke
  ---------------------------
  ./run_vmtests.sh: line 74: ./test_hmm.sh: No such file or directory
  [FAIL]
  -----------------------

Add "test_hmm.sh" to TEST_FILES so that it will be installed.

Link: https://lkml.kernel.org/r/20220521083825.319654-3-patrick.wang.shcn@gmail.com
Signed-off-by: Patrick Wang <patrick.wang.shcn@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 8111a33e4824..064bfae6dd0d 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -92,6 +92,7 @@ endif
 TEST_PROGS := run_vmtests.sh
 
 TEST_FILES := test_vmalloc.sh
+TEST_FILES += test_hmm.sh
 
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
-- 
cgit 


From 0598739900071feff82b89f1515f963a6889b330 Mon Sep 17 00:00:00 2001
From: Patrick Wang <patrick.wang.shcn@gmail.com>
Date: Sat, 21 May 2022 16:38:25 +0800
Subject: selftests: vm: add the "settings" file with timeout variable

The default "timeout" for one kselftest is 45 seconds, while some cases in
run_vmtests.sh require more time.  This will cause testing timeout like:

  not ok 4 selftests: vm: run_vmtests.sh # TIMEOUT 45 seconds

Therefore, add the "settings" file with timeout variable so users can set
the "timeout" value.

Link: https://lkml.kernel.org/r/20220521083825.319654-4-patrick.wang.shcn@gmail.com
Signed-off-by: Patrick Wang <patrick.wang.shcn@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/settings | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 tools/testing/selftests/vm/settings

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/settings b/tools/testing/selftests/vm/settings
new file mode 100644
index 000000000000..9abfc60e9e6f
--- /dev/null
+++ b/tools/testing/selftests/vm/settings
@@ -0,0 +1 @@
+timeout=45
-- 
cgit 


From 215cd9897afbce9a964d4afbeec58c086d6cb170 Mon Sep 17 00:00:00 2001
From: luyun <luyun@kylinos.cn>
Date: Wed, 25 May 2022 11:18:19 +0800
Subject: selftests/net: enable lo.accept_local in psock_snd test

The psock_snd test sends and receives packets over loopback, and
the test results depend on parameter settings:
Set rp_filter=0,
or set rp_filter=1 and accept_local=1
so that the test will pass. Otherwise, this test will fail with
Resource temporarily unavailable:
sudo ./psock_snd.sh
dgram
tx: 128
rx: 142
./psock_snd: recv: Resource temporarily unavailable

For most distro kernel releases(like Ubuntu or Centos), the parameter
rp_filter is enabled by default, so it's necessary to enable the
parameter lo.accept_local in psock_snd test. And this test runs
inside a netns, changing a sysctl is fine.

Signed-off-by: luyun <luyun@kylinos.cn>
Reviewed-by: Jackie Liu <liuyun01@kylinos.cn>
Tested-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20220525031819.866684-1-luyun_611@163.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/psock_snd.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c
index 7d15e10a9fb6..edf1e6f80d41 100644
--- a/tools/testing/selftests/net/psock_snd.c
+++ b/tools/testing/selftests/net/psock_snd.c
@@ -389,6 +389,8 @@ int main(int argc, char **argv)
 		error(1, errno, "ip link set mtu");
 	if (system("ip addr add dev lo 172.17.0.1/24"))
 		error(1, errno, "ip addr add");
+	if (system("sysctl -w net.ipv4.conf.lo.accept_local=1"))
+		error(1, errno, "sysctl lo.accept_local");
 
 	run_test();
 
-- 
cgit 


From 59ed76fe2f981bccde37bdddb465f260a96a2404 Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Thu, 26 May 2022 12:16:08 -0700
Subject: selftests/bpf: fix stacktrace_build_id with missing
 kprobe/urandom_read

Kernel function urandom_read is replaced with urandom_read_iter.
Therefore, kprobe on urandom_read is not working any more:

[root@eth50-1 bpf]# ./test_progs -n 161
test_stacktrace_build_id:PASS:skel_open_and_load 0 nsec
libbpf: kprobe perf_event_open() failed: No such file or directory
libbpf: prog 'oncpu': failed to create kprobe 'urandom_read+0x0' \
        perf event: No such file or directory
libbpf: prog 'oncpu': failed to auto-attach: -2
test_stacktrace_build_id:FAIL:attach_tp err -2
161     stacktrace_build_id:FAIL

Fix this by replacing urandom_read with urandom_read_iter in the test.

Fixes: 1b388e7765f2 ("random: convert to using fops->read_iter()")
Reported-by: Mykola Lysenko <mykolal@fb.com>
Signed-off-by: Song Liu <song@kernel.org>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20220526191608.2364049-1-song@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
index 6c62bfb8bb6f..0c4426592a26 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
@@ -39,7 +39,7 @@ struct {
 	__type(value, stack_trace_t);
 } stack_amap SEC(".maps");
 
-SEC("kprobe/urandom_read")
+SEC("kprobe/urandom_read_iter")
 int oncpu(struct pt_regs *args)
 {
 	__u32 max_len = sizeof(struct bpf_stack_build_id)
-- 
cgit 


From ff3b72a5d614702ec119f107bddd99cdad622b44 Mon Sep 17 00:00:00 2001
From: Michal Koutný <mkoutny@suse.com>
Date: Wed, 18 May 2022 18:18:55 +0200
Subject: selftests: memcg: fix compilation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "memcontrol selftests fixups", v2.

Flushing the patches to make memcontrol selftests check the events
behavior we had consensus about (test_memcg_low fails).

(test_memcg_reclaim, test_memcg_swap_max fail for me now but it's present
even before the refactoring.)

The two bigger changes are:
- adjustment of the protected values to make tests succeed with the given
  tolerance,
- both test_memcg_low and test_memcg_min check protection of memory in
  populated cgroups (actually as per Documentation/admin-guide/cgroup-v2.rst
  memory.min should not apply to empty cgroups, which is not the case
  currently. Therefore I unified tests with the populated case in order to to
  bring more broken tests).


This patch (of 5):

This fixes mis-applied changes from commit 72b1e03aa725 ("cgroup: account
for memory_localevents in test_memcg_oom_group_leaf_events()").

Link: https://lkml.kernel.org/r/20220518161859.21565-1-mkoutny@suse.com
Link: https://lkml.kernel.org/r/20220518161859.21565-2-mkoutny@suse.com
Signed-off-by: Michal Koutný <mkoutny@suse.com>
Reviewed-by: David Vernet <void@manifault.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Richard Palethorpe <rpalethorpe@suse.de>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 25 +++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 44a974ec472c..9b8213479b8b 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -1241,7 +1241,16 @@ static int test_memcg_oom_group_leaf_events(const char *root)
 	if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
 		goto cleanup;
 
-	if (cg_read_key_long(parent, "memory.events", "oom_kill ") <= 0)
+	parent_oom_events = cg_read_key_long(
+			parent, "memory.events", "oom_kill ");
+	/*
+	 * If memory_localevents is not enabled (the default), the parent should
+	 * count OOM events in its children groups. Otherwise, it should not
+	 * have observed any events.
+	 */
+	if (has_localevents && parent_oom_events != 0)
+		goto cleanup;
+	else if (!has_localevents && parent_oom_events <= 0)
 		goto cleanup;
 
 	ret = KSFT_PASS;
@@ -1349,20 +1358,14 @@ static int test_memcg_oom_group_score_events(const char *root)
 	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 		goto cleanup;
 
-	parent_oom_events = cg_read_key_long(
-			parent, "memory.events", "oom_kill ");
-	/*
-	 * If memory_localevents is not enabled (the default), the parent should
-	 * count OOM events in its children groups. Otherwise, it should not
-	 * have observed any events.
-	 */
-	if ((has_localevents && parent_oom_events == 0) ||
-	     parent_oom_events > 0)
-		ret = KSFT_PASS;
+	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
+		goto cleanup;
 
 	if (kill(safe_pid, SIGKILL))
 		goto cleanup;
 
+	ret = KSFT_PASS;
+
 cleanup:
 	if (memcg)
 		cg_destroy(memcg);
-- 
cgit 


From 1d09069f5313f7c35655dc6d405896f748ddc53f Mon Sep 17 00:00:00 2001
From: Michal Koutný <mkoutny@suse.com>
Date: Wed, 18 May 2022 18:18:56 +0200
Subject: selftests: memcg: expect no low events in unprotected sibling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is effectively a revert of commit cdc69458a5f3 ("cgroup: account for
memory_recursiveprot in test_memcg_low()").  The case test_memcg_low will
fail with memory_recursiveprot until resolved in reclaim code.

However, this patch preserves the existing helpers and variables for later
uses.

Link: https://lkml.kernel.org/r/20220518161859.21565-3-mkoutny@suse.com
Signed-off-by: Michal Koutný <mkoutny@suse.com>
Reviewed-by: David Vernet <void@manifault.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Richard Palethorpe <rpalethorpe@suse.de>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 9b8213479b8b..7514bf7c0c3e 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -528,7 +528,7 @@ static int test_memcg_low(const char *root)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(children); i++) {
-		int no_low_events_index = has_recursiveprot ? 2 : 1;
+		int no_low_events_index = 1;
 
 		oom = cg_read_key_long(children[i], "memory.events", "oom ");
 		low = cg_read_key_long(children[i], "memory.events", "low ");
-- 
cgit 


From f10b6e9a8e6621f6db2acfbf722a6331f3afaa84 Mon Sep 17 00:00:00 2001
From: Michal Koutný <mkoutny@suse.com>
Date: Wed, 18 May 2022 18:18:57 +0200
Subject: selftests: memcg: adjust expected reclaim values of protected cgroups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The numbers are not easy to derive in a closed form (certainly mere
protections ratios do not apply), therefore use a simulation to obtain
expected numbers.

Link: https://lkml.kernel.org/r/20220518161859.21565-4-mkoutny@suse.com
Signed-off-by: Michal Koutný <mkoutny@suse.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: David Vernet <void@manifault.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Richard Palethorpe <rpalethorpe@suse.de>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 MAINTAINERS                                       |  1 +
 tools/testing/selftests/cgroup/memcg_protection.m | 89 +++++++++++++++++++++++
 tools/testing/selftests/cgroup/test_memcontrol.c  | 29 +++++---
 3 files changed, 107 insertions(+), 12 deletions(-)
 create mode 100644 tools/testing/selftests/cgroup/memcg_protection.m

(limited to 'tools/testing')

diff --git a/MAINTAINERS b/MAINTAINERS
index d8c18f80bcf3..36efbe46f9eb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5029,6 +5029,7 @@ L:	linux-mm@kvack.org
 S:	Maintained
 F:	mm/memcontrol.c
 F:	mm/swap_cgroup.c
+F:	tools/testing/selftests/cgroup/memcg_protection.m
 F:	tools/testing/selftests/cgroup/test_kmem.c
 F:	tools/testing/selftests/cgroup/test_memcontrol.c
 
diff --git a/tools/testing/selftests/cgroup/memcg_protection.m b/tools/testing/selftests/cgroup/memcg_protection.m
new file mode 100644
index 000000000000..051daa3477b6
--- /dev/null
+++ b/tools/testing/selftests/cgroup/memcg_protection.m
@@ -0,0 +1,89 @@
+% SPDX-License-Identifier: GPL-2.0
+%
+% run as: octave-cli memcg_protection.m
+%
+% This script simulates reclaim protection behavior on a single level of memcg
+% hierarchy to illustrate how overcommitted protection spreads among siblings
+% (as it depends also on their current consumption).
+%
+% Simulation assumes siblings consumed the initial amount of memory (w/out
+% reclaim) and then the reclaim starts, all memory is reclaimable, i.e. treated
+% same. It simulates only non-low reclaim and assumes all memory.min = 0.
+%
+% Input configurations
+% --------------------
+% E number	parent effective protection
+% n vector	nominal protection of siblings set at the given level (memory.low)
+% c vector	current consumption -,,- (memory.current)
+
+% example from testcase (values in GB)
+E = 50 / 1024;
+n = [75 25 0 500 ] / 1024;
+c = [50 50 50 0] / 1024;
+
+% Reclaim parameters
+% ------------------
+
+% Minimal reclaim amount (GB)
+cluster = 32*4 / 2**20;
+
+% Reclaim coefficient (think as 0.5^sc->priority)
+alpha = .1
+
+% Simulation parameters
+% ---------------------
+epsilon = 1e-7;
+timeout = 1000;
+
+% Simulation loop
+% ---------------
+
+ch = [];
+eh = [];
+rh = [];
+
+for t = 1:timeout
+        % low_usage
+        u = min(c, n);
+        siblings = sum(u);
+
+        % effective_protection()
+        protected = min(n, c);                % start with nominal
+        e = protected * min(1, E / siblings); % normalize overcommit
+
+        % recursive protection
+        unclaimed = max(0, E - siblings);
+        parent_overuse = sum(c) - siblings;
+        if (unclaimed > 0 && parent_overuse > 0)
+                overuse = max(0, c - protected);
+                e += unclaimed * (overuse / parent_overuse);
+        endif
+
+        % get_scan_count()
+        r = alpha * c;             % assume all memory is in a single LRU list
+
+        % commit 1bc63fb1272b ("mm, memcg: make scan aggression always exclude protection")
+        sz = max(e, c);
+        r .*= (1 - (e+epsilon) ./ (sz+epsilon));
+
+        % uncomment to debug prints
+        % e, c, r
+
+        % nothing to reclaim, reached equilibrium
+        if max(r) < epsilon
+                break;
+        endif
+
+        % SWAP_CLUSTER_MAX roundup
+        r = max(r, (r > epsilon) .* cluster);
+        % XXX here I do parallel reclaim of all siblings
+        % in reality reclaim is serialized and each sibling recalculates own residual
+        c = max(c - r, 0);
+
+        ch = [ch ; c];
+        eh = [eh ; e];
+        rh = [rh ; r];
+endfor
+
+t
+c, e
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 7514bf7c0c3e..955eee05d60e 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -248,7 +248,7 @@ static int cg_test_proc_killed(const char *cgroup)
 /*
  * First, this test creates the following hierarchy:
  * A       memory.min = 50M,  memory.max = 200M
- * A/B     memory.min = 50M,  memory.current = 50M
+ * A/B     memory.min = 50M
  * A/B/C   memory.min = 75M,  memory.current = 50M
  * A/B/D   memory.min = 25M,  memory.current = 50M
  * A/B/E   memory.min = 0,    memory.current = 50M
@@ -259,10 +259,13 @@ static int cg_test_proc_killed(const char *cgroup)
  * Then it creates A/G and creates a significant
  * memory pressure in it.
  *
+ * Then it checks actual memory usages and expects that:
  * A/B    memory.current ~= 50M
- * A/B/C  memory.current ~= 33M
- * A/B/D  memory.current ~= 17M
- * A/B/F  memory.current ~= 0
+ * A/B/C  memory.current ~= 29M
+ * A/B/D  memory.current ~= 21M
+ * A/B/E  memory.current ~= 0
+ * A/B/F  memory.current  = 0
+ * (for origin of the numbers, see model in memcg_protection.m.)
  *
  * After that it tries to allocate more than there is
  * unprotected memory in A available, and checks
@@ -365,10 +368,10 @@ static int test_memcg_min(const char *root)
 	for (i = 0; i < ARRAY_SIZE(children); i++)
 		c[i] = cg_read_long(children[i], "memory.current");
 
-	if (!values_close(c[0], MB(33), 10))
+	if (!values_close(c[0], MB(29), 10))
 		goto cleanup;
 
-	if (!values_close(c[1], MB(17), 10))
+	if (!values_close(c[1], MB(21), 10))
 		goto cleanup;
 
 	if (c[3] != 0)
@@ -405,7 +408,7 @@ cleanup:
 /*
  * First, this test creates the following hierarchy:
  * A       memory.low = 50M,  memory.max = 200M
- * A/B     memory.low = 50M,  memory.current = 50M
+ * A/B     memory.low = 50M
  * A/B/C   memory.low = 75M,  memory.current = 50M
  * A/B/D   memory.low = 25M,  memory.current = 50M
  * A/B/E   memory.low = 0,    memory.current = 50M
@@ -417,9 +420,11 @@ cleanup:
  *
  * Then it checks actual memory usages and expects that:
  * A/B    memory.current ~= 50M
- * A/B/   memory.current ~= 33M
- * A/B/D  memory.current ~= 17M
- * A/B/F  memory.current ~= 0
+ * A/B/C  memory.current ~= 29M
+ * A/B/D  memory.current ~= 21M
+ * A/B/E  memory.current ~= 0
+ * A/B/F  memory.current  = 0
+ * (for origin of the numbers, see model in memcg_protection.m.)
  *
  * After that it tries to allocate more than there is
  * unprotected memory in A available,
@@ -512,10 +517,10 @@ static int test_memcg_low(const char *root)
 	for (i = 0; i < ARRAY_SIZE(children); i++)
 		c[i] = cg_read_long(children[i], "memory.current");
 
-	if (!values_close(c[0], MB(33), 10))
+	if (!values_close(c[0], MB(29), 10))
 		goto cleanup;
 
-	if (!values_close(c[1], MB(17), 10))
+	if (!values_close(c[1], MB(21), 10))
 		goto cleanup;
 
 	if (c[3] != 0)
-- 
cgit 


From 6a35919005d4146aee14339a6cd52286465e5023 Mon Sep 17 00:00:00 2001
From: Michal Koutný <mkoutny@suse.com>
Date: Wed, 18 May 2022 18:18:58 +0200
Subject: selftests: memcg: remove protection from top level memcg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The reclaim is triggered by memory limit in a subtree, therefore the
testcase does not need configured protection against external reclaim.

Also, correct respective comments.

Link: https://lkml.kernel.org/r/20220518161859.21565-5-mkoutny@suse.com
Signed-off-by: Michal Koutný <mkoutny@suse.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: David Vernet <void@manifault.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Richard Palethorpe <rpalethorpe@suse.de>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 955eee05d60e..aa70999ace80 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -247,7 +247,7 @@ static int cg_test_proc_killed(const char *cgroup)
 
 /*
  * First, this test creates the following hierarchy:
- * A       memory.min = 50M,  memory.max = 200M
+ * A       memory.min = 0,    memory.max = 200M
  * A/B     memory.min = 50M
  * A/B/C   memory.min = 75M,  memory.current = 50M
  * A/B/D   memory.min = 25M,  memory.current = 50M
@@ -257,7 +257,7 @@ static int cg_test_proc_killed(const char *cgroup)
  * Usages are pagecache, but the test keeps a running
  * process in every leaf cgroup.
  * Then it creates A/G and creates a significant
- * memory pressure in it.
+ * memory pressure in A.
  *
  * Then it checks actual memory usages and expects that:
  * A/B    memory.current ~= 50M
@@ -338,8 +338,6 @@ static int test_memcg_min(const char *root)
 			      (void *)(long)fd);
 	}
 
-	if (cg_write(parent[0], "memory.min", "50M"))
-		goto cleanup;
 	if (cg_write(parent[1], "memory.min", "50M"))
 		goto cleanup;
 	if (cg_write(children[0], "memory.min", "75M"))
@@ -407,7 +405,7 @@ cleanup:
 
 /*
  * First, this test creates the following hierarchy:
- * A       memory.low = 50M,  memory.max = 200M
+ * A       memory.low = 0,    memory.max = 200M
  * A/B     memory.low = 50M
  * A/B/C   memory.low = 75M,  memory.current = 50M
  * A/B/D   memory.low = 25M,  memory.current = 50M
@@ -495,8 +493,6 @@ static int test_memcg_low(const char *root)
 			goto cleanup;
 	}
 
-	if (cg_write(parent[0], "memory.low", "50M"))
-		goto cleanup;
 	if (cg_write(parent[1], "memory.low", "50M"))
 		goto cleanup;
 	if (cg_write(children[0], "memory.low", "75M"))
-- 
cgit 


From f079a020ba95b568329806aa13c62e6103cade3c Mon Sep 17 00:00:00 2001
From: Michal Koutný <mkoutny@suse.com>
Date: Wed, 18 May 2022 18:18:59 +0200
Subject: selftests: memcg: factor out common parts of memory.{low,min} tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The memory protection test setup and runtime is almost equal for
memory.low and memory.min cases.

It makes modification of the common parts prone to mistakes, since the
protections are similar not only in setup but also in principle, factor
the common part out.

Past exceptions between the tests:
- missing memory.min is fine (kept),
- test_memcg_low protected orphaned pagecache (adapted like
  test_memcg_min and we keep the processes of protected memory running).

The evaluation in two tests is different (OOM of allocator vs low events
of protégés), this is kept different.

Link: https://lkml.kernel.org/r/20220518161859.21565-6-mkoutny@suse.com
Signed-off-by: Michal Koutný <mkoutny@suse.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
CC: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Richard Palethorpe <rpalethorpe@suse.de>
Cc: David Vernet <void@manifault.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 199 ++++-------------------
 1 file changed, 36 insertions(+), 163 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index aa70999ace80..8833359556f3 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -190,13 +190,6 @@ cleanup:
 	return ret;
 }
 
-static int alloc_pagecache_50M(const char *cgroup, void *arg)
-{
-	int fd = (long)arg;
-
-	return alloc_pagecache(fd, MB(50));
-}
-
 static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
 {
 	int fd = (long)arg;
@@ -254,7 +247,9 @@ static int cg_test_proc_killed(const char *cgroup)
  * A/B/E   memory.min = 0,    memory.current = 50M
  * A/B/F   memory.min = 500M, memory.current = 0
  *
- * Usages are pagecache, but the test keeps a running
+ * (or memory.low if we test soft protection)
+ *
+ * Usages are pagecache and the test keeps a running
  * process in every leaf cgroup.
  * Then it creates A/G and creates a significant
  * memory pressure in A.
@@ -268,15 +263,16 @@ static int cg_test_proc_killed(const char *cgroup)
  * (for origin of the numbers, see model in memcg_protection.m.)
  *
  * After that it tries to allocate more than there is
- * unprotected memory in A available, and checks
- * checks that memory.min protects pagecache even
- * in this case.
+ * unprotected memory in A available, and checks that:
+ * a) memory.min protects pagecache even in this case,
+ * b) memory.low allows reclaiming page cache with low events.
  */
-static int test_memcg_min(const char *root)
+static int test_memcg_protection(const char *root, bool min)
 {
-	int ret = KSFT_FAIL;
+	int ret = KSFT_FAIL, rc;
 	char *parent[3] = {NULL};
 	char *children[4] = {NULL};
+	const char *attribute = min ? "memory.min" : "memory.low";
 	long c[4];
 	int i, attempts;
 	int fd;
@@ -300,8 +296,10 @@ static int test_memcg_min(const char *root)
 	if (cg_create(parent[0]))
 		goto cleanup;
 
-	if (cg_read_long(parent[0], "memory.min")) {
-		ret = KSFT_SKIP;
+	if (cg_read_long(parent[0], attribute)) {
+		/* No memory.min on older kernels is fine */
+		if (min)
+			ret = KSFT_SKIP;
 		goto cleanup;
 	}
 
@@ -338,15 +336,15 @@ static int test_memcg_min(const char *root)
 			      (void *)(long)fd);
 	}
 
-	if (cg_write(parent[1], "memory.min", "50M"))
+	if (cg_write(parent[1],   attribute, "50M"))
 		goto cleanup;
-	if (cg_write(children[0], "memory.min", "75M"))
+	if (cg_write(children[0], attribute, "75M"))
 		goto cleanup;
-	if (cg_write(children[1], "memory.min", "25M"))
+	if (cg_write(children[1], attribute, "25M"))
 		goto cleanup;
-	if (cg_write(children[2], "memory.min", "0"))
+	if (cg_write(children[2], attribute, "0"))
 		goto cleanup;
-	if (cg_write(children[3], "memory.min", "500M"))
+	if (cg_write(children[3], attribute, "500M"))
 		goto cleanup;
 
 	attempts = 0;
@@ -375,161 +373,26 @@ static int test_memcg_min(const char *root)
 	if (c[3] != 0)
 		goto cleanup;
 
-	if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
-		goto cleanup;
-
-	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
-		goto cleanup;
-
-	ret = KSFT_PASS;
-
-cleanup:
-	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
-		if (!children[i])
-			continue;
-
-		cg_destroy(children[i]);
-		free(children[i]);
-	}
-
-	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
-		if (!parent[i])
-			continue;
-
-		cg_destroy(parent[i]);
-		free(parent[i]);
-	}
-	close(fd);
-	return ret;
-}
-
-/*
- * First, this test creates the following hierarchy:
- * A       memory.low = 0,    memory.max = 200M
- * A/B     memory.low = 50M
- * A/B/C   memory.low = 75M,  memory.current = 50M
- * A/B/D   memory.low = 25M,  memory.current = 50M
- * A/B/E   memory.low = 0,    memory.current = 50M
- * A/B/F   memory.low = 500M, memory.current = 0
- *
- * Usages are pagecache.
- * Then it creates A/G an creates a significant
- * memory pressure in it.
- *
- * Then it checks actual memory usages and expects that:
- * A/B    memory.current ~= 50M
- * A/B/C  memory.current ~= 29M
- * A/B/D  memory.current ~= 21M
- * A/B/E  memory.current ~= 0
- * A/B/F  memory.current  = 0
- * (for origin of the numbers, see model in memcg_protection.m.)
- *
- * After that it tries to allocate more than there is
- * unprotected memory in A available,
- * and checks low and oom events in memory.events.
- */
-static int test_memcg_low(const char *root)
-{
-	int ret = KSFT_FAIL;
-	char *parent[3] = {NULL};
-	char *children[4] = {NULL};
-	long low, oom;
-	long c[4];
-	int i;
-	int fd;
-
-	fd = get_temp_fd();
-	if (fd < 0)
-		goto cleanup;
-
-	parent[0] = cg_name(root, "memcg_test_0");
-	if (!parent[0])
-		goto cleanup;
-
-	parent[1] = cg_name(parent[0], "memcg_test_1");
-	if (!parent[1])
-		goto cleanup;
-
-	parent[2] = cg_name(parent[0], "memcg_test_2");
-	if (!parent[2])
-		goto cleanup;
-
-	if (cg_create(parent[0]))
-		goto cleanup;
-
-	if (cg_read_long(parent[0], "memory.low"))
-		goto cleanup;
-
-	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+	rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
+	if (min && !rc)
 		goto cleanup;
-
-	if (cg_write(parent[0], "memory.max", "200M"))
-		goto cleanup;
-
-	if (cg_write(parent[0], "memory.swap.max", "0"))
-		goto cleanup;
-
-	if (cg_create(parent[1]))
-		goto cleanup;
-
-	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
-		goto cleanup;
-
-	if (cg_create(parent[2]))
+	else if (!min && rc) {
+		fprintf(stderr,
+			"memory.low prevents from allocating anon memory\n");
 		goto cleanup;
-
-	for (i = 0; i < ARRAY_SIZE(children); i++) {
-		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
-		if (!children[i])
-			goto cleanup;
-
-		if (cg_create(children[i]))
-			goto cleanup;
-
-		if (i > 2)
-			continue;
-
-		if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
-			goto cleanup;
 	}
 
-	if (cg_write(parent[1], "memory.low", "50M"))
-		goto cleanup;
-	if (cg_write(children[0], "memory.low", "75M"))
-		goto cleanup;
-	if (cg_write(children[1], "memory.low", "25M"))
-		goto cleanup;
-	if (cg_write(children[2], "memory.low", "0"))
-		goto cleanup;
-	if (cg_write(children[3], "memory.low", "500M"))
-		goto cleanup;
-
-	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
-		goto cleanup;
-
 	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
 		goto cleanup;
 
-	for (i = 0; i < ARRAY_SIZE(children); i++)
-		c[i] = cg_read_long(children[i], "memory.current");
-
-	if (!values_close(c[0], MB(29), 10))
-		goto cleanup;
-
-	if (!values_close(c[1], MB(21), 10))
-		goto cleanup;
-
-	if (c[3] != 0)
-		goto cleanup;
-
-	if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
-		fprintf(stderr,
-			"memory.low prevents from allocating anon memory\n");
+	if (min) {
+		ret = KSFT_PASS;
 		goto cleanup;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(children); i++) {
 		int no_low_events_index = 1;
+		long low, oom;
 
 		oom = cg_read_key_long(children[i], "memory.events", "oom ");
 		low = cg_read_key_long(children[i], "memory.events", "low ");
@@ -565,6 +428,16 @@ cleanup:
 	return ret;
 }
 
+static int test_memcg_min(const char *root)
+{
+	return test_memcg_protection(root, true);
+}
+
+static int test_memcg_low(const char *root)
+{
+	return test_memcg_protection(root, false);
+}
+
 static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
 {
 	size_t size = MB(50);
-- 
cgit 


From 3e0b8f529c10037ae0b369fc892e524eae5a5485 Mon Sep 17 00:00:00 2001
From: Arun Ajith S <aajith@arista.com>
Date: Mon, 30 May 2022 10:14:14 +0000
Subject: net/ipv6: Expand and rename accept_unsolicited_na to
 accept_untracked_na

RFC 9131 changes default behaviour of handling RX of NA messages when the
corresponding entry is absent in the neighbour cache. The current
implementation is limited to accept just unsolicited NAs. However, the
RFC is more generic where it also accepts solicited NAs. Both types
should result in adding a STALE entry for this case.

Expand accept_untracked_na behaviour to also accept solicited NAs to
be compliant with the RFC and rename the sysctl knob to
accept_untracked_na.

Fixes: f9a2fb73318e ("net/ipv6: Introduce accept_unsolicited_na knob to implement router-side changes for RFC9131")
Signed-off-by: Arun Ajith S <aajith@arista.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20220530101414.65439-1-aajith@arista.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/ip-sysctl.rst             | 23 +++++-------
 include/linux/ipv6.h                               |  2 +-
 include/uapi/linux/ipv6.h                          |  2 +-
 net/ipv6/addrconf.c                                |  6 ++--
 net/ipv6/ndisc.c                                   | 42 +++++++++++++---------
 .../selftests/net/ndisc_unsolicited_na_test.sh     | 23 ++++++------
 6 files changed, 50 insertions(+), 48 deletions(-)

(limited to 'tools/testing')

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index b882d4238581..04216564a03c 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2474,21 +2474,16 @@ drop_unsolicited_na - BOOLEAN
 
 	By default this is turned off.
 
-accept_unsolicited_na - BOOLEAN
-	Add a new neighbour cache entry in STALE state for routers on receiving an
-	unsolicited neighbour advertisement with target link-layer address option
-	specified. This is as per router-side behavior documented in RFC9131.
-	This has lower precedence than drop_unsolicited_na.
+accept_untracked_na - BOOLEAN
+	Add a new neighbour cache entry in STALE state for routers on receiving a
+	neighbour advertisement (either solicited or unsolicited) with target
+	link-layer address option specified if no neighbour entry is already
+	present for the advertised IPv6 address. Without this knob, NAs received
+	for untracked addresses (absent in neighbour cache) are silently ignored.
+
+	This is as per router-side behaviour documented in RFC9131.
 
-	 ====   ======  ======  ==============================================
-	 drop   accept  fwding                   behaviour
-	 ----   ------  ------  ----------------------------------------------
-	    1        X       X  Drop NA packet and don't pass up the stack
-	    0        0       X  Pass NA packet up the stack, don't update NC
-	    0        1       0  Pass NA packet up the stack, don't update NC
-	    0        1       1  Pass NA packet up the stack, and add a STALE
-	                        NC entry
-	 ====   ======  ======  ==============================================
+	This has lower precedence than drop_unsolicited_na.
 
 	This will optimize the return path for the initial off-link communication
 	that is initiated by a directly connected host, by ensuring that
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 38c8203d52cb..37dfdcfcdd54 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -61,7 +61,7 @@ struct ipv6_devconf {
 	__s32		suppress_frag_ndisc;
 	__s32		accept_ra_mtu;
 	__s32		drop_unsolicited_na;
-	__s32		accept_unsolicited_na;
+	__s32		accept_untracked_na;
 	struct ipv6_stable_secret {
 		bool initialized;
 		struct in6_addr secret;
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 549ddeaf788b..03cdbe798fe3 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -194,7 +194,7 @@ enum {
 	DEVCONF_IOAM6_ID,
 	DEVCONF_IOAM6_ID_WIDE,
 	DEVCONF_NDISC_EVICT_NOCARRIER,
-	DEVCONF_ACCEPT_UNSOLICITED_NA,
+	DEVCONF_ACCEPT_UNTRACKED_NA,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ca0aa744593e..1b1932502e9e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5586,7 +5586,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
 	array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
 	array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
-	array[DEVCONF_ACCEPT_UNSOLICITED_NA] = cnf->accept_unsolicited_na;
+	array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -7038,8 +7038,8 @@ static const struct ctl_table addrconf_sysctl[] = {
 		.extra2		= (void *)SYSCTL_ONE,
 	},
 	{
-		.procname	= "accept_unsolicited_na",
-		.data		= &ipv6_devconf.accept_unsolicited_na,
+		.procname	= "accept_untracked_na",
+		.data		= &ipv6_devconf.accept_untracked_na,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 254addad0dd3..b0dfe97ea4ee 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -979,7 +979,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	struct inet6_ifaddr *ifp;
 	struct neighbour *neigh;
-	bool create_neigh;
+	u8 new_state;
 
 	if (skb->len < sizeof(struct nd_msg)) {
 		ND_PRINTK(2, warn, "NA: packet too short\n");
@@ -1000,7 +1000,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 	/* For some 802.11 wireless deployments (and possibly other networks),
 	 * there will be a NA proxy and unsolicitd packets are attacks
 	 * and thus should not be accepted.
-	 * drop_unsolicited_na takes precedence over accept_unsolicited_na
+	 * drop_unsolicited_na takes precedence over accept_untracked_na
 	 */
 	if (!msg->icmph.icmp6_solicited && idev &&
 	    idev->cnf.drop_unsolicited_na)
@@ -1041,25 +1041,33 @@ static void ndisc_recv_na(struct sk_buff *skb)
 		in6_ifa_put(ifp);
 		return;
 	}
+
+	neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
+
 	/* RFC 9131 updates original Neighbour Discovery RFC 4861.
-	 * An unsolicited NA can now create a neighbour cache entry
-	 * on routers if it has Target LL Address option.
+	 * NAs with Target LL Address option without a corresponding
+	 * entry in the neighbour cache can now create a STALE neighbour
+	 * cache entry on routers.
+	 *
+	 *   entry accept  fwding  solicited        behaviour
+	 * ------- ------  ------  ---------    ----------------------
+	 * present      X       X         0     Set state to STALE
+	 * present      X       X         1     Set state to REACHABLE
+	 *  absent      0       X         X     Do nothing
+	 *  absent      1       0         X     Do nothing
+	 *  absent      1       1         X     Add a new STALE entry
 	 *
-	 * drop   accept  fwding                   behaviour
-	 * ----   ------  ------  ----------------------------------------------
-	 *    1        X       X  Drop NA packet and don't pass up the stack
-	 *    0        0       X  Pass NA packet up the stack, don't update NC
-	 *    0        1       0  Pass NA packet up the stack, don't update NC
-	 *    0        1       1  Pass NA packet up the stack, and add a STALE
-	 *                          NC entry
 	 * Note that we don't do a (daddr == all-routers-mcast) check.
 	 */
-	create_neigh = !msg->icmph.icmp6_solicited && lladdr &&
-		       idev && idev->cnf.forwarding &&
-		       idev->cnf.accept_unsolicited_na;
-	neigh = __neigh_lookup(&nd_tbl, &msg->target, dev, create_neigh);
+	new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE;
+	if (!neigh && lladdr &&
+	    idev && idev->cnf.forwarding &&
+	    idev->cnf.accept_untracked_na) {
+		neigh = neigh_create(&nd_tbl, &msg->target, dev);
+		new_state = NUD_STALE;
+	}
 
-	if (neigh) {
+	if (neigh && !IS_ERR(neigh)) {
 		u8 old_flags = neigh->flags;
 		struct net *net = dev_net(dev);
 
@@ -1079,7 +1087,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
 		}
 
 		ndisc_update(dev, neigh, lladdr,
-			     msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
+			     new_state,
 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
 			     (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
 			     NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
index f508657ee126..86e621b7b9c7 100755
--- a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
+++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
@@ -1,15 +1,14 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-# This test is for the accept_unsolicited_na feature to
+# This test is for the accept_untracked_na feature to
 # enable RFC9131 behaviour. The following is the test-matrix.
 # drop   accept  fwding                   behaviour
 # ----   ------  ------  ----------------------------------------------
-#    1        X       X  Drop NA packet and don't pass up the stack
-#    0        0       X  Pass NA packet up the stack, don't update NC
-#    0        1       0  Pass NA packet up the stack, don't update NC
-#    0        1       1  Pass NA packet up the stack, and add a STALE
-#                           NC entry
+#    1        X       X  Don't update NC
+#    0        0       X  Don't update NC
+#    0        1       0  Don't update NC
+#    0        1       1  Add a STALE NC entry
 
 ret=0
 # Kselftest framework requirement - SKIP code is 4.
@@ -72,7 +71,7 @@ setup()
 	set -e
 
 	local drop_unsolicited_na=$1
-	local accept_unsolicited_na=$2
+	local accept_untracked_na=$2
 	local forwarding=$3
 
 	# Setup two namespaces and a veth tunnel across them.
@@ -93,7 +92,7 @@ setup()
 	${IP_ROUTER_EXEC} sysctl -qw \
                 ${ROUTER_CONF}.drop_unsolicited_na=${drop_unsolicited_na}
 	${IP_ROUTER_EXEC} sysctl -qw \
-                ${ROUTER_CONF}.accept_unsolicited_na=${accept_unsolicited_na}
+                ${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na}
 	${IP_ROUTER_EXEC} sysctl -qw ${ROUTER_CONF}.disable_ipv6=0
 	${IP_ROUTER} addr add ${ROUTER_ADDR_WITH_MASK} dev ${ROUTER_INTF}
 
@@ -144,13 +143,13 @@ link_up() {
 
 verify_ndisc() {
 	local drop_unsolicited_na=$1
-	local accept_unsolicited_na=$2
+	local accept_untracked_na=$2
 	local forwarding=$3
 
 	neigh_show_output=$(${IP_ROUTER} neigh show \
                 to ${HOST_ADDR} dev ${ROUTER_INTF} nud stale)
 	if [ ${drop_unsolicited_na} -eq 0 ] && \
-			[ ${accept_unsolicited_na} -eq 1 ] && \
+			[ ${accept_untracked_na} -eq 1 ] && \
 			[ ${forwarding} -eq 1 ]; then
 		# Neighbour entry expected to be present for 011 case
 		[[ ${neigh_show_output} ]]
@@ -179,14 +178,14 @@ test_unsolicited_na_combination() {
 	test_unsolicited_na_common $1 $2 $3
 	test_msg=("test_unsolicited_na: "
 		"drop_unsolicited_na=$1 "
-		"accept_unsolicited_na=$2 "
+		"accept_untracked_na=$2 "
 		"forwarding=$3")
 	log_test $? 0 "${test_msg[*]}"
 	cleanup
 }
 
 test_unsolicited_na_combinations() {
-	# Args: drop_unsolicited_na accept_unsolicited_na forwarding
+	# Args: drop_unsolicited_na accept_untracked_na forwarding
 
 	# Expect entry
 	test_unsolicited_na_combination 0 1 1
-- 
cgit 


From 079d93b7dba8373920ad8b50e01616ce8ab3c927 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 31 May 2022 17:13:37 +0200
Subject: selftests: alsa: Handle pkg-config failure more gracefully

Follow the pattern used by other selftests like memfd and fall back on the
standard toolchain options to build with a system installed alsa-lib if
we don't get anything from pkg-config. This reduces our build dependencies
a bit in the common case while still allowing use of pkg-config in case
there is a need for it.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220531151337.2933810-1-broonie@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 tools/testing/selftests/alsa/Makefile | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile
index f64d9090426d..fd8ddce2b1a6 100644
--- a/tools/testing/selftests/alsa/Makefile
+++ b/tools/testing/selftests/alsa/Makefile
@@ -3,6 +3,9 @@
 
 CFLAGS += $(shell pkg-config --cflags alsa)
 LDLIBS += $(shell pkg-config --libs alsa)
+ifeq ($(LDLIBS),)
+LDLIBS += -lasound
+endif
 
 TEST_GEN_PROGS := mixer-test
 
-- 
cgit 


From 282e5f8fe907dc3f2fbf9f2103b0e62ffc3a68a5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 1 Jun 2022 10:47:35 +0200
Subject: netfilter: nat: really support inet nat without l3 address

When no l3 address is given, priv->family is set to NFPROTO_INET and
the evaluation function isn't called.

Call it too so l4-only rewrite can work.
Also add a test case for this.

Fixes: a33f387ecd5aa ("netfilter: nft_nat: allow to specify layer 4 protocol NAT only")
Reported-by: Yi Chen <yiche@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_nat.c                      |  3 +-
 tools/testing/selftests/netfilter/nft_nat.sh | 43 ++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 4394df4bc99b..e5fd6995e4bf 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -335,7 +335,8 @@ static void nft_nat_inet_eval(const struct nft_expr *expr,
 {
 	const struct nft_nat *priv = nft_expr_priv(expr);
 
-	if (priv->family == nft_pf(pkt))
+	if (priv->family == nft_pf(pkt) ||
+	    priv->family == NFPROTO_INET)
 		nft_nat_eval(expr, regs, pkt);
 }
 
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index eb8543b9a5c4..924ecb3f1f73 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -374,6 +374,45 @@ EOF
 	return $lret
 }
 
+test_local_dnat_portonly()
+{
+	local family=$1
+	local daddr=$2
+	local lret=0
+	local sr_s
+	local sr_r
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+	chain output {
+		type nat hook output priority 0; policy accept;
+		meta l4proto tcp dnat to :2000
+
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		if [ $family = "inet" ];then
+			echo "SKIP: inet port test"
+			test_inet_nat=false
+			return
+		fi
+		echo "SKIP: Could not add $family dnat hook"
+		return
+	fi
+
+	echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 &
+	sc_s=$!
+
+	result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT)
+
+	if [ "$result" = "SERVER-inet" ];then
+		echo "PASS: inet port rewrite without l3 address"
+	else
+		echo "ERROR: inet port rewrite"
+		ret=1
+	fi
+}
 
 test_masquerade6()
 {
@@ -1148,6 +1187,10 @@ fi
 reset_counters
 test_local_dnat ip
 test_local_dnat6 ip6
+
+reset_counters
+test_local_dnat_portonly inet 10.0.1.99
+
 reset_counters
 $test_inet_nat && test_local_dnat inet
 $test_inet_nat && test_local_dnat6 inet
-- 
cgit 


From 78c09c0f4df89fabdcfb3e5e53d3196cf67f64ef Mon Sep 17 00:00:00 2001
From: Cristian Marussi <cristian.marussi@arm.com>
Date: Tue, 24 May 2022 11:31:49 +0100
Subject: kselftest/arm64: signal: Skip SVE signal test if not enough VLs
 supported

On platform where SVE is supported but there are less than 2 VLs available
the signal SVE change test should be skipped instead of failing.

Reported-by: Andre Przywara <andre.przywara@arm.com>
Tested-by: Andre Przywara <andre.przywara@arm.com>
Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Cristian Marussi <cristian.marussi@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20220524103149.2802-1-cristian.marussi@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c     | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
index bb50b5adbf10..915821375b0a 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
@@ -6,6 +6,7 @@
  * supported and is expected to segfault.
  */
 
+#include <kselftest.h>
 #include <signal.h>
 #include <ucontext.h>
 #include <sys/prctl.h>
@@ -40,6 +41,7 @@ static bool sve_get_vls(struct tdescr *td)
 	/* We need at least two VLs */
 	if (nvls < 2) {
 		fprintf(stderr, "Only %d VL supported\n", nvls);
+		td->result = KSFT_SKIP;
 		return false;
 	}
 
-- 
cgit 


From cf67838c4422eab826679b076dad99f96152b4de Mon Sep 17 00:00:00 2001
From: Lina Wang <lina.wang@mediatek.com>
Date: Mon, 6 Jun 2022 14:45:17 +0800
Subject: selftests net: fix bpf build error

bpf_helpers.h has been moved to tools/lib/bpf since 5.10, so add more
including path.

Fixes: edae34a3ed92 ("selftests net: add UDP GRO fraglist + bpf self-tests")
Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Lina Wang <lina.wang@mediatek.com>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/20220606064517.8175-1-lina.wang@mediatek.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/bpf/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile
index f91bf14bbee7..8a69c91fcca0 100644
--- a/tools/testing/selftests/net/bpf/Makefile
+++ b/tools/testing/selftests/net/bpf/Makefile
@@ -2,6 +2,7 @@
 
 CLANG ?= clang
 CCINCLUDE += -I../../bpf
+CCINCLUDE += -I../../../lib
 CCINCLUDE += -I../../../../../usr/include/
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o
@@ -10,5 +11,4 @@ all: $(TEST_CUSTOM_PROGS)
 $(OUTPUT)/%.o: %.c
 	$(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@
 
-clean:
-	rm -f $(TEST_CUSTOM_PROGS)
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
-- 
cgit 


From eae260be3a0111a28fe95923e117a55dddec0384 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 1 Jun 2022 16:43:22 +0200
Subject: KVM: selftests: Make hyperv_clock selftest more stable

hyperv_clock doesn't always give a stable test result, especially with
AMD CPUs. The test compares Hyper-V MSR clocksource (acquired either
with rdmsr() from within the guest or KVM_GET_MSRS from the host)
against rdtsc(). To increase the accuracy, increase the measured delay
(done with nop loop) by two orders of magnitude and take the mean rdtsc()
value before and after rdmsr()/KVM_GET_MSRS.

Reported-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Tested-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20220601144322.1968742-1-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/hyperv_clock.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
index e0b2bb1339b1..3330fb183c68 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -44,7 +44,7 @@ static inline void nop_loop(void)
 {
 	int i;
 
-	for (i = 0; i < 1000000; i++)
+	for (i = 0; i < 100000000; i++)
 		asm volatile("nop");
 }
 
@@ -56,12 +56,14 @@ static inline void check_tsc_msr_rdtsc(void)
 	tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
 	GUEST_ASSERT(tsc_freq > 0);
 
-	/* First, check MSR-based clocksource */
+	/* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
 	r1 = rdtsc();
 	t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	r1 = (r1 + rdtsc()) / 2;
 	nop_loop();
 	r2 = rdtsc();
 	t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	r2 = (r2 + rdtsc()) / 2;
 
 	GUEST_ASSERT(r2 > r1 && t2 > t1);
 
@@ -181,12 +183,14 @@ static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
 	tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
 	TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
 
-	/* First, check MSR-based clocksource */
+	/* For increased accuracy, take mean rdtsc() before and afrer ioctl */
 	r1 = rdtsc();
 	t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
+	r1 = (r1 + rdtsc()) / 2;
 	nop_loop();
 	r2 = rdtsc();
 	t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
+	r2 = (r2 + rdtsc()) / 2;
 
 	TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
 
-- 
cgit 


From 2cf7b7ffdae519b284f1406012b52e2282fa36bf Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Mon, 6 Jun 2022 09:52:52 +0200
Subject: selftests/bpf: Add selftest for calling global functions from
 freplace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a selftest that calls a global function with a context object parameter
from an freplace function to check that the program context type is
correctly converted to the freplace target when fetching the context type
from the kernel BTF.

v2:
- Trim includes
- Get rid of global function
- Use __noinline

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20220606075253.28422-2-toke@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c | 14 ++++++++++++++
 .../testing/selftests/bpf/progs/freplace_global_func.c | 18 ++++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/freplace_global_func.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index d9aad15e0d24..02bb8cbf9194 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -395,6 +395,18 @@ static void test_func_map_prog_compatibility(void)
 				     "./test_attach_probe.o");
 }
 
+static void test_func_replace_global_func(void)
+{
+	const char *prog_name[] = {
+		"freplace/test_pkt_access",
+	};
+
+	test_fexit_bpf2bpf_common("./freplace_global_func.o",
+				  "./test_pkt_access.o",
+				  ARRAY_SIZE(prog_name),
+				  prog_name, false, NULL);
+}
+
 /* NOTE: affect other tests, must run in serial mode */
 void serial_test_fexit_bpf2bpf(void)
 {
@@ -416,4 +428,6 @@ void serial_test_fexit_bpf2bpf(void)
 		test_func_replace_multi();
 	if (test__start_subtest("fmod_ret_freplace"))
 		test_fmod_ret_freplace();
+	if (test__start_subtest("func_replace_global_func"))
+		test_func_replace_global_func();
 }
diff --git a/tools/testing/selftests/bpf/progs/freplace_global_func.c b/tools/testing/selftests/bpf/progs/freplace_global_func.c
new file mode 100644
index 000000000000..96cb61a6ce87
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_global_func.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__noinline
+int test_ctx_global_func(struct __sk_buff *skb)
+{
+	volatile int retval = 1;
+	return retval;
+}
+
+SEC("freplace/test_pkt_access")
+int new_test_pkt_access(struct __sk_buff *skb)
+{
+	return test_ctx_global_func(skb);
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 4ee602e78d706e740a48be9b6ddb239df4a113b5 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 20 May 2022 23:32:39 +0000
Subject: KVM: selftests: Replace x86_page_size with PG_LEVEL_XX

x86_page_size is an enum used to communicate the desired page size with
which to map a range of memory. Under the hood they just encode the
desired level at which to map the page. This ends up being clunky in a
few ways:

 - The name suggests it encodes the size of the page rather than the
   level.
 - In other places in x86_64/processor.c we just use a raw int to encode
   the level.

Simplify this by adopting the kernel style of PG_LEVEL_XX enums and pass
around raw ints when referring to the level. This makes the code easier
to understand since these macros are very common in KVM MMU code.

Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20220520233249.3776001-2-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/processor.h       | 18 ++++++++-----
 tools/testing/selftests/kvm/lib/x86_64/processor.c | 31 +++++++++++-----------
 .../testing/selftests/kvm/max_guest_memory_test.c  |  2 +-
 tools/testing/selftests/kvm/x86_64/mmu_role_test.c |  2 +-
 4 files changed, 29 insertions(+), 24 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index d0d51adec76e..273c70e91647 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -482,13 +482,19 @@ void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
 struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
 void vm_xsave_req_perm(int bit);
 
-enum x86_page_size {
-	X86_PAGE_SIZE_4K = 0,
-	X86_PAGE_SIZE_2M,
-	X86_PAGE_SIZE_1G,
+enum pg_level {
+	PG_LEVEL_NONE,
+	PG_LEVEL_4K,
+	PG_LEVEL_2M,
+	PG_LEVEL_1G,
+	PG_LEVEL_512G,
+	PG_LEVEL_NUM
 };
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-		   enum x86_page_size page_size);
+
+#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
+#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
 
 /*
  * Basic CPU control in CR0
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 33ea5e9955d9..ead7011ee8f6 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -158,7 +158,7 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
 			  int level)
 {
 	uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
-	int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu;
+	int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
 
 	return &page_table[index];
 }
@@ -167,14 +167,14 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
 				       uint64_t pt_pfn,
 				       uint64_t vaddr,
 				       uint64_t paddr,
-				       int level,
-				       enum x86_page_size page_size)
+				       int current_level,
+				       int target_level)
 {
-	uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+	uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level);
 
 	if (!(*pte & PTE_PRESENT_MASK)) {
 		*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
-		if (level == page_size)
+		if (current_level == target_level)
 			*pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
 		else
 			*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
@@ -184,20 +184,19 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
 		 * a hugepage at this level, and that there isn't a hugepage at
 		 * this level.
 		 */
-		TEST_ASSERT(level != page_size,
+		TEST_ASSERT(current_level != target_level,
 			    "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
-			    page_size, vaddr);
+			    current_level, vaddr);
 		TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
 			    "Cannot create page table at level: %u, vaddr: 0x%lx\n",
-			    level, vaddr);
+			    current_level, vaddr);
 	}
 	return pte;
 }
 
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-		   enum x86_page_size page_size)
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
 {
-	const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
+	const uint64_t pg_size = PG_LEVEL_SIZE(level);
 	uint64_t *pml4e, *pdpe, *pde;
 	uint64_t *pte;
 
@@ -222,20 +221,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	 * early if a hugepage was created.
 	 */
 	pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
-				      vaddr, paddr, 3, page_size);
+				      vaddr, paddr, PG_LEVEL_512G, level);
 	if (*pml4e & PTE_LARGE_MASK)
 		return;
 
-	pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
+	pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level);
 	if (*pdpe & PTE_LARGE_MASK)
 		return;
 
-	pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
+	pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level);
 	if (*pde & PTE_LARGE_MASK)
 		return;
 
 	/* Fill in page table entry. */
-	pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
+	pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K);
 	TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
 		    "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
 	*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
@@ -243,7 +242,7 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 
 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 {
-	__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
+	__virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
 }
 
 static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c
index 3875c4b23a04..15f046e19cb2 100644
--- a/tools/testing/selftests/kvm/max_guest_memory_test.c
+++ b/tools/testing/selftests/kvm/max_guest_memory_test.c
@@ -244,7 +244,7 @@ int main(int argc, char *argv[])
 #ifdef __x86_64__
 		/* Identity map memory in the guest using 1gb pages. */
 		for (i = 0; i < slot_size; i += size_1gb)
-			__virt_pg_map(vm, gpa + i, gpa + i, X86_PAGE_SIZE_1G);
+			__virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
 #else
 		for (i = 0; i < slot_size; i += vm_get_page_size(vm))
 			virt_pg_map(vm, gpa + i, gpa + i);
diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
index da2325fcad87..bdecd532f935 100644
--- a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
+++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
@@ -35,7 +35,7 @@ static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val)
 	run = vcpu_state(vm, VCPU_ID);
 
 	/* Map 1gb page without a backing memlot. */
-	__virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G);
+	__virt_pg_map(vm, MMIO_GPA, MMIO_GPA, PG_LEVEL_1G);
 
 	r = _vcpu_run(vm, VCPU_ID);
 
-- 
cgit 


From c5a0ccec4cb4edde8e5b7e369dbe4d169b111e42 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 20 May 2022 23:32:40 +0000
Subject: KVM: selftests: Add option to create 2M and 1G EPT mappings

The current EPT mapping code in the selftests only supports mapping 4K
pages. This commit extends that support with an option to map at 2M or
1G. This will be used in a future commit to create large page mappings
to test eager page splitting.

No functional change intended.

Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20220520233249.3776001-3-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/x86_64/vmx.c | 110 +++++++++++++++------------
 1 file changed, 60 insertions(+), 50 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index d089d8b850b5..fdc1e6deb922 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -392,80 +392,90 @@ void nested_vmx_check_supported(void)
 	}
 }
 
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-		   uint64_t nested_paddr, uint64_t paddr)
+static void nested_create_pte(struct kvm_vm *vm,
+			      struct eptPageTableEntry *pte,
+			      uint64_t nested_paddr,
+			      uint64_t paddr,
+			      int current_level,
+			      int target_level)
+{
+	if (!pte->readable) {
+		pte->writable = true;
+		pte->readable = true;
+		pte->executable = true;
+		pte->page_size = (current_level == target_level);
+		if (pte->page_size)
+			pte->address = paddr >> vm->page_shift;
+		else
+			pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
+	} else {
+		/*
+		 * Entry already present.  Assert that the caller doesn't want
+		 * a hugepage at this level, and that there isn't a hugepage at
+		 * this level.
+		 */
+		TEST_ASSERT(current_level != target_level,
+			    "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n",
+			    current_level, nested_paddr);
+		TEST_ASSERT(!pte->page_size,
+			    "Cannot create page table at level: %u, nested_paddr: 0x%lx\n",
+			    current_level, nested_paddr);
+	}
+}
+
+
+void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+		     uint64_t nested_paddr, uint64_t paddr, int target_level)
 {
-	uint16_t index[4];
-	struct eptPageTableEntry *pml4e;
+	const uint64_t page_size = PG_LEVEL_SIZE(target_level);
+	struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
+	uint16_t index;
 
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 		    "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
-	TEST_ASSERT((nested_paddr % vm->page_size) == 0,
+	TEST_ASSERT((nested_paddr % page_size) == 0,
 		    "Nested physical address not on page boundary,\n"
-		    "  nested_paddr: 0x%lx vm->page_size: 0x%x",
-		    nested_paddr, vm->page_size);
+		    "  nested_paddr: 0x%lx page_size: 0x%lx",
+		    nested_paddr, page_size);
 	TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
 		    "Physical address beyond beyond maximum supported,\n"
 		    "  nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
 		    paddr, vm->max_gfn, vm->page_size);
-	TEST_ASSERT((paddr % vm->page_size) == 0,
+	TEST_ASSERT((paddr % page_size) == 0,
 		    "Physical address not on page boundary,\n"
-		    "  paddr: 0x%lx vm->page_size: 0x%x",
-		    paddr, vm->page_size);
+		    "  paddr: 0x%lx page_size: 0x%lx",
+		    paddr, page_size);
 	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
 		    "Physical address beyond beyond maximum supported,\n"
 		    "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
 		    paddr, vm->max_gfn, vm->page_size);
 
-	index[0] = (nested_paddr >> 12) & 0x1ffu;
-	index[1] = (nested_paddr >> 21) & 0x1ffu;
-	index[2] = (nested_paddr >> 30) & 0x1ffu;
-	index[3] = (nested_paddr >> 39) & 0x1ffu;
-
-	/* Allocate page directory pointer table if not present. */
-	pml4e = vmx->eptp_hva;
-	if (!pml4e[index[3]].readable) {
-		pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift;
-		pml4e[index[3]].writable = true;
-		pml4e[index[3]].readable = true;
-		pml4e[index[3]].executable = true;
-	}
+	for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
+		index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+		pte = &pt[index];
 
-	/* Allocate page directory table if not present. */
-	struct eptPageTableEntry *pdpe;
-	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
-	if (!pdpe[index[2]].readable) {
-		pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift;
-		pdpe[index[2]].writable = true;
-		pdpe[index[2]].readable = true;
-		pdpe[index[2]].executable = true;
-	}
+		nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
 
-	/* Allocate page table if not present. */
-	struct eptPageTableEntry *pde;
-	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
-	if (!pde[index[1]].readable) {
-		pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift;
-		pde[index[1]].writable = true;
-		pde[index[1]].readable = true;
-		pde[index[1]].executable = true;
-	}
+		if (pte->page_size)
+			break;
 
-	/* Fill in page table entry. */
-	struct eptPageTableEntry *pte;
-	pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
-	pte[index[0]].address = paddr >> vm->page_shift;
-	pte[index[0]].writable = true;
-	pte[index[0]].readable = true;
-	pte[index[0]].executable = true;
+		pt = addr_gpa2hva(vm, pte->address * vm->page_size);
+	}
 
 	/*
 	 * For now mark these as accessed and dirty because the only
 	 * testcase we have needs that.  Can be reconsidered later.
 	 */
-	pte[index[0]].accessed = true;
-	pte[index[0]].dirty = true;
+	pte->accessed = true;
+	pte->dirty = true;
+
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+		   uint64_t nested_paddr, uint64_t paddr)
+{
+	__nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
 }
 
 /*
-- 
cgit 


From b8ca01ea19068b54938ebb4ebc06814a89dee8ea Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 20 May 2022 23:32:41 +0000
Subject: KVM: selftests: Drop stale function parameter comment for
 nested_map()

nested_map() does not take a parameter named eptp_memslot. Drop the
comment referring to it.

Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20220520233249.3776001-4-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/x86_64/vmx.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index fdc1e6deb922..baeaa35de113 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -486,7 +486,6 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
  *   nested_paddr - Nested guest physical address to map
  *   paddr - VM Physical Address
  *   size - The size of the range to map
- *   eptp_memslot - Memory region slot for new virtual translation tables
  *
  * Output Args: None
  *
-- 
cgit 


From ce690e9c17d27486af879defc506679cbbb14777 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 20 May 2022 23:32:42 +0000
Subject: KVM: selftests: Refactor nested_map() to specify target level

Refactor nested_map() to specify that it explicityl wants 4K mappings
(the existing behavior) and push the implementation down into
__nested_map(), which can be used in subsequent commits to create huge
page mappings.

No function change intended.

Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20220520233249.3776001-5-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/x86_64/vmx.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index baeaa35de113..b8cfe4914a3a 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -486,6 +486,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
  *   nested_paddr - Nested guest physical address to map
  *   paddr - VM Physical Address
  *   size - The size of the range to map
+ *   level - The level at which to map the range
  *
  * Output Args: None
  *
@@ -494,22 +495,29 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
  * Within the VM given by vm, creates a nested guest translation for the
  * page range starting at nested_paddr to the page range starting at paddr.
  */
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-		uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+		  uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+		  int level)
 {
-	size_t page_size = vm->page_size;
+	size_t page_size = PG_LEVEL_SIZE(level);
 	size_t npages = size / page_size;
 
 	TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
 	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
 
 	while (npages--) {
-		nested_pg_map(vmx, vm, nested_paddr, paddr);
+		__nested_pg_map(vmx, vm, nested_paddr, paddr, level);
 		nested_paddr += page_size;
 		paddr += page_size;
 	}
 }
 
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+		uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+{
+	__nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
+}
+
 /* Prepare an identity extended page table that maps all the
  * physical pages in VM.
  */
-- 
cgit 


From b6c086d04c0a1ba356145cdba5b46bd6cea2b9bd Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 20 May 2022 23:32:43 +0000
Subject: KVM: selftests: Move VMX_EPT_VPID_CAP_AD_BITS to vmx.h

This is a VMX-related macro so move it to vmx.h. While here, open code
the mask like the rest of the VMX bitmask macros.

No functional change intended.

Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20220520233249.3776001-6-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/x86_64/processor.h | 3 ---
 tools/testing/selftests/kvm/include/x86_64/vmx.h       | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 273c70e91647..d78f97f502b5 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -511,9 +511,6 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
 #define X86_CR0_CD          (1UL<<30) /* Cache Disable */
 #define X86_CR0_PG          (1UL<<31) /* Paging */
 
-/* VMX_EPT_VPID_CAP bits */
-#define VMX_EPT_VPID_CAP_AD_BITS       (1ULL << 21)
-
 #define XSTATE_XTILE_CFG_BIT		17
 #define XSTATE_XTILE_DATA_BIT		18
 
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 583ceb0d1457..3b1794baa97c 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -96,6 +96,8 @@
 #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK	0x0000001f
 #define VMX_MISC_SAVE_EFER_LMA			0x00000020
 
+#define VMX_EPT_VPID_CAP_AD_BITS		0x00200000
+
 #define EXIT_REASON_FAILED_VMENTRY	0x80000000
 #define EXIT_REASON_EXCEPTION_NMI	0
 #define EXIT_REASON_EXTERNAL_INTERRUPT	1
-- 
cgit 


From c363d95986b1b930947305e2372665141721d15f Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 20 May 2022 23:32:44 +0000
Subject: KVM: selftests: Add a helper to check EPT/VPID capabilities

Create a small helper function to check if a given EPT/VPID capability
is supported. This will be re-used in a follow-up commit to check for 1G
page support.

No functional change intended.

Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20220520233249.3776001-7-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/x86_64/vmx.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index b8cfe4914a3a..5bf169179455 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -198,6 +198,11 @@ bool load_vmcs(struct vmx_pages *vmx)
 	return true;
 }
 
+static bool ept_vpid_cap_supported(uint64_t mask)
+{
+	return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
+}
+
 /*
  * Initialize the control fields to the most basic settings possible.
  */
@@ -215,7 +220,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
 		struct eptPageTablePointer eptp = {
 			.memory_type = VMX_BASIC_MEM_TYPE_WB,
 			.page_walk_length = 3, /* + 1 */
-			.ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
+			.ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
 			.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
 		};
 
-- 
cgit 


From acf57736e755ba5c467fc6fa85e4a0750cc36150 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 20 May 2022 23:32:45 +0000
Subject: KVM: selftests: Drop unnecessary rule for STATIC_LIBS

Drop the "all: $(STATIC_LIBS)" rule. The KVM selftests already depend
on $(STATIC_LIBS), so there is no reason to have an extra "all" rule.

Suggested-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20220520233249.3776001-8-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 81470a99ed1c..e7d65e04b16a 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -192,7 +192,6 @@ $(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
 	$(AR) crs $@ $^
 
 x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
-all: $(STATIC_LIBS)
 $(TEST_GEN_PROGS): $(STATIC_LIBS)
 
 cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
-- 
cgit 


From cdc979dae265cc77a035b736f78f58e4c7309bb2 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 20 May 2022 23:32:46 +0000
Subject: KVM: selftests: Link selftests directly with lib object files

The linker does obey strong/weak symbols when linking static libraries,
it simply resolves an undefined symbol to the first-encountered symbol.
This means that defining __weak arch-generic functions and then defining
arch-specific strong functions to override them in libkvm will not
always work.

More specifically, if we have:

lib/generic.c:

  void __weak foo(void)
  {
          pr_info("weak\n");
  }

  void bar(void)
  {
          foo();
  }

lib/x86_64/arch.c:

  void foo(void)
  {
          pr_info("strong\n");
  }

And a selftest that calls bar(), it will print "weak". Now if you make
generic.o explicitly depend on arch.o (e.g. add function to arch.c that
is called directly from generic.c) it will print "strong". In other
words, it seems that the linker is free to throw out arch.o when linking
because generic.o does not explicitly depend on it, which causes the
linker to lose the strong symbol.

One solution is to link libkvm.a with --whole-archive so that the linker
doesn't throw away object files it thinks are unnecessary. However that
is a bit difficult to plumb since we are using the common selftests
makefile rules. An easier solution is to drop libkvm.a just link
selftests with all the .o files that were originally in libkvm.a.

Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20220520233249.3776001-9-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index e7d65e04b16a..804bf927618a 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -173,12 +173,13 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
 # $(TEST_GEN_PROGS) starts with $(OUTPUT)/
 include ../lib.mk
 
-STATIC_LIBS := $(OUTPUT)/libkvm.a
 LIBKVM_C := $(filter %.c,$(LIBKVM))
 LIBKVM_S := $(filter %.S,$(LIBKVM))
 LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
 LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
-EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
+
+EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.*
 
 x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
 $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
@@ -187,12 +188,8 @@ $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
 $(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
 
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
-$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
-	$(AR) crs $@ $^
-
 x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
-$(TEST_GEN_PROGS): $(STATIC_LIBS)
+$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
 
 cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
 cscope:
-- 
cgit 


From cf97d5e99f69f876dc310ea21b5f97c3a493a18a Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 20 May 2022 23:32:47 +0000
Subject: KVM: selftests: Clean up LIBKVM files in Makefile

Break up the long lines for LIBKVM and alphabetize each architecture.
This makes reading the Makefile easier, and will make reading diffs to
LIBKVM easier.

No functional change intended.

Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20220520233249.3776001-10-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 804bf927618a..14566c0a330d 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -37,11 +37,37 @@ ifeq ($(ARCH),riscv)
 	UNAME_M := riscv
 endif
 
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
-LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c
-LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
-LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c
+LIBKVM += lib/assert.c
+LIBKVM += lib/elf.c
+LIBKVM += lib/guest_modes.c
+LIBKVM += lib/io.c
+LIBKVM += lib/kvm_util.c
+LIBKVM += lib/perf_test_util.c
+LIBKVM += lib/rbtree.c
+LIBKVM += lib/sparsebit.c
+LIBKVM += lib/test_util.c
+
+LIBKVM_x86_64 += lib/x86_64/apic.c
+LIBKVM_x86_64 += lib/x86_64/handlers.S
+LIBKVM_x86_64 += lib/x86_64/processor.c
+LIBKVM_x86_64 += lib/x86_64/svm.c
+LIBKVM_x86_64 += lib/x86_64/ucall.c
+LIBKVM_x86_64 += lib/x86_64/vmx.c
+
+LIBKVM_aarch64 += lib/aarch64/gic.c
+LIBKVM_aarch64 += lib/aarch64/gic_v3.c
+LIBKVM_aarch64 += lib/aarch64/handlers.S
+LIBKVM_aarch64 += lib/aarch64/processor.c
+LIBKVM_aarch64 += lib/aarch64/spinlock.c
+LIBKVM_aarch64 += lib/aarch64/ucall.c
+LIBKVM_aarch64 += lib/aarch64/vgic.c
+
+LIBKVM_s390x += lib/s390x/diag318_test_handler.c
+LIBKVM_s390x += lib/s390x/processor.c
+LIBKVM_s390x += lib/s390x/ucall.c
+
+LIBKVM_riscv += lib/riscv/processor.c
+LIBKVM_riscv += lib/riscv/ucall.c
 
 TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
 TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
-- 
cgit 


From 71d489661904fcc3ec31b343acd5c0dac84b5410 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 20 May 2022 23:32:48 +0000
Subject: KVM: selftests: Add option to run dirty_log_perf_test vCPUs in L2

Add an option to dirty_log_perf_test that configures the vCPUs to run in
L2 instead of L1. This makes it possible to benchmark the dirty logging
performance of nested virtualization, which is particularly interesting
because KVM must shadow L1's EPT/NPT tables.

For now this support only works on x86_64 CPUs with VMX. Otherwise
passing -n results in the test being skipped.

Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20220520233249.3776001-11-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile               |   1 +
 tools/testing/selftests/kvm/dirty_log_perf_test.c  |  10 +-
 .../testing/selftests/kvm/include/perf_test_util.h |   9 ++
 .../selftests/kvm/include/x86_64/processor.h       |   4 +
 tools/testing/selftests/kvm/include/x86_64/vmx.h   |   4 +
 tools/testing/selftests/kvm/lib/perf_test_util.c   |  35 ++++++-
 .../selftests/kvm/lib/x86_64/perf_test_util.c      | 112 +++++++++++++++++++++
 tools/testing/selftests/kvm/lib/x86_64/vmx.c       |  15 +++
 8 files changed, 182 insertions(+), 8 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 14566c0a330d..22423c871ed6 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -49,6 +49,7 @@ LIBKVM += lib/test_util.c
 
 LIBKVM_x86_64 += lib/x86_64/apic.c
 LIBKVM_x86_64 += lib/x86_64/handlers.S
+LIBKVM_x86_64 += lib/x86_64/perf_test_util.c
 LIBKVM_x86_64 += lib/x86_64/processor.c
 LIBKVM_x86_64 += lib/x86_64/svm.c
 LIBKVM_x86_64 += lib/x86_64/ucall.c
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 7b47ae4f952e..d60a34cdfaee 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -336,8 +336,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 static void help(char *name)
 {
 	puts("");
-	printf("usage: %s [-h] [-i iterations] [-p offset] [-g]"
-	       "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
+	printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
+	       "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
 	       "[-x memslots]\n", name);
 	puts("");
 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
@@ -351,6 +351,7 @@ static void help(char *name)
 	printf(" -p: specify guest physical test memory offset\n"
 	       "     Warning: a low offset can conflict with the loaded test code.\n");
 	guest_modes_help();
+	printf(" -n: Run the vCPUs in nested mode (L2)\n");
 	printf(" -b: specify the size of the memory region which should be\n"
 	       "     dirtied by each vCPU. e.g. 10M or 3G.\n"
 	       "     (default: 1G)\n");
@@ -387,7 +388,7 @@ int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "ghi:p:m:b:f:v:os:x:")) != -1) {
+	while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:os:x:")) != -1) {
 		switch (opt) {
 		case 'g':
 			dirty_log_manual_caps = 0;
@@ -401,6 +402,9 @@ int main(int argc, char *argv[])
 		case 'm':
 			guest_modes_cmdline(optarg);
 			break;
+		case 'n':
+			perf_test_args.nested = true;
+			break;
 		case 'b':
 			guest_percpu_mem_size = parse_size(optarg);
 			break;
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index a86f953d8d36..d822cb670f1c 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -30,10 +30,15 @@ struct perf_test_vcpu_args {
 
 struct perf_test_args {
 	struct kvm_vm *vm;
+	/* The starting address and size of the guest test region. */
 	uint64_t gpa;
+	uint64_t size;
 	uint64_t guest_page_size;
 	int wr_fract;
 
+	/* Run vCPUs in L2 instead of L1, if the architecture supports it. */
+	bool nested;
+
 	struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
 };
 
@@ -49,5 +54,9 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
 
 void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
 void perf_test_join_vcpu_threads(int vcpus);
+void perf_test_guest_code(uint32_t vcpu_id);
+
+uint64_t perf_test_nested_pages(int nr_vcpus);
+void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus);
 
 #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index d78f97f502b5..6ce185449259 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -494,6 +494,10 @@ enum pg_level {
 #define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
 #define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
 
+#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
+#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
+#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
+
 void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
 
 /*
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 3b1794baa97c..cc3604f8f1d3 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -96,6 +96,7 @@
 #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK	0x0000001f
 #define VMX_MISC_SAVE_EFER_LMA			0x00000020
 
+#define VMX_EPT_VPID_CAP_1G_PAGES		0x00020000
 #define VMX_EPT_VPID_CAP_AD_BITS		0x00200000
 
 #define EXIT_REASON_FAILED_VMENTRY	0x80000000
@@ -608,6 +609,7 @@ bool load_vmcs(struct vmx_pages *vmx);
 
 bool nested_vmx_supported(void);
 void nested_vmx_check_supported(void);
+bool ept_1g_pages_supported(void);
 
 void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
 		   uint64_t nested_paddr, uint64_t paddr);
@@ -615,6 +617,8 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
 		 uint64_t nested_paddr, uint64_t paddr, uint64_t size);
 void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
 			uint32_t memslot);
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+			    uint64_t addr, uint64_t size);
 void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
 		  uint32_t eptp_memslot);
 void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 722df3a28791..b2ff2cee2e51 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -40,7 +40,7 @@ static bool all_vcpu_threads_running;
  * Continuously write to the first 8 bytes of each page in the
  * specified region.
  */
-static void guest_code(uint32_t vcpu_id)
+void perf_test_guest_code(uint32_t vcpu_id)
 {
 	struct perf_test_args *pta = &perf_test_args;
 	struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id];
@@ -108,7 +108,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 {
 	struct perf_test_args *pta = &perf_test_args;
 	struct kvm_vm *vm;
-	uint64_t guest_num_pages;
+	uint64_t guest_num_pages, slot0_pages = DEFAULT_GUEST_PHY_PAGES;
 	uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
 	int i;
 
@@ -134,13 +134,20 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 		    "Guest memory cannot be evenly divided into %d slots.",
 		    slots);
 
+	/*
+	 * If using nested, allocate extra pages for the nested page tables and
+	 * in-memory data structures.
+	 */
+	if (pta->nested)
+		slot0_pages += perf_test_nested_pages(vcpus);
+
 	/*
 	 * Pass guest_num_pages to populate the page tables for test memory.
 	 * The memory is also added to memslot 0, but that's a benign side
 	 * effect as KVM allows aliasing HVAs in meslots.
 	 */
-	vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
-				  guest_num_pages, 0, guest_code, NULL);
+	vm = vm_create_with_vcpus(mode, vcpus, slot0_pages, guest_num_pages, 0,
+				  perf_test_guest_code, NULL);
 
 	pta->vm = vm;
 
@@ -161,7 +168,9 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 	/* Align to 1M (segment size) */
 	pta->gpa = align_down(pta->gpa, 1 << 20);
 #endif
-	pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa);
+	pta->size = guest_num_pages * pta->guest_page_size;
+	pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
+		pta->gpa, pta->gpa + pta->size);
 
 	/* Add extra memory slots for testing */
 	for (i = 0; i < slots; i++) {
@@ -178,6 +187,11 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 
 	perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access);
 
+	if (pta->nested) {
+		pr_info("Configuring vCPUs to run in L2 (nested).\n");
+		perf_test_setup_nested(vm, vcpus);
+	}
+
 	ucall_init(vm, NULL);
 
 	/* Export the shared variables to the guest. */
@@ -198,6 +212,17 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract)
 	sync_global_to_guest(vm, perf_test_args);
 }
 
+uint64_t __weak perf_test_nested_pages(int nr_vcpus)
+{
+	return 0;
+}
+
+void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
+{
+	pr_info("%s() not support on this architecture, skipping.\n", __func__);
+	exit(KSFT_SKIP);
+}
+
 static void *vcpu_thread_main(void *data)
 {
 	struct vcpu_thread *vcpu = data;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c b/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
new file mode 100644
index 000000000000..e258524435a0
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86_64-specific extensions to perf_test_util.c.
+ *
+ * Copyright (C) 2022, Google, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "perf_test_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+#include "vmx.h"
+
+void perf_test_l2_guest_code(uint64_t vcpu_id)
+{
+	perf_test_guest_code(vcpu_id);
+	vmcall();
+}
+
+extern char perf_test_l2_guest_entry[];
+__asm__(
+"perf_test_l2_guest_entry:"
+"	mov (%rsp), %rdi;"
+"	call perf_test_l2_guest_code;"
+"	ud2;"
+);
+
+static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	unsigned long *rsp;
+
+	GUEST_ASSERT(vmx->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+	GUEST_ASSERT(load_vmcs(vmx));
+	GUEST_ASSERT(ept_1g_pages_supported());
+
+	rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+	*rsp = vcpu_id;
+	prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp);
+
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	GUEST_DONE();
+}
+
+uint64_t perf_test_nested_pages(int nr_vcpus)
+{
+	/*
+	 * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
+	 * pages and 4-level paging, plus a few pages per-vCPU for data
+	 * structures such as the VMCS.
+	 */
+	return 513 + 10 * nr_vcpus;
+}
+
+void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+	uint64_t start, end;
+
+	prepare_eptp(vmx, vm, 0);
+
+	/*
+	 * Identity map the first 4G and the test region with 1G pages so that
+	 * KVM can shadow the EPT12 with the maximum huge page size supported
+	 * by the backing source.
+	 */
+	nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+
+	start = align_down(perf_test_args.gpa, PG_SIZE_1G);
+	end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G);
+	nested_identity_map_1g(vmx, vm, start, end - start);
+}
+
+void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
+{
+	struct vmx_pages *vmx, *vmx0 = NULL;
+	struct kvm_regs regs;
+	vm_vaddr_t vmx_gva;
+	int vcpu_id;
+
+	nested_vmx_check_supported();
+
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+		vmx = vcpu_alloc_vmx(vm, &vmx_gva);
+
+		if (vcpu_id == 0) {
+			perf_test_setup_ept(vmx, vm);
+			vmx0 = vmx;
+		} else {
+			/* Share the same EPT table across all vCPUs. */
+			vmx->eptp = vmx0->eptp;
+			vmx->eptp_hva = vmx0->eptp_hva;
+			vmx->eptp_gpa = vmx0->eptp_gpa;
+		}
+
+		/*
+		 * Override the vCPU to run perf_test_l1_guest_code() which will
+		 * bounce it into L2 before calling perf_test_guest_code().
+		 */
+		vcpu_regs_get(vm, vcpu_id, &regs);
+		regs.rip = (unsigned long) perf_test_l1_guest_code;
+		vcpu_regs_set(vm, vcpu_id, &regs);
+		vcpu_args_set(vm, vcpu_id, 2, vmx_gva, vcpu_id);
+	}
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 5bf169179455..b77a01d0a271 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -203,6 +203,11 @@ static bool ept_vpid_cap_supported(uint64_t mask)
 	return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
 }
 
+bool ept_1g_pages_supported(void)
+{
+	return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
+}
+
 /*
  * Initialize the control fields to the most basic settings possible.
  */
@@ -439,6 +444,9 @@ void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 		    "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 
+	TEST_ASSERT((nested_paddr >> 48) == 0,
+		    "Nested physical address 0x%lx requires 5-level paging",
+		    nested_paddr);
 	TEST_ASSERT((nested_paddr % page_size) == 0,
 		    "Nested physical address not on page boundary,\n"
 		    "  nested_paddr: 0x%lx page_size: 0x%lx",
@@ -547,6 +555,13 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
 	}
 }
 
+/* Identity map a region with 1GiB Pages. */
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+			    uint64_t addr, uint64_t size)
+{
+	__nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
+}
+
 void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
 		  uint32_t eptp_memslot)
 {
-- 
cgit 


From e0f3f46e42064a51573914766897b4ab95d943e3 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 20 May 2022 23:32:49 +0000
Subject: KVM: selftests: Restrict test region to 48-bit physical addresses
 when using nested

The selftests nested code only supports 4-level paging at the moment.
This means it cannot map nested guest physical addresses with more than
48 bits. Allow perf_test_util nested mode to work on hosts with more
than 48 physical addresses by restricting the guest test region to
48-bits.

While here, opportunistically fix an off-by-one error when dealing with
vm_get_max_gfn(). perf_test_util.c was treating this as the maximum
number of GFNs, rather than the maximum allowed GFN. This didn't result
in any correctness issues, but it did end up shifting the test region
down slightly when using huge pages.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20220520233249.3776001-12-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/perf_test_util.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index b2ff2cee2e51..f989ff91f022 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -110,6 +110,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 	struct kvm_vm *vm;
 	uint64_t guest_num_pages, slot0_pages = DEFAULT_GUEST_PHY_PAGES;
 	uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
+	uint64_t region_end_gfn;
 	int i;
 
 	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
@@ -151,18 +152,29 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 
 	pta->vm = vm;
 
+	/* Put the test region at the top guest physical memory. */
+	region_end_gfn = vm_get_max_gfn(vm) + 1;
+
+#ifdef __x86_64__
+	/*
+	 * When running vCPUs in L2, restrict the test region to 48 bits to
+	 * avoid needing 5-level page tables to identity map L2.
+	 */
+	if (pta->nested)
+		region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size);
+#endif
 	/*
 	 * If there should be more memory in the guest test region than there
 	 * can be pages in the guest, it will definitely cause problems.
 	 */
-	TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+	TEST_ASSERT(guest_num_pages < region_end_gfn,
 		    "Requested more guest memory than address space allows.\n"
 		    "    guest pages: %" PRIx64 " max gfn: %" PRIx64
 		    " vcpus: %d wss: %" PRIx64 "]\n",
-		    guest_num_pages, vm_get_max_gfn(vm), vcpus,
+		    guest_num_pages, region_end_gfn - 1, vcpus,
 		    vcpu_memory_bytes);
 
-	pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;
+	pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size;
 	pta->gpa = align_down(pta->gpa, backing_src_pagesz);
 #ifdef __s390x__
 	/* Align to 1M (segment size) */
-- 
cgit 


From 17b0128a136d43e5f8f268631f48bc267373ebff Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 10 Jun 2022 16:32:02 +0200
Subject: wireguard: selftests: use maximum cpu features and allow rng seeding

By forcing the maximum CPU that QEMU has available, we expose additional
capabilities, such as the RNDR instruction, which increases test
coverage. This then allows the CI to skip the fake seeding step in some
cases. Also enable STRICT_KERNEL_RWX to catch issues related to early
jump labels when the RNG is initialized at boot.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 tools/testing/selftests/wireguard/qemu/Makefile    | 28 ++++++++++------------
 tools/testing/selftests/wireguard/qemu/init.c      |  3 +++
 .../testing/selftests/wireguard/qemu/kernel.config |  3 +++
 3 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index bca07b93eeb0..7d1b80988d8a 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -64,8 +64,8 @@ QEMU_VPORT_RESULT := virtio-serial-device
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
 else
-QEMU_MACHINE := -cpu cortex-a53 -machine virt
-CFLAGS += -march=armv8-a -mtune=cortex-a53
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv8-a
 endif
 else ifeq ($(ARCH),aarch64_be)
 CHOST := aarch64_be-linux-musl
@@ -76,8 +76,8 @@ QEMU_VPORT_RESULT := virtio-serial-device
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
 else
-QEMU_MACHINE := -cpu cortex-a53 -machine virt
-CFLAGS += -march=armv8-a -mtune=cortex-a53
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv8-a
 endif
 else ifeq ($(ARCH),arm)
 CHOST := arm-linux-musleabi
@@ -88,8 +88,8 @@ QEMU_VPORT_RESULT := virtio-serial-device
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
 else
-QEMU_MACHINE := -cpu cortex-a15 -machine virt
-CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv7-a -mabi=aapcs-linux
 endif
 else ifeq ($(ARCH),armeb)
 CHOST := armeb-linux-musleabi
@@ -100,8 +100,8 @@ QEMU_VPORT_RESULT := virtio-serial-device
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
 else
-QEMU_MACHINE := -cpu cortex-a15 -machine virt
-CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian.
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv7-a -mabi=aapcs-linux
 LDFLAGS += -Wl,--be8
 endif
 else ifeq ($(ARCH),x86_64)
@@ -112,8 +112,7 @@ KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host -machine q35,accel=kvm
 else
-QEMU_MACHINE := -cpu Skylake-Server -machine q35
-CFLAGS += -march=skylake-avx512
+QEMU_MACHINE := -cpu max -machine q35
 endif
 else ifeq ($(ARCH),i686)
 CHOST := i686-linux-musl
@@ -123,8 +122,7 @@ KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
 ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
 QEMU_MACHINE := -cpu host -machine q35,accel=kvm
 else
-QEMU_MACHINE := -cpu coreduo -machine q35
-CFLAGS += -march=prescott
+QEMU_MACHINE := -cpu max -machine q35
 endif
 else ifeq ($(ARCH),mips64)
 CHOST := mips64-linux-musl
@@ -182,7 +180,7 @@ KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
 else
-QEMU_MACHINE := -machine pseries
+QEMU_MACHINE := -machine pseries -device spapr-rng,rng=rng -object rng-random,id=rng
 endif
 else ifeq ($(ARCH),powerpc64le)
 CHOST := powerpc64le-linux-musl
@@ -192,7 +190,7 @@ KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
 else
-QEMU_MACHINE := -machine pseries
+QEMU_MACHINE := -machine pseries -device spapr-rng,rng=rng -object rng-random,id=rng
 endif
 else ifeq ($(ARCH),powerpc)
 CHOST := powerpc-linux-musl
@@ -247,7 +245,7 @@ QEMU_VPORT_RESULT := virtio-serial-ccw
 ifeq ($(HOST_ARCH),$(ARCH))
 QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
 else
-QEMU_MACHINE := -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -cpu max -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
 endif
 else
 $(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x)
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index 2a0f48fac925..c9e128436546 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -21,6 +21,7 @@
 #include <sys/utsname.h>
 #include <sys/sendfile.h>
 #include <sys/sysmacros.h>
+#include <sys/random.h>
 #include <linux/random.h>
 #include <linux/version.h>
 
@@ -58,6 +59,8 @@ static void seed_rng(void)
 {
 	int bits = 256, fd;
 
+	if (!getrandom(NULL, 0, GRND_NONBLOCK))
+		return;
 	pretty_message("[+] Fake seeding RNG...");
 	fd = open("/dev/random", O_WRONLY);
 	if (fd < 0)
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index a9b5a520a1d2..bad88f4b0a03 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -31,6 +31,7 @@ CONFIG_TTY=y
 CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_SCRIPT=y
 CONFIG_VDSO=y
+CONFIG_STRICT_KERNEL_RWX=y
 CONFIG_VIRTUALIZATION=y
 CONFIG_HYPERVISOR_GUEST=y
 CONFIG_PARAVIRT=y
@@ -65,6 +66,8 @@ CONFIG_PROC_FS=y
 CONFIG_PROC_SYSCTL=y
 CONFIG_SYSFS=y
 CONFIG_TMPFS=y
+CONFIG_RANDOM_TRUST_CPU=y
+CONFIG_RANDOM_TRUST_BOOTLOADER=y
 CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
 CONFIG_LOG_BUF_SHIFT=18
 CONFIG_PRINTK_TIME=y
-- 
cgit 


From 795285ef242543bb636556b7225f20adb7d3795c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 14 Jun 2022 13:10:45 +0100
Subject: selftests: Fix clang cross compilation

Unlike GCC clang uses a single compiler image to support multiple target
architectures meaning that we can't simply rely on CROSS_COMPILE to select
the output architecture. Instead we must pass --target to the compiler to
tell it what to output, kselftest was not doing this so cross compilation
of kselftest using clang resulted in kselftest being built for the host
architecture.

More work is required to fix tests using custom rules but this gets the
bulk of things building.

Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/lib.mk | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 2a2d240cdc1b..1a5cc3cd97ec 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -7,10 +7,31 @@ else ifneq ($(filter -%,$(LLVM)),)
 LLVM_SUFFIX := $(LLVM)
 endif
 
-CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX)
+CLANG_TARGET_FLAGS_arm          := arm-linux-gnueabi
+CLANG_TARGET_FLAGS_arm64        := aarch64-linux-gnu
+CLANG_TARGET_FLAGS_hexagon      := hexagon-linux-musl
+CLANG_TARGET_FLAGS_m68k         := m68k-linux-gnu
+CLANG_TARGET_FLAGS_mips         := mipsel-linux-gnu
+CLANG_TARGET_FLAGS_powerpc      := powerpc64le-linux-gnu
+CLANG_TARGET_FLAGS_riscv        := riscv64-linux-gnu
+CLANG_TARGET_FLAGS_s390         := s390x-linux-gnu
+CLANG_TARGET_FLAGS_x86          := x86_64-linux-gnu
+CLANG_TARGET_FLAGS              := $(CLANG_TARGET_FLAGS_$(ARCH))
+
+ifeq ($(CROSS_COMPILE),)
+ifeq ($(CLANG_TARGET_FLAGS),)
+$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk
+else
+CLANG_FLAGS     += --target=$(CLANG_TARGET_FLAGS)
+endif # CLANG_TARGET_FLAGS
+else
+CLANG_FLAGS     += --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif # CROSS_COMPILE
+
+CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as
 else
 CC := $(CROSS_COMPILE)gcc
-endif
+endif # LLVM
 
 ifeq (0,$(MAKELEVEL))
     ifeq ($(OUTPUT),)
-- 
cgit 


From 593d1ebe00a45af5cb7bda1235c0790987c2a2b2 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Wed, 15 Jun 2022 12:32:13 -0700
Subject: Revert "net: Add a second bind table hashed by port and address"

This reverts:

commit d5a42de8bdbe ("net: Add a second bind table hashed by port and address")
commit 538aaf9b2383 ("selftests: Add test for timing a bind request to a port with a populated bhash entry")
Link: https://lore.kernel.org/netdev/20220520001834.2247810-1-kuba@kernel.org/

There are a few things that need to be fixed here:
* Updating bhash2 in cases where the socket's rcv saddr changes
* Adding bhash2 hashbucket locks

Links to syzbot reports:
https://lore.kernel.org/netdev/00000000000022208805e0df247a@google.com/
https://lore.kernel.org/netdev/0000000000003f33bc05dfaf44fe@google.com/

Fixes: d5a42de8bdbe ("net: Add a second bind table hashed by port and address")
Reported-by: syzbot+015d756bbd1f8b5c8f09@syzkaller.appspotmail.com
Reported-by: syzbot+98fd2d1422063b0f8c44@syzkaller.appspotmail.com
Reported-by: syzbot+0a847a982613c6438fba@syzkaller.appspotmail.com
Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Link: https://lore.kernel.org/r/20220615193213.2419568-1-joannelkoong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_connection_sock.h            |   3 -
 include/net/inet_hashtables.h                 |  68 +------
 include/net/sock.h                            |  14 --
 net/dccp/proto.c                              |  33 +---
 net/ipv4/inet_connection_sock.c               | 247 +++++++-------------------
 net/ipv4/inet_hashtables.c                    | 193 ++------------------
 net/ipv4/tcp.c                                |  14 +-
 tools/testing/selftests/net/.gitignore        |   1 -
 tools/testing/selftests/net/Makefile          |   2 -
 tools/testing/selftests/net/bind_bhash_test.c | 119 -------------
 10 files changed, 83 insertions(+), 611 deletions(-)
 delete mode 100644 tools/testing/selftests/net/bind_bhash_test.c

(limited to 'tools/testing')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 077cd730ce2f..85cd695e7fd1 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -25,7 +25,6 @@
 #undef INET_CSK_CLEAR_TIMERS
 
 struct inet_bind_bucket;
-struct inet_bind2_bucket;
 struct tcp_congestion_ops;
 
 /*
@@ -58,7 +57,6 @@ struct inet_connection_sock_af_ops {
  *
  * @icsk_accept_queue:	   FIFO of established children
  * @icsk_bind_hash:	   Bind node
- * @icsk_bind2_hash:	   Bind node in the bhash2 table
  * @icsk_timeout:	   Timeout
  * @icsk_retransmit_timer: Resend (no ack)
  * @icsk_rto:		   Retransmit timeout
@@ -85,7 +83,6 @@ struct inet_connection_sock {
 	struct inet_sock	  icsk_inet;
 	struct request_sock_queue icsk_accept_queue;
 	struct inet_bind_bucket	  *icsk_bind_hash;
-	struct inet_bind2_bucket  *icsk_bind2_hash;
 	unsigned long		  icsk_timeout;
  	struct timer_list	  icsk_retransmit_timer;
  	struct timer_list	  icsk_delack_timer;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index a0887b70967b..ebfa3df6f8dc 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -90,32 +90,11 @@ struct inet_bind_bucket {
 	struct hlist_head	owners;
 };
 
-struct inet_bind2_bucket {
-	possible_net_t		ib_net;
-	int			l3mdev;
-	unsigned short		port;
-	union {
-#if IS_ENABLED(CONFIG_IPV6)
-		struct in6_addr		v6_rcv_saddr;
-#endif
-		__be32			rcv_saddr;
-	};
-	/* Node in the inet2_bind_hashbucket chain */
-	struct hlist_node	node;
-	/* List of sockets hashed to this bucket */
-	struct hlist_head	owners;
-};
-
 static inline struct net *ib_net(struct inet_bind_bucket *ib)
 {
 	return read_pnet(&ib->ib_net);
 }
 
-static inline struct net *ib2_net(struct inet_bind2_bucket *ib)
-{
-	return read_pnet(&ib->ib_net);
-}
-
 #define inet_bind_bucket_for_each(tb, head) \
 	hlist_for_each_entry(tb, head, node)
 
@@ -124,15 +103,6 @@ struct inet_bind_hashbucket {
 	struct hlist_head	chain;
 };
 
-/* This is synchronized using the inet_bind_hashbucket's spinlock.
- * Instead of having separate spinlocks, the inet_bind2_hashbucket can share
- * the inet_bind_hashbucket's given that in every case where the bhash2 table
- * is useful, a lookup in the bhash table also occurs.
- */
-struct inet_bind2_hashbucket {
-	struct hlist_head	chain;
-};
-
 /* Sockets can be hashed in established or listening table.
  * We must use different 'nulls' end-of-chain value for all hash buckets :
  * A socket might transition from ESTABLISH to LISTEN state without
@@ -164,12 +134,6 @@ struct inet_hashinfo {
 	 */
 	struct kmem_cache		*bind_bucket_cachep;
 	struct inet_bind_hashbucket	*bhash;
-	/* The 2nd binding table hashed by port and address.
-	 * This is used primarily for expediting the resolution of bind
-	 * conflicts.
-	 */
-	struct kmem_cache		*bind2_bucket_cachep;
-	struct inet_bind2_hashbucket	*bhash2;
 	unsigned int			bhash_size;
 
 	/* The 2nd listener table hashed by local port and address */
@@ -229,36 +193,6 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
 void inet_bind_bucket_destroy(struct kmem_cache *cachep,
 			      struct inet_bind_bucket *tb);
 
-static inline bool check_bind_bucket_match(struct inet_bind_bucket *tb,
-					   struct net *net,
-					   const unsigned short port,
-					   int l3mdev)
-{
-	return net_eq(ib_net(tb), net) && tb->port == port &&
-		tb->l3mdev == l3mdev;
-}
-
-struct inet_bind2_bucket *
-inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
-			 struct inet_bind2_hashbucket *head,
-			 const unsigned short port, int l3mdev,
-			 const struct sock *sk);
-
-void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
-			       struct inet_bind2_bucket *tb);
-
-struct inet_bind2_bucket *
-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
-		       const unsigned short port, int l3mdev,
-		       struct sock *sk,
-		       struct inet_bind2_hashbucket **head);
-
-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
-				       struct net *net,
-				       const unsigned short port,
-				       int l3mdev,
-				       const struct sock *sk);
-
 static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
 			       const u32 bhash_size)
 {
@@ -266,7 +200,7 @@ static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
 }
 
 void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
-		    struct inet_bind2_bucket *tb2, const unsigned short snum);
+		    const unsigned short snum);
 
 /* Caller must disable local BH processing. */
 int __inet_inherit_port(const struct sock *sk, struct sock *child);
diff --git a/include/net/sock.h b/include/net/sock.h
index c585ef6565d9..72ca97ccb460 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -348,7 +348,6 @@ struct sk_filter;
   *	@sk_txtime_report_errors: set report errors mode for SO_TXTIME
   *	@sk_txtime_unused: unused txtime flags
   *	@ns_tracker: tracker for netns reference
-  *	@sk_bind2_node: bind node in the bhash2 table
   */
 struct sock {
 	/*
@@ -538,7 +537,6 @@ struct sock {
 #endif
 	struct rcu_head		sk_rcu;
 	netns_tracker		ns_tracker;
-	struct hlist_node	sk_bind2_node;
 };
 
 enum sk_pacing {
@@ -819,16 +817,6 @@ static inline void sk_add_bind_node(struct sock *sk,
 	hlist_add_head(&sk->sk_bind_node, list);
 }
 
-static inline void __sk_del_bind2_node(struct sock *sk)
-{
-	__hlist_del(&sk->sk_bind2_node);
-}
-
-static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
-{
-	hlist_add_head(&sk->sk_bind2_node, list);
-}
-
 #define sk_for_each(__sk, list) \
 	hlist_for_each_entry(__sk, list, sk_node)
 #define sk_for_each_rcu(__sk, list) \
@@ -846,8 +834,6 @@ static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
 	hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
 #define sk_for_each_bound(__sk, list) \
 	hlist_for_each_entry(__sk, list, sk_bind_node)
-#define sk_for_each_bound_bhash2(__sk, list) \
-	hlist_for_each_entry(__sk, list, sk_bind2_node)
 
 /**
  * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 2e78458900f2..eb8e128e43e8 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1120,12 +1120,6 @@ static int __init dccp_init(void)
 				  SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
 	if (!dccp_hashinfo.bind_bucket_cachep)
 		goto out_free_hashinfo2;
-	dccp_hashinfo.bind2_bucket_cachep =
-		kmem_cache_create("dccp_bind2_bucket",
-				  sizeof(struct inet_bind2_bucket), 0,
-				  SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
-	if (!dccp_hashinfo.bind2_bucket_cachep)
-		goto out_free_bind_bucket_cachep;
 
 	/*
 	 * Size and allocate the main established and bind bucket
@@ -1156,7 +1150,7 @@ static int __init dccp_init(void)
 
 	if (!dccp_hashinfo.ehash) {
 		DCCP_CRIT("Failed to allocate DCCP established hash table");
-		goto out_free_bind2_bucket_cachep;
+		goto out_free_bind_bucket_cachep;
 	}
 
 	for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
@@ -1182,23 +1176,14 @@ static int __init dccp_init(void)
 		goto out_free_dccp_locks;
 	}
 
-	dccp_hashinfo.bhash2 = (struct inet_bind2_hashbucket *)
-		__get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
-
-	if (!dccp_hashinfo.bhash2) {
-		DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
-		goto out_free_dccp_bhash;
-	}
-
 	for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
 		spin_lock_init(&dccp_hashinfo.bhash[i].lock);
 		INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
-		INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
 	}
 
 	rc = dccp_mib_init();
 	if (rc)
-		goto out_free_dccp_bhash2;
+		goto out_free_dccp_bhash;
 
 	rc = dccp_ackvec_init();
 	if (rc)
@@ -1222,38 +1207,30 @@ out_ackvec_exit:
 	dccp_ackvec_exit();
 out_free_dccp_mib:
 	dccp_mib_exit();
-out_free_dccp_bhash2:
-	free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
 out_free_dccp_bhash:
 	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
 out_free_dccp_locks:
 	inet_ehash_locks_free(&dccp_hashinfo);
 out_free_dccp_ehash:
 	free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
-out_free_bind2_bucket_cachep:
-	kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
 out_free_bind_bucket_cachep:
 	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
 out_free_hashinfo2:
 	inet_hashinfo2_free_mod(&dccp_hashinfo);
 out_fail:
 	dccp_hashinfo.bhash = NULL;
-	dccp_hashinfo.bhash2 = NULL;
 	dccp_hashinfo.ehash = NULL;
 	dccp_hashinfo.bind_bucket_cachep = NULL;
-	dccp_hashinfo.bind2_bucket_cachep = NULL;
 	return rc;
 }
 
 static void __exit dccp_fini(void)
 {
-	int bhash_order = get_order(dccp_hashinfo.bhash_size *
-				    sizeof(struct inet_bind_hashbucket));
-
 	ccid_cleanup_builtins();
 	dccp_mib_exit();
-	free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
-	free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
+	free_pages((unsigned long)dccp_hashinfo.bhash,
+		   get_order(dccp_hashinfo.bhash_size *
+			     sizeof(struct inet_bind_hashbucket)));
 	free_pages((unsigned long)dccp_hashinfo.ehash,
 		   get_order((dccp_hashinfo.ehash_mask + 1) *
 			     sizeof(struct inet_ehash_bucket)));
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c0b7e6c21360..53f5f956d948 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -117,32 +117,6 @@ bool inet_rcv_saddr_any(const struct sock *sk)
 	return !sk->sk_rcv_saddr;
 }
 
-static bool use_bhash2_on_bind(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
-	int addr_type;
-
-	if (sk->sk_family == AF_INET6) {
-		addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
-		return addr_type != IPV6_ADDR_ANY &&
-			addr_type != IPV6_ADDR_MAPPED;
-	}
-#endif
-	return sk->sk_rcv_saddr != htonl(INADDR_ANY);
-}
-
-static u32 get_bhash2_nulladdr_hash(const struct sock *sk, struct net *net,
-				    int port)
-{
-#if IS_ENABLED(CONFIG_IPV6)
-	struct in6_addr nulladdr = {};
-
-	if (sk->sk_family == AF_INET6)
-		return ipv6_portaddr_hash(net, &nulladdr, port);
-#endif
-	return ipv4_portaddr_hash(net, 0, port);
-}
-
 void inet_get_local_port_range(struct net *net, int *low, int *high)
 {
 	unsigned int seq;
@@ -156,71 +130,16 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
 }
 EXPORT_SYMBOL(inet_get_local_port_range);
 
-static bool bind_conflict_exist(const struct sock *sk, struct sock *sk2,
-				kuid_t sk_uid, bool relax,
-				bool reuseport_cb_ok, bool reuseport_ok)
-{
-	int bound_dev_if2;
-
-	if (sk == sk2)
-		return false;
-
-	bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
-
-	if (!sk->sk_bound_dev_if || !bound_dev_if2 ||
-	    sk->sk_bound_dev_if == bound_dev_if2) {
-		if (sk->sk_reuse && sk2->sk_reuse &&
-		    sk2->sk_state != TCP_LISTEN) {
-			if (!relax || (!reuseport_ok && sk->sk_reuseport &&
-				       sk2->sk_reuseport && reuseport_cb_ok &&
-				       (sk2->sk_state == TCP_TIME_WAIT ||
-					uid_eq(sk_uid, sock_i_uid(sk2)))))
-				return true;
-		} else if (!reuseport_ok || !sk->sk_reuseport ||
-			   !sk2->sk_reuseport || !reuseport_cb_ok ||
-			   (sk2->sk_state != TCP_TIME_WAIT &&
-			    !uid_eq(sk_uid, sock_i_uid(sk2)))) {
-			return true;
-		}
-	}
-	return false;
-}
-
-static bool check_bhash2_conflict(const struct sock *sk,
-				  struct inet_bind2_bucket *tb2, kuid_t sk_uid,
-				  bool relax, bool reuseport_cb_ok,
-				  bool reuseport_ok)
-{
-	struct sock *sk2;
-
-	sk_for_each_bound_bhash2(sk2, &tb2->owners) {
-		if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
-			continue;
-
-		if (bind_conflict_exist(sk, sk2, sk_uid, relax,
-					reuseport_cb_ok, reuseport_ok))
-			return true;
-	}
-	return false;
-}
-
-/* This should be called only when the corresponding inet_bind_bucket spinlock
- * is held
- */
-static int inet_csk_bind_conflict(const struct sock *sk, int port,
-				  struct inet_bind_bucket *tb,
-				  struct inet_bind2_bucket *tb2, /* may be null */
+static int inet_csk_bind_conflict(const struct sock *sk,
+				  const struct inet_bind_bucket *tb,
 				  bool relax, bool reuseport_ok)
 {
-	struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
-	kuid_t uid = sock_i_uid((struct sock *)sk);
-	struct sock_reuseport *reuseport_cb;
-	struct inet_bind2_hashbucket *head2;
-	bool reuseport_cb_ok;
 	struct sock *sk2;
-	struct net *net;
-	int l3mdev;
-	u32 hash;
+	bool reuseport_cb_ok;
+	bool reuse = sk->sk_reuse;
+	bool reuseport = !!sk->sk_reuseport;
+	struct sock_reuseport *reuseport_cb;
+	kuid_t uid = sock_i_uid((struct sock *)sk);
 
 	rcu_read_lock();
 	reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
@@ -231,42 +150,40 @@ static int inet_csk_bind_conflict(const struct sock *sk, int port,
 	/*
 	 * Unlike other sk lookup places we do not check
 	 * for sk_net here, since _all_ the socks listed
-	 * in tb->owners and tb2->owners list belong
-	 * to the same net
+	 * in tb->owners list belong to the same net - the
+	 * one this bucket belongs to.
 	 */
 
-	if (!use_bhash2_on_bind(sk)) {
-		sk_for_each_bound(sk2, &tb->owners)
-			if (bind_conflict_exist(sk, sk2, uid, relax,
-						reuseport_cb_ok, reuseport_ok) &&
-			    inet_rcv_saddr_equal(sk, sk2, true))
-				return true;
+	sk_for_each_bound(sk2, &tb->owners) {
+		int bound_dev_if2;
 
-		return false;
+		if (sk == sk2)
+			continue;
+		bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
+		if ((!sk->sk_bound_dev_if ||
+		     !bound_dev_if2 ||
+		     sk->sk_bound_dev_if == bound_dev_if2)) {
+			if (reuse && sk2->sk_reuse &&
+			    sk2->sk_state != TCP_LISTEN) {
+				if ((!relax ||
+				     (!reuseport_ok &&
+				      reuseport && sk2->sk_reuseport &&
+				      reuseport_cb_ok &&
+				      (sk2->sk_state == TCP_TIME_WAIT ||
+				       uid_eq(uid, sock_i_uid(sk2))))) &&
+				    inet_rcv_saddr_equal(sk, sk2, true))
+					break;
+			} else if (!reuseport_ok ||
+				   !reuseport || !sk2->sk_reuseport ||
+				   !reuseport_cb_ok ||
+				   (sk2->sk_state != TCP_TIME_WAIT &&
+				    !uid_eq(uid, sock_i_uid(sk2)))) {
+				if (inet_rcv_saddr_equal(sk, sk2, true))
+					break;
+			}
+		}
 	}
-
-	if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
-					 reuseport_ok))
-		return true;
-
-	net = sock_net(sk);
-
-	/* check there's no conflict with an existing IPV6_ADDR_ANY (if ipv6) or
-	 * INADDR_ANY (if ipv4) socket.
-	 */
-	hash = get_bhash2_nulladdr_hash(sk, net, port);
-	head2 = &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-
-	l3mdev = inet_sk_bound_l3mdev(sk);
-	inet_bind_bucket_for_each(tb2, &head2->chain)
-		if (check_bind2_bucket_match_nulladdr(tb2, net, port, l3mdev, sk))
-			break;
-
-	if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
-					 reuseport_ok))
-		return true;
-
-	return false;
+	return sk2 != NULL;
 }
 
 /*
@@ -274,20 +191,16 @@ static int inet_csk_bind_conflict(const struct sock *sk, int port,
  * inet_bind_hashbucket lock held.
  */
 static struct inet_bind_hashbucket *
-inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret,
-			struct inet_bind2_bucket **tb2_ret,
-			struct inet_bind2_hashbucket **head2_ret, int *port_ret)
+inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
 {
 	struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
-	struct inet_bind2_hashbucket *head2;
+	int port = 0;
 	struct inet_bind_hashbucket *head;
 	struct net *net = sock_net(sk);
+	bool relax = false;
 	int i, low, high, attempt_half;
-	struct inet_bind2_bucket *tb2;
 	struct inet_bind_bucket *tb;
 	u32 remaining, offset;
-	bool relax = false;
-	int port = 0;
 	int l3mdev;
 
 	l3mdev = inet_sk_bound_l3mdev(sk);
@@ -326,12 +239,10 @@ other_parity_scan:
 		head = &hinfo->bhash[inet_bhashfn(net, port,
 						  hinfo->bhash_size)];
 		spin_lock_bh(&head->lock);
-		tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
-					     &head2);
 		inet_bind_bucket_for_each(tb, &head->chain)
-			if (check_bind_bucket_match(tb, net, port, l3mdev)) {
-				if (!inet_csk_bind_conflict(sk, port, tb, tb2,
-							    relax, false))
+			if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+			    tb->port == port) {
+				if (!inet_csk_bind_conflict(sk, tb, relax, false))
 					goto success;
 				goto next_port;
 			}
@@ -361,8 +272,6 @@ next_port:
 success:
 	*port_ret = port;
 	*tb_ret = tb;
-	*tb2_ret = tb2;
-	*head2_ret = head2;
 	return head;
 }
 
@@ -458,81 +367,54 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 {
 	bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
 	struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
-	bool bhash_created = false, bhash2_created = false;
-	struct inet_bind2_bucket *tb2 = NULL;
-	struct inet_bind2_hashbucket *head2;
-	struct inet_bind_bucket *tb = NULL;
+	int ret = 1, port = snum;
 	struct inet_bind_hashbucket *head;
 	struct net *net = sock_net(sk);
-	int ret = 1, port = snum;
-	bool found_port = false;
+	struct inet_bind_bucket *tb = NULL;
 	int l3mdev;
 
 	l3mdev = inet_sk_bound_l3mdev(sk);
 
 	if (!port) {
-		head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);
+		head = inet_csk_find_open_port(sk, &tb, &port);
 		if (!head)
 			return ret;
-		if (tb && tb2)
-			goto success;
-		found_port = true;
-	} else {
-		head = &hinfo->bhash[inet_bhashfn(net, port,
-						  hinfo->bhash_size)];
-		spin_lock_bh(&head->lock);
-		inet_bind_bucket_for_each(tb, &head->chain)
-			if (check_bind_bucket_match(tb, net, port, l3mdev))
-				break;
-
-		tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
-					     &head2);
-	}
-
-	if (!tb) {
-		tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,
-					     head, port, l3mdev);
 		if (!tb)
-			goto fail_unlock;
-		bhash_created = true;
-	}
-
-	if (!tb2) {
-		tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
-					       net, head2, port, l3mdev, sk);
-		if (!tb2)
-			goto fail_unlock;
-		bhash2_created = true;
+			goto tb_not_found;
+		goto success;
 	}
-
-	/* If we had to find an open port, we already checked for conflicts */
-	if (!found_port && !hlist_empty(&tb->owners)) {
+	head = &hinfo->bhash[inet_bhashfn(net, port,
+					  hinfo->bhash_size)];
+	spin_lock_bh(&head->lock);
+	inet_bind_bucket_for_each(tb, &head->chain)
+		if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+		    tb->port == port)
+			goto tb_found;
+tb_not_found:
+	tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+				     net, head, port, l3mdev);
+	if (!tb)
+		goto fail_unlock;
+tb_found:
+	if (!hlist_empty(&tb->owners)) {
 		if (sk->sk_reuse == SK_FORCE_REUSE)
 			goto success;
 
 		if ((tb->fastreuse > 0 && reuse) ||
 		    sk_reuseport_match(tb, sk))
 			goto success;
-		if (inet_csk_bind_conflict(sk, port, tb, tb2, true, true))
+		if (inet_csk_bind_conflict(sk, tb, true, true))
 			goto fail_unlock;
 	}
 success:
 	inet_csk_update_fastreuse(tb, sk);
 
 	if (!inet_csk(sk)->icsk_bind_hash)
-		inet_bind_hash(sk, tb, tb2, port);
+		inet_bind_hash(sk, tb, port);
 	WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
-	WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);
 	ret = 0;
 
 fail_unlock:
-	if (ret) {
-		if (bhash_created)
-			inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
-		if (bhash2_created)
-			inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
-						  tb2);
-	}
 	spin_unlock_bh(&head->lock);
 	return ret;
 }
@@ -1079,7 +961,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 
 		inet_sk_set_state(newsk, TCP_SYN_RECV);
 		newicsk->icsk_bind_hash = NULL;
-		newicsk->icsk_bind2_hash = NULL;
 
 		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
 		inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 545f91b6cb5e..b9d995b5ce24 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -81,41 +81,6 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
 	return tb;
 }
 
-struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
-						   struct net *net,
-						   struct inet_bind2_hashbucket *head,
-						   const unsigned short port,
-						   int l3mdev,
-						   const struct sock *sk)
-{
-	struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
-
-	if (tb) {
-		write_pnet(&tb->ib_net, net);
-		tb->l3mdev    = l3mdev;
-		tb->port      = port;
-#if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == AF_INET6)
-			tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
-		else
-#endif
-			tb->rcv_saddr = sk->sk_rcv_saddr;
-		INIT_HLIST_HEAD(&tb->owners);
-		hlist_add_head(&tb->node, &head->chain);
-	}
-	return tb;
-}
-
-static bool bind2_bucket_addr_match(struct inet_bind2_bucket *tb2, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
-	if (sk->sk_family == AF_INET6)
-		return ipv6_addr_equal(&tb2->v6_rcv_saddr,
-				       &sk->sk_v6_rcv_saddr);
-#endif
-	return tb2->rcv_saddr == sk->sk_rcv_saddr;
-}
-
 /*
  * Caller must hold hashbucket lock for this tb with local BH disabled
  */
@@ -127,25 +92,12 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
 	}
 }
 
-/* Caller must hold the lock for the corresponding hashbucket in the bhash table
- * with local BH disabled
- */
-void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
-{
-	if (hlist_empty(&tb->owners)) {
-		__hlist_del(&tb->node);
-		kmem_cache_free(cachep, tb);
-	}
-}
-
 void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
-		    struct inet_bind2_bucket *tb2, const unsigned short snum)
+		    const unsigned short snum)
 {
 	inet_sk(sk)->inet_num = snum;
 	sk_add_bind_node(sk, &tb->owners);
 	inet_csk(sk)->icsk_bind_hash = tb;
-	sk_add_bind2_node(sk, &tb2->owners);
-	inet_csk(sk)->icsk_bind2_hash = tb2;
 }
 
 /*
@@ -157,7 +109,6 @@ static void __inet_put_port(struct sock *sk)
 	const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
 			hashinfo->bhash_size);
 	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
-	struct inet_bind2_bucket *tb2;
 	struct inet_bind_bucket *tb;
 
 	spin_lock(&head->lock);
@@ -166,13 +117,6 @@ static void __inet_put_port(struct sock *sk)
 	inet_csk(sk)->icsk_bind_hash = NULL;
 	inet_sk(sk)->inet_num = 0;
 	inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
-
-	if (inet_csk(sk)->icsk_bind2_hash) {
-		tb2 = inet_csk(sk)->icsk_bind2_hash;
-		__sk_del_bind2_node(sk);
-		inet_csk(sk)->icsk_bind2_hash = NULL;
-		inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
-	}
 	spin_unlock(&head->lock);
 }
 
@@ -189,19 +133,14 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
 	struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
 	unsigned short port = inet_sk(child)->inet_num;
 	const int bhash = inet_bhashfn(sock_net(sk), port,
-				       table->bhash_size);
+			table->bhash_size);
 	struct inet_bind_hashbucket *head = &table->bhash[bhash];
-	struct inet_bind2_hashbucket *head_bhash2;
-	bool created_inet_bind_bucket = false;
-	struct net *net = sock_net(sk);
-	struct inet_bind2_bucket *tb2;
 	struct inet_bind_bucket *tb;
 	int l3mdev;
 
 	spin_lock(&head->lock);
 	tb = inet_csk(sk)->icsk_bind_hash;
-	tb2 = inet_csk(sk)->icsk_bind2_hash;
-	if (unlikely(!tb || !tb2)) {
+	if (unlikely(!tb)) {
 		spin_unlock(&head->lock);
 		return -ENOENT;
 	}
@@ -214,45 +153,25 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
 		 * as that of the child socket. We have to look up or
 		 * create a new bind bucket for the child here. */
 		inet_bind_bucket_for_each(tb, &head->chain) {
-			if (check_bind_bucket_match(tb, net, port, l3mdev))
+			if (net_eq(ib_net(tb), sock_net(sk)) &&
+			    tb->l3mdev == l3mdev && tb->port == port)
 				break;
 		}
 		if (!tb) {
 			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
-						     net, head, port, l3mdev);
+						     sock_net(sk), head, port,
+						     l3mdev);
 			if (!tb) {
 				spin_unlock(&head->lock);
 				return -ENOMEM;
 			}
-			created_inet_bind_bucket = true;
 		}
 		inet_csk_update_fastreuse(tb, child);
-
-		goto bhash2_find;
-	} else if (!bind2_bucket_addr_match(tb2, child)) {
-		l3mdev = inet_sk_bound_l3mdev(sk);
-
-bhash2_find:
-		tb2 = inet_bind2_bucket_find(table, net, port, l3mdev, child,
-					     &head_bhash2);
-		if (!tb2) {
-			tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,
-						       net, head_bhash2, port,
-						       l3mdev, child);
-			if (!tb2)
-				goto error;
-		}
 	}
-	inet_bind_hash(child, tb, tb2, port);
+	inet_bind_hash(child, tb, port);
 	spin_unlock(&head->lock);
 
 	return 0;
-
-error:
-	if (created_inet_bind_bucket)
-		inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
-	spin_unlock(&head->lock);
-	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(__inet_inherit_port);
 
@@ -756,76 +675,6 @@ void inet_unhash(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(inet_unhash);
 
-static bool check_bind2_bucket_match(struct inet_bind2_bucket *tb,
-				     struct net *net, unsigned short port,
-				     int l3mdev, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
-	if (sk->sk_family == AF_INET6)
-		return net_eq(ib2_net(tb), net) && tb->port == port &&
-			tb->l3mdev == l3mdev &&
-			ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
-	else
-#endif
-		return net_eq(ib2_net(tb), net) && tb->port == port &&
-			tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
-}
-
-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
-				       struct net *net, const unsigned short port,
-				       int l3mdev, const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
-	struct in6_addr nulladdr = {};
-
-	if (sk->sk_family == AF_INET6)
-		return net_eq(ib2_net(tb), net) && tb->port == port &&
-			tb->l3mdev == l3mdev &&
-			ipv6_addr_equal(&tb->v6_rcv_saddr, &nulladdr);
-	else
-#endif
-		return net_eq(ib2_net(tb), net) && tb->port == port &&
-			tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
-}
-
-static struct inet_bind2_hashbucket *
-inet_bhashfn_portaddr(struct inet_hashinfo *hinfo, const struct sock *sk,
-		      const struct net *net, unsigned short port)
-{
-	u32 hash;
-
-#if IS_ENABLED(CONFIG_IPV6)
-	if (sk->sk_family == AF_INET6)
-		hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
-	else
-#endif
-		hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
-	return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-}
-
-/* This should only be called when the spinlock for the socket's corresponding
- * bind_hashbucket is held
- */
-struct inet_bind2_bucket *
-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
-		       const unsigned short port, int l3mdev, struct sock *sk,
-		       struct inet_bind2_hashbucket **head)
-{
-	struct inet_bind2_bucket *bhash2 = NULL;
-	struct inet_bind2_hashbucket *h;
-
-	h = inet_bhashfn_portaddr(hinfo, sk, net, port);
-	inet_bind_bucket_for_each(bhash2, &h->chain) {
-		if (check_bind2_bucket_match(bhash2, net, port, l3mdev, sk))
-			break;
-	}
-
-	if (head)
-		*head = h;
-
-	return bhash2;
-}
-
 /* RFC 6056 3.3.4.  Algorithm 4: Double-Hash Port Selection Algorithm
  * Note that we use 32bit integers (vs RFC 'short integers')
  * because 2^16 is not a multiple of num_ephemeral and this
@@ -846,13 +695,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_timewait_sock *tw = NULL;
-	struct inet_bind2_hashbucket *head2;
 	struct inet_bind_hashbucket *head;
 	int port = inet_sk(sk)->inet_num;
 	struct net *net = sock_net(sk);
-	struct inet_bind2_bucket *tb2;
 	struct inet_bind_bucket *tb;
-	bool tb_created = false;
 	u32 remaining, offset;
 	int ret, i, low, high;
 	int l3mdev;
@@ -909,7 +755,8 @@ other_parity_scan:
 		 * the established check is already unique enough.
 		 */
 		inet_bind_bucket_for_each(tb, &head->chain) {
-			if (check_bind_bucket_match(tb, net, port, l3mdev)) {
+			if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+			    tb->port == port) {
 				if (tb->fastreuse >= 0 ||
 				    tb->fastreuseport >= 0)
 					goto next_port;
@@ -927,7 +774,6 @@ other_parity_scan:
 			spin_unlock_bh(&head->lock);
 			return -ENOMEM;
 		}
-		tb_created = true;
 		tb->fastreuse = -1;
 		tb->fastreuseport = -1;
 		goto ok;
@@ -943,17 +789,6 @@ next_port:
 	return -EADDRNOTAVAIL;
 
 ok:
-	/* Find the corresponding tb2 bucket since we need to
-	 * add the socket to the bhash2 table as well
-	 */
-	tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, &head2);
-	if (!tb2) {
-		tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,
-					       head2, port, l3mdev, sk);
-		if (!tb2)
-			goto error;
-	}
-
 	/* Here we want to add a little bit of randomness to the next source
 	 * port that will be chosen. We use a max() with a random here so that
 	 * on low contention the randomness is maximal and on high contention
@@ -963,7 +798,7 @@ ok:
 	WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
 
 	/* Head lock still held and bh's disabled */
-	inet_bind_hash(sk, tb, tb2, port);
+	inet_bind_hash(sk, tb, port);
 	if (sk_unhashed(sk)) {
 		inet_sk(sk)->inet_sport = htons(port);
 		inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
@@ -975,12 +810,6 @@ ok:
 		inet_twsk_deschedule_put(tw);
 	local_bh_enable();
 	return 0;
-
-error:
-	if (tb_created)
-		inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
-	spin_unlock_bh(&head->lock);
-	return -ENOMEM;
 }
 
 /*
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9984d23a7f3e..028513d3e2a2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4604,12 +4604,6 @@ void __init tcp_init(void)
 				  SLAB_HWCACHE_ALIGN | SLAB_PANIC |
 				  SLAB_ACCOUNT,
 				  NULL);
-	tcp_hashinfo.bind2_bucket_cachep =
-		kmem_cache_create("tcp_bind2_bucket",
-				  sizeof(struct inet_bind2_bucket), 0,
-				  SLAB_HWCACHE_ALIGN | SLAB_PANIC |
-				  SLAB_ACCOUNT,
-				  NULL);
 
 	/* Size and allocate the main established and bind bucket
 	 * hash tables.
@@ -4632,9 +4626,8 @@ void __init tcp_init(void)
 	if (inet_ehash_locks_alloc(&tcp_hashinfo))
 		panic("TCP: failed to alloc ehash_locks");
 	tcp_hashinfo.bhash =
-		alloc_large_system_hash("TCP bind bhash tables",
-					sizeof(struct inet_bind_hashbucket) +
-					sizeof(struct inet_bind2_hashbucket),
+		alloc_large_system_hash("TCP bind",
+					sizeof(struct inet_bind_hashbucket),
 					tcp_hashinfo.ehash_mask + 1,
 					17, /* one slot per 128 KB of memory */
 					0,
@@ -4643,12 +4636,9 @@ void __init tcp_init(void)
 					0,
 					64 * 1024);
 	tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
-	tcp_hashinfo.bhash2 =
-		(struct inet_bind2_hashbucket *)(tcp_hashinfo.bhash + tcp_hashinfo.bhash_size);
 	for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
 		spin_lock_init(&tcp_hashinfo.bhash[i].lock);
 		INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
-		INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
 	}
 
 
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index b984f8c8d523..a29f79618934 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -37,4 +37,3 @@ gro
 ioam6_parser
 toeplitz
 cmsg_sender
-bind_bhash_test
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 464df13831f2..7ea54af55490 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -59,7 +59,6 @@ TEST_GEN_FILES += toeplitz
 TEST_GEN_FILES += cmsg_sender
 TEST_GEN_FILES += stress_reuseport_listen
 TEST_PROGS += test_vxlan_vnifiltering.sh
-TEST_GEN_FILES += bind_bhash_test
 
 TEST_FILES := settings
 
@@ -70,5 +69,4 @@ include bpf/Makefile
 
 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
-$(OUTPUT)/bind_bhash_test: LDLIBS += -lpthread
 $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/bind_bhash_test.c b/tools/testing/selftests/net/bind_bhash_test.c
deleted file mode 100644
index 252e73754e76..000000000000
--- a/tools/testing/selftests/net/bind_bhash_test.c
+++ /dev/null
@@ -1,119 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This times how long it takes to bind to a port when the port already
- * has multiple sockets in its bhash table.
- *
- * In the setup(), we populate the port's bhash table with
- * MAX_THREADS * MAX_CONNECTIONS number of entries.
- */
-
-#include <unistd.h>
-#include <stdio.h>
-#include <netdb.h>
-#include <pthread.h>
-
-#define MAX_THREADS 600
-#define MAX_CONNECTIONS 40
-
-static const char *bind_addr = "::1";
-static const char *port;
-
-static int fd_array[MAX_THREADS][MAX_CONNECTIONS];
-
-static int bind_socket(int opt, const char *addr)
-{
-	struct addrinfo *res, hint = {};
-	int sock_fd, reuse = 1, err;
-
-	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
-	if (sock_fd < 0) {
-		perror("socket fd err");
-		return -1;
-	}
-
-	hint.ai_family = AF_INET6;
-	hint.ai_socktype = SOCK_STREAM;
-
-	err = getaddrinfo(addr, port, &hint, &res);
-	if (err) {
-		perror("getaddrinfo failed");
-		return -1;
-	}
-
-	if (opt) {
-		err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse));
-		if (err) {
-			perror("setsockopt failed");
-			return -1;
-		}
-	}
-
-	err = bind(sock_fd, res->ai_addr, res->ai_addrlen);
-	if (err) {
-		perror("failed to bind to port");
-		return -1;
-	}
-
-	return sock_fd;
-}
-
-static void *setup(void *arg)
-{
-	int sock_fd, i;
-	int *array = (int *)arg;
-
-	for (i = 0; i < MAX_CONNECTIONS; i++) {
-		sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);
-		if (sock_fd < 0)
-			return NULL;
-		array[i] = sock_fd;
-	}
-
-	return NULL;
-}
-
-int main(int argc, const char *argv[])
-{
-	int listener_fd, sock_fd, i, j;
-	pthread_t tid[MAX_THREADS];
-	clock_t begin, end;
-
-	if (argc != 2) {
-		printf("Usage: listener <port>\n");
-		return -1;
-	}
-
-	port = argv[1];
-
-	listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);
-	if (listen(listener_fd, 100) < 0) {
-		perror("listen failed");
-		return -1;
-	}
-
-	/* Set up threads to populate the bhash table entry for the port */
-	for (i = 0; i < MAX_THREADS; i++)
-		pthread_create(&tid[i], NULL, setup, fd_array[i]);
-
-	for (i = 0; i < MAX_THREADS; i++)
-		pthread_join(tid[i], NULL);
-
-	begin = clock();
-
-	/* Bind to the same port on a different address */
-	sock_fd  = bind_socket(0, "2001:0db8:0:f101::1");
-
-	end = clock();
-
-	printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);
-
-	/* clean up */
-	close(sock_fd);
-	close(listener_fd);
-	for (i = 0; i < MAX_THREADS; i++) {
-		for (j = 0; i < MAX_THREADS; i++)
-			close(fd_array[i][j]);
-	}
-
-	return 0;
-}
-- 
cgit 


From 5e0b0a4c52d30bb09659446f40b77a692361600d Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 16 Jun 2022 18:20:37 +0200
Subject: selftests/bpf: Test tail call counting with bpf2bpf and data on stack

Cover the case when tail call count needs to be passed from BPF function to
BPF function, and the caller has data on stack. Specifically when the size
of data allocated on BPF stack is not a multiple on 8.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220616162037.535469-3-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/prog_tests/tailcalls.c | 55 ++++++++++++++++++++++
 .../selftests/bpf/progs/tailcall_bpf2bpf6.c        | 42 +++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index c4da87ec3ba4..19c70880cfb3 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -831,6 +831,59 @@ out:
 	bpf_object__close(obj);
 }
 
+#include "tailcall_bpf2bpf6.skel.h"
+
+/* Tail call counting works even when there is data on stack which is
+ * not aligned to 8 bytes.
+ */
+static void test_tailcall_bpf2bpf_6(void)
+{
+	struct tailcall_bpf2bpf6 *obj;
+	int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
+
+	obj = tailcall_bpf2bpf6__open_and_load();
+	if (!ASSERT_OK_PTR(obj, "open and load"))
+		return;
+
+	main_fd = bpf_program__fd(obj->progs.entry);
+	if (!ASSERT_GE(main_fd, 0, "entry prog fd"))
+		goto out;
+
+	map_fd = bpf_map__fd(obj->maps.jmp_table);
+	if (!ASSERT_GE(map_fd, 0, "jmp_table map fd"))
+		goto out;
+
+	prog_fd = bpf_program__fd(obj->progs.classifier_0);
+	if (!ASSERT_GE(prog_fd, 0, "classifier_0 prog fd"))
+		goto out;
+
+	i = 0;
+	err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+	if (!ASSERT_OK(err, "jmp_table map update"))
+		goto out;
+
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "entry prog test run");
+	ASSERT_EQ(topts.retval, 0, "tailcall retval");
+
+	data_fd = bpf_map__fd(obj->maps.bss);
+	if (!ASSERT_GE(map_fd, 0, "bss map fd"))
+		goto out;
+
+	i = 0;
+	err = bpf_map_lookup_elem(data_fd, &i, &val);
+	ASSERT_OK(err, "bss map lookup");
+	ASSERT_EQ(val, 1, "done flag is set");
+
+out:
+	tailcall_bpf2bpf6__destroy(obj);
+}
+
 void test_tailcalls(void)
 {
 	if (test__start_subtest("tailcall_1"))
@@ -855,4 +908,6 @@ void test_tailcalls(void)
 		test_tailcall_bpf2bpf_4(false);
 	if (test__start_subtest("tailcall_bpf2bpf_5"))
 		test_tailcall_bpf2bpf_4(true);
+	if (test__start_subtest("tailcall_bpf2bpf_6"))
+		test_tailcall_bpf2bpf_6();
 }
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c
new file mode 100644
index 000000000000..41ce83da78e8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define __unused __attribute__((unused))
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int done = 0;
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb __unused)
+{
+	done = 1;
+	return 0;
+}
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+	/* Don't propagate the constant to the caller */
+	volatile int ret = 1;
+
+	bpf_tail_call_static(skb, &jmp_table, 0);
+	return ret;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+	/* Have data on stack which size is not a multiple of 8 */
+	volatile char arr[1] = {};
+
+	return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
-- 
cgit 


From 12a29115be72dfc72372af9ded4bc4ae7113a729 Mon Sep 17 00:00:00 2001
From: Yu Liao <liaoyu15@huawei.com>
Date: Tue, 14 Jun 2022 20:02:35 +0800
Subject: selftests dma: fix compile error for dma_map_benchmark

When building selftests/dma:
$ make -C tools/testing/selftests TARGETS=dma
I hit the following compilation error:

dma_map_benchmark.c:13:10: fatal error: linux/map_benchmark.h: No such file or directory
 #include <linux/map_benchmark.h>
          ^~~~~~~~~~~~~~~~~~~~~~~

dma/Makefile does not include the map_benchmark.h path, so add
more including path, and fix include order in dma_map_benchmark.c

Fixes: 8ddde07a3d28 ("dma-mapping: benchmark: extract a common header file for map_benchmark definition")
Signed-off-by: Yu Liao <liaoyu15@huawei.com>
Tested-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/dma/Makefile            | 1 +
 tools/testing/selftests/dma/dma_map_benchmark.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile
index aa8e8b5b3864..cd8c5ece1cba 100644
--- a/tools/testing/selftests/dma/Makefile
+++ b/tools/testing/selftests/dma/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -I../../../../usr/include/
+CFLAGS += -I../../../../include/
 
 TEST_GEN_PROGS := dma_map_benchmark
 
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
index c3b3c09e995e..5c997f17fcbd 100644
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ b/tools/testing/selftests/dma/dma_map_benchmark.c
@@ -10,8 +10,8 @@
 #include <unistd.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
-#include <linux/map_benchmark.h>
 #include <linux/types.h>
+#include <linux/map_benchmark.h>
 
 #define NSEC_PER_MSEC	1000000L
 
-- 
cgit 


From 3084a4ec7f9bb1ec90036cfd01b1abadc5dd4fb2 Mon Sep 17 00:00:00 2001
From: Ding Xiang <dingxiang@cmss.chinamobile.com>
Date: Wed, 15 Jun 2022 17:36:29 +0800
Subject: selftests: vm: Fix resource leak when return error

When return on an error path, file handle need to be closed
to prevent resource leak

Signed-off-by: Ding Xiang <dingxiang@cmss.chinamobile.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vm/ksm_tests.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
index 2fcf24312da8..f5e4e0bbd081 100644
--- a/tools/testing/selftests/vm/ksm_tests.c
+++ b/tools/testing/selftests/vm/ksm_tests.c
@@ -54,6 +54,7 @@ static int ksm_write_sysfs(const char *file_path, unsigned long val)
 	}
 	if (fprintf(f, "%lu", val) < 0) {
 		perror("fprintf");
+		fclose(f);
 		return 1;
 	}
 	fclose(f);
@@ -72,6 +73,7 @@ static int ksm_read_sysfs(const char *file_path, unsigned long *val)
 	}
 	if (fscanf(f, "%lu", val) != 1) {
 		perror("fscanf");
+		fclose(f);
 		return 1;
 	}
 	fclose(f);
-- 
cgit 


From 9b4d5c01eb234f66a15a746b1c73e10209edb199 Mon Sep 17 00:00:00 2001
From: Joel Savitz <jsavitz@redhat.com>
Date: Thu, 9 Jun 2022 16:32:17 -0400
Subject: selftests: make use of GUP_TEST_FILE macro

Commit 17de1e559cf1 ("selftests: clarify common error when running
gup_test") had most of its hunks dropped due to a conflict with another
patch accepted into Linux around the same time that implemented the same
behavior as a subset of other changes.

However, the remaining hunk defines the GUP_TEST_FILE macro without
making use of it. This patch makes use of the macro in the two relevant
places.

Furthermore, the above mentioned commit's log message erroneously describes
the changes that were dropped from the patch.

This patch corrects the record.

Fixes: 17de1e559cf1 ("selftests: clarify common error when running gup_test")

Signed-off-by: Joel Savitz <jsavitz@redhat.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Nico Pache <npache@redhat.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vm/gup_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 6bb36ca71cb5..a309876d832f 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -209,7 +209,7 @@ int main(int argc, char **argv)
 	if (write)
 		gup.gup_flags |= FOLL_WRITE;
 
-	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+	gup_fd = open(GUP_TEST_FILE, O_RDWR);
 	if (gup_fd == -1) {
 		switch (errno) {
 		case EACCES:
@@ -224,7 +224,7 @@ int main(int argc, char **argv)
 			printf("check if CONFIG_GUP_TEST is enabled in kernel config\n");
 			break;
 		default:
-			perror("failed to open /sys/kernel/debug/gup_test");
+			perror("failed to open " GUP_TEST_FILE);
 			break;
 		}
 		exit(KSFT_SKIP);
-- 
cgit 


From ad8848535e97f4a5374fc68f7a5d16e2565940cc Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 15 Jun 2022 13:21:15 +0200
Subject: selftests/bpf: Shuffle cookies symbols in kprobe multi test

There's a kernel bug that causes cookies to be misplaced and
the reason we did not catch this with this test is that we
provide bpf_fentry_test* functions already sorted by name.

Shuffling function bpf_fentry_test2 deeper in the list and
keeping the current cookie values as before will trigger
the bug.

The kernel fix is coming in following changes.

Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20220615112118.497303-2-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/bpf_cookie.c  | 78 +++++++++++-----------
 tools/testing/selftests/bpf/progs/kprobe_multi.c   | 24 +++----
 2 files changed, 51 insertions(+), 51 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 83ef55e3caa4..2974b44f80fa 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -121,24 +121,24 @@ static void kprobe_multi_link_api_subtest(void)
 })
 
 	GET_ADDR("bpf_fentry_test1", addrs[0]);
-	GET_ADDR("bpf_fentry_test2", addrs[1]);
-	GET_ADDR("bpf_fentry_test3", addrs[2]);
-	GET_ADDR("bpf_fentry_test4", addrs[3]);
-	GET_ADDR("bpf_fentry_test5", addrs[4]);
-	GET_ADDR("bpf_fentry_test6", addrs[5]);
-	GET_ADDR("bpf_fentry_test7", addrs[6]);
+	GET_ADDR("bpf_fentry_test3", addrs[1]);
+	GET_ADDR("bpf_fentry_test4", addrs[2]);
+	GET_ADDR("bpf_fentry_test5", addrs[3]);
+	GET_ADDR("bpf_fentry_test6", addrs[4]);
+	GET_ADDR("bpf_fentry_test7", addrs[5]);
+	GET_ADDR("bpf_fentry_test2", addrs[6]);
 	GET_ADDR("bpf_fentry_test8", addrs[7]);
 
 #undef GET_ADDR
 
-	cookies[0] = 1;
-	cookies[1] = 2;
-	cookies[2] = 3;
-	cookies[3] = 4;
-	cookies[4] = 5;
-	cookies[5] = 6;
-	cookies[6] = 7;
-	cookies[7] = 8;
+	cookies[0] = 1; /* bpf_fentry_test1 */
+	cookies[1] = 2; /* bpf_fentry_test3 */
+	cookies[2] = 3; /* bpf_fentry_test4 */
+	cookies[3] = 4; /* bpf_fentry_test5 */
+	cookies[4] = 5; /* bpf_fentry_test6 */
+	cookies[5] = 6; /* bpf_fentry_test7 */
+	cookies[6] = 7; /* bpf_fentry_test2 */
+	cookies[7] = 8; /* bpf_fentry_test8 */
 
 	opts.kprobe_multi.addrs = (const unsigned long *) &addrs;
 	opts.kprobe_multi.cnt = ARRAY_SIZE(addrs);
@@ -149,14 +149,14 @@ static void kprobe_multi_link_api_subtest(void)
 	if (!ASSERT_GE(link1_fd, 0, "link1_fd"))
 		goto cleanup;
 
-	cookies[0] = 8;
-	cookies[1] = 7;
-	cookies[2] = 6;
-	cookies[3] = 5;
-	cookies[4] = 4;
-	cookies[5] = 3;
-	cookies[6] = 2;
-	cookies[7] = 1;
+	cookies[0] = 8; /* bpf_fentry_test1 */
+	cookies[1] = 7; /* bpf_fentry_test3 */
+	cookies[2] = 6; /* bpf_fentry_test4 */
+	cookies[3] = 5; /* bpf_fentry_test5 */
+	cookies[4] = 4; /* bpf_fentry_test6 */
+	cookies[5] = 3; /* bpf_fentry_test7 */
+	cookies[6] = 2; /* bpf_fentry_test2 */
+	cookies[7] = 1; /* bpf_fentry_test8 */
 
 	opts.kprobe_multi.flags = BPF_F_KPROBE_MULTI_RETURN;
 	prog_fd = bpf_program__fd(skel->progs.test_kretprobe);
@@ -181,12 +181,12 @@ static void kprobe_multi_attach_api_subtest(void)
 	struct kprobe_multi *skel = NULL;
 	const char *syms[8] = {
 		"bpf_fentry_test1",
-		"bpf_fentry_test2",
 		"bpf_fentry_test3",
 		"bpf_fentry_test4",
 		"bpf_fentry_test5",
 		"bpf_fentry_test6",
 		"bpf_fentry_test7",
+		"bpf_fentry_test2",
 		"bpf_fentry_test8",
 	};
 	__u64 cookies[8];
@@ -198,14 +198,14 @@ static void kprobe_multi_attach_api_subtest(void)
 	skel->bss->pid = getpid();
 	skel->bss->test_cookie = true;
 
-	cookies[0] = 1;
-	cookies[1] = 2;
-	cookies[2] = 3;
-	cookies[3] = 4;
-	cookies[4] = 5;
-	cookies[5] = 6;
-	cookies[6] = 7;
-	cookies[7] = 8;
+	cookies[0] = 1; /* bpf_fentry_test1 */
+	cookies[1] = 2; /* bpf_fentry_test3 */
+	cookies[2] = 3; /* bpf_fentry_test4 */
+	cookies[3] = 4; /* bpf_fentry_test5 */
+	cookies[4] = 5; /* bpf_fentry_test6 */
+	cookies[5] = 6; /* bpf_fentry_test7 */
+	cookies[6] = 7; /* bpf_fentry_test2 */
+	cookies[7] = 8; /* bpf_fentry_test8 */
 
 	opts.syms = syms;
 	opts.cnt = ARRAY_SIZE(syms);
@@ -216,14 +216,14 @@ static void kprobe_multi_attach_api_subtest(void)
 	if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts"))
 		goto cleanup;
 
-	cookies[0] = 8;
-	cookies[1] = 7;
-	cookies[2] = 6;
-	cookies[3] = 5;
-	cookies[4] = 4;
-	cookies[5] = 3;
-	cookies[6] = 2;
-	cookies[7] = 1;
+	cookies[0] = 8; /* bpf_fentry_test1 */
+	cookies[1] = 7; /* bpf_fentry_test3 */
+	cookies[2] = 6; /* bpf_fentry_test4 */
+	cookies[3] = 5; /* bpf_fentry_test5 */
+	cookies[4] = 4; /* bpf_fentry_test6 */
+	cookies[5] = 3; /* bpf_fentry_test7 */
+	cookies[6] = 2; /* bpf_fentry_test2 */
+	cookies[7] = 1; /* bpf_fentry_test8 */
 
 	opts.retprobe = true;
 
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c
index 93510f4f0f3a..08f95a8155d1 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c
@@ -54,21 +54,21 @@ static void kprobe_multi_check(void *ctx, bool is_return)
 
 	if (is_return) {
 		SET(kretprobe_test1_result, &bpf_fentry_test1, 8);
-		SET(kretprobe_test2_result, &bpf_fentry_test2, 7);
-		SET(kretprobe_test3_result, &bpf_fentry_test3, 6);
-		SET(kretprobe_test4_result, &bpf_fentry_test4, 5);
-		SET(kretprobe_test5_result, &bpf_fentry_test5, 4);
-		SET(kretprobe_test6_result, &bpf_fentry_test6, 3);
-		SET(kretprobe_test7_result, &bpf_fentry_test7, 2);
+		SET(kretprobe_test2_result, &bpf_fentry_test2, 2);
+		SET(kretprobe_test3_result, &bpf_fentry_test3, 7);
+		SET(kretprobe_test4_result, &bpf_fentry_test4, 6);
+		SET(kretprobe_test5_result, &bpf_fentry_test5, 5);
+		SET(kretprobe_test6_result, &bpf_fentry_test6, 4);
+		SET(kretprobe_test7_result, &bpf_fentry_test7, 3);
 		SET(kretprobe_test8_result, &bpf_fentry_test8, 1);
 	} else {
 		SET(kprobe_test1_result, &bpf_fentry_test1, 1);
-		SET(kprobe_test2_result, &bpf_fentry_test2, 2);
-		SET(kprobe_test3_result, &bpf_fentry_test3, 3);
-		SET(kprobe_test4_result, &bpf_fentry_test4, 4);
-		SET(kprobe_test5_result, &bpf_fentry_test5, 5);
-		SET(kprobe_test6_result, &bpf_fentry_test6, 6);
-		SET(kprobe_test7_result, &bpf_fentry_test7, 7);
+		SET(kprobe_test2_result, &bpf_fentry_test2, 7);
+		SET(kprobe_test3_result, &bpf_fentry_test3, 2);
+		SET(kprobe_test4_result, &bpf_fentry_test4, 3);
+		SET(kprobe_test5_result, &bpf_fentry_test5, 4);
+		SET(kprobe_test6_result, &bpf_fentry_test6, 5);
+		SET(kprobe_test7_result, &bpf_fentry_test7, 6);
 		SET(kprobe_test8_result, &bpf_fentry_test8, 8);
 	}
 
-- 
cgit 


From 730067022c0137691b27726377c2d088f7f8e33c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 15 Jun 2022 13:21:18 +0200
Subject: selftest/bpf: Fix kprobe_multi bench test

With [1] the available_filter_functions file contains records
starting with __ftrace_invalid_address___ and marking disabled
entries.

We need to filter them out for the bench test to pass only
resolvable symbols to kernel.

[1] commit b39181f7c690 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid adding weak function")

Fixes: b39181f7c690 ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to avoid adding weak function")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20220615112118.497303-5-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 586dc52d6fb9..5b93d5d0bd93 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -364,6 +364,9 @@ static int get_syms(char ***symsp, size_t *cntp)
 			continue;
 		if (!strncmp(name, "rcu_", 4))
 			continue;
+		if (!strncmp(name, "__ftrace_invalid_address__",
+			     sizeof("__ftrace_invalid_address__") - 1))
+			continue;
 		err = hashmap__add(map, name, NULL);
 		if (err) {
 			free(name);
-- 
cgit 


From b4a028c4d031c27704ad73b1195ca69a1206941e Mon Sep 17 00:00:00 2001
From: Riccardo Paolo Bestetti <pbl@bestov.io>
Date: Fri, 17 Jun 2022 10:54:35 +0200
Subject: ipv4: ping: fix bind address validity check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 8ff978b8b222 ("ipv4/raw: support binding to nonlocal addresses")
introduced a helper function to fold duplicated validity checks of bind
addresses into inet_addr_valid_or_nonlocal(). However, this caused an
unintended regression in ping_check_bind_addr(), which previously would
reject binding to multicast and broadcast addresses, but now these are
both incorrectly allowed as reported in [1].

This patch restores the original check. A simple reordering is done to
improve readability and make it evident that multicast and broadcast
addresses should not be allowed. Also, add an early exit for INADDR_ANY
which replaces lost behavior added by commit 0ce779a9f501 ("net: Avoid
unnecessary inet_addr_type() call when addr is INADDR_ANY").

Furthermore, this patch introduces regression selftests to catch these
specific cases.

[1] https://lore.kernel.org/netdev/CANP3RGdkAcDyAZoT1h8Gtuu0saq+eOrrTiWbxnOs+5zn+cpyKg@mail.gmail.com/

Fixes: 8ff978b8b222 ("ipv4/raw: support binding to nonlocal addresses")
Cc: Miaohe Lin <linmiaohe@huawei.com>
Reported-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: Carlos Llamas <cmllamas@google.com>
Signed-off-by: Riccardo Paolo Bestetti <pbl@bestov.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ping.c                           | 10 +++++++---
 tools/testing/selftests/net/fcnal-test.sh | 33 +++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 1a43ca73f94d..3c6101def7d6 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -319,12 +319,16 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
 		pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
 			 sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
 
+		if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
+			return 0;
+
 		tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
 		chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
 
-		if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk),
-					         addr->sin_addr.s_addr,
-	                                         chk_addr_ret))
+		if (chk_addr_ret == RTN_MULTICAST ||
+		    chk_addr_ret == RTN_BROADCAST ||
+		    (chk_addr_ret != RTN_LOCAL &&
+		     !inet_can_nonlocal_bind(net, isk)))
 			return -EADDRNOTAVAIL;
 
 #if IS_ENABLED(CONFIG_IPV6)
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 54701c8b0cd7..75223b63e3c8 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -70,6 +70,10 @@ NSB_LO_IP6=2001:db8:2::2
 NL_IP=172.17.1.1
 NL_IP6=2001:db8:4::1
 
+# multicast and broadcast addresses
+MCAST_IP=224.0.0.1
+BCAST_IP=255.255.255.255
+
 MD5_PW=abc123
 MD5_WRONG_PW=abc1234
 
@@ -308,6 +312,9 @@ addr2str()
 	127.0.0.1) echo "loopback";;
 	::1) echo "IPv6 loopback";;
 
+	${BCAST_IP}) echo "broadcast";;
+	${MCAST_IP}) echo "multicast";;
+
 	${NSA_IP})	echo "ns-A IP";;
 	${NSA_IP6})	echo "ns-A IPv6";;
 	${NSA_LO_IP})	echo "ns-A loopback IP";;
@@ -1800,6 +1807,19 @@ ipv4_addr_bind_novrf()
 	run_cmd nettest -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b
 	log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after device bind"
 
+	#
+	# check that ICMP sockets cannot bind to broadcast and multicast addresses
+	#
+	a=${BCAST_IP}
+	log_start
+	run_cmd nettest -s -R -P icmp -l ${a} -b
+	log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address"
+
+	a=${MCAST_IP}
+	log_start
+	run_cmd nettest -s -R -P icmp -f -l ${a} -b
+	log_test_addr ${a} $? 1 "ICMP socket bind to multicast address"
+
 	#
 	# tcp sockets
 	#
@@ -1857,6 +1877,19 @@ ipv4_addr_bind_vrf()
 	run_cmd nettest -s -R -P icmp -f -l ${a} -I ${VRF} -b
 	log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind"
 
+	#
+	# check that ICMP sockets cannot bind to broadcast and multicast addresses
+	#
+	a=${BCAST_IP}
+	log_start
+	run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b
+	log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address after VRF bind"
+
+	a=${MCAST_IP}
+	log_start
+	run_cmd nettest -s -R -P icmp -f -l ${a} -I ${VRF} -b
+	log_test_addr ${a} $? 1 "ICMP socket bind to multicast address after VRF bind"
+
 	#
 	# tcp sockets
 	#
-- 
cgit 


From 313c502fa3b3494159cb8f18d4a6444d06c5c9a5 Mon Sep 17 00:00:00 2001
From: Riccardo Paolo Bestetti <pbl@bestov.io>
Date: Sun, 19 Jun 2022 18:27:35 +0200
Subject: ipv4: fix bind address validity regression tests

Commit 8ff978b8b222 ("ipv4/raw: support binding to nonlocal addresses")
introduces support for binding to nonlocal addresses, as well as some
basic test coverage for some of the related cases.

Commit b4a028c4d031 ("ipv4: ping: fix bind address validity check")
fixes a regression which incorrectly removed some checks for bind
address validation. In addition, it introduces regression tests for
those specific checks. However, those regression tests are defective, in
that they perform the tests using an incorrect combination of bind
flags. As a result, those tests fail when they should succeed.

This commit introduces additional regression tests for nonlocal binding
and fixes the defective regression tests. It also introduces new
set_sysctl calls for the ipv4_bind test group, as to perform the ICMP
binding tests it is necessary to allow ICMP socket creation by setting
the net.ipv4.ping_group_range knob.

Fixes: b4a028c4d031 ("ipv4: ping: fix bind address validity check")
Reported-by: Riccardo Paolo Bestetti <pbl@bestov.io>
Signed-off-by: Riccardo Paolo Bestetti <pbl@bestov.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fcnal-test.sh | 36 +++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 75223b63e3c8..03b586760164 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -1800,24 +1800,32 @@ ipv4_addr_bind_novrf()
 	done
 
 	#
-	# raw socket with nonlocal bind
+	# tests for nonlocal bind
 	#
 	a=${NL_IP}
 	log_start
-	run_cmd nettest -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b
-	log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after device bind"
+	run_cmd nettest -s -R -f -l ${a} -b
+	log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address"
+
+	log_start
+	run_cmd nettest -s -f -l ${a} -b
+	log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address"
+
+	log_start
+	run_cmd nettest -s -D -P icmp -f -l ${a} -b
+	log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address"
 
 	#
 	# check that ICMP sockets cannot bind to broadcast and multicast addresses
 	#
 	a=${BCAST_IP}
 	log_start
-	run_cmd nettest -s -R -P icmp -l ${a} -b
+	run_cmd nettest -s -D -P icmp -l ${a} -b
 	log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address"
 
 	a=${MCAST_IP}
 	log_start
-	run_cmd nettest -s -R -P icmp -f -l ${a} -b
+	run_cmd nettest -s -D -P icmp -l ${a} -b
 	log_test_addr ${a} $? 1 "ICMP socket bind to multicast address"
 
 	#
@@ -1870,24 +1878,32 @@ ipv4_addr_bind_vrf()
 	log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind"
 
 	#
-	# raw socket with nonlocal bind
+	# tests for nonlocal bind
 	#
 	a=${NL_IP}
 	log_start
-	run_cmd nettest -s -R -P icmp -f -l ${a} -I ${VRF} -b
+	run_cmd nettest -s -R -f -l ${a} -I ${VRF} -b
 	log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind"
 
+	log_start
+	run_cmd nettest -s -f -l ${a} -I ${VRF} -b
+	log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address after VRF bind"
+
+	log_start
+	run_cmd nettest -s -D -P icmp -f -l ${a} -I ${VRF} -b
+	log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address after VRF bind"
+
 	#
 	# check that ICMP sockets cannot bind to broadcast and multicast addresses
 	#
 	a=${BCAST_IP}
 	log_start
-	run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b
+	run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b
 	log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address after VRF bind"
 
 	a=${MCAST_IP}
 	log_start
-	run_cmd nettest -s -R -P icmp -f -l ${a} -I ${VRF} -b
+	run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b
 	log_test_addr ${a} $? 1 "ICMP socket bind to multicast address after VRF bind"
 
 	#
@@ -1922,10 +1938,12 @@ ipv4_addr_bind()
 
 	log_subsection "No VRF"
 	setup
+	set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
 	ipv4_addr_bind_novrf
 
 	log_subsection "With VRF"
 	setup "yes"
+	set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
 	ipv4_addr_bind_vrf
 }
 
-- 
cgit 


From 5d79d8af8dec58bf709b3124d09d9572edd9c617 Mon Sep 17 00:00:00 2001
From: Jie2x Zhou <jie2x.zhou@intel.com>
Date: Thu, 16 Jun 2022 15:40:46 +0800
Subject: selftests: netfilter: correct PKTGEN_SCRIPT_PATHS in
 nft_concat_range.sh

Before change:
make -C netfilter
 TEST: performance
   net,port                                                      [SKIP]
   perf not supported
   port,net                                                      [SKIP]
   perf not supported
   net6,port                                                     [SKIP]
   perf not supported
   port,proto                                                    [SKIP]
   perf not supported
   net6,port,mac                                                 [SKIP]
   perf not supported
   net6,port,mac,proto                                           [SKIP]
   perf not supported
   net,mac                                                       [SKIP]
   perf not supported

After change:
   net,mac                                                       [ OK ]
     baseline (drop from netdev hook):               2061098pps
     baseline hash (non-ranged entries):             1606741pps
     baseline rbtree (match on first field only):    1191607pps
     set with  1000 full, ranged entries:            1639119pps
ok 8 selftests: netfilter: nft_concat_range.sh

Fixes: 611973c1e06f ("selftests: netfilter: Introduce tests for sets with range concatenation")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Jie2x Zhou <jie2x.zhou@intel.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/netfilter/nft_concat_range.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh
index b35010cc7f6a..a6991877e50c 100755
--- a/tools/testing/selftests/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/netfilter/nft_concat_range.sh
@@ -31,7 +31,7 @@ BUGS="flush_remove_add reload"
 
 # List of possible paths to pktgen script from kernel tree for performance tests
 PKTGEN_SCRIPT_PATHS="
-	../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+	../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
 	pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
 
 # Definition of set types:
-- 
cgit 


From 9e2f6498efbbc880d7caa7935839e682b64fe5a6 Mon Sep 17 00:00:00 2001
From: Raghavendra Rao Ananta <rananta@google.com>
Date: Wed, 15 Jun 2022 18:57:06 +0000
Subject: selftests: KVM: Handle compiler optimizations in ucall

The selftests, when built with newer versions of clang, is found
to have over optimized guests' ucall() function, and eliminating
the stores for uc.cmd (perhaps due to no immediate readers). This
resulted in the userspace side always reading a value of '0', and
causing multiple test failures.

As a result, prevent the compiler from optimizing the stores in
ucall() with WRITE_ONCE().

Suggested-by: Ricardo Koller <ricarkol@google.com>
Suggested-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
Message-Id: <20220615185706.1099208-1-rananta@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/aarch64/ucall.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index e0b0164e9af8..be1d9728c4ce 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm)
 
 void ucall(uint64_t cmd, int nargs, ...)
 {
-	struct ucall uc = {
-		.cmd = cmd,
-	};
+	struct ucall uc = {};
 	va_list va;
 	int i;
 
+	WRITE_ONCE(uc.cmd, cmd);
 	nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
 
 	va_start(va, nargs);
 	for (i = 0; i < nargs; ++i)
-		uc.args[i] = va_arg(va, uint64_t);
+		WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
 	va_end(va);
 
-	*ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
+	WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);
 }
 
 uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
-- 
cgit 


From b968080808f7f28b89aa495b7402ba48eb17ee93 Mon Sep 17 00:00:00 2001
From: Dimitris Michailidis <d.michailidis@fungible.com>
Date: Wed, 22 Jun 2022 17:02:34 -0700
Subject: selftests/net: pass ipv6_args to udpgso_bench's IPv6 TCP test

udpgso_bench.sh has been running its IPv6 TCP test with IPv4 arguments
since its initial conmit. Looks like a typo.

Fixes: 3a687bef148d ("selftests: udp gso benchmark")
Cc: willemb@google.com
Signed-off-by: Dimitris Michailidis <dmichail@fungible.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20220623000234.61774-1-dmichail@fungible.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/udpgso_bench.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index 80b5d352702e..dc932fd65363 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -120,7 +120,7 @@ run_all() {
 	run_udp "${ipv4_args}"
 
 	echo "ipv6"
-	run_tcp "${ipv4_args}"
+	run_tcp "${ipv6_args}"
 	run_udp "${ipv6_args}"
 }
 
-- 
cgit 


From 935336c191040809bf5739654ea337c13fe8f9af Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 23 Jun 2022 11:12:31 +0200
Subject: selftests/bpf: Test sockmap update when socket has ULP

Cover the scenario when we cannot insert a socket into the sockmap, because
it has it is using ULP. Failed insert should not have any effect on the ULP
state. This is a regression test.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/r/20220623091231.417138-1-jakub@cloudflare.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/bpf/prog_tests/sockmap_ktls.c        | 84 +++++++++++++++++++---
 1 file changed, 75 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index af293ea1542c..e172d89e92e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -4,6 +4,7 @@
  * Tests for sockmap/sockhash holding kTLS sockets.
  */
 
+#include <netinet/tcp.h>
 #include "test_progs.h"
 
 #define MAX_TEST_NAME 80
@@ -92,9 +93,78 @@ close_srv:
 	close(srv);
 }
 
+static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map)
+{
+	struct sockaddr_storage addr = {};
+	socklen_t len = sizeof(addr);
+	struct sockaddr_in6 *v6;
+	struct sockaddr_in *v4;
+	int err, s, zero = 0;
+
+	switch (family) {
+	case AF_INET:
+		v4 = (struct sockaddr_in *)&addr;
+		v4->sin_family = AF_INET;
+		break;
+	case AF_INET6:
+		v6 = (struct sockaddr_in6 *)&addr;
+		v6->sin6_family = AF_INET6;
+		break;
+	default:
+		PRINT_FAIL("unsupported socket family %d", family);
+		return;
+	}
+
+	s = socket(family, SOCK_STREAM, 0);
+	if (!ASSERT_GE(s, 0, "socket"))
+		return;
+
+	err = bind(s, (struct sockaddr *)&addr, len);
+	if (!ASSERT_OK(err, "bind"))
+		goto close;
+
+	err = getsockname(s, (struct sockaddr *)&addr, &len);
+	if (!ASSERT_OK(err, "getsockname"))
+		goto close;
+
+	err = connect(s, (struct sockaddr *)&addr, len);
+	if (!ASSERT_OK(err, "connect"))
+		goto close;
+
+	/* save sk->sk_prot and set it to tls_prots */
+	err = setsockopt(s, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+	if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+		goto close;
+
+	/* sockmap update should not affect saved sk_prot */
+	err = bpf_map_update_elem(map, &zero, &s, BPF_ANY);
+	if (!ASSERT_ERR(err, "sockmap update elem"))
+		goto close;
+
+	/* call sk->sk_prot->setsockopt to dispatch to saved sk_prot */
+	err = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero));
+	ASSERT_OK(err, "setsockopt(TCP_NODELAY)");
+
+close:
+	close(s);
+}
+
+static const char *fmt_test_name(const char *subtest_name, int family,
+				 enum bpf_map_type map_type)
+{
+	const char *map_type_str = BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH";
+	const char *family_str = AF_INET ? "IPv4" : "IPv6";
+	static char test_name[MAX_TEST_NAME];
+
+	snprintf(test_name, MAX_TEST_NAME,
+		 "sockmap_ktls %s %s %s",
+		 subtest_name, family_str, map_type_str);
+
+	return test_name;
+}
+
 static void run_tests(int family, enum bpf_map_type map_type)
 {
-	char test_name[MAX_TEST_NAME];
 	int map;
 
 	map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL);
@@ -103,14 +173,10 @@ static void run_tests(int family, enum bpf_map_type map_type)
 		return;
 	}
 
-	snprintf(test_name, MAX_TEST_NAME,
-		 "sockmap_ktls disconnect_after_delete %s %s",
-		 family == AF_INET ? "IPv4" : "IPv6",
-		 map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH");
-	if (!test__start_subtest(test_name))
-		return;
-
-	test_sockmap_ktls_disconnect_after_delete(family, map);
+	if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type)))
+		test_sockmap_ktls_disconnect_after_delete(family, map);
+	if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type)))
+		test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map);
 
 	close(map);
 }
-- 
cgit 


From 88153e29c1e0f3ace8c831b06f6cea9503f16cec Mon Sep 17 00:00:00 2001
From: Victor Nogueira <victor@mojatatu.com>
Date: Thu, 23 Jun 2022 11:07:42 -0300
Subject: selftests: tc-testing: Add testcases to test new flush behaviour

Add tdc test cases to verify new flush behaviour is correct, which do
the following:

- Try to flush only one action which is being referenced by a filter
- Try to flush three actions where the last one (index 3) is being
  referenced by a filter

Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/actions/gact.json          | 77 ++++++++++++++++++++++
 1 file changed, 77 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
index b24494c6f546..c652e8c1157d 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -609,5 +609,82 @@
         "teardown": [
             "$TC actions flush action gact"
         ]
+    },
+    {
+        "id": "7f52",
+        "name": "Try to flush action which is referenced by filter",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC actions add action pass index 1",
+            "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 1"
+        ],
+        "cmdUnderTest": "$TC actions flush action gact",
+        "expExitCode": "1",
+        "verifyCmd": "$TC actions ls action gact",
+        "matchPattern": "total acts 1.*action order [0-9]*: gact action pass.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            [
+                "sleep 1; $TC actions flush action gact",
+                0,
+                1
+            ]
+        ]
+    },
+    {
+        "id": "ae1e",
+        "name": "Try to flush actions when last one is referenced by filter",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC qdisc add dev $DEV1 ingress",
+	    [
+                "$TC actions add action pass index 1",
+		0,
+		1,
+		255
+	    ],
+            "$TC actions add action reclassify index 2",
+            "$TC actions add action drop index 3",
+            "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 3"
+        ],
+        "cmdUnderTest": "$TC actions flush action gact",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action gact",
+        "matchPattern": "total acts 1.*action order [0-9]*: gact action drop.*index 3 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            [
+                "sleep 1; $TC actions flush action gact",
+                0,
+                1
+            ]
+        ]
     }
 ]
-- 
cgit 


From 42fb6cddec3b306c9f6ef136b6438e0de1836431 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 27 Jun 2022 18:02:41 -0700
Subject: selftests: mptcp: more stable diag tests

The mentioned test-case still use an hard-coded-len sleep to
wait for a relative large number of connection to be established.

On very slow VM and with debug build such timeout could be exceeded,
causing failures in our CI.

Address the issue polling for the expected condition several times,
up to an unreasonable high amount of time. On reasonably fast system
the self-tests will be faster then before, on very slow one we will
still catch the correct condition.

Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/diag.sh | 48 +++++++++++++++++++++++++------
 1 file changed, 40 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 9dd43d7d957b..515859a5168b 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -61,6 +61,39 @@ chk_msk_nr()
 	__chk_nr "grep -c token:" $*
 }
 
+wait_msk_nr()
+{
+	local condition="grep -c token:"
+	local expected=$1
+	local timeout=20
+	local msg nr
+	local max=0
+	local i=0
+
+	shift 1
+	msg=$*
+
+	while [ $i -lt $timeout ]; do
+		nr=$(ss -inmHMN $ns | $condition)
+		[ $nr == $expected ] && break;
+		[ $nr -gt $max ] && max=$nr
+		i=$((i + 1))
+		sleep 1
+	done
+
+	printf "%-50s" "$msg"
+	if [ $i -ge $timeout ]; then
+		echo "[ fail ] timeout while expecting $expected max $max last $nr"
+		ret=$test_cnt
+	elif [ $nr != $expected ]; then
+		echo "[ fail ] expected $expected found $nr"
+		ret=$test_cnt
+	else
+		echo "[  ok  ]"
+	fi
+	test_cnt=$((test_cnt+1))
+}
+
 chk_msk_fallback_nr()
 {
 		__chk_nr "grep -c fallback" $*
@@ -146,7 +179,7 @@ ip -n $ns link set dev lo up
 echo "a" | \
 	timeout ${timeout_test} \
 		ip netns exec $ns \
-			./mptcp_connect -p 10000 -l -t ${timeout_poll} \
+			./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \
 				0.0.0.0 >/dev/null &
 wait_local_port_listen $ns 10000
 chk_msk_nr 0 "no msk on netns creation"
@@ -155,7 +188,7 @@ chk_msk_listen 10000
 echo "b" | \
 	timeout ${timeout_test} \
 		ip netns exec $ns \
-			./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} \
+			./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \
 				127.0.0.1 >/dev/null &
 wait_connected $ns 10000
 chk_msk_nr 2 "after MPC handshake "
@@ -167,13 +200,13 @@ flush_pids
 echo "a" | \
 	timeout ${timeout_test} \
 		ip netns exec $ns \
-			./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \
+			./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \
 				0.0.0.0 >/dev/null &
 wait_local_port_listen $ns 10001
 echo "b" | \
 	timeout ${timeout_test} \
 		ip netns exec $ns \
-			./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} \
+			./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} -w 20 \
 				127.0.0.1 >/dev/null &
 wait_connected $ns 10001
 chk_msk_fallback_nr 1 "check fallback"
@@ -184,7 +217,7 @@ for I in `seq 1 $NR_CLIENTS`; do
 	echo "a" | \
 		timeout ${timeout_test} \
 			ip netns exec $ns \
-				./mptcp_connect -p $((I+10001)) -l -w 10 \
+				./mptcp_connect -p $((I+10001)) -l -w 20 \
 					-t ${timeout_poll} 0.0.0.0 >/dev/null &
 done
 wait_local_port_listen $ns $((NR_CLIENTS + 10001))
@@ -193,12 +226,11 @@ for I in `seq 1 $NR_CLIENTS`; do
 	echo "b" | \
 		timeout ${timeout_test} \
 			ip netns exec $ns \
-				./mptcp_connect -p $((I+10001)) -w 10 \
+				./mptcp_connect -p $((I+10001)) -w 20 \
 					-t ${timeout_poll} 127.0.0.1 >/dev/null &
 done
-sleep 1.5
 
-chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
+wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
 flush_pids
 
 exit $ret
-- 
cgit 


From fd37c2ecb21f7aee04ccca5f561469f07d00063c Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Mon, 27 Jun 2022 18:02:43 -0700
Subject: selftests: mptcp: Initialize variables to quiet gcc 12 warnings

In a few MPTCP selftest tools, gcc 12 complains that the 'sock' variable
might be used uninitialized. This is a false positive because the only
code path that could lead to uninitialized access is where getaddrinfo()
fails, but the local xgetaddrinfo() wrapper exits if such a failure
occurs.

Initialize the 'sock' variable anyway to allow the tools to build with
gcc 12.

Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp")
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.c | 2 +-
 tools/testing/selftests/net/mptcp/mptcp_inq.c     | 2 +-
 tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 8628aa61b763..e2ea6c126c99 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -265,7 +265,7 @@ static void sock_test_tcpulp(int sock, int proto, unsigned int line)
 static int sock_listen_mptcp(const char * const listenaddr,
 			     const char * const port)
 {
-	int sock;
+	int sock = -1;
 	struct addrinfo hints = {
 		.ai_protocol = IPPROTO_TCP,
 		.ai_socktype = SOCK_STREAM,
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
index 29f75e2a1116..8672d898f8cd 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -88,7 +88,7 @@ static void xgetaddrinfo(const char *node, const char *service,
 static int sock_listen_mptcp(const char * const listenaddr,
 			     const char * const port)
 {
-	int sock;
+	int sock = -1;
 	struct addrinfo hints = {
 		.ai_protocol = IPPROTO_TCP,
 		.ai_socktype = SOCK_STREAM,
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index ac9a4d9c1764..ae61f39556ca 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -136,7 +136,7 @@ static void xgetaddrinfo(const char *node, const char *service,
 static int sock_listen_mptcp(const char * const listenaddr,
 			     const char * const port)
 {
-	int sock;
+	int sock = -1;
 	struct addrinfo hints = {
 		.ai_protocol = IPPROTO_TCP,
 		.ai_socktype = SOCK_STREAM,
-- 
cgit 


From 7b92aa9e613508cbaa29dd35bf27db4c35628b10 Mon Sep 17 00:00:00 2001
From: Coleman Dietsch <dietschc@csp.edu>
Date: Tue, 28 Jun 2022 12:47:44 -0500
Subject: selftests net: fix kselftest net fatal error

The incorrect path is causing the following error when trying to run net
kselftests:

In file included from bpf/nat6to4.c:43:
../../../lib/bpf/bpf_helpers.h:11:10: fatal error: 'bpf_helper_defs.h' file not found
         ^~~~~~~~~~~~~~~~~~~
1 error generated.

Fixes: cf67838c4422 ("selftests net: fix bpf build error")
Signed-off-by: Coleman Dietsch <dietschc@csp.edu>
Link: https://lore.kernel.org/r/20220628174744.7908-1-dietschc@csp.edu
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile
index 8a69c91fcca0..8ccaf8732eb2 100644
--- a/tools/testing/selftests/net/bpf/Makefile
+++ b/tools/testing/selftests/net/bpf/Makefile
@@ -2,7 +2,7 @@
 
 CLANG ?= clang
 CCINCLUDE += -I../../bpf
-CCINCLUDE += -I../../../lib
+CCINCLUDE += -I../../../../lib
 CCINCLUDE += -I../../../../../usr/include/
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o
-- 
cgit 


From 839b92fede7ba308f1a475aa00fea55f63b7fccf Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 29 Jun 2022 11:19:11 -0700
Subject: selftest: tun: add test for NAPI dismantle

Being lazy does not pay, add the test for various
ordering of tun queue close / detach / destroy.

Link: https://lore.kernel.org/r/20220629181911.372047-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/Makefile |   2 +-
 tools/testing/selftests/net/tun.c    | 162 +++++++++++++++++++++++++++++++++++
 2 files changed, 163 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/net/tun.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 7ea54af55490..ddad703ace34 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -54,7 +54,7 @@ TEST_GEN_FILES += ipsec
 TEST_GEN_FILES += ioam6_parser
 TEST_GEN_FILES += gro
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun
 TEST_GEN_FILES += toeplitz
 TEST_GEN_FILES += cmsg_sender
 TEST_GEN_FILES += stress_reuseport_listen
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c
new file mode 100644
index 000000000000..fa83918b62d1
--- /dev/null
+++ b/tools/testing/selftests/net/tun.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "../kselftest_harness.h"
+
+static int tun_attach(int fd, char *dev)
+{
+	struct ifreq ifr;
+
+	memset(&ifr, 0, sizeof(ifr));
+	strcpy(ifr.ifr_name, dev);
+	ifr.ifr_flags = IFF_ATTACH_QUEUE;
+
+	return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+}
+
+static int tun_detach(int fd, char *dev)
+{
+	struct ifreq ifr;
+
+	memset(&ifr, 0, sizeof(ifr));
+	strcpy(ifr.ifr_name, dev);
+	ifr.ifr_flags = IFF_DETACH_QUEUE;
+
+	return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+}
+
+static int tun_alloc(char *dev)
+{
+	struct ifreq ifr;
+	int fd, err;
+
+	fd = open("/dev/net/tun", O_RDWR);
+	if (fd < 0) {
+		fprintf(stderr, "can't open tun: %s\n", strerror(errno));
+		return fd;
+	}
+
+	memset(&ifr, 0, sizeof(ifr));
+	strcpy(ifr.ifr_name, dev);
+	ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE;
+
+	err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+	if (err < 0) {
+		fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno));
+		close(fd);
+		return err;
+	}
+	strcpy(dev, ifr.ifr_name);
+	return fd;
+}
+
+static int tun_delete(char *dev)
+{
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifm;
+		unsigned char    data[64];
+	} req;
+	struct rtattr *rta;
+	int ret, rtnl;
+
+	rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+	if (rtnl < 0) {
+		fprintf(stderr, "can't open rtnl: %s\n", strerror(errno));
+		return 1;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm)));
+	req.nh.nlmsg_flags = NLM_F_REQUEST;
+	req.nh.nlmsg_type = RTM_DELLINK;
+
+	req.ifm.ifi_family = AF_UNSPEC;
+
+	rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
+	rta->rta_type = IFLA_IFNAME;
+	rta->rta_len = RTA_LENGTH(IFNAMSIZ);
+	req.nh.nlmsg_len += rta->rta_len;
+	memcpy(RTA_DATA(rta), dev, IFNAMSIZ);
+
+	ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
+	if (ret < 0)
+		fprintf(stderr, "can't send: %s\n", strerror(errno));
+	ret = (unsigned int)ret != req.nh.nlmsg_len;
+
+	close(rtnl);
+	return ret;
+}
+
+FIXTURE(tun)
+{
+	char ifname[IFNAMSIZ];
+	int fd, fd2;
+};
+
+FIXTURE_SETUP(tun)
+{
+	memset(self->ifname, 0, sizeof(self->ifname));
+
+	self->fd = tun_alloc(self->ifname);
+	ASSERT_GE(self->fd, 0);
+
+	self->fd2 = tun_alloc(self->ifname);
+	ASSERT_GE(self->fd2, 0);
+}
+
+FIXTURE_TEARDOWN(tun)
+{
+	if (self->fd >= 0)
+		close(self->fd);
+	if (self->fd2 >= 0)
+		close(self->fd2);
+}
+
+TEST_F(tun, delete_detach_close) {
+	EXPECT_EQ(tun_delete(self->ifname), 0);
+	EXPECT_EQ(tun_detach(self->fd, self->ifname), -1);
+	EXPECT_EQ(errno, 22);
+}
+
+TEST_F(tun, detach_delete_close) {
+	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+	EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_F(tun, detach_close_delete) {
+	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+	close(self->fd);
+	self->fd = -1;
+	EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_F(tun, reattach_delete_close) {
+	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+	EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
+	EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_F(tun, reattach_close_delete) {
+	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+	EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
+	close(self->fd);
+	self->fd = -1;
+	EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_HARNESS_MAIN
-- 
cgit