From 94133cf24bb33889aac267a7f0e3e6a08b8a8e5a Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 14 May 2024 14:12:21 +0100 Subject: bpftool: Introduce btf c dump sorting Sort bpftool c dump output; aiming to simplify vmlinux.h diffing and forcing more natural type definitions ordering. Definitions are sorted first by their BTF kind ranks, then by their base type name and by their own name. Type ranks Assign ranks to btf kinds (defined in function btf_type_rank) to set next order: 1. Anonymous enums/enums64 2. Named enums/enums64 3. Trivial types typedefs (ints, then floats) 4. Structs/Unions 5. Function prototypes 6. Forward declarations Type rank is set to maximum for unnamed reference types, structs and unions to avoid emitting those types early. They will be emitted as part of the type chain starting with named type. Lexicographical ordering Each type is assigned a sort_name and own_name. sort_name is the resolved name of the final base type for reference types (typedef, pointer, array etc). Sorting by sort_name allows to group typedefs of the same base type. sort_name for non-reference type is the same as own_name. own_name is a direct name of particular type, is used as final sorting step. Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Tested-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240514131221.20585-1-yatsenko@meta.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Documentation/bpftool-btf.rst | 6 +- tools/bpf/bpftool/bash-completion/bpftool | 3 + tools/bpf/bpftool/btf.c | 138 +++++++++++++++++++++++- 3 files changed, 140 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index eaba24320fb2..3f6bca03ad2e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -28,7 +28,7 @@ BTF COMMANDS | **bpftool** **btf help** | | *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* } -| *FORMAT* := { **raw** | **c** } +| *FORMAT* := { **raw** | **c** [**unsorted**] } | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } @@ -63,7 +63,9 @@ bpftool btf dump *BTF_SRC* pahole. **format** option can be used to override default (raw) output format. Raw - (**raw**) or C-syntax (**c**) output formats are supported. + (**raw**) or C-syntax (**c**) output formats are supported. With C-style + formatting, the output is sorted by default. Use the **unsorted** option + to avoid sorting the output. bpftool btf help Print short help message. diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 04afe2ac2228..be99d49b8714 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -930,6 +930,9 @@ _bpftool() format) COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) ) ;; + c) + COMPREPLY=( $( compgen -W "unsorted" -- "$cur" ) ) + ;; *) # emit extra options case ${words[3]} in diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 91fcb75babe3..af047dedde38 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -43,6 +43,13 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_ENUM64] = "ENUM64", }; +struct sort_datum { + int index; + int type_rank; + const char *sort_name; + const char *own_name; +}; + static const char *btf_int_enc_str(__u8 encoding) { switch (encoding) { @@ -460,9 +467,122 @@ static void __printf(2, 0) btf_dump_printf(void *ctx, vfprintf(stdout, fmt, args); } +static int btf_type_rank(const struct btf *btf, __u32 index, bool has_name) +{ + const struct btf_type *t = btf__type_by_id(btf, index); + const int kind = btf_kind(t); + const int max_rank = 10; + + if (t->name_off) + has_name = true; + + switch (kind) { + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + return has_name ? 1 : 0; + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + return 2; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return has_name ? 3 : max_rank; + case BTF_KIND_FUNC_PROTO: + return has_name ? 4 : max_rank; + case BTF_KIND_ARRAY: + if (has_name) + return btf_type_rank(btf, btf_array(t)->type, has_name); + return max_rank; + case BTF_KIND_TYPE_TAG: + case BTF_KIND_CONST: + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + case BTF_KIND_DECL_TAG: + if (has_name) + return btf_type_rank(btf, t->type, has_name); + return max_rank; + default: + return max_rank; + } +} + +static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool from_ref) +{ + const struct btf_type *t = btf__type_by_id(btf, index); + + switch (btf_kind(t)) { + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: { + int name_off = t->name_off; + + /* Use name of the first element for anonymous enums if allowed */ + if (!from_ref && !t->name_off && btf_vlen(t)) + name_off = btf_enum(t)->name_off; + + return btf__name_by_offset(btf, name_off); + } + case BTF_KIND_ARRAY: + return btf_type_sort_name(btf, btf_array(t)->type, true); + case BTF_KIND_TYPE_TAG: + case BTF_KIND_CONST: + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + case BTF_KIND_DECL_TAG: + return btf_type_sort_name(btf, t->type, true); + default: + return btf__name_by_offset(btf, t->name_off); + } + return NULL; +} + +static int btf_type_compare(const void *left, const void *right) +{ + const struct sort_datum *d1 = (const struct sort_datum *)left; + const struct sort_datum *d2 = (const struct sort_datum *)right; + int r; + + if (d1->type_rank != d2->type_rank) + return d1->type_rank < d2->type_rank ? -1 : 1; + + r = strcmp(d1->sort_name, d2->sort_name); + if (r) + return r; + + return strcmp(d1->own_name, d2->own_name); +} + +static struct sort_datum *sort_btf_c(const struct btf *btf) +{ + struct sort_datum *datums; + int n; + + n = btf__type_cnt(btf); + datums = malloc(sizeof(struct sort_datum) * n); + if (!datums) + return NULL; + + for (int i = 0; i < n; ++i) { + struct sort_datum *d = datums + i; + const struct btf_type *t = btf__type_by_id(btf, i); + + d->index = i; + d->type_rank = btf_type_rank(btf, i, false); + d->sort_name = btf_type_sort_name(btf, i, false); + d->own_name = btf__name_by_offset(btf, t->name_off); + } + + qsort(datums, n, sizeof(struct sort_datum), btf_type_compare); + + return datums; +} + static int dump_btf_c(const struct btf *btf, - __u32 *root_type_ids, int root_type_cnt) + __u32 *root_type_ids, int root_type_cnt, bool sort_dump) { + struct sort_datum *datums = NULL; struct btf_dump *d; int err = 0, i; @@ -486,8 +606,12 @@ static int dump_btf_c(const struct btf *btf, } else { int cnt = btf__type_cnt(btf); + if (sort_dump) + datums = sort_btf_c(btf); for (i = 1; i < cnt; i++) { - err = btf_dump__dump_type(d, i); + int idx = datums ? datums[i].index : i; + + err = btf_dump__dump_type(d, idx); if (err) goto done; } @@ -500,6 +624,7 @@ static int dump_btf_c(const struct btf *btf, printf("#endif /* __VMLINUX_H__ */\n"); done: + free(datums); btf_dump__free(d); return err; } @@ -549,10 +674,10 @@ static bool btf_is_kernel_module(__u32 btf_id) static int do_dump(int argc, char **argv) { + bool dump_c = false, sort_dump_c = true; struct btf *btf = NULL, *base = NULL; __u32 root_type_ids[2]; int root_type_cnt = 0; - bool dump_c = false; __u32 btf_id = -1; const char *src; int fd = -1; @@ -663,6 +788,9 @@ static int do_dump(int argc, char **argv) goto done; } NEXT_ARG(); + } else if (is_prefix(*argv, "unsorted")) { + sort_dump_c = false; + NEXT_ARG(); } else { p_err("unrecognized option: '%s'", *argv); err = -EINVAL; @@ -691,7 +819,7 @@ static int do_dump(int argc, char **argv) err = -ENOTSUP; goto done; } - err = dump_btf_c(btf, root_type_ids, root_type_cnt); + err = dump_btf_c(btf, root_type_ids, root_type_cnt, sort_dump_c); } else { err = dump_btf_raw(btf, root_type_ids, root_type_cnt); } @@ -1063,7 +1191,7 @@ static int do_help(int argc, char **argv) " %1$s %2$s help\n" "\n" " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n" - " FORMAT := { raw | c }\n" + " FORMAT := { raw | c [unsorted] }\n" " " HELP_SPEC_MAP "\n" " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS " |\n" -- cgit v1.2.3-73-gaa49b From e7b64f9d3f5b10186038201e0b91f734cbd7fc3d Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Mon, 13 May 2024 13:26:58 +0200 Subject: bpftool: Fix make dependencies for vmlinux.h With pre-generated vmlinux.h there is no dependency on neither vmlinux nor bootstrap bpftool. Define dependencies separately for both modes. This avoids needless rebuilds in some corner cases. Suggested-by: Jan Stancek Signed-off-by: Artem Savkov Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240513112658.43691-1-asavkov@redhat.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index dfa4f1bebbb3..ba927379eb20 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -204,10 +204,11 @@ ifeq ($(feature-clang-bpf-co-re),1) BUILD_BPF_SKELS := 1 -$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP) ifeq ($(VMLINUX_H),) +$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP) $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@ else +$(OUTPUT)vmlinux.h: $(VMLINUX_H) $(Q)cp "$(VMLINUX_H)" $@ endif -- cgit v1.2.3-73-gaa49b From f4aba3471cfb9ccf69b476463f19b4c50fef6b14 Mon Sep 17 00:00:00 2001 From: Ivan Babrou Date: Mon, 20 May 2024 15:51:49 -0700 Subject: bpftool: Un-const bpf_func_info to fix it for llvm 17 and newer LLVM 17 started treating const structs as constants: * https://github.com/llvm/llvm-project/commit/0b2d5b967d98 Combined with pointer laundering via ptr_to_u64, which takes a const ptr, but in reality treats the underlying memory as mutable, this makes clang always pass zero to btf__type_by_id, which breaks full name resolution. Disassembly before (LLVM 16) and after (LLVM 17): - 8b 75 cc mov -0x34(%rbp),%esi - e8 47 8d 02 00 call 3f5b0 + 31 f6 xor %esi,%esi + e8 a9 8c 02 00 call 3f510 It's a bigger project to fix this properly (and a question whether LLVM itself should detect this), but for right now let's just fix bpftool. For more information, see this thread in bpf mailing list: * https://lore.kernel.org/bpf/CABWYdi0ymezpYsQsPv7qzpx2fWuTkoD1-wG1eT-9x-TSREFrQg@mail.gmail.com/T/ Fixes: b662000aff84 ("bpftool: Adding support for BTF program names") Signed-off-by: Ivan Babrou Signed-off-by: Andrii Nakryiko Acked-by: Nick Desaulniers Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20240520225149.5517-1-ivan@cloudflare.com --- tools/bpf/bpftool/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 958e92acca8e..9b75639434b8 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -410,7 +410,7 @@ void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, { const char *prog_name = prog_info->name; const struct btf_type *func_type; - const struct bpf_func_info finfo = {}; + struct bpf_func_info finfo = {}; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); struct btf *prog_btf = NULL; -- cgit v1.2.3-73-gaa49b From 6c8d7598dfed759bf1d9d0322b4c2b42eb7252d8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 17 May 2024 14:21:46 +0800 Subject: selftests/bpf: Fix prog numbers in test_sockmap bpf_prog5 and bpf_prog7 are removed from progs/test_sockmap_kern.h in commit d79a32129b21 ("bpf: Selftests, remove prints from sockmap tests"), now there are only 9 progs in it, not 11: SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) SEC("sk_skb2") int bpf_prog2(struct __sk_buff *skb) SEC("sk_skb3") int bpf_prog3(struct __sk_buff *skb) SEC("sockops") int bpf_sockmap(struct bpf_sock_ops *skops) SEC("sk_msg1") int bpf_prog4(struct sk_msg_md *msg) SEC("sk_msg2") int bpf_prog6(struct sk_msg_md *msg) SEC("sk_msg3") int bpf_prog8(struct sk_msg_md *msg) SEC("sk_msg4") int bpf_prog9(struct sk_msg_md *msg) SEC("sk_msg5") int bpf_prog10(struct sk_msg_md *msg) This patch updates the array sizes of prog_fd[], prog_attach_type[] and prog_type[] from 11 to 9 accordingly. Fixes: d79a32129b21 ("bpf: Selftests, remove prints from sockmap tests") Signed-off-by: Geliang Tang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/9c10d9f974f07fcb354a43a8eca67acb2fafc587.1715926605.git.tanggeliang@kylinos.cn --- tools/testing/selftests/bpf/test_sockmap.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 92752f5eeded..4499b3cfc3a6 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -63,7 +63,7 @@ int passed; int failed; int map_fd[9]; struct bpf_map *maps[9]; -int prog_fd[11]; +int prog_fd[9]; int txmsg_pass; int txmsg_redir; @@ -1793,8 +1793,6 @@ int prog_attach_type[] = { BPF_SK_MSG_VERDICT, BPF_SK_MSG_VERDICT, BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, }; int prog_type[] = { @@ -1807,8 +1805,6 @@ int prog_type[] = { BPF_PROG_TYPE_SK_MSG, BPF_PROG_TYPE_SK_MSG, BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, }; static int populate_progs(char *bpf_file) -- cgit v1.2.3-73-gaa49b From a87f34e742d279d54d529e4bc4763fdaab32a466 Mon Sep 17 00:00:00 2001 From: Brad Cowie Date: Wed, 22 May 2024 17:07:12 +1200 Subject: selftests/bpf: Update tests for new ct zone opts for nf_conntrack kfuncs Add test for allocating and looking up ct entry in a non-default ct zone with kfuncs bpf_{xdp,skb}_ct_alloc and bpf_{xdp,skb}_ct_lookup. Add negative tests for looking up ct entry in a different ct zone to where it was allocated and with a different direction. Update reserved test for old struct definition to test for ct_zone_id being set when opts size isn't NF_BPF_CT_OPTS_SZ (16). Signed-off-by: Brad Cowie Link: https://lore.kernel.org/r/20240522050712.732558-2-brad@faucet.nz Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/config | 1 + tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 7 ++ tools/testing/selftests/bpf/progs/test_bpf_nf.c | 108 ++++++++++++++++++++++++ 3 files changed, 116 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index eeabd798bc3a..2fb16da78dce 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -80,6 +80,7 @@ CONFIG_NETFILTER_XT_TARGET_CT=y CONFIG_NETKIT=y CONFIG_NF_CONNTRACK=y CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y CONFIG_NF_NAT=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index b30ff6b3b81a..a4a1f93878d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -104,6 +104,7 @@ static void test_bpf_nf_ct(int mode) ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple"); ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0"); + ASSERT_EQ(skel->bss->test_einval_reserved_new, -EINVAL, "Test EINVAL for reserved in new struct not set to 0"); ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1"); ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ"); ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP"); @@ -122,6 +123,12 @@ static void test_bpf_nf_ct(int mode) ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark"); ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting"); ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting"); + ASSERT_EQ(skel->data->test_ct_zone_id_alloc_entry, 0, "Test for alloc new entry in specified ct zone"); + ASSERT_EQ(skel->data->test_ct_zone_id_insert_entry, 0, "Test for insert new entry in specified ct zone"); + ASSERT_EQ(skel->data->test_ct_zone_id_succ_lookup, 0, "Test for successful lookup in specified ct_zone"); + ASSERT_EQ(skel->bss->test_ct_zone_dir_enoent_lookup, -ENOENT, "Test ENOENT for lookup with wrong ct zone dir"); + ASSERT_EQ(skel->bss->test_ct_zone_id_enoent_lookup, -ENOENT, "Test ENOENT for lookup in wrong ct zone"); + end: if (client_fd != -1) close(client_fd); diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index 77ad8adf68da..0289d8ce2b80 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -9,10 +9,14 @@ #define EINVAL 22 #define ENOENT 2 +#define NF_CT_ZONE_DIR_ORIG (1 << IP_CT_DIR_ORIGINAL) +#define NF_CT_ZONE_DIR_REPL (1 << IP_CT_DIR_REPLY) + extern unsigned long CONFIG_HZ __kconfig; int test_einval_bpf_tuple = 0; int test_einval_reserved = 0; +int test_einval_reserved_new = 0; int test_einval_netns_id = 0; int test_einval_len_opts = 0; int test_eproto_l4proto = 0; @@ -22,6 +26,11 @@ int test_eafnosupport = 0; int test_alloc_entry = -EINVAL; int test_insert_entry = -EAFNOSUPPORT; int test_succ_lookup = -ENOENT; +int test_ct_zone_id_alloc_entry = -EINVAL; +int test_ct_zone_id_insert_entry = -EAFNOSUPPORT; +int test_ct_zone_id_succ_lookup = -ENOENT; +int test_ct_zone_dir_enoent_lookup = 0; +int test_ct_zone_id_enoent_lookup = 0; u32 test_delta_timeout = 0; u32 test_status = 0; u32 test_insert_lookup_mark = 0; @@ -45,6 +54,17 @@ struct bpf_ct_opts___local { s32 netns_id; s32 error; u8 l4proto; + u8 dir; + u8 reserved[2]; +}; + +struct bpf_ct_opts___new { + s32 netns_id; + s32 error; + u8 l4proto; + u8 dir; + u16 ct_zone_id; + u8 ct_zone_dir; u8 reserved[3]; } __attribute__((preserve_access_index)); @@ -220,10 +240,97 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, } } +static __always_inline void +nf_ct_opts_new_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___new *, u32), + struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___new *, u32), + void *ctx) +{ + struct bpf_ct_opts___new opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 }; + struct bpf_sock_tuple bpf_tuple; + struct nf_conn *ct; + + __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4)); + + opts_def.reserved[0] = 1; + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); + opts_def.reserved[0] = 0; + if (ct) + bpf_ct_release(ct); + else + test_einval_reserved_new = opts_def.error; + + bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */ + bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */ + bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */ + bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */ + + /* use non-default ct zone */ + opts_def.ct_zone_id = 10; + opts_def.ct_zone_dir = NF_CT_ZONE_DIR_ORIG; + ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); + if (ct) { + __u16 sport = bpf_get_prandom_u32(); + __u16 dport = bpf_get_prandom_u32(); + union nf_inet_addr saddr = {}; + union nf_inet_addr daddr = {}; + struct nf_conn *ct_ins; + + bpf_ct_set_timeout(ct, 10000); + + /* snat */ + saddr.ip = bpf_get_prandom_u32(); + bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local); + /* dnat */ + daddr.ip = bpf_get_prandom_u32(); + bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local); + + ct_ins = bpf_ct_insert_entry(ct); + if (ct_ins) { + struct nf_conn *ct_lk; + + /* entry should exist in same ct zone we inserted it */ + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + if (ct_lk) { + bpf_ct_release(ct_lk); + test_ct_zone_id_succ_lookup = 0; + } + + /* entry should not exist with wrong direction */ + opts_def.ct_zone_dir = NF_CT_ZONE_DIR_REPL; + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + opts_def.ct_zone_dir = NF_CT_ZONE_DIR_ORIG; + if (ct_lk) + bpf_ct_release(ct_lk); + else + test_ct_zone_dir_enoent_lookup = opts_def.error; + + /* entry should not exist in default ct zone */ + opts_def.ct_zone_id = 0; + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + if (ct_lk) + bpf_ct_release(ct_lk); + else + test_ct_zone_id_enoent_lookup = opts_def.error; + + bpf_ct_release(ct_ins); + test_ct_zone_id_insert_entry = 0; + } + test_ct_zone_id_alloc_entry = 0; + } +} + SEC("xdp") int nf_xdp_ct_test(struct xdp_md *ctx) { nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx); + nf_ct_opts_new_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx); return 0; } @@ -231,6 +338,7 @@ SEC("tc") int nf_skb_ct_test(struct __sk_buff *ctx) { nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx); + nf_ct_opts_new_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx); return 0; } -- cgit v1.2.3-73-gaa49b From 1693c5db6ab8262e6f5263f9d211855959aa5acd Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 9 May 2024 14:18:33 -0700 Subject: net: Add additional bit to support clockid_t timestamp type tstamp_type is now set based on actual clockid_t compressed into 2 bits. To make the design scalable for future needs this commit bring in the change to extend the tstamp_type:1 to tstamp_type:2 to support other clockid_t timestamp. We now support CLOCK_TAI as part of tstamp_type as part of this commit with existing support CLOCK_MONOTONIC and CLOCK_REALTIME. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240509211834.3235191-3-quic_abchauha@quicinc.com Signed-off-by: Martin KaFai Lau --- include/linux/skbuff.h | 18 +++++++++++------ include/uapi/linux/bpf.h | 15 +++++++++----- net/core/filter.c | 46 ++++++++++++++++++++++++------------------ net/ipv4/ip_output.c | 5 ++++- net/ipv4/raw.c | 2 +- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv6/ip6_output.c | 5 ++++- net/ipv6/raw.c | 2 +- net/ipv6/tcp_ipv6.c | 10 +++++++-- net/packet/af_packet.c | 7 +++---- tools/include/uapi/linux/bpf.h | 15 +++++++++----- 11 files changed, 81 insertions(+), 46 deletions(-) (limited to 'tools') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a721cc3b644..1e5c97daaa37 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -709,6 +709,8 @@ typedef unsigned char *sk_buff_data_t; enum skb_tstamp_type { SKB_CLOCK_REALTIME, SKB_CLOCK_MONOTONIC, + SKB_CLOCK_TAI, + __SKB_CLOCK_MAX = SKB_CLOCK_TAI, }; /** @@ -957,7 +959,7 @@ struct sk_buff { /* private: */ __u8 __mono_tc_offset[0]; /* public: */ - __u8 tstamp_type:1; /* See skb_tstamp_type */ + __u8 tstamp_type:2; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; @@ -1087,15 +1089,16 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) -/* if you move tc_at_ingress or mono_delivery_time +/* if you move tc_at_ingress or tstamp_type * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7) -#define TC_AT_INGRESS_MASK (1 << 6) +#define SKB_TSTAMP_TYPE_MASK (3 << 6) +#define SKB_TSTAMP_TYPE_RSHIFT (6) +#define TC_AT_INGRESS_MASK (1 << 5) #else -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0) -#define TC_AT_INGRESS_MASK (1 << 1) +#define SKB_TSTAMP_TYPE_MASK (3) +#define TC_AT_INGRESS_MASK (1 << 2) #endif #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset) @@ -4216,6 +4219,9 @@ static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, case CLOCK_MONOTONIC: tstamp_type = SKB_CLOCK_MONOTONIC; break; + case CLOCK_TAI: + tstamp_type = SKB_CLOCK_TAI; + break; default: WARN_ON_ONCE(1); kt = 0; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 90706a47f6ff..25ea393cf084 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6207,12 +6207,17 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +/* The enum used in skb->tstamp_type. It specifies the clock type + * of the time stored in the skb->tstamp. + */ enum { - BPF_SKB_TSTAMP_UNSPEC, - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC - * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */ + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */ + BPF_SKB_CLOCK_REALTIME = 0, + BPF_SKB_CLOCK_MONOTONIC = 1, + BPF_SKB_CLOCK_TAI = 2, + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle, + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid. */ }; diff --git a/net/core/filter.c b/net/core/filter.c index a3781a796da4..c6edfe9f41bc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7726,17 +7726,21 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb, return -EOPNOTSUPP; switch (tstamp_type) { - case BPF_SKB_TSTAMP_DELIVERY_MONO: + case BPF_SKB_CLOCK_REALTIME: + skb->tstamp = tstamp; + skb->tstamp_type = SKB_CLOCK_REALTIME; + break; + case BPF_SKB_CLOCK_MONOTONIC: if (!tstamp) return -EINVAL; skb->tstamp = tstamp; skb->tstamp_type = SKB_CLOCK_MONOTONIC; break; - case BPF_SKB_TSTAMP_UNSPEC: - if (tstamp) + case BPF_SKB_CLOCK_TAI: + if (!tstamp) return -EINVAL; - skb->tstamp = 0; - skb->tstamp_type = SKB_CLOCK_REALTIME; + skb->tstamp = tstamp; + skb->tstamp_type = SKB_CLOCK_TAI; break; default: return -EINVAL; @@ -9387,16 +9391,17 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si, { __u8 value_reg = si->dst_reg; __u8 skb_reg = si->src_reg; - /* AX is needed because src_reg and dst_reg could be the same */ - __u8 tmp_reg = BPF_REG_AX; - - *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, - SKB_BF_MONO_TC_OFFSET); - *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, - SKB_MONO_DELIVERY_TIME_MASK, 2); - *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC); - *insn++ = BPF_JMP_A(1); - *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO); + BUILD_BUG_ON(__SKB_CLOCK_MAX != (int)BPF_SKB_CLOCK_TAI); + BUILD_BUG_ON(SKB_CLOCK_REALTIME != (int)BPF_SKB_CLOCK_REALTIME); + BUILD_BUG_ON(SKB_CLOCK_MONOTONIC != (int)BPF_SKB_CLOCK_MONOTONIC); + BUILD_BUG_ON(SKB_CLOCK_TAI != (int)BPF_SKB_CLOCK_TAI); + *insn++ = BPF_LDX_MEM(BPF_B, value_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, value_reg, SKB_TSTAMP_TYPE_MASK); +#ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, value_reg, SKB_TSTAMP_TYPE_RSHIFT); +#else + BUILD_BUG_ON(!(SKB_TSTAMP_TYPE_MASK & 0x1)); +#endif return insn; } @@ -9439,10 +9444,11 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, __u8 tmp_reg = BPF_REG_AX; *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); - *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, - TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); - *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, - TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2); + /* check if ingress mask bits is set */ + *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1); + *insn++ = BPF_JMP_A(4); + *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, SKB_TSTAMP_TYPE_MASK, 1); + *insn++ = BPF_JMP_A(2); /* skb->tc_at_ingress && skb->tstamp_type, * read 0 as the (rcv) timestamp. */ @@ -9479,7 +9485,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, /* goto */ *insn++ = BPF_JMP_A(2); /* : skb->tstamp_type */ - *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); + *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_TSTAMP_TYPE_MASK); *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET); } #endif diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index fe86cadfa85b..b90d0f78ac80 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1457,7 +1457,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->mark = cork->mark; - skb->tstamp = cork->transmit_time; + if (sk_is_tcp(sk)) + skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC); + else + skb_set_delivery_type_by_clockid(skb, cork->transmit_time, sk->sk_clockid); /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4cb43401e0e0..1a0953650356 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->protocol = htons(ETH_P_IP); skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; - skb->tstamp = sockc->transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_dst_set(skb, &rt->dst); *rtp = NULL; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30ef0c8f5e92..8f70b8d1d1e5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3625,6 +3625,8 @@ void __init tcp_v4_init(void) */ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; + sk->sk_clockid = CLOCK_MONOTONIC; + per_cpu(ipv4_tcp_sk, cpu) = sk; } if (register_pernet_subsys(&tcp_sk_ops)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1ab0f23d37bf..e7a19df3125e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1924,7 +1924,10 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; - skb->tstamp = cork->base.transmit_time; + if (sk_is_tcp(sk)) + skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC); + else + skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid); ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2eedf255600b..f838366e8256 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->protocol = htons(ETH_P_IPV6); skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; - skb->tstamp = sockc->transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_put(skb, length); skb_reset_network_header(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8333005c5c2e..750aa681779c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2387,8 +2387,14 @@ static struct inet_protosw tcpv6_protosw = { static int __net_init tcpv6_net_init(struct net *net) { - return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, - SOCK_RAW, IPPROTO_TCP, net); + int res; + + res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, + SOCK_RAW, IPPROTO_TCP, net); + if (!res) + net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; + + return res; } static void __net_exit tcpv6_net_exit(struct net *net) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ea3ebc160e25..fce390887591 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2056,8 +2056,7 @@ retry: skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); - skb->tstamp = sockc.transmit_time; - + skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); skb_setup_tx_timestamp(skb, sockc.tsflags); if (unlikely(extra_len == 4)) @@ -2584,7 +2583,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->dev = dev; skb->priority = READ_ONCE(po->sk.sk_priority); skb->mark = READ_ONCE(po->sk.sk_mark); - skb->tstamp = sockc->transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid); skb_setup_tx_timestamp(skb, sockc->tsflags); skb_zcopy_set_nouarg(skb, ph.raw); @@ -3062,7 +3061,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc.mark; - skb->tstamp = sockc.transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); if (unlikely(extra_len == 4)) skb->no_fcs = 1; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 90706a47f6ff..25ea393cf084 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6207,12 +6207,17 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +/* The enum used in skb->tstamp_type. It specifies the clock type + * of the time stored in the skb->tstamp. + */ enum { - BPF_SKB_TSTAMP_UNSPEC, - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC - * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */ + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */ + BPF_SKB_CLOCK_REALTIME = 0, + BPF_SKB_CLOCK_MONOTONIC = 1, + BPF_SKB_CLOCK_TAI = 2, + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle, + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid. */ }; -- cgit v1.2.3-73-gaa49b From c34e3ab2a76e6a55a64e0d56acc5607062c2bad9 Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 9 May 2024 14:18:34 -0700 Subject: selftests/bpf: Handle forwarding of UDP CLOCK_TAI packets With changes in the design to forward CLOCK_TAI in the skbuff framework, existing selftest framework needs modification to handle forwarding of UDP packets with CLOCK_TAI as clockid. Signed-off-by: Abhishek Chauhan Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240509211834.3235191-4-quic_abchauha@quicinc.com Signed-off-by: Martin KaFai Lau --- .../testing/selftests/bpf/prog_tests/ctx_rewrite.c | 10 +++--- .../testing/selftests/bpf/prog_tests/tc_redirect.c | 3 -- tools/testing/selftests/bpf/progs/test_tc_dtime.c | 39 ++++++++++------------ 3 files changed, 23 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c index 3b7c57fe55a5..08b6391f2f56 100644 --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -69,15 +69,17 @@ static struct test_case test_cases[] = { { N(SCHED_CLS, struct __sk_buff, tstamp), .read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" - "w11 &= 3;" - "if w11 != 0x3 goto pc+2;" + "if w11 & 0x4 goto pc+1;" + "goto pc+4;" + "if w11 & 0x3 goto pc+1;" + "goto pc+2;" "$dst = 0;" "goto pc+1;" "$dst = *(u64 *)($ctx + sk_buff::tstamp);", .write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" - "if w11 & 0x2 goto pc+1;" + "if w11 & 0x4 goto pc+1;" "goto pc+2;" - "w11 &= -2;" + "w11 &= -4;" "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;" "*(u64 *)($ctx + sk_buff::tstamp) = $src;", }, diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index b1073d36d77a..327d51f59142 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -890,9 +890,6 @@ static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); - /* non mono delivery time is not forwarded */ - ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, - dtime_cnt_str(t, INGRESS_FWDNS_P101)); for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++) ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c index 74ec09f040b7..ca8e8734d901 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c +++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c @@ -222,17 +222,21 @@ int egress_host(struct __sk_buff *skb) return TC_ACT_OK; if (skb_proto(skb_type) == IPPROTO_TCP) { - if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && + if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC && skb->tstamp) inc_dtimes(EGRESS_ENDHOST); else inc_errs(EGRESS_ENDHOST); - } else { - if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC && + } else if (skb_proto(skb_type) == IPPROTO_UDP) { + if (skb->tstamp_type == BPF_SKB_CLOCK_TAI && skb->tstamp) inc_dtimes(EGRESS_ENDHOST); else inc_errs(EGRESS_ENDHOST); + } else { + if (skb->tstamp_type == BPF_SKB_CLOCK_REALTIME && + skb->tstamp) + inc_errs(EGRESS_ENDHOST); } skb->tstamp = EGRESS_ENDHOST_MAGIC; @@ -252,7 +256,7 @@ int ingress_host(struct __sk_buff *skb) if (!skb_type) return TC_ACT_OK; - if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && + if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC && skb->tstamp == EGRESS_FWDNS_MAGIC) inc_dtimes(INGRESS_ENDHOST); else @@ -315,7 +319,6 @@ int egress_fwdns_prio100(struct __sk_buff *skb) SEC("tc") int ingress_fwdns_prio101(struct __sk_buff *skb) { - __u64 expected_dtime = EGRESS_ENDHOST_MAGIC; int skb_type; skb_type = skb_get_type(skb); @@ -323,29 +326,24 @@ int ingress_fwdns_prio101(struct __sk_buff *skb) /* Should have handled in prio100 */ return TC_ACT_SHOT; - if (skb_proto(skb_type) == IPPROTO_UDP) - expected_dtime = 0; - if (skb->tstamp_type) { if (fwdns_clear_dtime() || - skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || - skb->tstamp != expected_dtime) + (skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC && + skb->tstamp_type != BPF_SKB_CLOCK_TAI) || + skb->tstamp != EGRESS_ENDHOST_MAGIC) inc_errs(INGRESS_FWDNS_P101); else inc_dtimes(INGRESS_FWDNS_P101); } else { - if (!fwdns_clear_dtime() && expected_dtime) + if (!fwdns_clear_dtime()) inc_errs(INGRESS_FWDNS_P101); } - if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { + if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) { skb->tstamp = INGRESS_FWDNS_MAGIC; } else { if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, - BPF_SKB_TSTAMP_DELIVERY_MONO)) - inc_errs(SET_DTIME); - if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, - BPF_SKB_TSTAMP_UNSPEC)) + BPF_SKB_CLOCK_MONOTONIC)) inc_errs(SET_DTIME); } @@ -370,7 +368,7 @@ int egress_fwdns_prio101(struct __sk_buff *skb) if (skb->tstamp_type) { if (fwdns_clear_dtime() || - skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || + skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC || skb->tstamp != INGRESS_FWDNS_MAGIC) inc_errs(EGRESS_FWDNS_P101); else @@ -380,14 +378,11 @@ int egress_fwdns_prio101(struct __sk_buff *skb) inc_errs(EGRESS_FWDNS_P101); } - if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { + if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) { skb->tstamp = EGRESS_FWDNS_MAGIC; } else { if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC, - BPF_SKB_TSTAMP_DELIVERY_MONO)) - inc_errs(SET_DTIME); - if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, - BPF_SKB_TSTAMP_UNSPEC)) + BPF_SKB_CLOCK_MONOTONIC)) inc_errs(SET_DTIME); } -- cgit v1.2.3-73-gaa49b From 7055968559a84e4ccbff8eeb621017eaff1b20aa Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Tue, 21 May 2024 11:21:29 +0200 Subject: selftests: cgroup: Lexicographic order in Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will reduce number of conflicts when modifying the lists. Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/.gitignore | 10 +++++----- tools/testing/selftests/cgroup/Makefile | 23 ++++++++++++----------- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index 2732e0b29271..ec635a0ef488 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only -test_memcontrol test_core -test_freezer -test_kmem -test_kill test_cpu test_cpuset -test_zswap +test_freezer test_hugetlb_memcg +test_kill +test_kmem +test_memcontrol +test_zswap wait_inotify diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 16461dc0ffdf..b91f60f3402c 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -6,26 +6,27 @@ all: ${HELPER_PROGS} TEST_FILES := with_stress.sh TEST_PROGS := test_stress.sh test_cpuset_prs.sh test_cpuset_v1_hp.sh TEST_GEN_FILES := wait_inotify -TEST_GEN_PROGS = test_memcontrol -TEST_GEN_PROGS += test_kmem -TEST_GEN_PROGS += test_core -TEST_GEN_PROGS += test_freezer -TEST_GEN_PROGS += test_kill +# Keep the lists lexicographically sorted +TEST_GEN_PROGS = test_core TEST_GEN_PROGS += test_cpu TEST_GEN_PROGS += test_cpuset -TEST_GEN_PROGS += test_zswap +TEST_GEN_PROGS += test_freezer TEST_GEN_PROGS += test_hugetlb_memcg +TEST_GEN_PROGS += test_kill +TEST_GEN_PROGS += test_kmem +TEST_GEN_PROGS += test_memcontrol +TEST_GEN_PROGS += test_zswap LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h include ../lib.mk -$(OUTPUT)/test_memcontrol: cgroup_util.c -$(OUTPUT)/test_kmem: cgroup_util.c $(OUTPUT)/test_core: cgroup_util.c -$(OUTPUT)/test_freezer: cgroup_util.c -$(OUTPUT)/test_kill: cgroup_util.c $(OUTPUT)/test_cpu: cgroup_util.c $(OUTPUT)/test_cpuset: cgroup_util.c -$(OUTPUT)/test_zswap: cgroup_util.c +$(OUTPUT)/test_freezer: cgroup_util.c $(OUTPUT)/test_hugetlb_memcg: cgroup_util.c +$(OUTPUT)/test_kill: cgroup_util.c +$(OUTPUT)/test_kmem: cgroup_util.c +$(OUTPUT)/test_memcontrol: cgroup_util.c +$(OUTPUT)/test_zswap: cgroup_util.c -- cgit v1.2.3-73-gaa49b From 9f34c566027b623854dabc86fde052b44e5240be Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Tue, 21 May 2024 11:21:30 +0200 Subject: selftests: cgroup: Add basic tests for pids controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds (and wires in) new test program for checking basic pids controller functionality -- restricting tasks in a cgroup and correct event counting. Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/.gitignore | 1 + tools/testing/selftests/cgroup/Makefile | 2 + tools/testing/selftests/cgroup/test_pids.c | 178 +++++++++++++++++++++++++++++ 3 files changed, 181 insertions(+) create mode 100644 tools/testing/selftests/cgroup/test_pids.c (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index ec635a0ef488..952e4448bf07 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -7,5 +7,6 @@ test_hugetlb_memcg test_kill test_kmem test_memcontrol +test_pids test_zswap wait_inotify diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index b91f60f3402c..1b897152bab6 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -15,6 +15,7 @@ TEST_GEN_PROGS += test_hugetlb_memcg TEST_GEN_PROGS += test_kill TEST_GEN_PROGS += test_kmem TEST_GEN_PROGS += test_memcontrol +TEST_GEN_PROGS += test_pids TEST_GEN_PROGS += test_zswap LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h @@ -29,4 +30,5 @@ $(OUTPUT)/test_hugetlb_memcg: cgroup_util.c $(OUTPUT)/test_kill: cgroup_util.c $(OUTPUT)/test_kmem: cgroup_util.c $(OUTPUT)/test_memcontrol: cgroup_util.c +$(OUTPUT)/test_pids: cgroup_util.c $(OUTPUT)/test_zswap: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/test_pids.c b/tools/testing/selftests/cgroup/test_pids.c new file mode 100644 index 000000000000..9ecb83c6cc5c --- /dev/null +++ b/tools/testing/selftests/cgroup/test_pids.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest.h" +#include "cgroup_util.h" + +static int run_success(const char *cgroup, void *arg) +{ + return 0; +} + +static int run_pause(const char *cgroup, void *arg) +{ + return pause(); +} + +/* + * This test checks that pids.max prevents forking new children above the + * specified limit in the cgroup. + */ +static int test_pids_max(const char *root) +{ + int ret = KSFT_FAIL; + char *cg_pids; + int pid; + + cg_pids = cg_name(root, "pids_test"); + if (!cg_pids) + goto cleanup; + + if (cg_create(cg_pids)) + goto cleanup; + + if (cg_read_strcmp(cg_pids, "pids.max", "max\n")) + goto cleanup; + + if (cg_write(cg_pids, "pids.max", "2")) + goto cleanup; + + if (cg_enter_current(cg_pids)) + goto cleanup; + + pid = cg_run_nowait(cg_pids, run_pause, NULL); + if (pid < 0) + goto cleanup; + + if (cg_run_nowait(cg_pids, run_success, NULL) != -1 || errno != EAGAIN) + goto cleanup; + + if (kill(pid, SIGINT)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + cg_destroy(cg_pids); + free(cg_pids); + + return ret; +} + +/* + * This test checks that pids.events are counted in cgroup associated with pids.max + */ +static int test_pids_events(const char *root) +{ + int ret = KSFT_FAIL; + char *cg_parent = NULL, *cg_child = NULL; + int pid; + + cg_parent = cg_name(root, "pids_parent"); + cg_child = cg_name(cg_parent, "pids_child"); + if (!cg_parent || !cg_child) + goto cleanup; + + if (cg_create(cg_parent)) + goto cleanup; + if (cg_write(cg_parent, "cgroup.subtree_control", "+pids")) + goto cleanup; + if (cg_create(cg_child)) + goto cleanup; + + if (cg_write(cg_parent, "pids.max", "2")) + goto cleanup; + + if (cg_read_strcmp(cg_child, "pids.max", "max\n")) + goto cleanup; + + if (cg_enter_current(cg_child)) + goto cleanup; + + pid = cg_run_nowait(cg_child, run_pause, NULL); + if (pid < 0) + goto cleanup; + + if (cg_run_nowait(cg_child, run_success, NULL) != -1 || errno != EAGAIN) + goto cleanup; + + if (kill(pid, SIGINT)) + goto cleanup; + + if (cg_read_key_long(cg_child, "pids.events", "max ") != 0) + goto cleanup; + if (cg_read_key_long(cg_parent, "pids.events", "max ") != 1) + goto cleanup; + + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_child) + cg_destroy(cg_child); + if (cg_parent) + cg_destroy(cg_parent); + free(cg_child); + free(cg_parent); + + return ret; +} + + + +#define T(x) { x, #x } +struct pids_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_pids_max), + T(test_pids_events), +}; +#undef T + +int main(int argc, char **argv) +{ + char root[PATH_MAX]; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); + if (cg_find_unified_root(root, sizeof(root), NULL)) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + /* + * Check that pids controller is available: + * pids is listed in cgroup.controllers + */ + if (cg_read_strstr(root, "cgroup.controllers", "pids")) + ksft_exit_skip("pids controller isn't available\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "pids")) + if (cg_write(root, "cgroup.subtree_control", "+pids")) + ksft_exit_skip("Failed to set pids controller\n"); + + for (int i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + ksft_finished(); +} -- cgit v1.2.3-73-gaa49b From 3241d46f5f5450ddff255a136f2ebf3282065435 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 14 May 2024 23:01:13 -0700 Subject: perf pmus: Sort/merge/aggregate PMUs like mrvl_ddr_pmu The mrvl_ddr_pmu is uncore and has a hexadecimal address suffix while the previous PMU sorting/merging code assumes uncore PMU names start with uncore_ and have a decimal suffix. Because of the previous assumption it isn't possible to wildcard the mrvl_ddr_pmu. Modify pmu_name_len_no_suffix but also remove the suffix number out argument, this is because we don't know if a suffix number of say 100 is in hexadecimal or decimal. As the only use of the suffix number is in comparisons, it is safe there to compare the values as hexadecimal. Modify perf_pmu__match_ignoring_suffix so that hexadecimal suffixes are ignored. Only allow hexadecimal suffixes to be greater than length 2 (ie 3 or more) so that S390's cpum_cf PMU doesn't lose its suffix. Change the return type of pmu_name_len_no_suffix to size_t to workaround GCC incorrectly determining the result could be negative. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Ravi Bangoria Cc: James Clark Cc: Robin Murphy Cc: Stephane Eranian Cc: Will Deacon Cc: Thomas Richter Cc: Bharat Bhushan Cc: Bhaskara Budiredla Cc: Tuan Phan Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240515060114.3268149-2-irogers@google.com --- tools/perf/util/pmu.c | 33 +++++++++++++++---------- tools/perf/util/pmus.c | 67 +++++++++++++++++++++++++++++--------------------- tools/perf/util/pmus.h | 7 +++++- 3 files changed, 65 insertions(+), 42 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 888ce9912275..c94a91645b21 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -856,26 +856,34 @@ __weak const struct pmu_metrics_table *pmu_metrics_table__find(void) */ static bool perf_pmu__match_ignoring_suffix(const char *pmu_name, const char *tok) { - const char *p; + const char *p, *suffix; + bool has_hex = false; if (strncmp(pmu_name, tok, strlen(tok))) return false; - p = pmu_name + strlen(tok); + suffix = p = pmu_name + strlen(tok); if (*p == 0) return true; - if (*p == '_') + if (*p == '_') { ++p; + ++suffix; + } /* Ensure we end in a number */ while (1) { - if (!isdigit(*p)) + if (!isxdigit(*p)) return false; + if (!has_hex) + has_hex = !isdigit(*p); if (*(++p) == 0) break; } + if (has_hex) + return (p - suffix) > 2; + return true; } @@ -1788,10 +1796,10 @@ static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, const struct perf_pmu_alias *alias, bool skip_duplicate_pmus) { struct parse_events_term *term; - int pmu_name_len = skip_duplicate_pmus - ? pmu_name_len_no_suffix(pmu->name, /*num=*/NULL) - : (int)strlen(pmu->name); - int used = snprintf(buf, len, "%.*s/%s", pmu_name_len, pmu->name, alias->name); + size_t pmu_name_len = skip_duplicate_pmus + ? pmu_name_len_no_suffix(pmu->name) + : strlen(pmu->name); + int used = snprintf(buf, len, "%.*s/%s", (int)pmu_name_len, pmu->name, alias->name); list_for_each_entry(term, &alias->terms.terms, list) { if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) @@ -1828,13 +1836,12 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); list_for_each_entry(event, &pmu->aliases, list) { - size_t buf_used; - int pmu_name_len; + size_t buf_used, pmu_name_len; info.pmu_name = event->pmu_name ?: pmu->name; pmu_name_len = skip_duplicate_pmus - ? pmu_name_len_no_suffix(info.pmu_name, /*num=*/NULL) - : (int)strlen(info.pmu_name); + ? pmu_name_len_no_suffix(info.pmu_name) + : strlen(info.pmu_name); info.alias = NULL; if (event->desc) { info.name = event->name; @@ -1859,7 +1866,7 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, info.encoding_desc = buf + buf_used; parse_events_terms__to_strbuf(&event->terms, &sb); buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, - "%.*s/%s/", pmu_name_len, info.pmu_name, sb.buf) + 1; + "%.*s/%s/", (int)pmu_name_len, info.pmu_name, sb.buf) + 1; info.topic = event->topic; info.str = sb.buf; info.deprecated = event->deprecated; diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index b9b4c5eb5002..63b9cf9ccfa7 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -40,31 +40,52 @@ static bool read_sysfs_all_pmus; static void pmu_read_sysfs(bool core_only); -int pmu_name_len_no_suffix(const char *str, unsigned long *num) +size_t pmu_name_len_no_suffix(const char *str) { int orig_len, len; + bool has_hex_digits = false; orig_len = len = strlen(str); - /* Non-uncore PMUs have their full length, for example, i915. */ - if (!strstarts(str, "uncore_")) - return len; - - /* - * Count trailing digits and '_', if '_{num}' suffix isn't present use - * the full length. - */ - while (len > 0 && isdigit(str[len - 1])) + /* Count trailing digits. */ + while (len > 0 && isxdigit(str[len - 1])) { + if (!isdigit(str[len - 1])) + has_hex_digits = true; len--; + } if (len > 0 && len != orig_len && str[len - 1] == '_') { - if (num) - *num = strtoul(&str[len], NULL, 10); - return len - 1; + /* + * There is a '_{num}' suffix. For decimal suffixes any length + * will do, for hexadecimal ensure more than 2 hex digits so + * that S390's cpum_cf PMU doesn't match. + */ + if (!has_hex_digits || (orig_len - len) > 2) + return len - 1; } + /* Use the full length. */ return orig_len; } +int pmu_name_cmp(const char *lhs_pmu_name, const char *rhs_pmu_name) +{ + unsigned long lhs_num = 0, rhs_num = 0; + size_t lhs_pmu_name_len = pmu_name_len_no_suffix(lhs_pmu_name); + size_t rhs_pmu_name_len = pmu_name_len_no_suffix(rhs_pmu_name); + int ret = strncmp(lhs_pmu_name, rhs_pmu_name, + lhs_pmu_name_len < rhs_pmu_name_len ? lhs_pmu_name_len : rhs_pmu_name_len); + + if (lhs_pmu_name_len != rhs_pmu_name_len || ret != 0 || lhs_pmu_name_len == 0) + return ret; + + if (lhs_pmu_name_len + 1 < strlen(lhs_pmu_name)) + lhs_num = strtoul(&lhs_pmu_name[lhs_pmu_name_len + 1], NULL, 16); + if (rhs_pmu_name_len + 1 < strlen(rhs_pmu_name)) + rhs_num = strtoul(&rhs_pmu_name[rhs_pmu_name_len + 1], NULL, 16); + + return lhs_num < rhs_num ? -1 : (lhs_num > rhs_num ? 1 : 0); +} + void perf_pmus__destroy(void) { struct perf_pmu *pmu, *tmp; @@ -167,20 +188,10 @@ static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name) static int pmus_cmp(void *priv __maybe_unused, const struct list_head *lhs, const struct list_head *rhs) { - unsigned long lhs_num = 0, rhs_num = 0; struct perf_pmu *lhs_pmu = container_of(lhs, struct perf_pmu, list); struct perf_pmu *rhs_pmu = container_of(rhs, struct perf_pmu, list); - const char *lhs_pmu_name = lhs_pmu->name ?: ""; - const char *rhs_pmu_name = rhs_pmu->name ?: ""; - int lhs_pmu_name_len = pmu_name_len_no_suffix(lhs_pmu_name, &lhs_num); - int rhs_pmu_name_len = pmu_name_len_no_suffix(rhs_pmu_name, &rhs_num); - int ret = strncmp(lhs_pmu_name, rhs_pmu_name, - lhs_pmu_name_len < rhs_pmu_name_len ? lhs_pmu_name_len : rhs_pmu_name_len); - - if (lhs_pmu_name_len != rhs_pmu_name_len || ret != 0 || lhs_pmu_name_len == 0) - return ret; - return lhs_num < rhs_num ? -1 : (lhs_num > rhs_num ? 1 : 0); + return pmu_name_cmp(lhs_pmu->name ?: "", rhs_pmu->name ?: ""); } /* Add all pmus in sysfs to pmu list: */ @@ -300,11 +311,11 @@ static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu) pmu_read_sysfs(/*core_only=*/false); pmu = list_prepare_entry(pmu, &core_pmus, list); } else - last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "", NULL); + last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: ""); if (use_core_pmus) { list_for_each_entry_continue(pmu, &core_pmus, list) { - int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "", /*num=*/NULL); + int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: ""); if (last_pmu_name_len == pmu_name_len && !strncmp(last_pmu_name, pmu->name ?: "", pmu_name_len)) @@ -316,7 +327,7 @@ static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu) pmu = list_prepare_entry(pmu, &other_pmus, list); } list_for_each_entry_continue(pmu, &other_pmus, list) { - int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "", /*num=*/NULL); + int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: ""); if (last_pmu_name_len == pmu_name_len && !strncmp(last_pmu_name, pmu->name ?: "", pmu_name_len)) @@ -566,7 +577,7 @@ void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, voi .long_string = STRBUF_INIT, .num_formats = 0, }; - int len = pmu_name_len_no_suffix(pmu->name, /*num=*/NULL); + int len = pmu_name_len_no_suffix(pmu->name); const char *desc = "(see 'man perf-list' or 'man perf-record' on how to encode it)"; if (!pmu->is_core) diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index 9d4ded80b8e9..bdbff02324bb 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -2,10 +2,15 @@ #ifndef __PMUS_H #define __PMUS_H +#include +#include + struct perf_pmu; struct print_callbacks; -int pmu_name_len_no_suffix(const char *str, unsigned long *num); +size_t pmu_name_len_no_suffix(const char *str); +/* Exposed for testing only. */ +int pmu_name_cmp(const char *lhs_pmu_name, const char *rhs_pmu_name); void perf_pmus__destroy(void); -- cgit v1.2.3-73-gaa49b From 678be1ca30cc939e0180c85b4cc9150b3d5ef0c8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 14 May 2024 23:01:14 -0700 Subject: perf tests: Add some pmu core functionality tests Test behavior of PMU names and comparisons wrt suffixes using Intel uncore_cha, marvell mrvl_ddr_pmu and S390's cpum_cf as examples. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Ravi Bangoria Cc: James Clark Cc: Robin Murphy Cc: Stephane Eranian Cc: Will Deacon Cc: Thomas Richter Cc: Bharat Bhushan Cc: Bhaskara Budiredla Cc: Tuan Phan Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240515060114.3268149-3-irogers@google.com --- tools/perf/tests/pmu.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 06cc0e46cb28..cc88b5920c3e 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -3,6 +3,7 @@ #include "evsel.h" #include "parse-events.h" #include "pmu.h" +#include "pmus.h" #include "tests.h" #include "debug.h" #include "fncache.h" @@ -340,10 +341,108 @@ static int test__pmu_event_names(struct test_suite *test __maybe_unused, return ret; } +static const char * const uncore_chas[] = { + "uncore_cha_0", + "uncore_cha_1", + "uncore_cha_2", + "uncore_cha_3", + "uncore_cha_4", + "uncore_cha_5", + "uncore_cha_6", + "uncore_cha_7", + "uncore_cha_8", + "uncore_cha_9", + "uncore_cha_10", + "uncore_cha_11", + "uncore_cha_12", + "uncore_cha_13", + "uncore_cha_14", + "uncore_cha_15", + "uncore_cha_16", + "uncore_cha_17", + "uncore_cha_18", + "uncore_cha_19", + "uncore_cha_20", + "uncore_cha_21", + "uncore_cha_22", + "uncore_cha_23", + "uncore_cha_24", + "uncore_cha_25", + "uncore_cha_26", + "uncore_cha_27", + "uncore_cha_28", + "uncore_cha_29", + "uncore_cha_30", + "uncore_cha_31", +}; + +static const char * const mrvl_ddrs[] = { + "mrvl_ddr_pmu_87e1b0000000", + "mrvl_ddr_pmu_87e1b1000000", + "mrvl_ddr_pmu_87e1b2000000", + "mrvl_ddr_pmu_87e1b3000000", + "mrvl_ddr_pmu_87e1b4000000", + "mrvl_ddr_pmu_87e1b5000000", + "mrvl_ddr_pmu_87e1b6000000", + "mrvl_ddr_pmu_87e1b7000000", + "mrvl_ddr_pmu_87e1b8000000", + "mrvl_ddr_pmu_87e1b9000000", + "mrvl_ddr_pmu_87e1ba000000", + "mrvl_ddr_pmu_87e1bb000000", + "mrvl_ddr_pmu_87e1bc000000", + "mrvl_ddr_pmu_87e1bd000000", + "mrvl_ddr_pmu_87e1be000000", + "mrvl_ddr_pmu_87e1bf000000", +}; + +static int test__name_len(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +{ + TEST_ASSERT_VAL("cpu", pmu_name_len_no_suffix("cpu") == strlen("cpu")); + TEST_ASSERT_VAL("i915", pmu_name_len_no_suffix("i915") == strlen("i915")); + TEST_ASSERT_VAL("cpum_cf", pmu_name_len_no_suffix("cpum_cf") == strlen("cpum_cf")); + for (size_t i = 0; i < ARRAY_SIZE(uncore_chas); i++) { + TEST_ASSERT_VAL("Strips uncore_cha suffix", + pmu_name_len_no_suffix(uncore_chas[i]) == + strlen("uncore_cha")); + } + for (size_t i = 0; i < ARRAY_SIZE(mrvl_ddrs); i++) { + TEST_ASSERT_VAL("Strips mrvl_ddr_pmu suffix", + pmu_name_len_no_suffix(mrvl_ddrs[i]) == + strlen("mrvl_ddr_pmu")); + } + return TEST_OK; +} + +static int test__name_cmp(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +{ + TEST_ASSERT_EQUAL("cpu", pmu_name_cmp("cpu", "cpu"), 0); + TEST_ASSERT_EQUAL("i915", pmu_name_cmp("i915", "i915"), 0); + TEST_ASSERT_EQUAL("cpum_cf", pmu_name_cmp("cpum_cf", "cpum_cf"), 0); + TEST_ASSERT_VAL("i915", pmu_name_cmp("cpu", "i915") < 0); + TEST_ASSERT_VAL("i915", pmu_name_cmp("i915", "cpu") > 0); + TEST_ASSERT_VAL("cpum_cf", pmu_name_cmp("cpum_cf", "cpum_ce") > 0); + TEST_ASSERT_VAL("cpum_cf", pmu_name_cmp("cpum_cf", "cpum_d0") < 0); + for (size_t i = 1; i < ARRAY_SIZE(uncore_chas); i++) { + TEST_ASSERT_VAL("uncore_cha suffixes ordered lt", + pmu_name_cmp(uncore_chas[i-1], uncore_chas[i]) < 0); + TEST_ASSERT_VAL("uncore_cha suffixes ordered gt", + pmu_name_cmp(uncore_chas[i], uncore_chas[i-1]) > 0); + } + for (size_t i = 1; i < ARRAY_SIZE(mrvl_ddrs); i++) { + TEST_ASSERT_VAL("mrvl_ddr_pmu suffixes ordered lt", + pmu_name_cmp(mrvl_ddrs[i-1], mrvl_ddrs[i]) < 0); + TEST_ASSERT_VAL("mrvl_ddr_pmu suffixes ordered gt", + pmu_name_cmp(mrvl_ddrs[i], mrvl_ddrs[i-1]) > 0); + } + return TEST_OK; +} + static struct test_case tests__pmu[] = { TEST_CASE("Parsing with PMU format directory", pmu_format), TEST_CASE("Parsing with PMU event", pmu_events), TEST_CASE("PMU event names", pmu_event_names), + TEST_CASE("PMU name combining", name_len), + TEST_CASE("PMU name comparison", name_cmp), { .name = NULL, } }; -- cgit v1.2.3-73-gaa49b From cbd446b4db7efce27311f3186f81c2a2d906dd60 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 13 May 2024 22:24:02 -0700 Subject: perf arm-spe: Unaligned pointer work around Use get_unaligned_leXX instead of leXX_to_cpu to handle unaligned pointers. Such pointers occur with libFuzzer testing. A similar change for intel-pt was done in: https://lore.kernel.org/r/20231005190451.175568-6-adrian.hunter@intel.com Signed-off-by: Ian Rogers Reviewed-by: James Clark Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240514052402.3031871-1-irogers@google.com --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 23 +++++----------------- 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index a454c6737563..7bf607d0f6d8 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -10,24 +10,11 @@ #include #include #include +#include +#include #include "arm-spe-pkt-decoder.h" -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define le16_to_cpu bswap_16 -#define le32_to_cpu bswap_32 -#define le64_to_cpu bswap_64 -#define memcpy_le64(d, s, n) do { \ - memcpy((d), (s), (n)); \ - *(d) = le64_to_cpu(*(d)); \ -} while (0) -#else -#define le16_to_cpu -#define le32_to_cpu -#define le64_to_cpu -#define memcpy_le64 memcpy -#endif - static const char * const arm_spe_packet_name[] = { [ARM_SPE_PAD] = "PAD", [ARM_SPE_END] = "END", @@ -70,9 +57,9 @@ static int arm_spe_get_payload(const unsigned char *buf, size_t len, switch (payload_len) { case 1: packet->payload = *(uint8_t *)buf; break; - case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break; - case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break; - case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break; + case 2: packet->payload = get_unaligned_le16(buf); break; + case 4: packet->payload = get_unaligned_le32(buf); break; + case 8: packet->payload = get_unaligned_le64(buf); break; default: return ARM_SPE_BAD_PACKET; } -- cgit v1.2.3-73-gaa49b From 265b71153e1ac270546f1d0d2a59a565947f2ed3 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 17 May 2024 07:14:26 -0700 Subject: perf list: Fix the --no-desc option Currently, the --no-desc option in perf list isn't functioning as intended. This issue arises from the overwriting of struct option->desc with the opposite value of struct option->long_desc. Consequently, whatever parse_options() returns at struct option->desc gets overridden later, rendering the --desc or --no-desc arguments ineffective. To resolve this, set ->desc as true by default and allow parse_options() to adjust it accordingly. This adjustment will fix the --no-desc option while preserving the functionality of the other parameters. Signed-off-by: Breno Leitao Reviewed-by: Ian Rogers Cc: leit@meta.com Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240517141427.1905691-1-leitao@debian.org --- tools/perf/builtin-list.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 5cab31231551..82cb4b1010aa 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -162,7 +162,11 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi } else fputc('\n', fp); - if (desc && print_state->desc) { + if (long_desc && print_state->long_desc) { + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, long_desc, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); + } else if (desc && print_state->desc) { char *desc_with_unit = NULL; int desc_len = -1; @@ -178,12 +182,6 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi fprintf(fp, "]\n"); free(desc_with_unit); } - long_desc = long_desc ?: desc; - if (long_desc && print_state->long_desc) { - fprintf(fp, "%*s", 8, "["); - wordwrap(fp, long_desc, 8, pager_get_columns(), 0); - fprintf(fp, "]\n"); - } if (print_state->detailed && encoding_desc) { fprintf(fp, "%*s", 8, ""); @@ -256,15 +254,14 @@ static void default_print_metric(void *ps, } fprintf(fp, " %s\n", name); - if (desc && print_state->desc) { - fprintf(fp, "%*s", 8, "["); - wordwrap(fp, desc, 8, pager_get_columns(), 0); - fprintf(fp, "]\n"); - } if (long_desc && print_state->long_desc) { fprintf(fp, "%*s", 8, "["); wordwrap(fp, long_desc, 8, pager_get_columns(), 0); fprintf(fp, "]\n"); + } else if (desc && print_state->desc) { + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, desc, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); } if (expr && print_state->detailed) { fprintf(fp, "%*s", 8, "["); @@ -507,6 +504,7 @@ int cmd_list(int argc, const char **argv) int i, ret = 0; struct print_state default_ps = { .fp = stdout, + .desc = true, }; struct print_state json_ps = { .fp = stdout, @@ -579,7 +577,6 @@ int cmd_list(int argc, const char **argv) }; ps = &json_ps; } else { - default_ps.desc = !default_ps.long_desc; default_ps.last_topic = strdup(""); assert(default_ps.last_topic); default_ps.visited_metrics = strlist__new(NULL, NULL); -- cgit v1.2.3-73-gaa49b From eb4e7726279a344c82e3c23be396bcfd0a4d5669 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Fri, 24 May 2024 14:18:40 +0100 Subject: libbpf: Configure log verbosity with env variable Configure logging verbosity by setting LIBBPF_LOG_LEVEL environment variable, which is applied only to default logger. Once user set their custom logging callback, it is up to them to handle filtering. Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240524131840.114289-1-yatsenko@meta.com --- Documentation/bpf/libbpf/libbpf_overview.rst | 8 ++++++++ tools/lib/bpf/libbpf.c | 25 ++++++++++++++++++++++++- tools/lib/bpf/libbpf.h | 5 ++++- 3 files changed, 36 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/Documentation/bpf/libbpf/libbpf_overview.rst b/Documentation/bpf/libbpf/libbpf_overview.rst index f36a2d4ffea2..f4d22f0c62b0 100644 --- a/Documentation/bpf/libbpf/libbpf_overview.rst +++ b/Documentation/bpf/libbpf/libbpf_overview.rst @@ -219,6 +219,14 @@ compilation and skeleton generation. Using Libbpf-rs will make building user space part of the BPF application easier. Note that the BPF program themselves must still be written in plain C. +libbpf logging +============== + +By default, libbpf logs informational and warning messages to stderr. The +verbosity of these messages can be controlled by setting the environment +variable LIBBPF_LOG_LEVEL to either warn, info, or debug. A custom log +callback can be set using ``libbpf_set_print()``. + Additional Documentation ======================== diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5401f2df463d..d1627a2ca30b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -229,7 +229,30 @@ static const char * const prog_type_name[] = { static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) { - if (level == LIBBPF_DEBUG) + const char *env_var = "LIBBPF_LOG_LEVEL"; + static enum libbpf_print_level min_level = LIBBPF_INFO; + static bool initialized; + + if (!initialized) { + char *verbosity; + + initialized = true; + verbosity = getenv(env_var); + if (verbosity) { + if (strcasecmp(verbosity, "warn") == 0) + min_level = LIBBPF_WARN; + else if (strcasecmp(verbosity, "debug") == 0) + min_level = LIBBPF_DEBUG; + else if (strcasecmp(verbosity, "info") == 0) + min_level = LIBBPF_INFO; + else + fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n", + env_var, verbosity); + } + } + + /* if too verbose, skip logging */ + if (level > min_level) return 0; return vfprintf(stderr, format, args); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index c3f77d9260fe..26e4e35528c5 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -98,7 +98,10 @@ typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, /** * @brief **libbpf_set_print()** sets user-provided log callback function to - * be used for libbpf warnings and informational messages. + * be used for libbpf warnings and informational messages. If the user callback + * is not set, messages are logged to stderr by default. The verbosity of these + * messages can be controlled by setting the environment variable + * LIBBPF_LOG_LEVEL to either warn, info, or debug. * @param fn The log print function. If NULL, libbpf won't print anything. * @return Pointer to old print function. * -- cgit v1.2.3-73-gaa49b From ed31adf6874db172e3212ac1ebaf701ed6190650 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sat, 25 May 2024 20:08:15 +0800 Subject: selftests/bpf: Drop struct post_socket_opts It's not possible to have one generic/common "struct post_socket_opts" for all tests. It's better to have the individual test define its own callback opts struct. So this patch drops struct post_socket_opts, and changes the second parameter of post_socket_cb as "void *" type. Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/f8bda41c7cb9cb6979b2779f89fb3a684234304f.1716638248.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/network_helpers.c | 2 +- tools/testing/selftests/bpf/network_helpers.h | 4 +--- tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c | 2 +- tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c | 4 ++-- 4 files changed, 5 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 35250e6cde7f..4d776b78929c 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -133,7 +133,7 @@ int start_server(int family, int type, const char *addr_str, __u16 port, return __start_server(type, (struct sockaddr *)&addr, addrlen, &opts); } -static int reuseport_cb(int fd, const struct post_socket_opts *opts) +static int reuseport_cb(int fd, void *opts) { int on = 1; diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 883c7ea9d8d5..40011e0f584b 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -21,8 +21,6 @@ typedef __u16 __sum16; #define VIP_NUM 5 #define MAGIC_BYTES 123 -struct post_socket_opts {}; - struct network_helper_opts { const char *cc; int timeout_ms; @@ -30,7 +28,7 @@ struct network_helper_opts { bool noconnect; int type; int proto; - int (*post_socket_cb)(int fd, const struct post_socket_opts *opts); + int (*post_socket_cb)(int fd, void *opts); }; /* ipv4 test vector */ diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c index 1d3a20f01b60..7cd8be2780ca 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -70,7 +70,7 @@ static void *server_thread(void *arg) return (void *)(long)err; } -static int custom_cb(int fd, const struct post_socket_opts *opts) +static int custom_cb(int fd, void *opts) { char buf; int err; diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c index 7b5fc98838cd..aebc58c24dc5 100644 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c @@ -139,14 +139,14 @@ out: return ret; } -static int v6only_true(int fd, const struct post_socket_opts *opts) +static int v6only_true(int fd, void *opts) { int mode = true; return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); } -static int v6only_false(int fd, const struct post_socket_opts *opts) +static int v6only_false(int fd, void *opts) { int mode = false; -- cgit v1.2.3-73-gaa49b From 6f802cb8988e8e41f2fdb74ac949d3a0ef9a9594 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sat, 25 May 2024 20:08:16 +0800 Subject: selftests/bpf: Add start_server_str helper It's a tech debt that start_server() does not take the "opts" argument. It's pretty handy to have start_server() as a helper that takes string address. So this patch creates a new helper start_server_str(). Then start_server() can be a wrapper of it. Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/606e6cfd7e1aff8bc51ede49862eed0802e52170.1716638248.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/network_helpers.c | 22 ++++++++++++++++------ tools/testing/selftests/bpf/network_helpers.h | 2 ++ 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 4d776b78929c..0e8266f439e4 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -118,19 +118,29 @@ error_close: return -1; } -int start_server(int family, int type, const char *addr_str, __u16 port, - int timeout_ms) +int start_server_str(int family, int type, const char *addr_str, __u16 port, + const struct network_helper_opts *opts) { - struct network_helper_opts opts = { - .timeout_ms = timeout_ms, - }; struct sockaddr_storage addr; socklen_t addrlen; + if (!opts) + opts = &default_opts; + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) return -1; - return __start_server(type, (struct sockaddr *)&addr, addrlen, &opts); + return __start_server(type, (struct sockaddr *)&addr, addrlen, opts); +} + +int start_server(int family, int type, const char *addr_str, __u16 port, + int timeout_ms) +{ + struct network_helper_opts opts = { + .timeout_ms = timeout_ms, + }; + + return start_server_str(family, type, addr_str, port, &opts); } static int reuseport_cb(int fd, void *opts) diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 40011e0f584b..4e3e6afe7d3a 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -48,6 +48,8 @@ struct ipv6_packet { extern struct ipv6_packet pkt_v6; int settimeo(int fd, int timeout_ms); +int start_server_str(int family, int type, const char *addr_str, __u16 port, + const struct network_helper_opts *opts); int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); int *start_reuseport_server(int family, int type, const char *addr_str, -- cgit v1.2.3-73-gaa49b From e078255abd53ac44c9133fd98d51645dbd196123 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sat, 25 May 2024 20:08:17 +0800 Subject: selftests/bpf: Use post_socket_cb in connect_to_fd_opts Since the post_socket_cb() callback is added in struct network_helper_opts, it's make sense to use it not only in __start_server(), but also in connect_to_fd_opts(). Then it can be used to set TCP_CONGESTION sockopt. Add a "void *" type member cb_opts into struct network_helper_opts, and add a new struct named cb_opts in prog_tests/bpf_tcp_ca.c, then cc can be moved into struct cb_opts from network_helper_opts. Define a new callback cc_cb() to set TCP_CONGESTION sockopt, and set it to post_socket_cb pointer of opts. Define a new cb_opts cubic, set it to cb_opts of opts. Pass this opts to connect_to_fd_opts() in test_dctcp_fallback(). Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/b512bb8d8f6854c9ea5c409b69d1bf37c6f272c6.1716638248.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/network_helpers.c | 5 ++--- tools/testing/selftests/bpf/network_helpers.h | 2 +- tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 17 ++++++++++++++++- 3 files changed, 19 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 0e8266f439e4..8502917b6c7b 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -348,9 +348,8 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) if (settimeo(fd, opts->timeout_ms)) goto error_close; - if (opts->cc && opts->cc[0] && - setsockopt(fd, SOL_TCP, TCP_CONGESTION, opts->cc, - strlen(opts->cc) + 1)) + if (opts->post_socket_cb && + opts->post_socket_cb(fd, opts->cb_opts)) goto error_close; if (!opts->noconnect) diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 4e3e6afe7d3a..11eea8e2e4f1 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -22,13 +22,13 @@ typedef __u16 __sum16; #define MAGIC_BYTES 123 struct network_helper_opts { - const char *cc; int timeout_ms; bool must_fail; bool noconnect; int type; int proto; int (*post_socket_cb)(int fd, void *opts); + void *cb_opts; }; /* ipv4 test vector */ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 0aca02532794..6f034059888a 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -23,6 +23,10 @@ static const unsigned int total_bytes = 10 * 1024 * 1024; static int expected_stg = 0xeB9F; +struct cb_opts { + const char *cc; +}; + static int settcpca(int fd, const char *tcp_ca) { int err; @@ -81,6 +85,13 @@ done: close(fd); } +static int cc_cb(int fd, void *opts) +{ + struct cb_opts *cb_opts = (struct cb_opts *)opts; + + return settcpca(fd, cb_opts->cc); +} + static void test_cubic(void) { struct bpf_cubic *cubic_skel; @@ -172,10 +183,13 @@ static void test_dctcp_fallback(void) { int err, lfd = -1, cli_fd = -1, srv_fd = -1; struct network_helper_opts opts = { - .cc = "cubic", + .post_socket_cb = cc_cb, }; struct bpf_dctcp *dctcp_skel; struct bpf_link *link = NULL; + struct cb_opts cubic = { + .cc = "cubic", + }; char srv_cc[16]; socklen_t cc_len = sizeof(srv_cc); @@ -195,6 +209,7 @@ static void test_dctcp_fallback(void) !ASSERT_OK(settcpca(lfd, "bpf_dctcp"), "lfd=>bpf_dctcp")) goto done; + opts.cb_opts = &cubic; cli_fd = connect_to_fd_opts(lfd, &opts); if (!ASSERT_GE(cli_fd, 0, "cli_fd")) goto done; -- cgit v1.2.3-73-gaa49b From 79b330c57debe6b15f441e999bb62042afd5b08e Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sat, 25 May 2024 20:08:18 +0800 Subject: selftests/bpf: Use post_socket_cb in start_server_str This patch uses start_server_str() helper in test_dctcp_fallback() in bpf_tcp_ca.c, instead of using start_server() and settcpca(). For support opts in start_server_str() helper, opts->cb_opts needs to be passed to post_socket_cb() in __start_server(). Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/414c749321fa150435f7fe8e12c80fec8b447c78.1716638248.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/network_helpers.c | 3 ++- tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 8502917b6c7b..e20caef06aae 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -94,7 +94,8 @@ static int __start_server(int type, const struct sockaddr *addr, socklen_t addrl if (settimeo(fd, opts->timeout_ms)) goto error_close; - if (opts->post_socket_cb && opts->post_socket_cb(fd, NULL)) { + if (opts->post_socket_cb && + opts->post_socket_cb(fd, opts->cb_opts)) { log_err("Failed to call post_socket_cb"); goto error_close; } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 6f034059888a..7a523c7a0c6a 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -187,6 +187,9 @@ static void test_dctcp_fallback(void) }; struct bpf_dctcp *dctcp_skel; struct bpf_link *link = NULL; + struct cb_opts dctcp = { + .cc = "bpf_dctcp", + }; struct cb_opts cubic = { .cc = "cubic", }; @@ -204,9 +207,9 @@ static void test_dctcp_fallback(void) if (!ASSERT_OK_PTR(link, "dctcp link")) goto done; - lfd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); - if (!ASSERT_GE(lfd, 0, "lfd") || - !ASSERT_OK(settcpca(lfd, "bpf_dctcp"), "lfd=>bpf_dctcp")) + opts.cb_opts = &dctcp; + lfd = start_server_str(AF_INET6, SOCK_STREAM, "::1", 0, &opts); + if (!ASSERT_GE(lfd, 0, "lfd")) goto done; opts.cb_opts = &cubic; -- cgit v1.2.3-73-gaa49b From ed61271af5230cef9b9329bb1eacc1b1a9800d07 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sat, 25 May 2024 20:08:19 +0800 Subject: selftests/bpf: Use start_server_str in do_test in bpf_tcp_ca This patch uses new helper start_server_str() in do_test() in bpf_tcp_ca.c to accept a struct network_helper_opts argument instead of using start_server() and settcpca(). Then change the type of the first paramenter of do_test() into a struct network_helper_opts one. Define its own cb_opts and opts for each test, set its own cc name into cb_opts.cc, and cc_cb() into post_socket_cb callback, then pass it to do_test(). Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/6e1b6555e3284e77c8aa60668c61a66c5f99aa37.1716638248.git.tanggeliang@kylinos.cn Signed-off-by: Martin KaFai Lau --- .../testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 66 ++++++++++++++++++---- 1 file changed, 55 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 7a523c7a0c6a..ebc7d4616880 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -38,12 +38,14 @@ static int settcpca(int fd, const char *tcp_ca) return 0; } -static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) +static void do_test(const struct network_helper_opts *opts, + const struct bpf_map *sk_stg_map) { + struct cb_opts *cb_opts = (struct cb_opts *)opts->cb_opts; int lfd = -1, fd = -1; int err; - lfd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + lfd = start_server_str(AF_INET6, SOCK_STREAM, NULL, 0, opts); if (!ASSERT_NEQ(lfd, -1, "socket")) return; @@ -53,7 +55,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) return; } - if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca)) + if (settcpca(fd, cb_opts->cc)) goto done; if (sk_stg_map) { @@ -94,6 +96,13 @@ static int cc_cb(int fd, void *opts) static void test_cubic(void) { + struct cb_opts cb_opts = { + .cc = "bpf_cubic", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct bpf_cubic *cubic_skel; struct bpf_link *link; @@ -107,7 +116,7 @@ static void test_cubic(void) return; } - do_test("bpf_cubic", NULL); + do_test(&opts, NULL); ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); @@ -117,6 +126,13 @@ static void test_cubic(void) static void test_dctcp(void) { + struct cb_opts cb_opts = { + .cc = "bpf_dctcp", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct bpf_dctcp *dctcp_skel; struct bpf_link *link; @@ -130,7 +146,7 @@ static void test_dctcp(void) return; } - do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map); + do_test(&opts, dctcp_skel->maps.sk_stg_map); ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result"); bpf_link__destroy(link); @@ -315,6 +331,13 @@ static void test_unsupp_cong_op(void) static void test_update_ca(void) { + struct cb_opts cb_opts = { + .cc = "tcp_ca_update", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct tcp_ca_update *skel; struct bpf_link *link; int saved_ca1_cnt; @@ -327,14 +350,14 @@ static void test_update_ca(void) link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); - do_test("tcp_ca_update", NULL); + do_test(&opts, NULL); saved_ca1_cnt = skel->bss->ca1_cnt; ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_update_2); ASSERT_OK(err, "update_map"); - do_test("tcp_ca_update", NULL); + do_test(&opts, NULL); ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); @@ -344,6 +367,13 @@ static void test_update_ca(void) static void test_update_wrong(void) { + struct cb_opts cb_opts = { + .cc = "tcp_ca_update", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct tcp_ca_update *skel; struct bpf_link *link; int saved_ca1_cnt; @@ -356,14 +386,14 @@ static void test_update_wrong(void) link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); - do_test("tcp_ca_update", NULL); + do_test(&opts, NULL); saved_ca1_cnt = skel->bss->ca1_cnt; ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_wrong); ASSERT_ERR(err, "update_map"); - do_test("tcp_ca_update", NULL); + do_test(&opts, NULL); ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); bpf_link__destroy(link); @@ -372,6 +402,13 @@ static void test_update_wrong(void) static void test_mixed_links(void) { + struct cb_opts cb_opts = { + .cc = "tcp_ca_update", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct tcp_ca_update *skel; struct bpf_link *link, *link_nl; int err; @@ -386,7 +423,7 @@ static void test_mixed_links(void) link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); - do_test("tcp_ca_update", NULL); + do_test(&opts, NULL); ASSERT_GT(skel->bss->ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_no_link); @@ -473,6 +510,13 @@ static void test_tcp_ca_kfunc(void) static void test_cc_cubic(void) { + struct cb_opts cb_opts = { + .cc = "bpf_cc_cubic", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct bpf_cc_cubic *cc_cubic_skel; struct bpf_link *link; @@ -486,7 +530,7 @@ static void test_cc_cubic(void) return; } - do_test("bpf_cc_cubic", NULL); + do_test(&opts, NULL); bpf_link__destroy(link); bpf_cc_cubic__destroy(cc_cubic_skel); -- cgit v1.2.3-73-gaa49b From a93c83eca48a4ffb8e57cb0c7cc2e3935744d2c6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 21 May 2024 15:35:55 -0700 Subject: perf docs: Fix typos Assorted typo fixes. Signed-off-by: Ian Rogers Reviewed-by: James Clark Cc: Changbin Du Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240521223555.858859-1-irogers@google.com --- tools/perf/Documentation/perf-kwork.txt | 4 ++-- tools/perf/Documentation/perf-mem.txt | 2 +- tools/perf/Documentation/perf-record.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-kwork.txt b/tools/perf/Documentation/perf-kwork.txt index 109ace1d5e90..21e607669d78 100644 --- a/tools/perf/Documentation/perf-kwork.txt +++ b/tools/perf/Documentation/perf-kwork.txt @@ -1,4 +1,4 @@ -perf-kowrk(1) +perf-kwork(1) ============= NAME @@ -35,7 +35,7 @@ There are several variants of 'perf kwork': perf kwork top perf kwork top -b - By default it shows the individual work events such as irq, workqeueu, + By default it shows the individual work events such as irq, workqueue, including the run time and delay (time between raise and actually entry): Runtime start Runtime end Cpu Kwork name Runtime Delaytime diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 19862572e3f2..47456b212e99 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -21,7 +21,7 @@ and stores are sampled. Use the -t option to limit to loads or stores. Note that on Intel systems the memory latency reported is the use-latency, not the pure load (or store latency). Use latency includes any pipeline -queueing delays in addition to the memory subsystem latency. +queuing delays in addition to the memory subsystem latency. On Arm64 this uses SPE to sample load and store operations, therefore hardware and kernel support is required. See linkperf:perf-arm-spe[1] for a setup guide. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 6015fdd08fb6..77578c0a142a 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -311,7 +311,7 @@ OPTIONS User can change the size by passing the size after comma like "--call-graph dwarf,4096". - When "fp" recording is used, perf tries to save stack enties + When "fp" recording is used, perf tries to save stack entries up to the number specified in sysctl.kernel.perf_event_max_stack by default. User can change the number by passing it after comma like "--call-graph fp,32". -- cgit v1.2.3-73-gaa49b From 6ba7acdb93b4ecb554d5838fca3f5f0fcf9fff14 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Fri, 17 May 2024 10:30:34 +0800 Subject: selftests/bpf: Add selftest for bits iter Add test cases for the bits iter: - Positive cases - Bit mask representing a single word (8-byte unit) - Bit mask representing data spanning more than one word - The index of the set bit - Nagative cases - bpf_iter_bits_destroy() is required after calling bpf_iter_bits_new() - bpf_iter_bits_destroy() can only destroy an initialized iter - bpf_iter_bits_next() must use an initialized iter - Bit mask representing zero words - Bit mask representing fewer words than expected - Case for ENOMEM - Case for NULL pointer Signed-off-by: Yafang Shao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240517023034.48138-3-laoar.shao@gmail.com --- tools/testing/selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_bits_iter.c | 153 +++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_bits_iter.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index c60db8beeb73..8743340b5bf6 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -85,6 +85,7 @@ #include "verifier_xadd.skel.h" #include "verifier_xdp.skel.h" #include "verifier_xdp_direct_packet_access.skel.h" +#include "verifier_bits_iter.skel.h" #define MAX_ENTRIES 11 @@ -200,6 +201,7 @@ void test_verifier_var_off(void) { RUN(verifier_var_off); } void test_verifier_xadd(void) { RUN(verifier_xadd); } void test_verifier_xdp(void) { RUN(verifier_xdp); } void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); } +void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } static int init_test_val_map(struct bpf_object *obj, char *map_name) { diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c new file mode 100644 index 000000000000..716113c2bce2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2024 Yafang Shao */ + +#include "vmlinux.h" +#include +#include + +#include "bpf_misc.h" +#include "task_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, + u32 nr_bits) __ksym __weak; +int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak; +void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak; + +SEC("iter.s/cgroup") +__description("bits iter without destroy") +__failure __msg("Unreleased reference") +int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp) +{ + struct bpf_iter_bits it; + u64 data = 1; + + bpf_iter_bits_new(&it, &data, 1); + bpf_iter_bits_next(&it); + return 0; +} + +SEC("iter/cgroup") +__description("uninitialized iter in ->next()") +__failure __msg("expected an initialized iter_bits as arg #1") +int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) +{ + struct bpf_iter_bits *it = NULL; + + bpf_iter_bits_next(it); + return 0; +} + +SEC("iter/cgroup") +__description("uninitialized iter in ->destroy()") +__failure __msg("expected an initialized iter_bits as arg #1") +int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) +{ + struct bpf_iter_bits it = {}; + + bpf_iter_bits_destroy(&it); + return 0; +} + +SEC("syscall") +__description("null pointer") +__success __retval(0) +int null_pointer(void) +{ + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, NULL, 1) + nr++; + return nr; +} + +SEC("syscall") +__description("bits copy") +__success __retval(10) +int bits_copy(void) +{ + u64 data = 0xf7310UL; /* 4 + 3 + 2 + 1 + 0*/ + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, &data, 1) + nr++; + return nr; +} + +SEC("syscall") +__description("bits memalloc") +__success __retval(64) +int bits_memalloc(void) +{ + u64 data[2]; + int nr = 0; + int *bit; + + __builtin_memset(&data, 0xf0, sizeof(data)); /* 4 * 16 */ + bpf_for_each(bits, bit, &data[0], sizeof(data) / sizeof(u64)) + nr++; + return nr; +} + +SEC("syscall") +__description("bit index") +__success __retval(8) +int bit_index(void) +{ + u64 data = 0x100; + int bit_idx = 0; + int *bit; + + bpf_for_each(bits, bit, &data, 1) { + if (*bit == 0) + continue; + bit_idx = *bit; + } + return bit_idx; +} + +SEC("syscall") +__description("bits nomem") +__success __retval(0) +int bits_nomem(void) +{ + u64 data[4]; + int nr = 0; + int *bit; + + __builtin_memset(&data, 0xff, sizeof(data)); + bpf_for_each(bits, bit, &data[0], 513) /* Be greater than 512 */ + nr++; + return nr; +} + +SEC("syscall") +__description("fewer words") +__success __retval(1) +int fewer_words(void) +{ + u64 data[2] = {0x1, 0xff}; + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, &data[0], 1) + nr++; + return nr; +} + +SEC("syscall") +__description("zero words") +__success __retval(0) +int zero_words(void) +{ + u64 data[2] = {0x1, 0xff}; + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, &data[0], 0) + nr++; + return nr; +} -- cgit v1.2.3-73-gaa49b From c697f515b6390b17e4a54a6f93aedf27133929c5 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Tue, 28 May 2024 15:06:49 +0100 Subject: doc: netlink: Fix generated .rst for multi-line docs Fix the newline replacement in ynl-gen-rst.py to put spaces between concatenated lines. This fixes the broken doc string formatting. See the dpll docs for an example of broken concatenation: https://docs.kernel.org/6.9/networking/netlink_spec/dpll.html#lock-status Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20240528140652.9445-2-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-rst.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index 657e881d2ea4..5c7465d6befa 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -49,7 +49,7 @@ def inline(text: str) -> str: def sanitize(text: str) -> str: """Remove newlines and multiple spaces""" # This is useful for some fields that are spread across multiple lines - return str(text).replace("\n", "").strip() + return str(text).replace("\n", " ").strip() def rst_fields(key: str, value: str, level: int = 0) -> str: -- cgit v1.2.3-73-gaa49b From ebf9004136c76b7b62fe628a4bc88b3e894b4b95 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Tue, 28 May 2024 15:06:50 +0100 Subject: doc: netlink: Don't 'sanitize' op docstrings in generated .rst The doc strings for do/dump ops are emitted as toplevel .rst constructs so they can be multi-line. Pass multi-line text straight through to the .rst to retain any simple formatting from the .yaml This fixes e.g. list formatting for the pin-get docs in dpll.yaml: https://docs.kernel.org/6.9/networking/netlink_spec/dpll.html#pin-get Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20240528140652.9445-3-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/dpll.yaml | 1 + tools/net/ynl/ynl-gen-rst.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index 95b0eb1486bf..94132d30e0e0 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -479,6 +479,7 @@ operations: name: pin-get doc: | Get list of pins and its attributes. + - dump request without any attributes given - list all the pins in the system - dump request with target dpll - list all the pins registered with diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index 5c7465d6befa..1096a71d7867 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -178,7 +178,7 @@ def parse_operations(operations: List[Dict[str, Any]], namespace: str) -> str: for operation in operations: lines.append(rst_section(namespace, 'operation', operation["name"])) - lines.append(rst_paragraph(sanitize(operation["doc"])) + "\n") + lines.append(rst_paragraph(operation["doc"]) + "\n") for key in operation.keys(): if key in preprocessed: -- cgit v1.2.3-73-gaa49b From cb7351ac17862cf8d4e00831ce4aa27f53bda01c Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Tue, 28 May 2024 15:06:51 +0100 Subject: doc: netlink: Fix formatting of op flags in generated .rst Generate op flags as an inline list instead of a stringified python value. Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20240528140652.9445-4-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-rst.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index 1096a71d7867..a957725b20dc 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -172,7 +172,7 @@ def parse_do_attributes(attrs: Dict[str, Any], level: int = 0) -> str: def parse_operations(operations: List[Dict[str, Any]], namespace: str) -> str: """Parse operations block""" - preprocessed = ["name", "doc", "title", "do", "dump"] + preprocessed = ["name", "doc", "title", "do", "dump", "flags"] linkable = ["fixed-header", "attribute-set"] lines = [] @@ -188,6 +188,8 @@ def parse_operations(operations: List[Dict[str, Any]], namespace: str) -> str: if key in linkable: value = rst_ref(namespace, key, value) lines.append(rst_fields(key, value, 0)) + if 'flags' in operation: + lines.append(rst_fields('flags', rst_list_inline(operation['flags']))) if "do" in operation: lines.append(rst_paragraph(":do:", 0)) -- cgit v1.2.3-73-gaa49b From 9104feed4c6454b9a720e7e11047be7e5cd83487 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Tue, 28 May 2024 15:06:52 +0100 Subject: doc: netlink: Fix op pre and post fields in generated .rst The generated .rst has pre and post headings without any values, e.g. here: https://docs.kernel.org/6.9/networking/netlink_spec/dpll.html#device-id-get Emit keys and values in the generated .rst Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20240528140652.9445-5-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-rst.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index a957725b20dc..6c56d0d726b4 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -156,7 +156,10 @@ def parse_do(do_dict: Dict[str, Any], level: int = 0) -> str: lines = [] for key in do_dict.keys(): lines.append(rst_paragraph(bold(key), level + 1)) - lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n") + if key in ['request', 'reply']: + lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n") + else: + lines.append(headroom(level + 2) + do_dict[key] + "\n") return "\n".join(lines) -- cgit v1.2.3-73-gaa49b From 92968dcc037fed045dab5c8e52b51255d77f5432 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Wed, 22 May 2024 11:35:41 +0800 Subject: perf trace beauty: Always show param if show_zero is set For some parameters, it is best to also display them when they are 0, e.g. flags. Here we only check the show_zero property and let arg printer handle special cases. Signed-off-by: Changbin Du Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240522033542.1359421-2-changbin.du@huawei.com --- tools/perf/builtin-trace.c | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 51eca671c797..a36e98c7a1c5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2091,17 +2091,11 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val); /* - * Suppress this argument if its value is zero and - * and we don't have a string associated in an - * strarray for it. - */ - if (val == 0 && - !trace->show_zeros && - !(sc->arg_fmt && - (sc->arg_fmt[arg.idx].show_zero || - sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY || - sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) && - sc->arg_fmt[arg.idx].parm)) + * Suppress this argument if its value is zero and show_zero + * property isn't set. + */ + if (val == 0 && !trace->show_zeros && + !(sc->arg_fmt && sc->arg_fmt[arg.idx].show_zero)) continue; printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : ""); @@ -2796,17 +2790,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, */ val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val); - /* - * Suppress this argument if its value is zero and - * we don't have a string associated in an - * strarray for it. - */ - if (val == 0 && - !trace->show_zeros && - !((arg->show_zero || - arg->scnprintf == SCA_STRARRAY || - arg->scnprintf == SCA_STRARRAYS) && - arg->parm)) + /* Suppress this argument if its value is zero and show_zero property isn't set. */ + if (val == 0 && !trace->show_zeros && !arg->show_zero) continue; printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : ""); -- cgit v1.2.3-73-gaa49b From f975c13d2a34a335fc559aeff76dcaba456cced0 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Wed, 22 May 2024 11:35:42 +0800 Subject: perf trace beauty: Always show mmap prot even though PROT_NONE PROT_NONE is also useful information, so do not omit the mmap prot even though it is 0. syscall_arg__scnprintf_mmap_prot() could print PROT_NONE for prot 0. Before: PROT_NONE is not shown. $ sudo perf trace -e syscalls:sys_enter_mmap --filter prot==0 -- ls 0.000 ls/2979231 syscalls:sys_enter_mmap(len: 4220888, flags: PRIVATE|ANONYMOUS) After: PROT_NONE is displayed. $ sudo perf trace -e syscalls:sys_enter_mmap --filter prot==0 -- ls 0.000 ls/2975708 syscalls:sys_enter_mmap(len: 4220888, prot: NONE, flags: PRIVATE|ANONYMOUS) Signed-off-by: Changbin Du Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240522033542.1359421-3-changbin.du@huawei.com --- tools/perf/builtin-trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a36e98c7a1c5..c42bc608954e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1033,7 +1033,7 @@ static const struct syscall_fmt syscall_fmts[] = { #if defined(__s390x__) .alias = "old_mmap", #endif - .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, + .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ }, [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ .strtoul = STUL_STRARRAY_FLAGS, .parm = &strarray__mmap_flags, }, @@ -1050,7 +1050,7 @@ static const struct syscall_fmt syscall_fmts[] = { [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, }, { .name = "mprotect", .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, - [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, + [2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ }, }, }, { .name = "mq_unlink", .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, }, { .name = "mremap", .hexret = true, @@ -1084,7 +1084,7 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, }, { .name = "pkey_mprotect", .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, - [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, + [2] = { .scnprintf = SCA_MMAP_PROT, .show_zero = true, /* prot */ }, [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, }, { .name = "poll", .timeout = true, }, { .name = "ppoll", .timeout = true, }, -- cgit v1.2.3-73-gaa49b From d163d60258c755845cbc9cfe0e45fca71e649488 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sun, 19 May 2024 11:17:16 -0700 Subject: tools api io: Move filling the io buffer to its own function In general a read fills 4kb so filling the buffer is a 1 in 4096 operation, move it out of the io__get_char function to avoid some checking overhead and to better hint the function is good to inline. For perf's IO intensive internal (non-rigorous) benchmarks there's a small improvement to kallsyms-parsing with a default build. Before: ``` $ perf bench internals all Computing performance of single threaded perf event synthesis by synthesizing events on the perf process itself: Average synthesis took: 146.322 usec (+- 0.305 usec) Average num. events: 61.000 (+- 0.000) Average time per event 2.399 usec Average data synthesis took: 145.056 usec (+- 0.155 usec) Average num. events: 329.000 (+- 0.000) Average time per event 0.441 usec Average kallsyms__parse took: 162.313 ms (+- 0.599 ms) ... Computing performance of sysfs PMU event scan for 100 times Average core PMU scanning took: 53.720 usec (+- 7.823 usec) Average PMU scanning took: 375.145 usec (+- 23.974 usec) ``` After: ``` $ perf bench internals all Computing performance of single threaded perf event synthesis by synthesizing events on the perf process itself: Average synthesis took: 127.829 usec (+- 0.079 usec) Average num. events: 61.000 (+- 0.000) Average time per event 2.096 usec Average data synthesis took: 133.652 usec (+- 0.101 usec) Average num. events: 327.000 (+- 0.000) Average time per event 0.409 usec Average kallsyms__parse took: 150.415 ms (+- 0.313 ms) ... Computing performance of sysfs PMU event scan for 100 times Average core PMU scanning took: 47.790 usec (+- 1.178 usec) Average PMU scanning took: 376.945 usec (+- 23.683 usec) ``` Signed-off-by: Ian Rogers Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240519181716.4088459-1-irogers@google.com --- tools/lib/api/io.h | 69 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h index 84adf8102018..d3eb04d1bc89 100644 --- a/tools/lib/api/io.h +++ b/tools/lib/api/io.h @@ -43,48 +43,55 @@ static inline void io__init(struct io *io, int fd, io->eof = false; } -/* Reads one character from the "io" file with similar semantics to fgetc. */ -static inline int io__get_char(struct io *io) +/* Read from fd filling the buffer. Called when io->data == io->end. */ +static inline int io__fill_buffer(struct io *io) { - char *ptr = io->data; + ssize_t n; if (io->eof) return -1; - if (ptr == io->end) { - ssize_t n; - - if (io->timeout_ms != 0) { - struct pollfd pfds[] = { - { - .fd = io->fd, - .events = POLLIN, - }, - }; - - n = poll(pfds, 1, io->timeout_ms); - if (n == 0) - errno = ETIMEDOUT; - if (n > 0 && !(pfds[0].revents & POLLIN)) { - errno = EIO; - n = -1; - } - if (n <= 0) { - io->eof = true; - return -1; - } + if (io->timeout_ms != 0) { + struct pollfd pfds[] = { + { + .fd = io->fd, + .events = POLLIN, + }, + }; + + n = poll(pfds, 1, io->timeout_ms); + if (n == 0) + errno = ETIMEDOUT; + if (n > 0 && !(pfds[0].revents & POLLIN)) { + errno = EIO; + n = -1; } - n = read(io->fd, io->buf, io->buf_len); - if (n <= 0) { io->eof = true; return -1; } - ptr = &io->buf[0]; - io->end = &io->buf[n]; } - io->data = ptr + 1; - return *ptr; + n = read(io->fd, io->buf, io->buf_len); + + if (n <= 0) { + io->eof = true; + return -1; + } + io->data = &io->buf[0]; + io->end = &io->buf[n]; + return 0; +} + +/* Reads one character from the "io" file with similar semantics to fgetc. */ +static inline int io__get_char(struct io *io) +{ + if (io->data == io->end) { + int ret = io__fill_buffer(io); + + if (ret) + return ret; + } + return *io->data++; } /* Read a hexadecimal value with no 0x prefix into the out argument hex. If the -- cgit v1.2.3-73-gaa49b From 63b9cbd7941aa9ec5cb61567042176c4ce04b020 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 24 May 2024 13:52:25 -0700 Subject: perf bpf filter: Give terms their own enum Give the term types their own enum so that additional terms can be added that don't correspond to a PERF_SAMPLE_xx flag. The term values are numerically ascending rather than bit field positions, this means they need translating to a PERF_SAMPLE_xx bit field in certain places using a shift. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: John Fastabend Cc: Changbin Du Cc: Yang Jihong Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240524205227.244375-2-irogers@google.com --- tools/perf/util/bpf-filter.c | 28 +++++------ tools/perf/util/bpf-filter.h | 5 +- tools/perf/util/bpf-filter.l | 64 +++++++++++++------------- tools/perf/util/bpf-filter.y | 7 +-- tools/perf/util/bpf_skel/sample-filter.h | 37 ++++++++++++++- tools/perf/util/bpf_skel/sample_filter.bpf.c | 69 +++++++++++++++++++++------- 6 files changed, 141 insertions(+), 69 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index b51544996046..f10148623a8e 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -17,11 +17,11 @@ #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) -#define __PERF_SAMPLE_TYPE(st, opt) { st, #st, opt } -#define PERF_SAMPLE_TYPE(_st, opt) __PERF_SAMPLE_TYPE(PERF_SAMPLE_##_st, opt) +#define __PERF_SAMPLE_TYPE(tt, st, opt) { tt, #st, opt } +#define PERF_SAMPLE_TYPE(_st, opt) __PERF_SAMPLE_TYPE(PBF_TERM_##_st, PERF_SAMPLE_##_st, opt) static const struct perf_sample_info { - u64 type; + enum perf_bpf_filter_term type; const char *name; const char *option; } sample_table[] = { @@ -44,12 +44,12 @@ static const struct perf_sample_info { PERF_SAMPLE_TYPE(DATA_PAGE_SIZE, "--data-page-size"), }; -static const struct perf_sample_info *get_sample_info(u64 flags) +static const struct perf_sample_info *get_sample_info(enum perf_bpf_filter_term type) { size_t i; for (i = 0; i < ARRAY_SIZE(sample_table); i++) { - if (sample_table[i].type == flags) + if (sample_table[i].type == type) return &sample_table[i]; } return NULL; @@ -59,7 +59,8 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr * { const struct perf_sample_info *info; - if (evsel->core.attr.sample_type & expr->sample_flags) + if (expr->term >= PBF_TERM_SAMPLE_START && expr->term <= PBF_TERM_SAMPLE_END && + (evsel->core.attr.sample_type & (1 << (expr->term - PBF_TERM_SAMPLE_START)))) return 0; if (expr->op == PBF_OP_GROUP_BEGIN) { @@ -72,10 +73,10 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr * return 0; } - info = get_sample_info(expr->sample_flags); + info = get_sample_info(expr->term); if (info == NULL) { - pr_err("Error: %s event does not have sample flags %lx\n", - evsel__name(evsel), expr->sample_flags); + pr_err("Error: %s event does not have sample flags %d\n", + evsel__name(evsel), expr->term); return -1; } @@ -105,7 +106,7 @@ int perf_bpf_filter__prepare(struct evsel *evsel) struct perf_bpf_filter_entry entry = { .op = expr->op, .part = expr->part, - .flags = expr->sample_flags, + .term = expr->term, .value = expr->val, }; @@ -122,7 +123,7 @@ int perf_bpf_filter__prepare(struct evsel *evsel) struct perf_bpf_filter_entry group_entry = { .op = group->op, .part = group->part, - .flags = group->sample_flags, + .term = group->term, .value = group->val, }; bpf_map_update_elem(fd, &i, &group_entry, BPF_ANY); @@ -173,7 +174,8 @@ u64 perf_bpf_filter__lost_count(struct evsel *evsel) return skel ? skel->bss->dropped : 0; } -struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flags, int part, +struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term, + int part, enum perf_bpf_filter_op op, unsigned long val) { @@ -181,7 +183,7 @@ struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flag expr = malloc(sizeof(*expr)); if (expr != NULL) { - expr->sample_flags = sample_flags; + expr->term = term; expr->part = part; expr->op = op; expr->val = val; diff --git a/tools/perf/util/bpf-filter.h b/tools/perf/util/bpf-filter.h index 7afd159411b8..cd6764442c16 100644 --- a/tools/perf/util/bpf-filter.h +++ b/tools/perf/util/bpf-filter.h @@ -11,14 +11,15 @@ struct perf_bpf_filter_expr { struct list_head groups; enum perf_bpf_filter_op op; int part; - unsigned long sample_flags; + enum perf_bpf_filter_term term; unsigned long val; }; struct evsel; #ifdef HAVE_BPF_SKEL -struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flags, int part, +struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term, + int part, enum perf_bpf_filter_op op, unsigned long val); int perf_bpf_filter__parse(struct list_head *expr_head, const char *str); diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l index d4ff0f1345cd..62c959813466 100644 --- a/tools/perf/util/bpf-filter.l +++ b/tools/perf/util/bpf-filter.l @@ -9,16 +9,16 @@ #include "bpf-filter.h" #include "bpf-filter-bison.h" -static int sample(unsigned long sample_flag) +static int sample(enum perf_bpf_filter_term term) { - perf_bpf_filter_lval.sample.type = sample_flag; + perf_bpf_filter_lval.sample.term = term; perf_bpf_filter_lval.sample.part = 0; return BFT_SAMPLE; } -static int sample_part(unsigned long sample_flag, int part) +static int sample_part(enum perf_bpf_filter_term term, int part) { - perf_bpf_filter_lval.sample.type = sample_flag; + perf_bpf_filter_lval.sample.term = term; perf_bpf_filter_lval.sample.part = part; return BFT_SAMPLE; } @@ -67,34 +67,34 @@ ident [_a-zA-Z][_a-zA-Z0-9]+ {num_hex} { return value(16); } {space} { } -ip { return sample(PERF_SAMPLE_IP); } -id { return sample(PERF_SAMPLE_ID); } -tid { return sample(PERF_SAMPLE_TID); } -pid { return sample_part(PERF_SAMPLE_TID, 1); } -cpu { return sample(PERF_SAMPLE_CPU); } -time { return sample(PERF_SAMPLE_TIME); } -addr { return sample(PERF_SAMPLE_ADDR); } -period { return sample(PERF_SAMPLE_PERIOD); } -txn { return sample(PERF_SAMPLE_TRANSACTION); } -weight { return sample(PERF_SAMPLE_WEIGHT); } -weight1 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 1); } -weight2 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 2); } -weight3 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } -ins_lat { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 2); } /* alias for weight2 */ -p_stage_cyc { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } /* alias for weight3 */ -retire_lat { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } /* alias for weight3 */ -phys_addr { return sample(PERF_SAMPLE_PHYS_ADDR); } -code_pgsz { return sample(PERF_SAMPLE_CODE_PAGE_SIZE); } -data_pgsz { return sample(PERF_SAMPLE_DATA_PAGE_SIZE); } -mem_op { return sample_part(PERF_SAMPLE_DATA_SRC, 1); } -mem_lvlnum { return sample_part(PERF_SAMPLE_DATA_SRC, 2); } -mem_lvl { return sample_part(PERF_SAMPLE_DATA_SRC, 2); } /* alias for mem_lvlnum */ -mem_snoop { return sample_part(PERF_SAMPLE_DATA_SRC, 3); } /* include snoopx */ -mem_remote { return sample_part(PERF_SAMPLE_DATA_SRC, 4); } -mem_lock { return sample_part(PERF_SAMPLE_DATA_SRC, 5); } -mem_dtlb { return sample_part(PERF_SAMPLE_DATA_SRC, 6); } -mem_blk { return sample_part(PERF_SAMPLE_DATA_SRC, 7); } -mem_hops { return sample_part(PERF_SAMPLE_DATA_SRC, 8); } +ip { return sample(PBF_TERM_IP); } +id { return sample(PBF_TERM_ID); } +tid { return sample(PBF_TERM_TID); } +pid { return sample_part(PBF_TERM_TID, 1); } +cpu { return sample(PBF_TERM_CPU); } +time { return sample(PBF_TERM_TIME); } +addr { return sample(PBF_TERM_ADDR); } +period { return sample(PBF_TERM_PERIOD); } +txn { return sample(PBF_TERM_TRANSACTION); } +weight { return sample(PBF_TERM_WEIGHT); } +weight1 { return sample_part(PBF_TERM_WEIGHT_STRUCT, 1); } +weight2 { return sample_part(PBF_TERM_WEIGHT_STRUCT, 2); } +weight3 { return sample_part(PBF_TERM_WEIGHT_STRUCT, 3); } +ins_lat { return sample_part(PBF_TERM_WEIGHT_STRUCT, 2); } /* alias for weight2 */ +p_stage_cyc { return sample_part(PBF_TERM_WEIGHT_STRUCT, 3); } /* alias for weight3 */ +retire_lat { return sample_part(PBF_TERM_WEIGHT_STRUCT, 3); } /* alias for weight3 */ +phys_addr { return sample(PBF_TERM_PHYS_ADDR); } +code_pgsz { return sample(PBF_TERM_CODE_PAGE_SIZE); } +data_pgsz { return sample(PBF_TERM_DATA_PAGE_SIZE); } +mem_op { return sample_part(PBF_TERM_DATA_SRC, 1); } +mem_lvlnum { return sample_part(PBF_TERM_DATA_SRC, 2); } +mem_lvl { return sample_part(PBF_TERM_DATA_SRC, 2); } /* alias for mem_lvlnum */ +mem_snoop { return sample_part(PBF_TERM_DATA_SRC, 3); } /* include snoopx */ +mem_remote { return sample_part(PBF_TERM_DATA_SRC, 4); } +mem_lock { return sample_part(PBF_TERM_DATA_SRC, 5); } +mem_dtlb { return sample_part(PBF_TERM_DATA_SRC, 6); } +mem_blk { return sample_part(PBF_TERM_DATA_SRC, 7); } +mem_hops { return sample_part(PBF_TERM_DATA_SRC, 8); } "==" { return operator(PBF_OP_EQ); } "!=" { return operator(PBF_OP_NEQ); } diff --git a/tools/perf/util/bpf-filter.y b/tools/perf/util/bpf-filter.y index 0e4d6de3c2ad..0c56fccb8874 100644 --- a/tools/perf/util/bpf-filter.y +++ b/tools/perf/util/bpf-filter.y @@ -27,7 +27,7 @@ static void perf_bpf_filter_error(struct list_head *expr __maybe_unused, { unsigned long num; struct { - unsigned long type; + enum perf_bpf_filter_term term; int part; } sample; enum perf_bpf_filter_op op; @@ -62,7 +62,8 @@ filter_term BFT_LOGICAL_OR filter_expr if ($1->op == PBF_OP_GROUP_BEGIN) { expr = $1; } else { - expr = perf_bpf_filter_expr__new(0, 0, PBF_OP_GROUP_BEGIN, 1); + expr = perf_bpf_filter_expr__new(PBF_TERM_NONE, /*part=*/0, + PBF_OP_GROUP_BEGIN, /*val=*/1); list_add_tail(&$1->list, &expr->groups); } expr->val++; @@ -78,7 +79,7 @@ filter_expr filter_expr: BFT_SAMPLE BFT_OP BFT_NUM { - $$ = perf_bpf_filter_expr__new($1.type, $1.part, $2, $3); + $$ = perf_bpf_filter_expr__new($1.term, $1.part, $2, $3); } %% diff --git a/tools/perf/util/bpf_skel/sample-filter.h b/tools/perf/util/bpf_skel/sample-filter.h index 2e96e1ab084a..25f780022951 100644 --- a/tools/perf/util/bpf_skel/sample-filter.h +++ b/tools/perf/util/bpf_skel/sample-filter.h @@ -16,12 +16,45 @@ enum perf_bpf_filter_op { PBF_OP_GROUP_END, }; +enum perf_bpf_filter_term { + /* No term is in use. */ + PBF_TERM_NONE = 0, + /* Terms that correspond to PERF_SAMPLE_xx values. */ + PBF_TERM_SAMPLE_START = PBF_TERM_NONE + 1, + PBF_TERM_IP = PBF_TERM_SAMPLE_START + 0, /* SAMPLE_IP = 1U << 0 */ + PBF_TERM_TID = PBF_TERM_SAMPLE_START + 1, /* SAMPLE_TID = 1U << 1 */ + PBF_TERM_TIME = PBF_TERM_SAMPLE_START + 2, /* SAMPLE_TIME = 1U << 2 */ + PBF_TERM_ADDR = PBF_TERM_SAMPLE_START + 3, /* SAMPLE_ADDR = 1U << 3 */ + __PBF_UNUSED_TERM4 = PBF_TERM_SAMPLE_START + 4, /* SAMPLE_READ = 1U << 4 */ + __PBF_UNUSED_TERM5 = PBF_TERM_SAMPLE_START + 5, /* SAMPLE_CALLCHAIN = 1U << 5 */ + PBF_TERM_ID = PBF_TERM_SAMPLE_START + 6, /* SAMPLE_ID = 1U << 6 */ + PBF_TERM_CPU = PBF_TERM_SAMPLE_START + 7, /* SAMPLE_CPU = 1U << 7 */ + PBF_TERM_PERIOD = PBF_TERM_SAMPLE_START + 8, /* SAMPLE_PERIOD = 1U << 8 */ + __PBF_UNUSED_TERM9 = PBF_TERM_SAMPLE_START + 9, /* SAMPLE_STREAM_ID = 1U << 9 */ + __PBF_UNUSED_TERM10 = PBF_TERM_SAMPLE_START + 10, /* SAMPLE_RAW = 1U << 10 */ + __PBF_UNUSED_TERM11 = PBF_TERM_SAMPLE_START + 11, /* SAMPLE_BRANCH_STACK = 1U << 11 */ + __PBF_UNUSED_TERM12 = PBF_TERM_SAMPLE_START + 12, /* SAMPLE_REGS_USER = 1U << 12 */ + __PBF_UNUSED_TERM13 = PBF_TERM_SAMPLE_START + 13, /* SAMPLE_STACK_USER = 1U << 13 */ + PBF_TERM_WEIGHT = PBF_TERM_SAMPLE_START + 14, /* SAMPLE_WEIGHT = 1U << 14 */ + PBF_TERM_DATA_SRC = PBF_TERM_SAMPLE_START + 15, /* SAMPLE_DATA_SRC = 1U << 15 */ + __PBF_UNUSED_TERM16 = PBF_TERM_SAMPLE_START + 16, /* SAMPLE_IDENTIFIER = 1U << 16 */ + PBF_TERM_TRANSACTION = PBF_TERM_SAMPLE_START + 17, /* SAMPLE_TRANSACTION = 1U << 17 */ + __PBF_UNUSED_TERM18 = PBF_TERM_SAMPLE_START + 18, /* SAMPLE_REGS_INTR = 1U << 18 */ + PBF_TERM_PHYS_ADDR = PBF_TERM_SAMPLE_START + 19, /* SAMPLE_PHYS_ADDR = 1U << 19 */ + __PBF_UNUSED_TERM20 = PBF_TERM_SAMPLE_START + 20, /* SAMPLE_AUX = 1U << 20 */ + __PBF_UNUSED_TERM21 = PBF_TERM_SAMPLE_START + 21, /* SAMPLE_CGROUP = 1U << 21 */ + PBF_TERM_DATA_PAGE_SIZE = PBF_TERM_SAMPLE_START + 22, /* SAMPLE_DATA_PAGE_SIZE = 1U << 22 */ + PBF_TERM_CODE_PAGE_SIZE = PBF_TERM_SAMPLE_START + 23, /* SAMPLE_CODE_PAGE_SIZE = 1U << 23 */ + PBF_TERM_WEIGHT_STRUCT = PBF_TERM_SAMPLE_START + 24, /* SAMPLE_WEIGHT_STRUCT = 1U << 24 */ + PBF_TERM_SAMPLE_END = PBF_TERM_WEIGHT_STRUCT, +}; + /* BPF map entry for filtering */ struct perf_bpf_filter_entry { enum perf_bpf_filter_op op; __u32 part; /* sub-sample type info when it has multiple values */ - __u64 flags; /* perf sample type flags */ + enum perf_bpf_filter_term term; __u64 value; }; -#endif /* PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H */ \ No newline at end of file +#endif /* PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H */ diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c index fb94f5280626..5ac1778ff66e 100644 --- a/tools/perf/util/bpf_skel/sample_filter.bpf.c +++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c @@ -48,31 +48,54 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, { struct perf_sample_data___new *data = (void *)kctx->data; - if (!bpf_core_field_exists(data->sample_flags) || - (data->sample_flags & entry->flags) == 0) + if (!bpf_core_field_exists(data->sample_flags)) return 0; - switch (entry->flags) { - case PERF_SAMPLE_IP: +#define BUILD_CHECK_SAMPLE(x) \ + _Static_assert((1 << (PBF_TERM_##x - PBF_TERM_SAMPLE_START)) == PERF_SAMPLE_##x, \ + "Mismatched PBF term to sample bit " #x) + BUILD_CHECK_SAMPLE(IP); + BUILD_CHECK_SAMPLE(TID); + BUILD_CHECK_SAMPLE(TIME); + BUILD_CHECK_SAMPLE(ADDR); + BUILD_CHECK_SAMPLE(ID); + BUILD_CHECK_SAMPLE(CPU); + BUILD_CHECK_SAMPLE(PERIOD); + BUILD_CHECK_SAMPLE(WEIGHT); + BUILD_CHECK_SAMPLE(DATA_SRC); + BUILD_CHECK_SAMPLE(TRANSACTION); + BUILD_CHECK_SAMPLE(PHYS_ADDR); + BUILD_CHECK_SAMPLE(DATA_PAGE_SIZE); + BUILD_CHECK_SAMPLE(CODE_PAGE_SIZE); + BUILD_CHECK_SAMPLE(WEIGHT_STRUCT); +#undef BUILD_CHECK_SAMPLE + + /* For sample terms check the sample bit is set. */ + if (entry->term >= PBF_TERM_SAMPLE_START && entry->term <= PBF_TERM_SAMPLE_END && + (data->sample_flags & (1 << (entry->term - PBF_TERM_SAMPLE_START))) == 0) + return 0; + + switch (entry->term) { + case PBF_TERM_IP: return kctx->data->ip; - case PERF_SAMPLE_ID: + case PBF_TERM_ID: return kctx->data->id; - case PERF_SAMPLE_TID: + case PBF_TERM_TID: if (entry->part) return kctx->data->tid_entry.pid; else return kctx->data->tid_entry.tid; - case PERF_SAMPLE_CPU: + case PBF_TERM_CPU: return kctx->data->cpu_entry.cpu; - case PERF_SAMPLE_TIME: + case PBF_TERM_TIME: return kctx->data->time; - case PERF_SAMPLE_ADDR: + case PBF_TERM_ADDR: return kctx->data->addr; - case PERF_SAMPLE_PERIOD: + case PBF_TERM_PERIOD: return kctx->data->period; - case PERF_SAMPLE_TRANSACTION: + case PBF_TERM_TRANSACTION: return kctx->data->txn; - case PERF_SAMPLE_WEIGHT_STRUCT: + case PBF_TERM_WEIGHT_STRUCT: if (entry->part == 1) return kctx->data->weight.var1_dw; if (entry->part == 2) @@ -80,15 +103,15 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, if (entry->part == 3) return kctx->data->weight.var3_w; /* fall through */ - case PERF_SAMPLE_WEIGHT: + case PBF_TERM_WEIGHT: return kctx->data->weight.full; - case PERF_SAMPLE_PHYS_ADDR: + case PBF_TERM_PHYS_ADDR: return kctx->data->phys_addr; - case PERF_SAMPLE_CODE_PAGE_SIZE: + case PBF_TERM_CODE_PAGE_SIZE: return kctx->data->code_page_size; - case PERF_SAMPLE_DATA_PAGE_SIZE: + case PBF_TERM_DATA_PAGE_SIZE: return kctx->data->data_page_size; - case PERF_SAMPLE_DATA_SRC: + case PBF_TERM_DATA_SRC: if (entry->part == 1) return kctx->data->data_src.mem_op; if (entry->part == 2) @@ -117,6 +140,18 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, } /* return the whole word */ return kctx->data->data_src.val; + case PBF_TERM_NONE: + case __PBF_UNUSED_TERM4: + case __PBF_UNUSED_TERM5: + case __PBF_UNUSED_TERM9: + case __PBF_UNUSED_TERM10: + case __PBF_UNUSED_TERM11: + case __PBF_UNUSED_TERM12: + case __PBF_UNUSED_TERM13: + case __PBF_UNUSED_TERM16: + case __PBF_UNUSED_TERM18: + case __PBF_UNUSED_TERM20: + case __PBF_UNUSED_TERM21: default: break; } -- cgit v1.2.3-73-gaa49b From d92aa899fe0a66350303a1986d6dc7ec4b3a1ea7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 24 May 2024 13:52:26 -0700 Subject: perf bpf filter: Add uid and gid terms Allow the BPF filter to use the uid and gid terms determined by the bpf_get_current_uid_gid BPF helper. For example, the following will record the cpu-clock event system wide discarding samples that don't belong to the current user. $ perf record -e cpu-clock --filter "uid == $(id -u)" -a sleep 0.1 Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: John Fastabend Cc: Changbin Du Cc: Yang Jihong Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240524205227.244375-3-irogers@google.com --- tools/perf/Documentation/perf-record.txt | 2 +- tools/perf/util/bpf-filter.c | 5 +++++ tools/perf/util/bpf-filter.l | 2 ++ tools/perf/util/bpf_skel/sample-filter.h | 3 +++ tools/perf/util/bpf_skel/sample_filter.bpf.c | 4 ++++ 5 files changed, 15 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 77578c0a142a..d6532ed97c02 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -200,7 +200,7 @@ OPTIONS ip, id, tid, pid, cpu, time, addr, period, txn, weight, phys_addr, code_pgsz, data_pgsz, weight1, weight2, weight3, ins_lat, retire_lat, p_stage_cyc, mem_op, mem_lvl, mem_snoop, mem_remote, mem_lock, - mem_dtlb, mem_blk, mem_hops + mem_dtlb, mem_blk, mem_hops, uid, gid The can be one of: ==, !=, >, >=, <, <=, & diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index f10148623a8e..04f98b6bb291 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -63,6 +63,11 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr * (evsel->core.attr.sample_type & (1 << (expr->term - PBF_TERM_SAMPLE_START)))) return 0; + if (expr->term == PBF_TERM_UID || expr->term == PBF_TERM_GID) { + /* Not dependent on the sample_type as computed from a BPF helper. */ + return 0; + } + if (expr->op == PBF_OP_GROUP_BEGIN) { struct perf_bpf_filter_expr *group; diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l index 62c959813466..2a7c839f3fae 100644 --- a/tools/perf/util/bpf-filter.l +++ b/tools/perf/util/bpf-filter.l @@ -95,6 +95,8 @@ mem_lock { return sample_part(PBF_TERM_DATA_SRC, 5); } mem_dtlb { return sample_part(PBF_TERM_DATA_SRC, 6); } mem_blk { return sample_part(PBF_TERM_DATA_SRC, 7); } mem_hops { return sample_part(PBF_TERM_DATA_SRC, 8); } +uid { return sample(PBF_TERM_UID); } +gid { return sample(PBF_TERM_GID); } "==" { return operator(PBF_OP_EQ); } "!=" { return operator(PBF_OP_NEQ); } diff --git a/tools/perf/util/bpf_skel/sample-filter.h b/tools/perf/util/bpf_skel/sample-filter.h index 25f780022951..350efa121026 100644 --- a/tools/perf/util/bpf_skel/sample-filter.h +++ b/tools/perf/util/bpf_skel/sample-filter.h @@ -47,6 +47,9 @@ enum perf_bpf_filter_term { PBF_TERM_CODE_PAGE_SIZE = PBF_TERM_SAMPLE_START + 23, /* SAMPLE_CODE_PAGE_SIZE = 1U << 23 */ PBF_TERM_WEIGHT_STRUCT = PBF_TERM_SAMPLE_START + 24, /* SAMPLE_WEIGHT_STRUCT = 1U << 24 */ PBF_TERM_SAMPLE_END = PBF_TERM_WEIGHT_STRUCT, + /* Terms computed from BPF helpers. */ + PBF_TERM_UID, + PBF_TERM_GID, }; /* BPF map entry for filtering */ diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c index 5ac1778ff66e..f59985101973 100644 --- a/tools/perf/util/bpf_skel/sample_filter.bpf.c +++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c @@ -140,6 +140,10 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, } /* return the whole word */ return kctx->data->data_src.val; + case PBF_TERM_UID: + return bpf_get_current_uid_gid() & 0xFFFFFFFF; + case PBF_TERM_GID: + return bpf_get_current_uid_gid() >> 32; case PBF_TERM_NONE: case __PBF_UNUSED_TERM4: case __PBF_UNUSED_TERM5: -- cgit v1.2.3-73-gaa49b From af752016340021d433a962063067e819dba889b1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 24 May 2024 13:52:27 -0700 Subject: perf top: Allow filters on events Allow filters to be added to perf top events. One use is to workaround issues with: ``` $ perf top --uid="$(id -u)" ``` which tries to scan /proc find processes belonging to the uid and can fail in such a pid terminates between the scan and the perf_event_open reporting: ``` Error: The sys_perf_event_open() syscall returned with 3 (No such process) for event (cycles:P). /bin/dmesg | grep -i perf may provide additional information. ``` A similar filter: ``` $ perf top -e cycles:P --filter "uid == $(id -u)" ``` doesn't fail this way. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: John Fastabend Cc: Changbin Du Cc: Yang Jihong Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240524205227.244375-4-irogers@google.com --- tools/perf/Documentation/perf-top.txt | 4 ++++ tools/perf/builtin-top.c | 9 +++++++++ 2 files changed, 13 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index a754875fa5bb..667e5102075e 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -43,6 +43,10 @@ Default is to monitor all CPUS. encoding with the layout of the event control registers as described by entries in /sys/bus/event_source/devices/cpu/format/*. +--filter=:: + Event filter. This option should follow an event selector (-e). For + syntax see linkperf:perf-record[1]. + -E :: --entries=:: Display this many functions. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1d6aef51c122..e8cbbf10d361 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1055,6 +1055,13 @@ try_again: } } + if (evlist__apply_filters(evlist, &counter)) { + pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", + counter->filter ?: "BPF", evsel__name(counter), errno, + str_error_r(errno, msg, sizeof(msg))); + goto out_err; + } + if (evlist__mmap(evlist, opts->mmap_pages) < 0) { ui__error("Failed to mmap with %d (%s)\n", errno, str_error_r(errno, msg, sizeof(msg))); @@ -1462,6 +1469,8 @@ int cmd_top(int argc, const char **argv) OPT_CALLBACK('e', "event", &parse_events_option_args, "event", "event selector. use 'perf list' to list available events", parse_events_option), + OPT_CALLBACK(0, "filter", &top.evlist, "filter", + "event filter", parse_filter), OPT_U64('c', "count", &opts->user_interval, "event period to sample"), OPT_STRING('p', "pid", &target->pid, "pid", "profile events on existing process id"), -- cgit v1.2.3-73-gaa49b From edc96a2b4c793c21ffae285d3122b6a67a63da60 Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Fri, 10 May 2024 00:26:58 +0800 Subject: selftest: run vector prctl test for ZVE32X The minimal requirement for running Vector subextension on Linux is ZVE32X. So change the test accordingly to run prctl as long as it find it. Signed-off-by: Andy Chiu Link: https://lore.kernel.org/r/20240510-zve-detection-v5-8-0711bdd26c12@sifive.com Signed-off-by: Palmer Dabbelt --- tools/testing/selftests/riscv/vector/vstate_prctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c index 27668fb3b6d0..895177f6bf4c 100644 --- a/tools/testing/selftests/riscv/vector/vstate_prctl.c +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -88,16 +88,16 @@ int main(void) return -2; } - if (!(pair.value & RISCV_HWPROBE_IMA_V)) { + if (!(pair.value & RISCV_HWPROBE_EXT_ZVE32X)) { rc = prctl(PR_RISCV_V_GET_CONTROL); if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n"); + ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without ZVE32X\n"); return -3; } rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n"); + ksft_test_result_fail("SET_CONTROL should fail on kernel/hw without ZVE32X\n"); return -4; } -- cgit v1.2.3-73-gaa49b From 46253c4ae96162a840ad65c1394de63796d7798a Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 22 May 2024 10:09:36 +0200 Subject: selftests/bpf: use section names understood by libbpf in test_sockmap libbpf can deduce program type and attach type from the ELF section name. We don't need to pass it out-of-band if we switch to libbpf convention [1]. [1] https://docs.kernel.org/bpf/libbpf/program_types.html Signed-off-by: Jakub Sitnicki Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240522080936.2475833-1-jakub@cloudflare.com --- .../selftests/bpf/progs/test_sockmap_kern.h | 17 ++++++------ tools/testing/selftests/bpf/test_sockmap.c | 31 ---------------------- 2 files changed, 9 insertions(+), 39 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 99d2ea9fb658..3dff0813730b 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -92,7 +92,7 @@ struct { __uint(value_size, sizeof(int)); } tls_sock_map SEC(".maps"); -SEC("sk_skb1") +SEC("sk_skb/stream_parser") int bpf_prog1(struct __sk_buff *skb) { int *f, two = 2; @@ -104,7 +104,7 @@ int bpf_prog1(struct __sk_buff *skb) return skb->len; } -SEC("sk_skb2") +SEC("sk_skb/stream_verdict") int bpf_prog2(struct __sk_buff *skb) { __u32 lport = skb->local_port; @@ -151,7 +151,7 @@ static inline void bpf_write_pass(struct __sk_buff *skb, int offset) memcpy(c + offset, "PASS", 4); } -SEC("sk_skb3") +SEC("sk_skb/stream_verdict") int bpf_prog3(struct __sk_buff *skb) { int err, *f, ret = SK_PASS; @@ -233,7 +233,7 @@ int bpf_sockmap(struct bpf_sock_ops *skops) return 0; } -SEC("sk_msg1") +SEC("sk_msg") int bpf_prog4(struct sk_msg_md *msg) { int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; @@ -263,7 +263,7 @@ int bpf_prog4(struct sk_msg_md *msg) return SK_PASS; } -SEC("sk_msg2") +SEC("sk_msg") int bpf_prog6(struct sk_msg_md *msg) { int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0; @@ -308,7 +308,7 @@ int bpf_prog6(struct sk_msg_md *msg) #endif } -SEC("sk_msg3") +SEC("sk_msg") int bpf_prog8(struct sk_msg_md *msg) { void *data_end = (void *)(long) msg->data_end; @@ -329,7 +329,8 @@ int bpf_prog8(struct sk_msg_md *msg) return SK_PASS; } -SEC("sk_msg4") + +SEC("sk_msg") int bpf_prog9(struct sk_msg_md *msg) { void *data_end = (void *)(long) msg->data_end; @@ -347,7 +348,7 @@ int bpf_prog9(struct sk_msg_md *msg) return SK_PASS; } -SEC("sk_msg5") +SEC("sk_msg") int bpf_prog10(struct sk_msg_md *msg) { int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop; diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 4499b3cfc3a6..ddc6a9cef36f 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1783,30 +1783,6 @@ char *map_names[] = { "tls_sock_map", }; -int prog_attach_type[] = { - BPF_SK_SKB_STREAM_PARSER, - BPF_SK_SKB_STREAM_VERDICT, - BPF_SK_SKB_STREAM_VERDICT, - BPF_CGROUP_SOCK_OPS, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, -}; - -int prog_type[] = { - BPF_PROG_TYPE_SK_SKB, - BPF_PROG_TYPE_SK_SKB, - BPF_PROG_TYPE_SK_SKB, - BPF_PROG_TYPE_SOCK_OPS, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, -}; - static int populate_progs(char *bpf_file) { struct bpf_program *prog; @@ -1825,13 +1801,6 @@ static int populate_progs(char *bpf_file) return -1; } - bpf_object__for_each_program(prog, obj) { - bpf_program__set_type(prog, prog_type[i]); - bpf_program__set_expected_attach_type(prog, - prog_attach_type[i]); - i++; - } - i = bpf_object__load(obj); i = 0; bpf_object__for_each_program(prog, obj) { -- cgit v1.2.3-73-gaa49b From 020e6c22bd6e67592f38b47d0f1926a831482560 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 May 2024 15:36:57 -0700 Subject: kcsan: Add example to data_race() kerneldoc header Although the data_race() kerneldoc header accurately states what it does, some of the implications and usage patterns are non-obvious. Therefore, add a brief locking example and also state how to have KCSAN ignore accesses while also preventing the compiler from folding, spindling, or otherwise mutilating the access. [ paulmck: Apply Bart Van Assche feedback. ] [ paulmck: Apply feedback from Marco Elver. ] Reported-by: Bart Van Assche Signed-off-by: Paul E. McKenney Cc: Marco Elver Cc: Breno Leitao Cc: Jens Axboe --- include/linux/compiler.h | 10 ++++++++- .../memory-model/Documentation/access-marking.txt | 24 +++++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8c252e073bd8..68a24a3a6979 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -194,9 +194,17 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * This data_race() macro is useful for situations in which data races * should be forgiven. One example is diagnostic code that accesses * shared variables but is not a part of the core synchronization design. + * For example, if accesses to a given variable are protected by a lock, + * except for diagnostic code, then the accesses under the lock should + * be plain C-language accesses and those in the diagnostic code should + * use data_race(). This way, KCSAN will complain if buggy lockless + * accesses to that variable are introduced, even if the buggy accesses + * are protected by READ_ONCE() or WRITE_ONCE(). * * This macro *does not* affect normal code generation, but is a hint - * to tooling that data races here are to be ignored. + * to tooling that data races here are to be ignored. If the access must + * be atomic *and* KCSAN should ignore the access, use both data_race() + * and READ_ONCE(), for example, data_race(READ_ONCE(x)). */ #define data_race(expr) \ ({ \ diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt index 65778222183e..3377d01bb512 100644 --- a/tools/memory-model/Documentation/access-marking.txt +++ b/tools/memory-model/Documentation/access-marking.txt @@ -24,6 +24,11 @@ The Linux kernel provides the following access-marking options: 4. WRITE_ONCE(), for example, "WRITE_ONCE(a, b);" The various forms of atomic_set() also fit in here. +5. __data_racy, for example "int __data_racy a;" + +6. KCSAN's negative-marking assertions, ASSERT_EXCLUSIVE_ACCESS() + and ASSERT_EXCLUSIVE_WRITER(), are described in the + "ACCESS-DOCUMENTATION OPTIONS" section below. These may be used in combination, as shown in this admittedly improbable example: @@ -205,6 +210,23 @@ because doing otherwise prevents KCSAN from detecting violations of your code's synchronization rules. +Use of __data_racy +------------------ + +Adding the __data_racy type qualifier to the declaration of a variable +causes KCSAN to treat all accesses to that variable as if they were +enclosed by data_race(). However, __data_racy does not affect the +compiler, though one could imagine hardened kernel builds treating the +__data_racy type qualifier as if it was the volatile keyword. + +Note well that __data_racy is subject to the same pointer-declaration +rules as are other type qualifiers such as const and volatile. +For example: + + int __data_racy *p; // Pointer to data-racy data. + int *__data_racy p; // Data-racy pointer to non-data-racy data. + + ACCESS-DOCUMENTATION OPTIONS ============================ @@ -342,7 +364,7 @@ as follows: Because foo is read locklessly, all accesses are marked. The purpose of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy -concurrent lockless write. +concurrent write, whether marked or not. Lock-Protected Writes With Heuristic Lockless Reads -- cgit v1.2.3-73-gaa49b From 73287fe228721b05690e671adbcccc6cf5435be6 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:39 -0700 Subject: bpf: pass bpf_struct_ops_link to callbacks in bpf_struct_ops. Pass an additional pointer of bpf_struct_ops_link to callback function reg, unreg, and update provided by subsystems defined in bpf_struct_ops. A bpf_struct_ops_map can be registered for multiple links. Passing a pointer of bpf_struct_ops_link helps subsystems to distinguish them. This pointer will be used in the later patches to let the subsystem initiate a detachment on a link that was registered to it previously. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-2-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 6 +++--- kernel/bpf/bpf_struct_ops.c | 10 +++++----- net/bpf/bpf_dummy_struct_ops.c | 4 ++-- net/ipv4/bpf_tcp_ca.c | 6 +++--- tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c | 4 ++-- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5e694a308081..19f8836382fc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1730,9 +1730,9 @@ struct bpf_struct_ops { int (*init_member)(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata); - int (*reg)(void *kdata); - void (*unreg)(void *kdata); - int (*update)(void *kdata, void *old_kdata); + int (*reg)(void *kdata, struct bpf_link *link); + void (*unreg)(void *kdata, struct bpf_link *link); + int (*update)(void *kdata, void *old_kdata, struct bpf_link *link); int (*validate)(void *kdata); void *cfi_stubs; struct module *owner; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 86c7884abaf8..1542dded7489 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -757,7 +757,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto unlock; } - err = st_ops->reg(kdata); + err = st_ops->reg(kdata, NULL); if (likely(!err)) { /* This refcnt increment on the map here after * 'st_ops->reg()' is secure since the state of the @@ -805,7 +805,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) BPF_STRUCT_OPS_STATE_TOBEFREE); switch (prev_state) { case BPF_STRUCT_OPS_STATE_INUSE: - st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data); + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, NULL); bpf_map_put(map); return 0; case BPF_STRUCT_OPS_STATE_TOBEFREE: @@ -1060,7 +1060,7 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link) /* st_link->map can be NULL if * bpf_struct_ops_link_create() fails to register. */ - st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data); + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link); bpf_map_put(&st_map->map); } kfree(st_link); @@ -1125,7 +1125,7 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map goto err_out; } - err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data); + err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data, link); if (err) goto err_out; @@ -1176,7 +1176,7 @@ int bpf_struct_ops_link_create(union bpf_attr *attr) if (err) goto err_out; - err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data); + err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link); if (err) { bpf_link_cleanup(&link_primer); link = NULL; diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index 891cdf61c65a..3ea52b05adfb 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -272,12 +272,12 @@ static int bpf_dummy_init_member(const struct btf_type *t, return -EOPNOTSUPP; } -static int bpf_dummy_reg(void *kdata) +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { return -EOPNOTSUPP; } -static void bpf_dummy_unreg(void *kdata) +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) { } diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 18227757ec0c..3f88d0961e5b 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -260,17 +260,17 @@ static int bpf_tcp_ca_check_member(const struct btf_type *t, return 0; } -static int bpf_tcp_ca_reg(void *kdata) +static int bpf_tcp_ca_reg(void *kdata, struct bpf_link *link) { return tcp_register_congestion_control(kdata); } -static void bpf_tcp_ca_unreg(void *kdata) +static void bpf_tcp_ca_unreg(void *kdata, struct bpf_link *link) { tcp_unregister_congestion_control(kdata); } -static int bpf_tcp_ca_update(void *kdata, void *old_kdata) +static int bpf_tcp_ca_update(void *kdata, void *old_kdata, struct bpf_link *link) { return tcp_update_congestion_control(kdata, old_kdata); } diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c index b1dd889d5d7d..948eb3962732 100644 --- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c +++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c @@ -22,12 +22,12 @@ static int dummy_init_member(const struct btf_type *t, return 0; } -static int dummy_reg(void *kdata) +static int dummy_reg(void *kdata, struct bpf_link *link) { return 0; } -static void dummy_unreg(void *kdata) +static void dummy_unreg(void *kdata, struct bpf_link *link) { } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 2a18bd320e92..0a09732cde4b 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -820,7 +820,7 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { .is_valid_access = bpf_testmod_ops_is_valid_access, }; -static int bpf_dummy_reg(void *kdata) +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops *ops = kdata; @@ -835,7 +835,7 @@ static int bpf_dummy_reg(void *kdata) return 0; } -static void bpf_dummy_unreg(void *kdata) +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) { } @@ -871,7 +871,7 @@ struct bpf_struct_ops bpf_bpf_testmod_ops = { .owner = THIS_MODULE, }; -static int bpf_dummy_reg2(void *kdata) +static int bpf_dummy_reg2(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops2 *ops = kdata; -- cgit v1.2.3-73-gaa49b From 1a4b858b6a045828de1b536cfab7819c50864ed6 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:43 -0700 Subject: selftests/bpf: test struct_ops with epoll Verify whether a user space program is informed through epoll with EPOLLHUP when a struct_ops object is detached. The BPF code in selftests/bpf/progs/struct_ops_module.c has become complex. Therefore, struct_ops_detach.c has been added to segregate the BPF code for detachment tests from the BPF code for other tests based on the recommendation of Andrii Nakryiko. Suggested-by: Andrii Nakryiko Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-6-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- .../bpf/prog_tests/test_struct_ops_module.c | 57 ++++++++++++++++++++++ .../selftests/bpf/progs/struct_ops_detach.c | 10 ++++ 2 files changed, 67 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_detach.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c index 29e183a80f49..bbcf12696a6b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c @@ -3,9 +3,12 @@ #include #include +#include + #include "struct_ops_module.skel.h" #include "struct_ops_nulled_out_cb.skel.h" #include "struct_ops_forgotten_cb.skel.h" +#include "struct_ops_detach.skel.h" static void check_map_info(struct bpf_map_info *info) { @@ -242,6 +245,58 @@ cleanup: struct_ops_forgotten_cb__destroy(skel); } +/* Detach a link from a user space program */ +static void test_detach_link(void) +{ + struct epoll_event ev, events[2]; + struct struct_ops_detach *skel; + struct bpf_link *link = NULL; + int fd, epollfd = -1, nfds; + int err; + + skel = struct_ops_detach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "struct_ops_detach__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_do_detach); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto cleanup; + + fd = bpf_link__fd(link); + if (!ASSERT_GE(fd, 0, "link_fd")) + goto cleanup; + + epollfd = epoll_create1(0); + if (!ASSERT_GE(epollfd, 0, "epoll_create1")) + goto cleanup; + + ev.events = EPOLLHUP; + ev.data.fd = fd; + err = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev); + if (!ASSERT_OK(err, "epoll_ctl")) + goto cleanup; + + err = bpf_link__detach(link); + if (!ASSERT_OK(err, "detach_link")) + goto cleanup; + + /* Wait for EPOLLHUP */ + nfds = epoll_wait(epollfd, events, 2, 500); + if (!ASSERT_EQ(nfds, 1, "epoll_wait")) + goto cleanup; + + if (!ASSERT_EQ(events[0].data.fd, fd, "epoll_wait_fd")) + goto cleanup; + if (!ASSERT_TRUE(events[0].events & EPOLLHUP, "events[0].events")) + goto cleanup; + +cleanup: + if (epollfd >= 0) + close(epollfd); + bpf_link__destroy(link); + struct_ops_detach__destroy(skel); +} + void serial_test_struct_ops_module(void) { if (test__start_subtest("struct_ops_load")) @@ -254,5 +309,7 @@ void serial_test_struct_ops_module(void) test_struct_ops_nulled_out_cb(); if (test__start_subtest("struct_ops_forgotten_cb")) test_struct_ops_forgotten_cb(); + if (test__start_subtest("test_detach_link")) + test_detach_link(); } diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c new file mode 100644 index 000000000000..56b787a89876 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_do_detach; -- cgit v1.2.3-73-gaa49b From d14c1fac0c9722c4ec79589921c9e798601ca9d5 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:46 -0700 Subject: bpftool: Change pid_iter.bpf.c to comply with the change of bpf_link_fops. To support epoll, a new instance of file_operations, bpf_link_fops_poll, has been added for links that support epoll. The pid_iter.bpf.c checks f_ops for links and other BPF objects. The check should fail for struct_ops links without this patch. Acked-by: Quentin Monnet Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-9-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- tools/bpf/bpftool/skeleton/pid_iter.bpf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c index 7bdbcac3cf62..948dde25034e 100644 --- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c +++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c @@ -29,6 +29,7 @@ enum bpf_link_type___local { }; extern const void bpf_link_fops __ksym; +extern const void bpf_link_fops_poll __ksym __weak; extern const void bpf_map_fops __ksym; extern const void bpf_prog_fops __ksym; extern const void btf_fops __ksym; @@ -84,7 +85,11 @@ int iter(struct bpf_iter__task_file *ctx) fops = &btf_fops; break; case BPF_OBJ_LINK: - fops = &bpf_link_fops; + if (&bpf_link_fops_poll && + file->f_op == &bpf_link_fops_poll) + fops = &bpf_link_fops_poll; + else + fops = &bpf_link_fops; break; default: return 0; -- cgit v1.2.3-73-gaa49b From ccf23c916ca35239a924ec8649cc88b1ef25d3d9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 May 2024 12:20:31 -0700 Subject: tools: ynl: make the attr and msg helpers more C++ friendly Folks working on a C++ codegen would like to reuse the attribute helpers directly. Add the few necessary casts, it's not too ugly. Reviewed-by: Donald Hunter Reviewed-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240529192031.3785761-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 6cf890080dc0..80791c34730c 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -79,7 +79,7 @@ static inline void *ynl_dump_obj_next(void *obj) struct ynl_dump_list_type *list; uptr -= offsetof(struct ynl_dump_list_type, data); - list = (void *)uptr; + list = (struct ynl_dump_list_type *)uptr; uptr = (unsigned long)list->next; uptr += offsetof(struct ynl_dump_list_type, data); @@ -139,7 +139,7 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg); static inline struct nlmsghdr *ynl_nlmsg_put_header(void *buf) { - struct nlmsghdr *nlh = buf; + struct nlmsghdr *nlh = (struct nlmsghdr *)buf; memset(nlh, 0, sizeof(*nlh)); nlh->nlmsg_len = NLMSG_HDRLEN; @@ -196,7 +196,7 @@ static inline void *ynl_attr_data(const struct nlattr *attr) static inline void *ynl_attr_data_end(const struct nlattr *attr) { - return ynl_attr_data(attr) + ynl_attr_data_len(attr); + return (char *)ynl_attr_data(attr) + ynl_attr_data_len(attr); } #define ynl_attr_for_each(attr, nlh, fixed_hdr_sz) \ @@ -228,7 +228,7 @@ ynl_attr_next(const void *end, const struct nlattr *prev) { struct nlattr *attr; - attr = (void *)((char *)prev + NLA_ALIGN(prev->nla_len)); + attr = (struct nlattr *)((char *)prev + NLA_ALIGN(prev->nla_len)); return ynl_attr_if_good(end, attr); } @@ -237,8 +237,8 @@ ynl_attr_first(const void *start, size_t len, size_t skip) { struct nlattr *attr; - attr = (void *)((char *)start + NLMSG_ALIGN(skip)); - return ynl_attr_if_good(start + len, attr); + attr = (struct nlattr *)((char *)start + NLMSG_ALIGN(skip)); + return ynl_attr_if_good((char *)start + len, attr); } static inline bool @@ -262,9 +262,9 @@ ynl_attr_nest_start(struct nlmsghdr *nlh, unsigned int attr_type) struct nlattr *attr; if (__ynl_attr_put_overflow(nlh, 0)) - return ynl_nlmsg_end_addr(nlh) - NLA_HDRLEN; + return (struct nlattr *)ynl_nlmsg_end_addr(nlh) - 1; - attr = ynl_nlmsg_end_addr(nlh); + attr = (struct nlattr *)ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type | NLA_F_NESTED; nlh->nlmsg_len += NLA_HDRLEN; @@ -286,7 +286,7 @@ ynl_attr_put(struct nlmsghdr *nlh, unsigned int attr_type, if (__ynl_attr_put_overflow(nlh, size)) return; - attr = ynl_nlmsg_end_addr(nlh); + attr = (struct nlattr *)ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type; attr->nla_len = NLA_HDRLEN + size; @@ -305,10 +305,10 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) if (__ynl_attr_put_overflow(nlh, len)) return; - attr = ynl_nlmsg_end_addr(nlh); + attr = (struct nlattr *)ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type; - strcpy(ynl_attr_data(attr), str); + strcpy((char *)ynl_attr_data(attr), str); attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len); nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len); -- cgit v1.2.3-73-gaa49b From 531876c80004ecff7bfdbd8ba6c6b48835ef5e22 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 29 May 2024 15:32:39 -0700 Subject: libbpf: keep FD_CLOEXEC flag when dup()'ing FD Make sure to preserve and/or enforce FD_CLOEXEC flag on duped FDs. Use dup3() with O_CLOEXEC flag for that. Without this fix libbpf effectively clears FD_CLOEXEC flag on each of BPF map/prog FD, which is definitely not the right or expected behavior. Reported-by: Lennart Poettering Fixes: bc308d011ab8 ("libbpf: call dup2() syscall directly") Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20240529223239.504241-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf_internal.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index a0dcfb82e455..7e7e686008c6 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -597,13 +597,9 @@ static inline int ensure_good_fd(int fd) return fd; } -static inline int sys_dup2(int oldfd, int newfd) +static inline int sys_dup3(int oldfd, int newfd, int flags) { -#ifdef __NR_dup2 - return syscall(__NR_dup2, oldfd, newfd); -#else - return syscall(__NR_dup3, oldfd, newfd, 0); -#endif + return syscall(__NR_dup3, oldfd, newfd, flags); } /* Point *fixed_fd* to the same file that *tmp_fd* points to. @@ -614,7 +610,7 @@ static inline int reuse_fd(int fixed_fd, int tmp_fd) { int err; - err = sys_dup2(tmp_fd, fixed_fd); + err = sys_dup3(tmp_fd, fixed_fd, O_CLOEXEC); err = err < 0 ? -errno : 0; close(tmp_fd); /* clean up temporary FD */ return err; -- cgit v1.2.3-73-gaa49b From 5b5233fb81bfecbfb7502178a9cf6790dde04a2c Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 31 May 2024 01:27:22 +0200 Subject: selftests: net: tests net.core.{r,w}mem_{default,max} sysctls in a netns Add a selftest which checks that the sysctl is present in a netns, that the value is read from the init one, and that it's readonly. Signed-off-by: Matteo Croce Link: https://lore.kernel.org/r/20240530232722.45255-3-technoboy85@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/netns-sysctl.sh | 40 +++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100755 tools/testing/selftests/net/netns-sysctl.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index bd01e4a0be2c..6da63d1831c1 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -53,6 +53,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh +TEST_PROGS += netns-sysctl.sh TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/netns-sysctl.sh b/tools/testing/selftests/net/netns-sysctl.sh new file mode 100755 index 000000000000..45c34a3b9aae --- /dev/null +++ b/tools/testing/selftests/net/netns-sysctl.sh @@ -0,0 +1,40 @@ +#!/bin/bash -e +# SPDX-License-Identifier: GPL-2.0 +# +# This test checks that the network buffer sysctls are present +# in a network namespaces, and that they are readonly. + +source lib.sh + +cleanup() { + cleanup_ns $test_ns +} + +trap cleanup EXIT + +fail() { + echo "ERROR: $*" >&2 + exit 1 +} + +setup_ns test_ns + +for sc in {r,w}mem_{default,max}; do + # check that this is writable in a netns + [ -w "/proc/sys/net/core/$sc" ] || + fail "$sc isn't writable in the init netns!" + + # change the value in the host netns + sysctl -qw "net.core.$sc=300000" || + fail "Can't write $sc in init netns!" + + # check that the value is read from the init netns + [ "$(ip netns exec $test_ns sysctl -n "net.core.$sc")" -eq 300000 ] || + fail "Value for $sc mismatch!" + + # check that this isn't writable in a netns + ip netns exec $test_ns [ -w "/proc/sys/net/core/$sc" ] && + fail "$sc is writable in a netns!" +done + +echo 'Test passed OK' -- cgit v1.2.3-73-gaa49b From dfa7c9ffa607235119e029b70ced72f29059f8f3 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 3 Jun 2024 00:41:10 +0100 Subject: selftests/bpf: Remove unused struct 'scale_test_def' 'scale_test_def' is unused since commit 3762a39ce85f ("selftests/bpf: Split out bpf_verif_scale selftests into multiple tests"). Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240602234112.225107-2-linux@treblig.org --- tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 4c6ada5b270b..73f669014b69 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -45,12 +45,6 @@ err_out: return err; } -struct scale_test_def { - const char *file; - enum bpf_prog_type attach_type; - bool fails; -}; - static void scale_test(const char *file, enum bpf_prog_type attach_type, bool should_fail) -- cgit v1.2.3-73-gaa49b From 3f67639d8e582c89c79549c619b22a00dd330e4e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 3 Jun 2024 00:41:11 +0100 Subject: selftests/bpf: Remove unused 'key_t' structs 'key_t' is unused in a couple of files since the original commit 60dd49ea6539 ("selftests/bpf: Add test for bpf array map iterators"). Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240602234112.225107-3-linux@treblig.org --- tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c | 6 ------ tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c | 6 ------ 2 files changed, 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c index c5969ca6f26b..564835ba7d51 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c @@ -6,12 +6,6 @@ char _license[] SEC("license") = "GPL"; -struct key_t { - int a; - int b; - int c; -}; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 3); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c index 85fa710fad90..9f0e0705b2bf 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c @@ -6,12 +6,6 @@ char _license[] SEC("license") = "GPL"; -struct key_t { - int a; - int b; - int c; -}; - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 3); -- cgit v1.2.3-73-gaa49b From a450d36b05fa225b071ce9fbf522544caea06594 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 3 Jun 2024 00:41:12 +0100 Subject: selftests/bpf: Remove unused struct 'libcap' 'libcap' is unused since commit b1c2768a82b9 ("bpf: selftests: Remove libcap usage from test_verifier"). Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240602234112.225107-4-linux@treblig.org --- tools/testing/selftests/bpf/test_verifier.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index df04bda1c927..610392dfc4fb 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1237,11 +1237,6 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id); } -struct libcap { - struct __user_cap_header_struct hdr; - struct __user_cap_data_struct data[2]; -}; - static int set_admin(bool admin) { int err; -- cgit v1.2.3-73-gaa49b From ce5249b91e34d81255c00950d415ebd4c3cae8d4 Mon Sep 17 00:00:00 2001 From: Swan Beaujard Date: Mon, 3 Jun 2024 00:58:12 +0200 Subject: bpftool: Fix typo in MAX_NUM_METRICS macro name Correct typo in bpftool profiler and change all instances of 'MATRICS' to 'METRICS' in the profiler.bpf.c file. Signed-off-by: Swan Beaujard Signed-off-by: Daniel Borkmann Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240602225812.81171-1-beaujardswan@gmail.com --- tools/bpf/bpftool/skeleton/profiler.bpf.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c index 2f80edc682f1..f48c783cb9f7 100644 --- a/tools/bpf/bpftool/skeleton/profiler.bpf.c +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -40,17 +40,17 @@ struct { const volatile __u32 num_cpu = 1; const volatile __u32 num_metric = 1; -#define MAX_NUM_MATRICS 4 +#define MAX_NUM_METRICS 4 SEC("fentry/XXX") int BPF_PROG(fentry_XXX) { - struct bpf_perf_event_value___local *ptrs[MAX_NUM_MATRICS]; + struct bpf_perf_event_value___local *ptrs[MAX_NUM_METRICS]; u32 key = bpf_get_smp_processor_id(); u32 i; /* look up before reading, to reduce error */ - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) { u32 flag = i; ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag); @@ -58,7 +58,7 @@ int BPF_PROG(fentry_XXX) return 0; } - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) { struct bpf_perf_event_value___local reading; int err; @@ -99,14 +99,14 @@ fexit_update_maps(u32 id, struct bpf_perf_event_value___local *after) SEC("fexit/XXX") int BPF_PROG(fexit_XXX) { - struct bpf_perf_event_value___local readings[MAX_NUM_MATRICS]; + struct bpf_perf_event_value___local readings[MAX_NUM_METRICS]; u32 cpu = bpf_get_smp_processor_id(); u32 i, zero = 0; int err; u64 *count; /* read all events before updating the maps, to reduce error */ - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) { err = bpf_perf_event_read_value(&events, cpu + i * num_cpu, (void *)(readings + i), sizeof(*readings)); @@ -116,7 +116,7 @@ int BPF_PROG(fexit_XXX) count = bpf_map_lookup_elem(&counts, &zero); if (count) { *count += 1; - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) fexit_update_maps(i, &readings[i]); } return 0; -- cgit v1.2.3-73-gaa49b From f626279dea33ba551839f2321511ad127e5a58e8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 3 Jun 2024 00:55:29 +0100 Subject: KVM: selftests: remove unused struct 'memslot_antagonist_args' 'memslot_antagonist_args' is unused since the original commit f73a3446252e ("KVM: selftests: Add memslot modification stress test"). Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20240602235529.228204-1-linux@treblig.org Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/memslot_modification_stress_test.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 05fcf902e067..49f162573126 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -53,12 +53,6 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) } } -struct memslot_antagonist_args { - struct kvm_vm *vm; - useconds_t delay; - uint64_t nr_modifications; -}; - static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, uint64_t nr_modifications) { -- cgit v1.2.3-73-gaa49b From d95ba15b97847f4ae520db83bd98b61d50fb3068 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 23 May 2024 14:49:57 +0800 Subject: selftests/bpf: Fix tx_prog_fd values in test_sockmap The values of tx_prog_fd in run_options() should not be 0, so set it as -1 in else branch, and test it using "if (tx_prog_fd > 0)" condition, not "if (tx_prog_fd)" or "if (tx_prog_fd >= 0)". Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Tested-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/08b20ffc544324d40939efeae93800772a91a58e.1716446893.git.tanggeliang@kylinos.cn --- tools/testing/selftests/bpf/test_sockmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index ddc6a9cef36f..892a690c4e7b 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1027,9 +1027,9 @@ run: else if (txmsg_drop) tx_prog_fd = prog_fd[8]; else - tx_prog_fd = 0; + tx_prog_fd = -1; - if (tx_prog_fd) { + if (tx_prog_fd > 0) { int redir_fd, i = 0; err = bpf_prog_attach(tx_prog_fd, @@ -1285,7 +1285,7 @@ out: bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER); bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT); - if (tx_prog_fd >= 0) + if (tx_prog_fd > 0) bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT); for (i = 0; i < 8; i++) { -- cgit v1.2.3-73-gaa49b From a9f0ea175948c21640ae1cc145e679db7fc45fa6 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 23 May 2024 14:49:58 +0800 Subject: selftests/bpf: Drop duplicate definition of i in test_sockmap There's already a definition of i in run_options() at the beginning, no need to define a new one in "if (tx_prog_fd > 0)" block. Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Tested-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/8d690682330a59361562bca75d6903253d16f312.1716446893.git.tanggeliang@kylinos.cn --- tools/testing/selftests/bpf/test_sockmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 892a690c4e7b..6d724fea59fb 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1030,7 +1030,7 @@ run: tx_prog_fd = -1; if (tx_prog_fd > 0) { - int redir_fd, i = 0; + int redir_fd; err = bpf_prog_attach(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT, 0); @@ -1041,6 +1041,7 @@ run: goto out; } + i = 0; err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY); if (err) { fprintf(stderr, -- cgit v1.2.3-73-gaa49b From 3f32a115f61d31049e3e91d469bca849f712a979 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 23 May 2024 14:49:59 +0800 Subject: selftests/bpf: Use bpf_link attachments in test_sockmap Switch attachments to bpf_link using bpf_program__attach_sockmap() instead of bpf_prog_attach(). This patch adds a new array progs[] to replace prog_fd[] array, set in populate_progs() for each program in bpf object. And another new array links[] to save the attached bpf_link. It is initalized as NULL in populate_progs, set as the return valuses of bpf_program__attach_sockmap(), and detached by bpf_link__detach(). Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Tested-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/32cf8376a810e2e9c719f8e4cfb97132ed2d1f9c.1716446893.git.tanggeliang@kylinos.cn --- tools/testing/selftests/bpf/test_sockmap.c | 59 ++++++++++++++++-------------- 1 file changed, 31 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 6d724fea59fb..81e7a57c727c 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -64,6 +64,8 @@ int failed; int map_fd[9]; struct bpf_map *maps[9]; int prog_fd[9]; +struct bpf_program *progs[9]; +struct bpf_link *links[9]; int txmsg_pass; int txmsg_redir; @@ -960,43 +962,39 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) /* Attach programs to sockmap */ if (!txmsg_omit_skb_parser) { - err = bpf_prog_attach(prog_fd[0], map_fd[0], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { + links[0] = bpf_program__attach_sockmap(progs[0], map_fd[0]); + if (!links[0]) { fprintf(stderr, - "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[0], err, strerror(errno)); - return err; + "ERROR: bpf_program__attach_sockmap (sockmap %i->%i): (%s)\n", + bpf_program__fd(progs[0]), map_fd[0], strerror(errno)); + return -1; } } - err = bpf_prog_attach(prog_fd[1], map_fd[0], - BPF_SK_SKB_STREAM_VERDICT, 0); - if (err) { - fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n", - err, strerror(errno)); - return err; + links[1] = bpf_program__attach_sockmap(progs[1], map_fd[0]); + if (!links[1]) { + fprintf(stderr, "ERROR: bpf_program__attach_sockmap (sockmap): (%s)\n", + strerror(errno)); + return -1; } /* Attach programs to TLS sockmap */ if (txmsg_ktls_skb) { if (!txmsg_omit_skb_parser) { - err = bpf_prog_attach(prog_fd[0], map_fd[8], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { + links[2] = bpf_program__attach_sockmap(progs[0], map_fd[8]); + if (!links[2]) { fprintf(stderr, - "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[8], err, strerror(errno)); - return err; + "ERROR: bpf_program__attach_sockmap (TLS sockmap %i->%i): (%s)\n", + bpf_program__fd(progs[0]), map_fd[8], strerror(errno)); + return -1; } } - err = bpf_prog_attach(prog_fd[2], map_fd[8], - BPF_SK_SKB_STREAM_VERDICT, 0); - if (err) { - fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n", - err, strerror(errno)); - return err; + links[3] = bpf_program__attach_sockmap(progs[2], map_fd[8]); + if (!links[3]) { + fprintf(stderr, "ERROR: bpf_program__attach_sockmap (TLS sockmap): (%s)\n", + strerror(errno)); + return -1; } } @@ -1281,10 +1279,11 @@ run: out: /* Detatch and zero all the maps */ bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS); - bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER); - bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT); - bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER); - bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT); + + for (i = 0; i < ARRAY_SIZE(links); i++) { + if (links[i]) + bpf_link__detach(links[i]); + } if (tx_prog_fd > 0) bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT); @@ -1805,6 +1804,7 @@ static int populate_progs(char *bpf_file) i = bpf_object__load(obj); i = 0; bpf_object__for_each_program(prog, obj) { + progs[i] = prog; prog_fd[i] = bpf_program__fd(prog); i++; } @@ -1819,6 +1819,9 @@ static int populate_progs(char *bpf_file) } } + for (i = 0; i < ARRAY_SIZE(links); i++) + links[i] = NULL; + return 0; } -- cgit v1.2.3-73-gaa49b From 24bb90a42633ea47256d4f13289dd3181236e028 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 23 May 2024 14:50:00 +0800 Subject: selftests/bpf: Replace tx_prog_fd with tx_prog in test_sockmap bpf_program__attach_sockmap() needs to take a parameter of type bpf_program instead of an fd, so tx_prog_fd becomes useless. This patch uses a pointer tx_prog to point to an item in progs[] array. Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Tested-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/23b37f932c547dd1ebfe154bbc0b0e957be21ee6.1716446893.git.tanggeliang@kylinos.cn --- tools/testing/selftests/bpf/test_sockmap.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 81e7a57c727c..c185c4242abb 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -954,7 +954,8 @@ enum { static int run_options(struct sockmap_options *options, int cg_fd, int test) { - int i, key, next_key, err, tx_prog_fd = -1, zero = 0; + int i, key, next_key, err, zero = 0; + struct bpf_program *tx_prog; /* If base test skip BPF setup */ if (test == BASE || test == BASE_SENDPAGE) @@ -1015,27 +1016,27 @@ run: /* Attach txmsg program to sockmap */ if (txmsg_pass) - tx_prog_fd = prog_fd[4]; + tx_prog = progs[4]; else if (txmsg_redir) - tx_prog_fd = prog_fd[5]; + tx_prog = progs[5]; else if (txmsg_apply) - tx_prog_fd = prog_fd[6]; + tx_prog = progs[6]; else if (txmsg_cork) - tx_prog_fd = prog_fd[7]; + tx_prog = progs[7]; else if (txmsg_drop) - tx_prog_fd = prog_fd[8]; + tx_prog = progs[8]; else - tx_prog_fd = -1; + tx_prog = NULL; - if (tx_prog_fd > 0) { + if (tx_prog) { int redir_fd; - err = bpf_prog_attach(tx_prog_fd, - map_fd[1], BPF_SK_MSG_VERDICT, 0); - if (err) { + links[4] = bpf_program__attach_sockmap(tx_prog, map_fd[1]); + if (!links[4]) { fprintf(stderr, - "ERROR: bpf_prog_attach (txmsg): %d (%s)\n", - err, strerror(errno)); + "ERROR: bpf_program__attach_sockmap (txmsg): (%s)\n", + strerror(errno)); + err = -1; goto out; } @@ -1285,9 +1286,6 @@ out: bpf_link__detach(links[i]); } - if (tx_prog_fd > 0) - bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT); - for (i = 0; i < 8; i++) { key = next_key = 0; bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY); -- cgit v1.2.3-73-gaa49b From 467a0c79b5514d7301ae679770380679a8e32668 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 23 May 2024 14:50:01 +0800 Subject: selftests/bpf: Drop prog_fd array in test_sockmap The program fds can be got by using bpf_program__fd(progs[]), then prog_fd becomes useless. This patch drops it. Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Tested-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/9a6335e4d8dbab23c0d8906074457ceddd61e74b.1716446893.git.tanggeliang@kylinos.cn --- tools/testing/selftests/bpf/test_sockmap.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index c185c4242abb..64b38a1a1b60 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -63,7 +63,6 @@ int passed; int failed; int map_fd[9]; struct bpf_map *maps[9]; -int prog_fd[9]; struct bpf_program *progs[9]; struct bpf_link *links[9]; @@ -1000,7 +999,7 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) } /* Attach to cgroups */ - err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0); + err = bpf_prog_attach(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS, 0); if (err) { fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n", err, strerror(errno)); @@ -1279,7 +1278,7 @@ run: fprintf(stderr, "unknown test\n"); out: /* Detatch and zero all the maps */ - bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS); + bpf_prog_detach2(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS); for (i = 0; i < ARRAY_SIZE(links); i++) { if (links[i]) @@ -1803,7 +1802,6 @@ static int populate_progs(char *bpf_file) i = 0; bpf_object__for_each_program(prog, obj) { progs[i] = prog; - prog_fd[i] = bpf_program__fd(prog); i++; } -- cgit v1.2.3-73-gaa49b From dcb681b659f2a0a546752730c9daa92dc6120d52 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 23 May 2024 14:50:02 +0800 Subject: selftests/bpf: Fix size of map_fd in test_sockmap The array size of map_fd[] is 9, not 8. This patch changes it as a more general form: ARRAY_SIZE(map_fd). Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Tested-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/0972529ee01ebf8a8fd2b310bdec90831c94be77.1716446893.git.tanggeliang@kylinos.cn --- tools/testing/selftests/bpf/test_sockmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 64b38a1a1b60..e29f8203bd4f 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1285,7 +1285,7 @@ out: bpf_link__detach(links[i]); } - for (i = 0; i < 8; i++) { + for (i = 0; i < ARRAY_SIZE(map_fd); i++) { key = next_key = 0; bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY); while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) { -- cgit v1.2.3-73-gaa49b From de1b5ea789dc28066cc8dc634b6825bd6148f38b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 23 May 2024 14:50:03 +0800 Subject: selftests/bpf: Check length of recv in test_sockmap The value of recv in msg_loop may be negative, like EWOULDBLOCK, so it's necessary to check if it is positive before accumulating it to bytes_recvd. Fixes: 16962b2404ac ("bpf: sockmap, add selftests") Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Tested-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/5172563f7c7b2a2e953cef02e89fc34664a7b190.1716446893.git.tanggeliang@kylinos.cn --- tools/testing/selftests/bpf/test_sockmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index e29f8203bd4f..9cba4ec844a5 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -681,7 +681,8 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } } - s->bytes_recvd += recv; + if (recv > 0) + s->bytes_recvd += recv; if (opt->check_recved_len && s->bytes_recvd > total_bytes) { errno = EMSGSIZE; -- cgit v1.2.3-73-gaa49b From 49784c7979321c49a8055f5c588d24c34a8c55fc Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 23 May 2024 14:50:04 +0800 Subject: selftests/bpf: Drop duplicate bpf_map_lookup_elem in test_sockmap bpf_map_lookup_elem is invoked in bpf_prog3() already, no need to invoke it again. This patch drops it. Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Tested-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/ea8458462b876ee445173e3effb535fd126137ed.1716446893.git.tanggeliang@kylinos.cn --- tools/testing/selftests/bpf/progs/test_sockmap_kern.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 3dff0813730b..f48f85f1bd70 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -177,9 +177,6 @@ int bpf_prog3(struct __sk_buff *skb) return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags); #endif } - f = bpf_map_lookup_elem(&sock_skb_opts, &one); - if (f && *f) - ret = SK_DROP; err = bpf_skb_adjust_room(skb, 4, 0, 0); if (err) return SK_DROP; -- cgit v1.2.3-73-gaa49b From c4c6c3b785a0b1426add15d078da61f899abeaac Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 23 May 2024 10:42:00 -0700 Subject: selftests/bpf: Test kptr arrays and kptrs in nested struct fields. Make sure that BPF programs can declare global kptr arrays and kptr fields in struct types that is the type of a global variable or the type of a nested descendant field in a global variable. An array with only one element is special case, that it treats the element like a non-array kptr field. Nested arrays are also tested to ensure they are handled properly. Acked-by: Eduard Zingerman Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240523174202.461236-8-thinker.li@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/cpumask.c | 5 + .../testing/selftests/bpf/progs/cpumask_success.c | 171 +++++++++++++++++++++ 2 files changed, 176 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index ecf89df78109..2570bd4b0cb2 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -18,6 +18,11 @@ static const char * const cpumask_success_testcases[] = { "test_insert_leave", "test_insert_remove_release", "test_global_mask_rcu", + "test_global_mask_array_one_rcu", + "test_global_mask_array_rcu", + "test_global_mask_array_l2_rcu", + "test_global_mask_nested_rcu", + "test_global_mask_nested_deep_rcu", "test_cpumask_weight", }; diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 7a1e64c6c065..fd8106831c32 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -12,6 +12,31 @@ char _license[] SEC("license") = "GPL"; int pid, nr_cpus; +struct kptr_nested { + struct bpf_cpumask __kptr * mask; +}; + +struct kptr_nested_pair { + struct bpf_cpumask __kptr * mask_1; + struct bpf_cpumask __kptr * mask_2; +}; + +struct kptr_nested_mid { + int dummy; + struct kptr_nested m; +}; + +struct kptr_nested_deep { + struct kptr_nested_mid ptrs[2]; + struct kptr_nested_pair ptr_pairs[3]; +}; + +private(MASK) static struct bpf_cpumask __kptr * global_mask_array[2]; +private(MASK) static struct bpf_cpumask __kptr * global_mask_array_l2[2][1]; +private(MASK) static struct bpf_cpumask __kptr * global_mask_array_one[1]; +private(MASK) static struct kptr_nested global_mask_nested[2]; +private(MASK_DEEP) static struct kptr_nested_deep global_mask_nested_deep; + static bool is_test_task(void) { int cur_pid = bpf_get_current_pid_tgid() >> 32; @@ -460,6 +485,152 @@ int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) return 0; } +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_array_one_rcu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local, *prev; + + if (!is_test_task()) + return 0; + + /* Kptr arrays with one element are special cased, being treated + * just like a single pointer. + */ + + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask_array_one[0], local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; + return 0; + } + + bpf_rcu_read_lock(); + local = global_mask_array_one[0]; + if (!local) { + err = 4; + bpf_rcu_read_unlock(); + return 0; + } + + bpf_rcu_read_unlock(); + + return 0; +} + +static int _global_mask_array_rcu(struct bpf_cpumask **mask0, + struct bpf_cpumask **mask1) +{ + struct bpf_cpumask *local; + + if (!is_test_task()) + return 0; + + /* Check if two kptrs in the array work and independently */ + + local = create_cpumask(); + if (!local) + return 0; + + bpf_rcu_read_lock(); + + local = bpf_kptr_xchg(mask0, local); + if (local) { + err = 1; + goto err_exit; + } + + /* [, NULL] */ + if (!*mask0 || *mask1) { + err = 2; + goto err_exit; + } + + local = create_cpumask(); + if (!local) { + err = 9; + goto err_exit; + } + + local = bpf_kptr_xchg(mask1, local); + if (local) { + err = 10; + goto err_exit; + } + + /* [, ] */ + if (!*mask0 || !*mask1 || *mask0 == *mask1) { + err = 11; + goto err_exit; + } + +err_exit: + if (local) + bpf_cpumask_release(local); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_array_rcu, struct task_struct *task, u64 clone_flags) +{ + return _global_mask_array_rcu(&global_mask_array[0], &global_mask_array[1]); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_array_l2_rcu, struct task_struct *task, u64 clone_flags) +{ + return _global_mask_array_rcu(&global_mask_array_l2[0][0], &global_mask_array_l2[1][0]); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_nested_rcu, struct task_struct *task, u64 clone_flags) +{ + return _global_mask_array_rcu(&global_mask_nested[0].mask, &global_mask_nested[1].mask); +} + +/* Ensure that the field->offset has been correctly advanced from one + * nested struct or array sub-tree to another. In the case of + * kptr_nested_deep, it comprises two sub-trees: ktpr_1 and kptr_2. By + * calling bpf_kptr_xchg() on every single kptr in both nested sub-trees, + * the verifier should reject the program if the field->offset of any kptr + * is incorrect. + * + * For instance, if we have 10 kptrs in a nested struct and a program that + * accesses each kptr individually with bpf_kptr_xchg(), the compiler + * should emit instructions to access 10 different offsets if it works + * correctly. If the field->offset values of any pair of them are + * incorrectly the same, the number of unique offsets in btf_record for + * this nested struct should be less than 10. The verifier should fail to + * discover some of the offsets emitted by the compiler. + * + * Even if the field->offset values of kptrs are not duplicated, the + * verifier should fail to find a btf_field for the instruction accessing a + * kptr if the corresponding field->offset is pointing to a random + * incorrect offset. + */ +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_nested_deep_rcu, struct task_struct *task, u64 clone_flags) +{ + int r, i; + + r = _global_mask_array_rcu(&global_mask_nested_deep.ptrs[0].m.mask, + &global_mask_nested_deep.ptrs[1].m.mask); + if (r) + return r; + + for (i = 0; i < 3; i++) { + r = _global_mask_array_rcu(&global_mask_nested_deep.ptr_pairs[i].mask_1, + &global_mask_nested_deep.ptr_pairs[i].mask_2); + if (r) + return r; + } + return 0; +} + SEC("tp_btf/task_newtask") int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags) { -- cgit v1.2.3-73-gaa49b From d55c765a9b2d54b53ef86a62d6209e2e5eb62585 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 23 May 2024 10:42:01 -0700 Subject: selftests/bpf: Test global bpf_rb_root arrays and fields in nested struct types. Make sure global arrays of bpf_rb_root and fields of bpf_rb_root in nested struct types work correctly. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240523174202.461236-9-thinker.li@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/rbtree.c | 47 +++++++++++++++ tools/testing/selftests/bpf/progs/rbtree.c | 77 +++++++++++++++++++++++++ 2 files changed, 124 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c index e9300c96607d..9818f06c97c5 100644 --- a/tools/testing/selftests/bpf/prog_tests/rbtree.c +++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c @@ -31,6 +31,28 @@ static void test_rbtree_add_nodes(void) rbtree__destroy(skel); } +static void test_rbtree_add_nodes_nested(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct rbtree *skel; + int ret; + + skel = rbtree__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_nodes_nested), &opts); + ASSERT_OK(ret, "rbtree_add_nodes_nested run"); + ASSERT_OK(opts.retval, "rbtree_add_nodes_nested retval"); + ASSERT_EQ(skel->data->less_callback_ran, 1, "rbtree_add_nodes_nested less_callback_ran"); + + rbtree__destroy(skel); +} + static void test_rbtree_add_and_remove(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -53,6 +75,27 @@ static void test_rbtree_add_and_remove(void) rbtree__destroy(skel); } +static void test_rbtree_add_and_remove_array(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct rbtree *skel; + int ret; + + skel = rbtree__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_and_remove_array), &opts); + ASSERT_OK(ret, "rbtree_add_and_remove_array"); + ASSERT_OK(opts.retval, "rbtree_add_and_remove_array retval"); + + rbtree__destroy(skel); +} + static void test_rbtree_first_and_remove(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -104,8 +147,12 @@ void test_rbtree_success(void) { if (test__start_subtest("rbtree_add_nodes")) test_rbtree_add_nodes(); + if (test__start_subtest("rbtree_add_nodes_nested")) + test_rbtree_add_nodes_nested(); if (test__start_subtest("rbtree_add_and_remove")) test_rbtree_add_and_remove(); + if (test__start_subtest("rbtree_add_and_remove_array")) + test_rbtree_add_and_remove_array(); if (test__start_subtest("rbtree_first_and_remove")) test_rbtree_first_and_remove(); if (test__start_subtest("rbtree_api_release_aliasing")) diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c index b09f4fffe57c..a3620c15c136 100644 --- a/tools/testing/selftests/bpf/progs/rbtree.c +++ b/tools/testing/selftests/bpf/progs/rbtree.c @@ -13,6 +13,15 @@ struct node_data { struct bpf_rb_node node; }; +struct root_nested_inner { + struct bpf_spin_lock glock; + struct bpf_rb_root root __contains(node_data, node); +}; + +struct root_nested { + struct root_nested_inner inner; +}; + long less_callback_ran = -1; long removed_key = -1; long first_data[2] = {-1, -1}; @@ -20,6 +29,9 @@ long first_data[2] = {-1, -1}; #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) private(A) struct bpf_spin_lock glock; private(A) struct bpf_rb_root groot __contains(node_data, node); +private(A) struct bpf_rb_root groot_array[2] __contains(node_data, node); +private(A) struct bpf_rb_root groot_array_one[1] __contains(node_data, node); +private(B) struct root_nested groot_nested; static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) { @@ -71,6 +83,12 @@ long rbtree_add_nodes(void *ctx) return __add_three(&groot, &glock); } +SEC("tc") +long rbtree_add_nodes_nested(void *ctx) +{ + return __add_three(&groot_nested.inner.root, &groot_nested.inner.glock); +} + SEC("tc") long rbtree_add_and_remove(void *ctx) { @@ -109,6 +127,65 @@ err_out: return 1; } +SEC("tc") +long rbtree_add_and_remove_array(void *ctx) +{ + struct bpf_rb_node *res1 = NULL, *res2 = NULL, *res3 = NULL; + struct node_data *nodes[3][2] = {{NULL, NULL}, {NULL, NULL}, {NULL, NULL}}; + struct node_data *n; + long k1 = -1, k2 = -1, k3 = -1; + int i, j; + + for (i = 0; i < 3; i++) { + for (j = 0; j < 2; j++) { + nodes[i][j] = bpf_obj_new(typeof(*nodes[i][j])); + if (!nodes[i][j]) + goto err_out; + nodes[i][j]->key = i * 2 + j; + } + } + + bpf_spin_lock(&glock); + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + bpf_rbtree_add(&groot_array[i], &nodes[i][j]->node, less); + for (j = 0; j < 2; j++) + bpf_rbtree_add(&groot_array_one[0], &nodes[2][j]->node, less); + res1 = bpf_rbtree_remove(&groot_array[0], &nodes[0][0]->node); + res2 = bpf_rbtree_remove(&groot_array[1], &nodes[1][0]->node); + res3 = bpf_rbtree_remove(&groot_array_one[0], &nodes[2][0]->node); + bpf_spin_unlock(&glock); + + if (res1) { + n = container_of(res1, struct node_data, node); + k1 = n->key; + bpf_obj_drop(n); + } + if (res2) { + n = container_of(res2, struct node_data, node); + k2 = n->key; + bpf_obj_drop(n); + } + if (res3) { + n = container_of(res3, struct node_data, node); + k3 = n->key; + bpf_obj_drop(n); + } + if (k1 != 0 || k2 != 2 || k3 != 4) + return 2; + + return 0; + +err_out: + for (i = 0; i < 3; i++) { + for (j = 0; j < 2; j++) { + if (nodes[i][j]) + bpf_obj_drop(nodes[i][j]); + } + } + return 1; +} + SEC("tc") long rbtree_first_and_remove(void *ctx) { -- cgit v1.2.3-73-gaa49b From 43d50ffb1f7e32865cdd343224659614d8b558b9 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 23 May 2024 10:42:02 -0700 Subject: selftests/bpf: Test global bpf_list_head arrays. Make sure global arrays of bpf_list_heads and fields of bpf_list_heads in nested struct types work correctly. Acked-by: Eduard Zingerman Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240523174202.461236-10-thinker.li@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/linked_list.c | 12 +++++++ tools/testing/selftests/bpf/progs/linked_list.c | 42 ++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 2fb89de63bd2..77d07e0a4a55 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -183,6 +183,18 @@ static void test_linked_list_success(int mode, bool leave_in_map) if (!leave_in_map) clear_fields(skel->maps.bss_A); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_nested), &opts); + ASSERT_OK(ret, "global_list_push_pop_nested"); + ASSERT_OK(opts.retval, "global_list_push_pop_nested retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_array_push_pop), &opts); + ASSERT_OK(ret, "global_list_array_push_pop"); + ASSERT_OK(opts.retval, "global_list_array_push_pop retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); + if (mode == PUSH_POP) goto end; diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c index 26205ca80679..f69bf3e30321 100644 --- a/tools/testing/selftests/bpf/progs/linked_list.c +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -11,6 +11,22 @@ #include "linked_list.h" +struct head_nested_inner { + struct bpf_spin_lock lock; + struct bpf_list_head head __contains(foo, node2); +}; + +struct head_nested { + int dummy; + struct head_nested_inner inner; +}; + +private(C) struct bpf_spin_lock glock_c; +private(C) struct bpf_list_head ghead_array[2] __contains(foo, node2); +private(C) struct bpf_list_head ghead_array_one[1] __contains(foo, node2); + +private(D) struct head_nested ghead_nested; + static __always_inline int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) { @@ -309,6 +325,32 @@ int global_list_push_pop(void *ctx) return test_list_push_pop(&glock, &ghead); } +SEC("tc") +int global_list_push_pop_nested(void *ctx) +{ + return test_list_push_pop(&ghead_nested.inner.lock, &ghead_nested.inner.head); +} + +SEC("tc") +int global_list_array_push_pop(void *ctx) +{ + int r; + + r = test_list_push_pop(&glock_c, &ghead_array[0]); + if (r) + return r; + + r = test_list_push_pop(&glock_c, &ghead_array[1]); + if (r) + return r; + + /* Arrays with only one element is a special case, being treated + * just like a bpf_list_head variable by the verifier, not an + * array. + */ + return test_list_push_pop(&glock_c, &ghead_array_one[0]); +} + SEC("tc") int map_list_push_pop_multiple(void *ctx) { -- cgit v1.2.3-73-gaa49b From f7d4485fcedcb4978148bad5fcde570f63790323 Mon Sep 17 00:00:00 2001 From: Nick Forrington Date: Mon, 13 May 2024 09:14:12 +0000 Subject: perf lock info: Display both map and thread by default Change "perf lock info" argument handling to: Display both map and thread info (rather than an error) when neither are specified. Display both map and thread info (rather than just thread info) when both are requested. Signed-off-by: Nick Forrington Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240513091413.738537-2-nick.forrington@arm.com --- tools/perf/Documentation/perf-lock.txt | 4 ++-- tools/perf/builtin-lock.c | 27 +++++++++++++++------------ 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index f5938d616d75..57a940399de0 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -111,11 +111,11 @@ INFO OPTIONS -t:: --threads:: - dump thread list in perf.data + dump only the thread list in perf.data -m:: --map:: - dump map of lock instances (address:name table) + dump only the map of lock instances (address:name table) CONTENTION OPTIONS diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 7007d26fe654..0253184b3b58 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1477,20 +1477,16 @@ static void dump_map(void) fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name); } -static int dump_info(void) +static void dump_info(void) { - int rc = 0; - if (info_threads) dump_threads(); - else if (info_map) + + if (info_map) { + if (info_threads) + fputc('\n', lock_output); dump_map(); - else { - rc = -1; - pr_err("Unknown type of information\n"); } - - return rc; } static const struct evsel_str_handler lock_tracepoints[] = { @@ -1992,7 +1988,7 @@ static int __cmd_report(bool display_info) setup_pager(); if (display_info) /* used for info subcommand */ - err = dump_info(); + dump_info(); else { combine_result(); sort_result(); @@ -2568,9 +2564,9 @@ int cmd_lock(int argc, const char **argv) const struct option info_options[] = { OPT_BOOLEAN('t', "threads", &info_threads, - "dump thread list in perf.data"), + "dump the thread list in perf.data"), OPT_BOOLEAN('m', "map", &info_map, - "map of lock instances (address:name table)"), + "dump the map of lock instances (address:name table)"), OPT_PARENT(lock_options) }; @@ -2684,6 +2680,13 @@ int cmd_lock(int argc, const char **argv) if (argc) usage_with_options(info_usage, info_options); } + + /* If neither threads nor map requested, display both */ + if (!info_threads && !info_map) { + info_threads = true; + info_map = true; + } + /* recycling report_lock_ops */ trace_handler = &report_lock_ops; rc = __cmd_report(true); -- cgit v1.2.3-73-gaa49b From f7abc0cfa8be0aa872842569583a69e6bdec0e76 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 2 Jun 2024 01:05:05 +0100 Subject: perf genelf: remove unused struct 'options' 'options' has been unused since commit fa7f7e735495 ("perf jit: Move test functionality in to a test"). Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240602000505.213032-1-linux@treblig.org --- tools/perf/util/genelf.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index ac17a3cb59dc..c8f6bee1fa61 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -54,11 +54,6 @@ typedef struct { char name[0]; /* Start of the name+desc data */ } Elf_Note; -struct options { - char *output; - int fd; -}; - static char shd_string_table[] = { 0, '.', 't', 'e', 'x', 't', 0, /* 1 */ -- cgit v1.2.3-73-gaa49b From 35b944a997e25962122c3dea68b020e7fbb06cbd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 3 Jun 2024 15:07:30 -0400 Subject: selftests/ftrace: Add function_graph tracer to func-filter-pid test The function tracer is tested to see if pid filtering works. Add a test to test function_graph tracer as well, but only if the function_graph tracer is enabled for the top level or instance. Link: https://lore.kernel.org/linux-trace-kernel/20240603190825.083048115@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- .../ftrace/test.d/ftrace/func-filter-pid.tc | 27 ++++++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index 2f7211254529..c6fc9d31a496 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -14,6 +14,11 @@ if [ ! -f options/function-fork ]; then echo "no option for function-fork found. Option will not be tested." fi +if [ ! -f options/funcgraph-proc ]; then + do_funcgraph_proc=0 + echo "no option for function-fork found. Option will not be tested." +fi + read PID _ < /proc/self/stat if [ $do_function_fork -eq 1 ]; then @@ -21,12 +26,18 @@ if [ $do_function_fork -eq 1 ]; then orig_value=`grep function-fork trace_options` fi +if [ $do_funcgraph_proc -eq 1 ]; then + orig_value2=`cat options/funcgraph-proc` +fi + do_reset() { - if [ $do_function_fork -eq 0 ]; then - return + if [ $do_function_fork -eq 1 ]; then + echo $orig_value > trace_options fi - echo $orig_value > trace_options + if [ $do_funcgraph_proc -eq 1 ]; then + echo $orig_value2 > options/funcgraph-proc + fi } fail() { # msg @@ -36,13 +47,15 @@ fail() { # msg } do_test() { + TRACER=$1 + disable_tracing echo do_execve* > set_ftrace_filter echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_pid - echo function > current_tracer + echo $TRACER > current_tracer if [ $do_function_fork -eq 1 ]; then # don't allow children to be traced @@ -82,7 +95,11 @@ do_test() { fi } -do_test +do_test function +if grep -s function_graph available_tracers; then + do_test function_graph +fi + do_reset exit 0 -- cgit v1.2.3-73-gaa49b From 8d4e21bd4cca8013d2c6f55d42df85838d4ebce6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 3 Jun 2024 15:07:31 -0400 Subject: selftests/ftrace: Add fgraph-multi.tc test Add a test that creates 3 instances and enables function_graph tracer in each as well as the top instance, where each will enable a filter (but one that traces all functions) and check that they are filtering properly. Link: https://lore.kernel.org/linux-trace-kernel/20240603190825.252845939@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- .../selftests/ftrace/test.d/ftrace/fgraph-multi.tc | 103 +++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc new file mode 100644 index 000000000000..ff88f97e41fb --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc @@ -0,0 +1,103 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function graph filters +# requires: set_ftrace_filter function_graph:tracer + +# Make sure that function graph filtering works + +INSTANCE1="instances/test1_$$" +INSTANCE2="instances/test2_$$" +INSTANCE3="instances/test3_$$" + +WD=`pwd` + +do_reset() { + cd $WD + if [ -d $INSTANCE1 ]; then + echo nop > $INSTANCE1/current_tracer + rmdir $INSTANCE1 + fi + if [ -d $INSTANCE2 ]; then + echo nop > $INSTANCE2/current_tracer + rmdir $INSTANCE2 + fi + if [ -d $INSTANCE3 ]; then + echo nop > $INSTANCE3/current_tracer + rmdir $INSTANCE3 + fi +} + +mkdir $INSTANCE1 +if ! grep -q function_graph $INSTANCE1/available_tracers; then + echo "function_graph not allowed with instances" + rmdir $INSTANCE1 + exit_unsupported +fi + +mkdir $INSTANCE2 +mkdir $INSTANCE3 + +fail() { # msg + do_reset + echo $1 + exit_fail +} + +disable_tracing +clear_trace + +do_test() { + REGEX=$1 + TEST=$2 + + # filter something, schedule is always good + if ! echo "$REGEX" > set_ftrace_filter; then + fail "can not enable filter $REGEX" + fi + + echo > trace + echo function_graph > current_tracer + enable_tracing + sleep 1 + # search for functions (has "{" or ";" on the line) + echo 0 > tracing_on + count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep -v "$TEST" | wc -l` + echo 1 > tracing_on + if [ $count -ne 0 ]; then + fail "Graph filtering not working by itself against $TEST?" + fi + + # Make sure we did find something + echo 0 > tracing_on + count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep "$TEST" | wc -l` + echo 1 > tracing_on + if [ $count -eq 0 ]; then + fail "No traces found with $TEST?" + fi +} + +do_test '*sched*' 'sched' +cd $INSTANCE1 +do_test '*lock*' 'lock' +cd $WD +cd $INSTANCE2 +do_test '*rcu*' 'rcu' +cd $WD +cd $INSTANCE3 +echo function_graph > current_tracer + +sleep 1 +count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep "$TEST" | wc -l` +if [ $count -eq 0 ]; then + fail "No traces found with all tracing?" +fi + +cd $WD +echo nop > current_tracer +echo nop > $INSTANCE1/current_tracer +echo nop > $INSTANCE2/current_tracer +echo nop > $INSTANCE3/current_tracer + +do_reset + +exit 0 -- cgit v1.2.3-73-gaa49b From 61ce0ea7591fef2eb6e89ce40ffcc24fda4dbbc5 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 4 Jun 2024 00:07:00 -0700 Subject: selftests/bpf: Fix bpf_cookie and find_vma in nested VM bpf_cookie and find_vma are flaky in nested VMs, which is used by some CI systems. It turns out these failures are caused by unreliable perf event in nested VM. Fix these by: 1. Use PERF_COUNT_SW_CPU_CLOCK in find_vma; 2. Increase sample_freq in bpf_cookie. Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240604070700.3032142-1-song@kernel.org --- tools/testing/selftests/bpf/prog_tests/bpf_cookie.c | 2 +- tools/testing/selftests/bpf/prog_tests/find_vma.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 4407ea428e77..070c52c312e5 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -451,7 +451,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CPU_CLOCK; attr.freq = 1; - attr.sample_freq = 1000; + attr.sample_freq = 10000; pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); if (!ASSERT_GE(pfd, 0, "perf_fd")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c index 5165b38f0e59..f7619e0ade10 100644 --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c @@ -29,8 +29,8 @@ static int open_pe(void) /* create perf event */ attr.size = sizeof(attr); - attr.type = PERF_TYPE_HARDWARE; - attr.config = PERF_COUNT_HW_CPU_CYCLES; + attr.type = PERF_TYPE_SOFTWARE; + attr.config = PERF_COUNT_SW_CPU_CLOCK; attr.freq = 1; attr.sample_freq = 1000; pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); -- cgit v1.2.3-73-gaa49b From 898ac74c5b5f8b551a377d6a60ca4e30023ac9d2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 4 Jun 2024 11:00:34 -0700 Subject: selftests/bpf: Ignore .llvm. suffix in kallsyms_find() I hit the following failure when running selftests with internal backported upstream kernel: test_ksyms:PASS:kallsyms_fopen 0 nsec test_ksyms:FAIL:ksym_find symbol 'bpf_link_fops' not found #123 ksyms:FAIL In /proc/kallsyms, we have $ cat /proc/kallsyms | grep bpf_link_fops ffffffff829f0cb0 d bpf_link_fops.llvm.12608678492448798416 The CONFIG_LTO_CLANG_THIN is enabled in the kernel which is responsible for bpf_link_fops.llvm.12608678492448798416 symbol name. In prog_tests/ksyms.c we have kallsyms_find("bpf_link_fops", &link_fops_addr) and kallsyms_find() compares "bpf_link_fops" with symbols in /proc/kallsyms in order to find the entry. With bpf_link_fops.llvm. in /proc/kallsyms, the kallsyms_find() failed. To fix the issue, in kallsyms_find(), if a symbol has suffix .llvm., that suffix will be ignored for comparison. This fixed the test failure. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240604180034.1356016-1-yonghong.song@linux.dev --- tools/testing/selftests/bpf/trace_helpers.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 70e29f316fe7..465d196c7165 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -211,7 +211,7 @@ long ksym_get_addr(const char *name) */ int kallsyms_find(const char *sym, unsigned long long *addr) { - char type, name[500]; + char type, name[500], *match; unsigned long long value; int err = 0; FILE *f; @@ -221,6 +221,17 @@ int kallsyms_find(const char *sym, unsigned long long *addr) return -EINVAL; while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) { + /* If CONFIG_LTO_CLANG_THIN is enabled, static variable/function + * symbols could be promoted to global due to cross-file inlining. + * For such cases, clang compiler will add .llvm. suffix + * to those symbols to avoid potential naming conflict. + * Let us ignore .llvm. suffix during symbol comparison. + */ + if (type == 'd') { + match = strstr(name, ".llvm."); + if (match) + *match = '\0'; + } if (strcmp(name, sym) == 0) { *addr = value; goto out; -- cgit v1.2.3-73-gaa49b From 0770ceaff2f6a084d4d020295cfba6c5ef278cf4 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 2 Jun 2024 01:07:09 +0100 Subject: perf hisi-ptt: remove unused struct 'hisi_ptt_queue' 'hisi_ptt_queue' has been unused since the original commit 5e91e57e6809 ("perf auxtrace arm64: Add support for parsing HiSilicon PCIe Trace packet"). Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Jonathan Cameron Reviewed-by: Ian Rogers Cc: yangyicong@hisilicon.com Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240602000709.213116-1-linux@treblig.org --- tools/perf/util/hisi-ptt.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c index 52d0ce302ca0..37ea987017f6 100644 --- a/tools/perf/util/hisi-ptt.c +++ b/tools/perf/util/hisi-ptt.c @@ -35,11 +35,6 @@ struct hisi_ptt { u32 pmu_type; }; -struct hisi_ptt_queue { - struct hisi_ptt *ptt; - struct auxtrace_buffer *buffer; -}; - static enum hisi_ptt_pkt_type hisi_ptt_check_packet_type(unsigned char *buf) { uint32_t head = *(uint32_t *)buf; -- cgit v1.2.3-73-gaa49b From 721f4a6526daafca15634f30c9865e880da3e1d1 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 5 Apr 2024 01:58:21 +0000 Subject: mm/memblock: remove empty dummy entry The dummy entry is introduced in the initial implementation of lmb in commit 7c8c6b9776fb ("powerpc: Merge lmb.c and make MM initialization use it."). As the comment says the empty dummy entry is to simplify the code. /* Create a dummy zero size LMB which will get coalesced away later. * This simplifies the lmb_add() code below... */ While current code is reimplemented by Tejun in commit 784656f9c680 ("memblock: Reimplement memblock_add_region()"). This empty dummy entry seems not benefit the code any more. Let's remove it. Signed-off-by: Wei Yang CC: Paul Mackerras CC: Tejun Heo CC: Mike Rapoport Link: https://lore.kernel.org/r/20240405015821.13411-1-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (IBM) --- mm/memblock.c | 7 ++----- tools/testing/memblock/tests/basic_api.c | 8 ++++---- tools/testing/memblock/tests/common.c | 4 ++-- 3 files changed, 8 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/mm/memblock.c b/mm/memblock.c index d09136e040d3..98d25689cf10 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -114,12 +114,10 @@ static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS struct memblock memblock __initdata_memblock = { .memory.regions = memblock_memory_init_regions, - .memory.cnt = 1, /* empty dummy entry */ .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS, .memory.name = "memory", .reserved.regions = memblock_reserved_init_regions, - .reserved.cnt = 1, /* empty dummy entry */ .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, .reserved.name = "reserved", @@ -130,7 +128,6 @@ struct memblock memblock __initdata_memblock = { #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP struct memblock_type physmem = { .regions = memblock_physmem_init_regions, - .cnt = 1, /* empty dummy entry */ .max = INIT_PHYSMEM_REGIONS, .name = "physmem", }; @@ -356,7 +353,6 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u /* Special case for empty arrays */ if (type->cnt == 0) { WARN_ON(type->total_size != 0); - type->cnt = 1; type->regions[0].base = 0; type->regions[0].size = 0; type->regions[0].flags = 0; @@ -600,12 +596,13 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, /* special case for empty array */ if (type->regions[0].size == 0) { - WARN_ON(type->cnt != 1 || type->total_size); + WARN_ON(type->cnt != 0 || type->total_size); type->regions[0].base = base; type->regions[0].size = size; type->regions[0].flags = flags; memblock_set_region_node(&type->regions[0], nid); type->total_size = size; + type->cnt = 1; return 0; } diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 57bf2688edfd..f317fe691fc4 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -15,12 +15,12 @@ static int memblock_initialization_check(void) PREFIX_PUSH(); ASSERT_NE(memblock.memory.regions, NULL); - ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.cnt, 0); ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS); ASSERT_EQ(strcmp(memblock.memory.name, "memory"), 0); ASSERT_NE(memblock.reserved.regions, NULL); - ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.cnt, 0); ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS); ASSERT_EQ(strcmp(memblock.reserved.name, "reserved"), 0); @@ -1295,7 +1295,7 @@ static int memblock_remove_only_region_check(void) ASSERT_EQ(rgn->base, 0); ASSERT_EQ(rgn->size, 0); - ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.cnt, 0); ASSERT_EQ(memblock.memory.total_size, 0); test_pass_pop(); @@ -1723,7 +1723,7 @@ static int memblock_free_only_region_check(void) ASSERT_EQ(rgn->base, 0); ASSERT_EQ(rgn->size, 0); - ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.cnt, 0); ASSERT_EQ(memblock.reserved.total_size, 0); test_pass_pop(); diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c index f43b6f414983..c2c569f12178 100644 --- a/tools/testing/memblock/tests/common.c +++ b/tools/testing/memblock/tests/common.c @@ -40,13 +40,13 @@ void reset_memblock_regions(void) { memset(memblock.memory.regions, 0, memblock.memory.cnt * sizeof(struct memblock_region)); - memblock.memory.cnt = 1; + memblock.memory.cnt = 0; memblock.memory.max = INIT_MEMBLOCK_REGIONS; memblock.memory.total_size = 0; memset(memblock.reserved.regions, 0, memblock.reserved.cnt * sizeof(struct memblock_region)); - memblock.reserved.cnt = 1; + memblock.reserved.cnt = 0; memblock.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS; memblock.reserved.total_size = 0; } -- cgit v1.2.3-73-gaa49b From 3d3165193776ddacf59f101f0fa05cfab9f1a9ba Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 7 May 2024 07:58:27 +0000 Subject: memblock tests: add memblock_reserve_all_locations_check() Instead of adding 129th memory block at the last position, let's try all possible position. Signed-off-by: Wei Yang Link: https://lore.kernel.org/r/20240507075833.6346-2-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (IBM) --- tools/testing/memblock/tests/basic_api.c | 107 +++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) (limited to 'tools') diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index f317fe691fc4..bd3ebbf6b697 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -982,6 +982,112 @@ static int memblock_reserve_many_check(void) return 0; } + +/* + * A test that trying to reserve the 129th memory block at all locations. + * Expect to trigger memblock_double_array() to double the + * memblock.memory.max, find a new valid memory as reserved.regions. + * + * 0 1 2 128 + * +-------+ +-------+ +-------+ +-------+ + * | 32K | | 32K | | 32K | ... | 32K | + * +-------+-------+-------+-------+-------+ +-------+ + * |<-32K->| |<-32K->| + * + */ +/* Keep the gap so these memory region will not be merged. */ +#define MEMORY_BASE(idx) (SZ_128K + (MEM_SIZE * 2) * (idx)) +static int memblock_reserve_all_locations_check(void) +{ + int i, skip; + void *orig_region; + struct region r = { + .base = SZ_16K, + .size = SZ_16K, + }; + phys_addr_t new_reserved_regions_size; + + PREFIX_PUSH(); + + /* Reserve the 129th memory block for all possible positions*/ + for (skip = 0; skip < INIT_MEMBLOCK_REGIONS + 1; skip++) { + reset_memblock_regions(); + memblock_allow_resize(); + + /* Add a valid memory region used by double_array(). */ + dummy_physical_memory_init(); + memblock_add(dummy_physical_memory_base(), MEM_SIZE); + + for (i = 0; i < INIT_MEMBLOCK_REGIONS + 1; i++) { + if (i == skip) + continue; + + /* Reserve some fakes memory region to fulfill the memblock. */ + memblock_reserve(MEMORY_BASE(i), MEM_SIZE); + + if (i < skip) { + ASSERT_EQ(memblock.reserved.cnt, i + 1); + ASSERT_EQ(memblock.reserved.total_size, (i + 1) * MEM_SIZE); + } else { + ASSERT_EQ(memblock.reserved.cnt, i); + ASSERT_EQ(memblock.reserved.total_size, i * MEM_SIZE); + } + } + + orig_region = memblock.reserved.regions; + + /* This reserve the 129 memory_region, and makes it double array. */ + memblock_reserve(MEMORY_BASE(skip), MEM_SIZE); + + /* + * This is the memory region size used by the doubled reserved.regions, + * and it has been reserved due to it has been used. The size is used to + * calculate the total_size that the memblock.reserved have now. + */ + new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) * + sizeof(struct memblock_region)); + /* + * The double_array() will find a free memory region as the new + * reserved.regions, and the used memory region will be reserved, so + * there will be one more region exist in the reserved memblock. And the + * one more reserved region's size is new_reserved_regions_size. + */ + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + /* + * Now memblock_double_array() works fine. Let's check after the + * double_array(), the memblock_reserve() still works as normal. + */ + memblock_reserve(r.base, r.size); + ASSERT_EQ(memblock.reserved.regions[0].base, r.base); + ASSERT_EQ(memblock.reserved.regions[0].size, r.size); + + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size + + r.size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + dummy_physical_memory_cleanup(); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + } + + test_pass_pop(); + + return 0; +} + static int memblock_reserve_checks(void) { prefix_reset(); @@ -997,6 +1103,7 @@ static int memblock_reserve_checks(void) memblock_reserve_between_check(); memblock_reserve_near_max_check(); memblock_reserve_many_check(); + memblock_reserve_all_locations_check(); prefix_pop(); -- cgit v1.2.3-73-gaa49b From f6df89c3582a337090ae1f37c3648bdb35da29f7 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 7 May 2024 07:58:28 +0000 Subject: memblock tests: add memblock_reserve_many_may_conflict_check() This may trigger the case fixed by commit 48c3b583bbdd ("mm/memblock: fix overlapping allocation when doubling reserved array"). This is done by adding the 129th reserve region into memblock.memory. If memblock_double_array() use this reserve region as new array, it fails. Signed-off-by: Wei Yang Link: https://lore.kernel.org/r/20240507075833.6346-3-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (IBM) --- tools/testing/memblock/tests/basic_api.c | 151 +++++++++++++++++++++++++++++++ tools/testing/memblock/tests/common.c | 4 +- tools/testing/memblock/tests/common.h | 1 + 3 files changed, 154 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index bd3ebbf6b697..fdac82656d15 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -1088,6 +1088,156 @@ static int memblock_reserve_all_locations_check(void) return 0; } +/* + * A test that trying to reserve the 129th memory block at all locations. + * Expect to trigger memblock_double_array() to double the + * memblock.memory.max, find a new valid memory as reserved.regions. And make + * sure it doesn't conflict with the range we want to reserve. + * + * For example, we have 128 regions in reserved and now want to reserve + * the skipped one. Since reserved is full, memblock_double_array() would find + * an available range in memory for the new array. We intended to put two + * ranges in memory with one is the exact range of the skipped one. Before + * commit 48c3b583bbdd ("mm/memblock: fix overlapping allocation when doubling + * reserved array"), the new array would sits in the skipped range which is a + * conflict. The expected new array should be allocated from memory.regions[0]. + * + * 0 1 + * memory +-------+ +-------+ + * | 32K | | 32K | + * +-------+ ------+-------+-------+-------+ + * |<-32K->|<-32K->|<-32K->| + * + * 0 skipped 127 + * reserved +-------+ ......... +-------+ + * | 32K | . 32K . ... | 32K | + * +-------+-------+-------+ +-------+ + * |<-32K->| + * ^ + * | + * | + * skipped one + */ +/* Keep the gap so these memory region will not be merged. */ +#define MEMORY_BASE_OFFSET(idx, offset) ((offset) + (MEM_SIZE * 2) * (idx)) +static int memblock_reserve_many_may_conflict_check(void) +{ + int i, skip; + void *orig_region; + struct region r = { + .base = SZ_16K, + .size = SZ_16K, + }; + phys_addr_t new_reserved_regions_size; + + /* + * 0 1 129 + * +---+ +---+ +---+ + * |32K| |32K| .. |32K| + * +---+ +---+ +---+ + * + * Pre-allocate the range for 129 memory block + one range for double + * memblock.reserved.regions at idx 0. + */ + dummy_physical_memory_init(); + phys_addr_t memory_base = dummy_physical_memory_base(); + phys_addr_t offset = PAGE_ALIGN(memory_base); + + PREFIX_PUSH(); + + /* Reserve the 129th memory block for all possible positions*/ + for (skip = 1; skip <= INIT_MEMBLOCK_REGIONS + 1; skip++) { + reset_memblock_regions(); + memblock_allow_resize(); + + reset_memblock_attributes(); + /* Add a valid memory region used by double_array(). */ + memblock_add(MEMORY_BASE_OFFSET(0, offset), MEM_SIZE); + /* + * Add a memory region which will be reserved as 129th memory + * region. This is not expected to be used by double_array(). + */ + memblock_add(MEMORY_BASE_OFFSET(skip, offset), MEM_SIZE); + + for (i = 1; i <= INIT_MEMBLOCK_REGIONS + 1; i++) { + if (i == skip) + continue; + + /* Reserve some fakes memory region to fulfill the memblock. */ + memblock_reserve(MEMORY_BASE_OFFSET(i, offset), MEM_SIZE); + + if (i < skip) { + ASSERT_EQ(memblock.reserved.cnt, i); + ASSERT_EQ(memblock.reserved.total_size, i * MEM_SIZE); + } else { + ASSERT_EQ(memblock.reserved.cnt, i - 1); + ASSERT_EQ(memblock.reserved.total_size, (i - 1) * MEM_SIZE); + } + } + + orig_region = memblock.reserved.regions; + + /* This reserve the 129 memory_region, and makes it double array. */ + memblock_reserve(MEMORY_BASE_OFFSET(skip, offset), MEM_SIZE); + + /* + * This is the memory region size used by the doubled reserved.regions, + * and it has been reserved due to it has been used. The size is used to + * calculate the total_size that the memblock.reserved have now. + */ + new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) * + sizeof(struct memblock_region)); + /* + * The double_array() will find a free memory region as the new + * reserved.regions, and the used memory region will be reserved, so + * there will be one more region exist in the reserved memblock. And the + * one more reserved region's size is new_reserved_regions_size. + */ + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + /* + * The first reserved region is allocated for double array + * with the size of new_reserved_regions_size and the base to be + * MEMORY_BASE_OFFSET(0, offset) + SZ_32K - new_reserved_regions_size + */ + ASSERT_EQ(memblock.reserved.regions[0].base + memblock.reserved.regions[0].size, + MEMORY_BASE_OFFSET(0, offset) + SZ_32K); + ASSERT_EQ(memblock.reserved.regions[0].size, new_reserved_regions_size); + + /* + * Now memblock_double_array() works fine. Let's check after the + * double_array(), the memblock_reserve() still works as normal. + */ + memblock_reserve(r.base, r.size); + ASSERT_EQ(memblock.reserved.regions[0].base, r.base); + ASSERT_EQ(memblock.reserved.regions[0].size, r.size); + + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size + + r.size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + } + + dummy_physical_memory_cleanup(); + + test_pass_pop(); + + return 0; +} + static int memblock_reserve_checks(void) { prefix_reset(); @@ -1104,6 +1254,7 @@ static int memblock_reserve_checks(void) memblock_reserve_near_max_check(); memblock_reserve_many_check(); memblock_reserve_all_locations_check(); + memblock_reserve_many_may_conflict_check(); prefix_pop(); diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c index c2c569f12178..3250c8e5124b 100644 --- a/tools/testing/memblock/tests/common.c +++ b/tools/testing/memblock/tests/common.c @@ -61,7 +61,7 @@ void reset_memblock_attributes(void) static inline void fill_memblock(void) { - memset(memory_block.base, 1, MEM_SIZE); + memset(memory_block.base, 1, PHYS_MEM_SIZE); } void setup_memblock(void) @@ -103,7 +103,7 @@ void setup_numa_memblock(const unsigned int node_fracs[]) void dummy_physical_memory_init(void) { - memory_block.base = malloc(MEM_SIZE); + memory_block.base = malloc(PHYS_MEM_SIZE); assert(memory_block.base); fill_memblock(); } diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index b5ec59aa62d7..2f26405562b0 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -12,6 +12,7 @@ #include <../selftests/kselftest.h> #define MEM_SIZE SZ_32K +#define PHYS_MEM_SIZE SZ_16M #define NUMA_NODES 8 #define INIT_MEMBLOCK_REGIONS 128 -- cgit v1.2.3-73-gaa49b From 1a879671bdfd14698a839f30de8e6d76e1e858fd Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 7 May 2024 07:58:30 +0000 Subject: memblock tests: add memblock_overlaps_region_checks Add a test case for memblock_overlaps_region(). Signed-off-by: Wei Yang Link: https://lore.kernel.org/r/20240507075833.6346-5-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (IBM) --- tools/testing/memblock/tests/basic_api.c | 48 ++++++++++++++++++++++++++++++++ tools/testing/memblock/tests/common.h | 3 ++ 2 files changed, 51 insertions(+) (limited to 'tools') diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index fdac82656d15..67503089e6a0 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -2387,6 +2387,53 @@ static int memblock_trim_memory_checks(void) return 0; } +static int memblock_overlaps_region_check(void) +{ + struct region r = { + .base = SZ_1G, + .size = SZ_4M + }; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_add(r.base, r.size); + + /* Far Away */ + ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1M, SZ_1M)); + ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_2G, SZ_1M)); + + /* Neighbor */ + ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_1M, SZ_1M)); + ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_4M, SZ_1M)); + + /* Partial Overlap */ + ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_1M, SZ_2M)); + ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_2M, SZ_2M)); + + /* Totally Overlap */ + ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G, SZ_4M)); + ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_2M, SZ_8M)); + ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_1M, SZ_1M)); + + test_pass_pop(); + + return 0; +} + +static int memblock_overlaps_region_checks(void) +{ + prefix_reset(); + prefix_push("memblock_overlaps_region"); + test_print("Running memblock_overlaps_region tests...\n"); + + memblock_overlaps_region_check(); + + prefix_pop(); + + return 0; +} + int memblock_basic_checks(void) { memblock_initialization_check(); @@ -2396,6 +2443,7 @@ int memblock_basic_checks(void) memblock_free_checks(); memblock_bottom_up_checks(); memblock_trim_memory_checks(); + memblock_overlaps_region_checks(); return 0; } diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index 2f26405562b0..e1138e06c903 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -40,6 +40,9 @@ enum test_flags { assert((_expected) == (_seen)); \ } while (0) +#define ASSERT_TRUE(_seen) ASSERT_EQ(true, _seen) +#define ASSERT_FALSE(_seen) ASSERT_EQ(false, _seen) + /** * ASSERT_NE(): * Check the condition -- cgit v1.2.3-73-gaa49b From b73f6b98bbd0b4c1fdcebc0c5b926349455035bf Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 7 May 2024 07:58:32 +0000 Subject: mm/memblock: use PAGE_ALIGN_DOWN to get pgend in free_memmap Leverage the macro PAGE_ALIGN_DOWN to get pgend. Signed-off-by: Wei Yang Link: https://lore.kernel.org/r/20240507075833.6346-7-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (IBM) --- mm/memblock.c | 2 +- tools/include/linux/mm.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/mm/memblock.c b/mm/memblock.c index da9a6c862a69..33a8b6f7b626 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2039,7 +2039,7 @@ static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) * downwards. */ pg = PAGE_ALIGN(__pa(start_pg)); - pgend = __pa(end_pg) & PAGE_MASK; + pgend = PAGE_ALIGN_DOWN(__pa(end_pg)); /* * If there are free pages between these, free the section of the diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index dc0fc7125bc3..cad4f2927983 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -12,6 +12,7 @@ #define PHYS_ADDR_MAX (~(phys_addr_t)0) #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) +#define PAGE_ALIGN_DOWN(addr) ALIGN_DOWN(addr, PAGE_SIZE) #define __va(x) ((void *)((unsigned long)(x))) #define __pa(x) ((unsigned long)(x)) -- cgit v1.2.3-73-gaa49b From 68153bb2fffbe59804370e514482f95c4b2053ff Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 4 Jun 2024 17:16:25 -0700 Subject: libbpf: Add BTF field iterator Implement iterator-based type ID and string offset BTF field iterator. This is used extensively in BTF-handling code and BPF linker code for various sanity checks, rewriting IDs/offsets, etc. Currently this is implemented as visitor pattern calling custom callbacks, which makes the logic (especially in simple cases) unnecessarily obscure and harder to follow. Having equivalent functionality using iterator pattern makes for simpler to understand and maintain code. As we add more code for BTF processing logic in libbpf, it's best to switch to iterator pattern before adding more callback-based code. The idea for iterator-based implementation is to record offsets of necessary fields within fixed btf_type parts (which should be iterated just once), and, for kinds that have multiple members (based on vlen field), record where in each member necessary fields are located. Generic iteration code then just keeps track of last offset that was returned and handles N members correctly. Return type is just u32 pointer, where NULL is returned when all relevant fields were already iterated. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Tested-by: Alan Maguire Acked-by: Eduard Zingerman Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240605001629.4061937-2-andrii@kernel.org --- tools/lib/bpf/btf.c | 162 ++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf_internal.h | 24 ++++++ 2 files changed, 186 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 2d0840ef599a..50ff8b6eaf36 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -5133,6 +5133,168 @@ int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ct return 0; } +int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, enum btf_field_iter_kind iter_kind) +{ + it->p = NULL; + it->m_idx = -1; + it->off_idx = 0; + it->vlen = 0; + + switch (iter_kind) { + case BTF_FIELD_ITER_IDS: + switch (btf_kind(t)) { + case BTF_KIND_UNKN: + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + it->desc = (struct btf_field_desc) {}; + break; + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + it->desc = (struct btf_field_desc) { 1, {offsetof(struct btf_type, type)} }; + break; + case BTF_KIND_ARRAY: + it->desc = (struct btf_field_desc) { + 2, {sizeof(struct btf_type) + offsetof(struct btf_array, type), + sizeof(struct btf_type) + offsetof(struct btf_array, index_type)} + }; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + it->desc = (struct btf_field_desc) { + 0, {}, + sizeof(struct btf_member), + 1, {offsetof(struct btf_member, type)} + }; + break; + case BTF_KIND_FUNC_PROTO: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, type)}, + sizeof(struct btf_param), + 1, {offsetof(struct btf_param, type)} + }; + break; + case BTF_KIND_DATASEC: + it->desc = (struct btf_field_desc) { + 0, {}, + sizeof(struct btf_var_secinfo), + 1, {offsetof(struct btf_var_secinfo, type)} + }; + break; + default: + return -EINVAL; + } + break; + case BTF_FIELD_ITER_STRS: + switch (btf_kind(t)) { + case BTF_KIND_UNKN: + it->desc = (struct btf_field_desc) {}; + break; + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + case BTF_KIND_ARRAY: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + case BTF_KIND_DATASEC: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)} + }; + break; + case BTF_KIND_ENUM: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_enum), + 1, {offsetof(struct btf_enum, name_off)} + }; + break; + case BTF_KIND_ENUM64: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_enum64), + 1, {offsetof(struct btf_enum64, name_off)} + }; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_member), + 1, {offsetof(struct btf_member, name_off)} + }; + break; + case BTF_KIND_FUNC_PROTO: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_param), + 1, {offsetof(struct btf_param, name_off)} + }; + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + if (it->desc.m_sz) + it->vlen = btf_vlen(t); + + it->p = t; + return 0; +} + +__u32 *btf_field_iter_next(struct btf_field_iter *it) +{ + if (!it->p) + return NULL; + + if (it->m_idx < 0) { + if (it->off_idx < it->desc.t_cnt) + return it->p + it->desc.t_offs[it->off_idx++]; + /* move to per-member iteration */ + it->m_idx = 0; + it->p += sizeof(struct btf_type); + it->off_idx = 0; + } + + /* if type doesn't have members, stop */ + if (it->desc.m_sz == 0) { + it->p = NULL; + return NULL; + } + + if (it->off_idx >= it->desc.m_cnt) { + /* exhausted this member's fields, go to the next member */ + it->m_idx++; + it->p += it->desc.m_sz; + it->off_idx = 0; + } + + if (it->m_idx < it->vlen) + return it->p + it->desc.m_offs[it->off_idx++]; + + it->p = NULL; + return NULL; +} + int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx) { const struct btf_ext_info *seg; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 7e7e686008c6..80f3d346db33 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -508,6 +508,30 @@ struct bpf_line_info_min { __u32 line_col; }; +enum btf_field_iter_kind { + BTF_FIELD_ITER_IDS, + BTF_FIELD_ITER_STRS, +}; + +struct btf_field_desc { + /* once-per-type offsets */ + int t_cnt, t_offs[2]; + /* member struct size, or zero, if no members */ + int m_sz; + /* repeated per-member offsets */ + int m_cnt, m_offs[1]; +}; + +struct btf_field_iter { + struct btf_field_desc desc; + void *p; + int m_idx; + int off_idx; + int vlen; +}; + +int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, enum btf_field_iter_kind iter_kind); +__u32 *btf_field_iter_next(struct btf_field_iter *it); typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx); typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx); -- cgit v1.2.3-73-gaa49b From 2bce2c1cb2f0acbf619737a10575f99df0c43984 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 4 Jun 2024 17:16:26 -0700 Subject: libbpf: Make use of BTF field iterator in BPF linker code Switch all BPF linker code dealing with iterating BTF type ID and string offset fields to new btf_field_iter facilities. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Tested-by: Alan Maguire Acked-by: Eduard Zingerman Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240605001629.4061937-3-andrii@kernel.org --- tools/lib/bpf/btf.c | 4 +-- tools/lib/bpf/libbpf_internal.h | 4 +-- tools/lib/bpf/linker.c | 58 +++++++++++++++++++++++++---------------- 3 files changed, 40 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 50ff8b6eaf36..d72260ac26a5 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -5267,7 +5267,7 @@ __u32 *btf_field_iter_next(struct btf_field_iter *it) return NULL; if (it->m_idx < 0) { - if (it->off_idx < it->desc.t_cnt) + if (it->off_idx < it->desc.t_off_cnt) return it->p + it->desc.t_offs[it->off_idx++]; /* move to per-member iteration */ it->m_idx = 0; @@ -5281,7 +5281,7 @@ __u32 *btf_field_iter_next(struct btf_field_iter *it) return NULL; } - if (it->off_idx >= it->desc.m_cnt) { + if (it->off_idx >= it->desc.m_off_cnt) { /* exhausted this member's fields, go to the next member */ it->m_idx++; it->p += it->desc.m_sz; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 80f3d346db33..96c0b0993f8b 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -515,11 +515,11 @@ enum btf_field_iter_kind { struct btf_field_desc { /* once-per-type offsets */ - int t_cnt, t_offs[2]; + int t_off_cnt, t_offs[2]; /* member struct size, or zero, if no members */ int m_sz; /* repeated per-member offsets */ - int m_cnt, m_offs[1]; + int m_off_cnt, m_offs[1]; }; struct btf_field_iter { diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 0d4be829551b..fa11a671da3e 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -957,19 +957,33 @@ static int check_btf_str_off(__u32 *str_off, void *ctx) static int linker_sanity_check_btf(struct src_obj *obj) { struct btf_type *t; - int i, n, err = 0; + int i, n, err; if (!obj->btf) return 0; n = btf__type_cnt(obj->btf); for (i = 1; i < n; i++) { + struct btf_field_iter it; + __u32 *type_id, *str_off; + t = btf_type_by_id(obj->btf, i); - err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf); - err = err ?: btf_type_visit_str_offs(t, check_btf_str_off, obj->btf); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (err) return err; + while ((type_id = btf_field_iter_next(&it))) { + if (*type_id >= n) + return -EINVAL; + } + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); + if (err) + return err; + while ((str_off = btf_field_iter_next(&it))) { + if (!btf__str_by_offset(obj->btf, *str_off)) + return -EINVAL; + } } return 0; @@ -2234,26 +2248,10 @@ static int linker_fixup_btf(struct src_obj *obj) return 0; } -static int remap_type_id(__u32 *type_id, void *ctx) -{ - int *id_map = ctx; - int new_id = id_map[*type_id]; - - /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */ - if (new_id == 0 && *type_id != 0) { - pr_warn("failed to find new ID mapping for original BTF type ID %u\n", *type_id); - return -EINVAL; - } - - *type_id = id_map[*type_id]; - - return 0; -} - static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) { const struct btf_type *t; - int i, j, n, start_id, id; + int i, j, n, start_id, id, err; const char *name; if (!obj->btf) @@ -2324,9 +2322,25 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) n = btf__type_cnt(linker->btf); for (i = start_id; i < n; i++) { struct btf_type *dst_t = btf_type_by_id(linker->btf, i); + struct btf_field_iter it; + __u32 *type_id; - if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map)) - return -EINVAL; + err = btf_field_iter_init(&it, dst_t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((type_id = btf_field_iter_next(&it))) { + int new_id = obj->btf_type_map[*type_id]; + + /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */ + if (new_id == 0 && *type_id != 0) { + pr_warn("failed to find new ID mapping for original BTF type ID %u\n", + *type_id); + return -EINVAL; + } + + *type_id = obj->btf_type_map[*type_id]; + } } /* Rewrite VAR/FUNC underlying types (i.e., FUNC's FUNC_PROTO and VAR's -- cgit v1.2.3-73-gaa49b From c2641123696b572a3b059e1b45777317ba9f9086 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 4 Jun 2024 17:16:27 -0700 Subject: libbpf: Make use of BTF field iterator in BTF handling code Use new BTF field iterator logic to replace all the callback-based visitor calls. There is still a .BTF.ext callback-based visitor APIs that should be converted, which will happens as a follow up. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Tested-by: Alan Maguire Acked-by: Eduard Zingerman Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240605001629.4061937-4-andrii@kernel.org --- tools/lib/bpf/btf.c | 76 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index d72260ac26a5..0190fd819f58 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1739,9 +1739,8 @@ struct btf_pipe { struct hashmap *str_off_map; /* map string offsets from src to dst */ }; -static int btf_rewrite_str(__u32 *str_off, void *ctx) +static int btf_rewrite_str(struct btf_pipe *p, __u32 *str_off) { - struct btf_pipe *p = ctx; long mapped_off; int off, err; @@ -1774,7 +1773,9 @@ static int btf_rewrite_str(__u32 *str_off, void *ctx) int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type) { struct btf_pipe p = { .src = src_btf, .dst = btf }; + struct btf_field_iter it; struct btf_type *t; + __u32 *str_off; int sz, err; sz = btf_type_size(src_type); @@ -1791,26 +1792,17 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t memcpy(t, src_type, sz); - err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (err) return libbpf_err(err); - return btf_commit_type(btf, sz); -} - -static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) -{ - struct btf *btf = ctx; - - if (!*type_id) /* nothing to do for VOID references */ - return 0; + while ((str_off = btf_field_iter_next(&it))) { + err = btf_rewrite_str(&p, str_off); + if (err) + return libbpf_err(err); + } - /* we haven't updated btf's type count yet, so - * btf->start_id + btf->nr_types - 1 is the type ID offset we should - * add to all newly added BTF types - */ - *type_id += btf->start_id + btf->nr_types - 1; - return 0; + return btf_commit_type(btf, sz); } static size_t btf_dedup_identity_hash_fn(long key, void *ctx); @@ -1858,6 +1850,9 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) memcpy(t, src_btf->types_data, data_sz); for (i = 0; i < cnt; i++) { + struct btf_field_iter it; + __u32 *type_id, *str_off; + sz = btf_type_size(t); if (sz < 0) { /* unlikely, has to be corrupted src_btf */ @@ -1869,15 +1864,31 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) *off = t - btf->types_data; /* add, dedup, and remap strings referenced by this BTF type */ - err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (err) goto err_out; + while ((str_off = btf_field_iter_next(&it))) { + err = btf_rewrite_str(&p, str_off); + if (err) + goto err_out; + } /* remap all type IDs referenced from this BTF type */ - err = btf_type_visit_type_ids(t, btf_rewrite_type_ids, btf); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (err) goto err_out; + while ((type_id = btf_field_iter_next(&it))) { + if (!*type_id) /* nothing to do for VOID references */ + continue; + + /* we haven't updated btf's type count yet, so + * btf->start_id + btf->nr_types - 1 is the type ID offset we should + * add to all newly added BTF types + */ + *type_id += btf->start_id + btf->nr_types - 1; + } + /* go to next type data and type offset index entry */ t += sz; off++; @@ -3453,11 +3464,19 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_visit_fn fn, void * int i, r; for (i = 0; i < d->btf->nr_types; i++) { + struct btf_field_iter it; struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i); + __u32 *str_off; - r = btf_type_visit_str_offs(t, fn, ctx); + r = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (r) return r; + + while ((str_off = btf_field_iter_next(&it))) { + r = fn(str_off, ctx); + if (r) + return r; + } } if (!d->btf_ext) @@ -4919,10 +4938,23 @@ static int btf_dedup_remap_types(struct btf_dedup *d) for (i = 0; i < d->btf->nr_types; i++) { struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i); + struct btf_field_iter it; + __u32 *type_id; - r = btf_type_visit_type_ids(t, btf_dedup_remap_type_id, d); + r = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (r) return r; + + while ((type_id = btf_field_iter_next(&it))) { + __u32 resolved_id, new_id; + + resolved_id = resolve_type_id(d, *type_id); + new_id = d->hypot_map[resolved_id]; + if (new_id > BTF_MAX_NR_TYPES) + return -EINVAL; + + *type_id = new_id; + } } if (!d->btf_ext) -- cgit v1.2.3-73-gaa49b From e1a8630291fde2a0edac2955e3df48587dac9906 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 4 Jun 2024 17:16:28 -0700 Subject: bpftool: Use BTF field iterator in btfgen Switch bpftool's code which is using libbpf-internal btf_type_visit_type_ids() helper to new btf_field_iter functionality. This makes bpftool code simpler, but also unblocks removing libbpf's btf_type_visit_type_ids() helper completely. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Tested-by: Alan Maguire Reviewed-by: Quentin Monnet Acked-by: Eduard Zingerman Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240605001629.4061937-5-andrii@kernel.org --- tools/bpf/bpftool/gen.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index b3979ddc0189..d244a7de387e 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -2379,15 +2379,6 @@ out: return err; } -static int btfgen_remap_id(__u32 *type_id, void *ctx) -{ - unsigned int *ids = ctx; - - *type_id = ids[*type_id]; - - return 0; -} - /* Generate BTF from relocation information previously recorded */ static struct btf *btfgen_get_btf(struct btfgen_info *info) { @@ -2467,10 +2458,15 @@ static struct btf *btfgen_get_btf(struct btfgen_info *info) /* second pass: fix up type ids */ for (i = 1; i < btf__type_cnt(btf_new); i++) { struct btf_type *btf_type = (struct btf_type *) btf__type_by_id(btf_new, i); + struct btf_field_iter it; + __u32 *type_id; - err = btf_type_visit_type_ids(btf_type, btfgen_remap_id, ids); + err = btf_field_iter_init(&it, btf_type, BTF_FIELD_ITER_IDS); if (err) goto err_out; + + while ((type_id = btf_field_iter_next(&it))) + *type_id = ids[*type_id]; } free(ids); -- cgit v1.2.3-73-gaa49b From 072088704433f75dacf9e33179dd7a81f0a238d4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 4 Jun 2024 17:16:29 -0700 Subject: libbpf: Remove callback-based type/string BTF field visitor helpers Now that all libbpf/bpftool code switched to btf_field_iter, remove btf_type_visit_type_ids() and btf_type_visit_str_offs() callback-based helpers as not needed anymore. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Tested-by: Alan Maguire Acked-by: Eduard Zingerman Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20240605001629.4061937-6-andrii@kernel.org --- tools/lib/bpf/btf.c | 130 ---------------------------------------- tools/lib/bpf/libbpf_internal.h | 2 - 2 files changed, 132 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 0190fd819f58..775ca55a541c 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -5035,136 +5035,6 @@ struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_bt return btf__parse_split(path, vmlinux_btf); } -int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx) -{ - int i, n, err; - - switch (btf_kind(t)) { - case BTF_KIND_INT: - case BTF_KIND_FLOAT: - case BTF_KIND_ENUM: - case BTF_KIND_ENUM64: - return 0; - - case BTF_KIND_FWD: - case BTF_KIND_CONST: - case BTF_KIND_VOLATILE: - case BTF_KIND_RESTRICT: - case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: - case BTF_KIND_FUNC: - case BTF_KIND_VAR: - case BTF_KIND_DECL_TAG: - case BTF_KIND_TYPE_TAG: - return visit(&t->type, ctx); - - case BTF_KIND_ARRAY: { - struct btf_array *a = btf_array(t); - - err = visit(&a->type, ctx); - err = err ?: visit(&a->index_type, ctx); - return err; - } - - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - struct btf_member *m = btf_members(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - case BTF_KIND_FUNC_PROTO: { - struct btf_param *m = btf_params(t); - - err = visit(&t->type, ctx); - if (err) - return err; - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - case BTF_KIND_DATASEC: { - struct btf_var_secinfo *m = btf_var_secinfos(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - default: - return -EINVAL; - } -} - -int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx) -{ - int i, n, err; - - err = visit(&t->name_off, ctx); - if (err) - return err; - - switch (btf_kind(t)) { - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - struct btf_member *m = btf_members(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_ENUM: { - struct btf_enum *m = btf_enum(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_ENUM64: { - struct btf_enum64 *m = btf_enum64(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_FUNC_PROTO: { - struct btf_param *m = btf_params(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - default: - break; - } - - return 0; -} - int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, enum btf_field_iter_kind iter_kind) { it->p = NULL; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 96c0b0993f8b..e2f06609c624 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -535,8 +535,6 @@ __u32 *btf_field_iter_next(struct btf_field_iter *it); typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx); typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx); -int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx); -int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx); int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx); int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx); __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, -- cgit v1.2.3-73-gaa49b From 5efe9688f9488e8e95177d03be2678021233e877 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 4 Jun 2024 15:25:50 -0400 Subject: ftrace/selftests: Fix pid test with function graph not showing pids The pid filtering test will set the pid filters and make sure that both function and function_graph tracing honors the filters. But the function_graph tracer test was failing because the PID was not being filtered properly. That's because the funcgraph-proc option wasn't getting set. Without that option the PID is not shown. Instead we get: + cat trace # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 3) ! 143.685 us | kernel_clone(); 3) ! 127.055 us | kernel_clone(); 1) ! 127.170 us | kernel_clone(); 3) ! 126.840 us | kernel_clone(); When we should be getting: + cat trace # tracer: function_graph # # CPU TASK/PID DURATION FUNCTION CALLS # | | | | | | | | | 4) bash-939 | # 1070.009 us | kernel_clone(); 4) bash-939 | # 1116.903 us | kernel_clone(); 5) bash-939 | ! 976.133 us | kernel_clone(); 5) bash-939 | ! 954.012 us | kernel_clone(); The test looks for the pids it is filtering and will fail if it can not find them. Without fungraph-proc option set, it will not be displayed and the test will fail. Link: https://lore.kernel.org/all/Zl9JFnzKGuUM10X2@J2N7QTR9R3/ Link: https://lore.kernel.org/linux-trace-kernel/20240604152550.0c01d7cd@gandalf.local.home Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Fixes: 35b944a997e2 ("selftests/ftrace: Add function_graph tracer to func-filter-pid test") Reported-by: Mark Rutland Signed-off-by: Steven Rostedt (Google) Tested-by: Mark Rutland --- tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index c6fc9d31a496..8dcce001881d 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -8,6 +8,7 @@ # Also test it on an instance directory do_function_fork=1 +do_funcgraph_proc=1 if [ ! -f options/function-fork ]; then do_function_fork=0 @@ -28,6 +29,7 @@ fi if [ $do_funcgraph_proc -eq 1 ]; then orig_value2=`cat options/funcgraph-proc` + echo 1 > options/funcgraph-proc fi do_reset() { -- cgit v1.2.3-73-gaa49b From 955edd872baf29740b714ffa093886918fc9a47b Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Mon, 3 Jun 2024 11:33:22 +0200 Subject: selftests: hsr: Extend the hsr_redbox.sh test to use fixed MAC addresses Fixed MAC addresses help with debugging as last four bytes identify the network namespace. Moreover, it allows to mimic the real life setup with for example bridge having the same MAC address on each port. Signed-off-by: Lukasz Majewski Link: https://lore.kernel.org/r/20240603093322.3150030-2-lukma@denx.de Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/hsr/hsr_redbox.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/hsr/hsr_redbox.sh b/tools/testing/selftests/net/hsr/hsr_redbox.sh index 1f36785347c0..998103502d5d 100755 --- a/tools/testing/selftests/net/hsr/hsr_redbox.sh +++ b/tools/testing/selftests/net/hsr/hsr_redbox.sh @@ -96,6 +96,21 @@ setup_hsr_interfaces() ip -n "${ns4}" link set ns4eth1 up ip -n "${ns5}" link set ns5eth1 up + ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1 + ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2 + + ip -net "$ns2" link set address 00:11:22:00:02:01 dev ns2eth1 + ip -net "$ns2" link set address 00:11:22:00:02:02 dev ns2eth2 + ip -net "$ns2" link set address 00:11:22:00:02:03 dev ns2eth3 + + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth1 + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth2 + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth3 + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3br1 + + ip -net "$ns4" link set address 00:11:22:00:04:01 dev ns4eth1 + ip -net "$ns5" link set address 00:11:22:00:05:01 dev ns5eth1 + ip -net "${ns1}" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version ${HSRv} proto 0 ip -net "${ns2}" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 interlink ns2eth3 supervision 45 version ${HSRv} proto 0 -- cgit v1.2.3-73-gaa49b From ed20142ed68c2b8819120508bc029e84d13cfe63 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Mon, 3 Jun 2024 11:33:21 +0200 Subject: selftests: hsr: Extend the hsr_ping.sh test to use fixed MAC addresses Fixed MAC addresses help with debugging as last four bytes identify the network namespace. Signed-off-by: Lukasz Majewski Link: https://lore.kernel.org/r/20240603093322.3150030-1-lukma@denx.de Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/hsr/hsr_ping.sh | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index 3684b813b0f6..f5d207fc770a 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -152,6 +152,15 @@ setup_hsr_interfaces() ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1 + ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2 + + ip -net "$ns2" link set address 00:11:22:00:02:01 dev ns2eth1 + ip -net "$ns2" link set address 00:11:22:00:02:02 dev ns2eth2 + + ip -net "$ns3" link set address 00:11:22:00:03:01 dev ns3eth1 + ip -net "$ns3" link set address 00:11:22:00:03:02 dev ns3eth2 + # All Links up ip -net "$ns1" link set ns1eth1 up ip -net "$ns1" link set ns1eth2 up -- cgit v1.2.3-73-gaa49b From 9aa61d8ecb7f6f176ff8247a41a4f6eea8376112 Mon Sep 17 00:00:00 2001 From: Clément Le Goffic Date: Wed, 5 Jun 2024 16:04:53 +0200 Subject: perf: parse-events: Fix compilation error while defining DEBUG_PARSER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling perf tool with 'DEBUG_PARSER=1' leads to errors: $> make -C tools/perf PARSER_DEBUG=1 NO_LIBTRACEEVENT=1 ... CC util/expr-flex.o CC util/expr.o util/parse-events.c:33:12: error: redundant redeclaration of ‘parse_events_debug’ [-Werror=redundant-decls] 33 | extern int parse_events_debug; | ^~~~~~~~~~~~~~~~~~ In file included from util/parse-events.c:18: util/parse-events-bison.h:43:12: note: previous declaration of ‘parse_events_debug’ with type ‘int’ 43 | extern int parse_events_debug; | ^~~~~~~~~~~~~~~~~~ util/expr.c:27:12: error: redundant redeclaration of ‘expr_debug’ [-Werror=redundant-decls] 27 | extern int expr_debug; | ^~~~~~~~~~ In file included from util/expr.c:11: util/expr-bison.h:43:12: note: previous declaration of ‘expr_debug’ with type ‘int’ 43 | extern int expr_debug; | ^~~~~~~~~~ cc-1: all warnings being treated as errors Remove extern declaration from the parse-envents.c file as there is a conflict with the ones generated using bison and yacc tools from the file parse-events.[ly]. Signed-off-by: Clément Le Goffic Reviewed-by: Ian Rogers Cc: James Clark Cc: John Garry Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240605140453.614862-1-clement.legoffic@foss.st.com --- tools/perf/util/expr.c | 4 ---- tools/perf/util/parse-events.c | 3 --- 2 files changed, 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index b8875aac8f87..b2536a59c44e 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -25,10 +25,6 @@ #include #include "pmu.h" -#ifdef PARSER_DEBUG -extern int expr_debug; -#endif - struct expr_id_data { union { struct { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6ed0f9c5581d..8d5fb05f20c2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -31,9 +31,6 @@ #define MAX_NAME_LEN 100 -#ifdef PARSER_DEBUG -extern int parse_events_debug; -#endif static int get_config_terms(const struct parse_events_terms *head_config, struct list_head *head_terms); static int parse_events_terms__copy(const struct parse_events_terms *src, -- cgit v1.2.3-73-gaa49b From 7015843afcaf68c132784c89528dfddc0005e483 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 5 Jun 2024 13:12:03 -0700 Subject: selftests/bpf: Fix send_signal test with nested CONFIG_PARAVIRT Alexei reported that send_signal test may fail with nested CONFIG_PARAVIRT configs. In this particular case, the base VM is AMD with 166 cpus, and I run selftests with regular qemu on top of that and indeed send_signal test failed. I also tried with an Intel box with 80 cpus and there is no issue. The main qemu command line includes: -enable-kvm -smp 16 -cpu host The failure log looks like: $ ./test_progs -t send_signal [ 48.501588] watchdog: BUG: soft lockup - CPU#9 stuck for 26s! [test_progs:2225] [ 48.503622] Modules linked in: bpf_testmod(O) [ 48.503622] CPU: 9 PID: 2225 Comm: test_progs Tainted: G O 6.9.0-08561-g2c1713a8f1c9-dirty #69 [ 48.507629] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 [ 48.511635] RIP: 0010:handle_softirqs+0x71/0x290 [ 48.511635] Code: [...] 10 0a 00 00 00 31 c0 65 66 89 05 d5 f4 fa 7e fb bb ff ff ff ff <49> c7 c2 cb [ 48.518527] RSP: 0018:ffffc90000310fa0 EFLAGS: 00000246 [ 48.519579] RAX: 0000000000000000 RBX: 00000000ffffffff RCX: 00000000000006e0 [ 48.522526] RDX: 0000000000000006 RSI: ffff88810791ae80 RDI: 0000000000000000 [ 48.523587] RBP: ffffc90000fabc88 R08: 00000005a0af4f7f R09: 0000000000000000 [ 48.525525] R10: 0000000561d2f29c R11: 0000000000006534 R12: 0000000000000280 [ 48.528525] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 48.528525] FS: 00007f2f2885cd00(0000) GS:ffff888237c40000(0000) knlGS:0000000000000000 [ 48.531600] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 48.535520] CR2: 00007f2f287059f0 CR3: 0000000106a28002 CR4: 00000000003706f0 [ 48.537538] Call Trace: [ 48.537538] [ 48.537538] ? watchdog_timer_fn+0x1cd/0x250 [ 48.539590] ? lockup_detector_update_enable+0x50/0x50 [ 48.539590] ? __hrtimer_run_queues+0xff/0x280 [ 48.542520] ? hrtimer_interrupt+0x103/0x230 [ 48.544524] ? __sysvec_apic_timer_interrupt+0x4f/0x140 [ 48.545522] ? sysvec_apic_timer_interrupt+0x3a/0x90 [ 48.547612] ? asm_sysvec_apic_timer_interrupt+0x1a/0x20 [ 48.547612] ? handle_softirqs+0x71/0x290 [ 48.547612] irq_exit_rcu+0x63/0x80 [ 48.551585] sysvec_apic_timer_interrupt+0x75/0x90 [ 48.552521] [ 48.553529] [ 48.553529] asm_sysvec_apic_timer_interrupt+0x1a/0x20 [ 48.555609] RIP: 0010:finish_task_switch.isra.0+0x90/0x260 [ 48.556526] Code: [...] 9f 58 0a 00 00 48 85 db 0f 85 89 01 00 00 4c 89 ff e8 53 d9 bd 00 fb 66 90 <4d> 85 ed 74 [ 48.562524] RSP: 0018:ffffc90000fabd38 EFLAGS: 00000282 [ 48.563589] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff83385620 [ 48.563589] RDX: ffff888237c73ae4 RSI: 0000000000000000 RDI: ffff888237c6fd00 [ 48.568521] RBP: ffffc90000fabd68 R08: 0000000000000000 R09: 0000000000000000 [ 48.569528] R10: 0000000000000001 R11: 0000000000000000 R12: ffff8881009d0000 [ 48.573525] R13: ffff8881024e5400 R14: ffff88810791ae80 R15: ffff888237c6fd00 [ 48.575614] ? finish_task_switch.isra.0+0x8d/0x260 [ 48.576523] __schedule+0x364/0xac0 [ 48.577535] schedule+0x2e/0x110 [ 48.578555] pipe_read+0x301/0x400 [ 48.579589] ? destroy_sched_domains_rcu+0x30/0x30 [ 48.579589] vfs_read+0x2b3/0x2f0 [ 48.579589] ksys_read+0x8b/0xc0 [ 48.583590] do_syscall_64+0x3d/0xc0 [ 48.583590] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [ 48.586525] RIP: 0033:0x7f2f28703fa1 [ 48.587592] Code: [...] 00 00 00 0f 1f 44 00 00 f3 0f 1e fa 80 3d c5 23 14 00 00 74 13 31 c0 0f 05 <48> 3d 00 f0 [ 48.593534] RSP: 002b:00007ffd90f8cf88 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 48.595589] RAX: ffffffffffffffda RBX: 00007ffd90f8d5e8 RCX: 00007f2f28703fa1 [ 48.595589] RDX: 0000000000000001 RSI: 00007ffd90f8cfb0 RDI: 0000000000000006 [ 48.599592] RBP: 00007ffd90f8d2f0 R08: 0000000000000064 R09: 0000000000000000 [ 48.602527] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 48.603589] R13: 00007ffd90f8d608 R14: 00007f2f288d8000 R15: 0000000000f6bdb0 [ 48.605527] In the test, two processes are communicating through pipe. Further debugging with strace found that the above splat is triggered as read() syscall could not receive the data even if the corresponding write() syscall in another process successfully wrote data into the pipe. The failed subtest is "send_signal_perf". The corresponding perf event has sample_period 1 and config PERF_COUNT_SW_CPU_CLOCK. sample_period 1 means every overflow event will trigger a call to the BPF program. So I suspect this may overwhelm the system. So I increased the sample_period to 100,000 and the test passed. The sample_period 10,000 still has the test failed. In other parts of selftest, e.g., [1], sample_freq is used instead. So I decided to use sample_freq = 1,000 since the test can pass as well. [1] https://lore.kernel.org/bpf/20240604070700.3032142-1-song@kernel.org/ Reported-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240605201203.2603846-1-yonghong.song@linux.dev --- tools/testing/selftests/bpf/prog_tests/send_signal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 920aee41bd58..6cc69900b310 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -156,7 +156,8 @@ static void test_send_signal_tracepoint(bool signal_thread) static void test_send_signal_perf(bool signal_thread) { struct perf_event_attr attr = { - .sample_period = 1, + .freq = 1, + .sample_freq = 1000, .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, }; -- cgit v1.2.3-73-gaa49b From b24862bac7b5db326716ad07bbff7b6ee3b09a59 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 5 Jun 2024 16:33:14 +0100 Subject: selftests/bpf: Add btf_field_iter selftests The added selftests verify that for every BTF kind we iterate correctly over consituent strings and ids. Signed-off-by: Alan Maguire Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240605153314.3727466-1-alan.maguire@oracle.com --- .../selftests/bpf/prog_tests/btf_field_iter.c | 161 +++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_field_iter.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c b/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c new file mode 100644 index 000000000000..32159d3eb281 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#include +#include +#include "btf_helpers.h" +#include "bpf/libbpf_internal.h" + +struct field_data { + __u32 ids[5]; + const char *strs[5]; +} fields[] = { + { .ids = {}, .strs = {} }, + { .ids = {}, .strs = { "int" } }, + { .ids = {}, .strs = { "int64" } }, + { .ids = { 1 }, .strs = { "" } }, + { .ids = { 2, 1 }, .strs = { "" } }, + { .ids = { 3, 1 }, .strs = { "s1", "f1", "f2" } }, + { .ids = { 1, 5 }, .strs = { "u1", "f1", "f2" } }, + { .ids = {}, .strs = { "e1", "v1", "v2" } }, + { .ids = {}, .strs = { "fw1" } }, + { .ids = { 1 }, .strs = { "t" } }, + { .ids = { 2 }, .strs = { "" } }, + { .ids = { 1 }, .strs = { "" } }, + { .ids = { 3 }, .strs = { "" } }, + { .ids = { 1, 1, 3 }, .strs = { "", "p1", "p2" } }, + { .ids = { 13 }, .strs = { "func" } }, + { .ids = { 1 }, .strs = { "var1" } }, + { .ids = { 3 }, .strs = { "var2" } }, + { .ids = {}, .strs = { "float" } }, + { .ids = { 11 }, .strs = { "decltag" } }, + { .ids = { 6 }, .strs = { "typetag" } }, + { .ids = {}, .strs = { "e64", "eval1", "eval2", "eval3" } }, + { .ids = { 15, 16 }, .strs = { "datasec1" } } + +}; + +/* Fabricate BTF with various types and check BTF field iteration finds types, + * strings expected. + */ +void test_btf_field_iter(void) +{ + struct btf *btf = NULL; + int id; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_btf")) + return; + + btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_int(btf, "int64", 8, BTF_INT_SIGNED); /* [2] int64 */ + btf__add_ptr(btf, 1); /* [3] int * */ + btf__add_array(btf, 1, 2, 3); /* [4] int64[3] */ + btf__add_struct(btf, "s1", 12); /* [5] struct s1 { */ + btf__add_field(btf, "f1", 3, 0, 0); /* int *f1; */ + btf__add_field(btf, "f2", 1, 0, 0); /* int f2; */ + /* } */ + btf__add_union(btf, "u1", 12); /* [6] union u1 { */ + btf__add_field(btf, "f1", 1, 0, 0); /* int f1; */ + btf__add_field(btf, "f2", 5, 0, 0); /* struct s1 f2; */ + /* } */ + btf__add_enum(btf, "e1", 4); /* [7] enum e1 { */ + btf__add_enum_value(btf, "v1", 1); /* v1 = 1; */ + btf__add_enum_value(btf, "v2", 2); /* v2 = 2; */ + /* } */ + + btf__add_fwd(btf, "fw1", BTF_FWD_STRUCT); /* [8] struct fw1; */ + btf__add_typedef(btf, "t", 1); /* [9] typedef int t; */ + btf__add_volatile(btf, 2); /* [10] volatile int64; */ + btf__add_const(btf, 1); /* [11] const int; */ + btf__add_restrict(btf, 3); /* [12] restrict int *; */ + btf__add_func_proto(btf, 1); /* [13] int (*)(int p1, int *p2); */ + btf__add_func_param(btf, "p1", 1); + btf__add_func_param(btf, "p2", 3); + + btf__add_func(btf, "func", BTF_FUNC_GLOBAL, 13);/* [14] int func(int p1, int *p2); */ + btf__add_var(btf, "var1", BTF_VAR_STATIC, 1); /* [15] static int var1; */ + btf__add_var(btf, "var2", BTF_VAR_STATIC, 3); /* [16] static int *var2; */ + btf__add_float(btf, "float", 4); /* [17] float; */ + btf__add_decl_tag(btf, "decltag", 11, -1); /* [18] decltag const int; */ + btf__add_type_tag(btf, "typetag", 6); /* [19] typetag union u1; */ + btf__add_enum64(btf, "e64", 8, true); /* [20] enum { */ + btf__add_enum64_value(btf, "eval1", 1000); /* eval1 = 1000, */ + btf__add_enum64_value(btf, "eval2", 2000); /* eval2 = 2000, */ + btf__add_enum64_value(btf, "eval3", 3000); /* eval3 = 3000 */ + /* } */ + btf__add_datasec(btf, "datasec1", 12); /* [21] datasec datasec1 */ + btf__add_datasec_var_info(btf, 15, 0, 4); + btf__add_datasec_var_info(btf, 16, 4, 8); + + VALIDATE_RAW_BTF( + btf, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int64' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED", + "[3] PTR '(anon)' type_id=1", + "[4] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=3", + "[5] STRUCT 's1' size=12 vlen=2\n" + "\t'f1' type_id=3 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=0", + "[6] UNION 'u1' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=5 bits_offset=0", + "[7] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", + "[8] FWD 'fw1' fwd_kind=struct", + "[9] TYPEDEF 't' type_id=1", + "[10] VOLATILE '(anon)' type_id=2", + "[11] CONST '(anon)' type_id=1", + "[12] RESTRICT '(anon)' type_id=3", + "[13] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" + "\t'p1' type_id=1\n" + "\t'p2' type_id=3", + "[14] FUNC 'func' type_id=13 linkage=global", + "[15] VAR 'var1' type_id=1, linkage=static", + "[16] VAR 'var2' type_id=3, linkage=static", + "[17] FLOAT 'float' size=4", + "[18] DECL_TAG 'decltag' type_id=11 component_idx=-1", + "[19] TYPE_TAG 'typetag' type_id=6", + "[20] ENUM64 'e64' encoding=SIGNED size=8 vlen=3\n" + "\t'eval1' val=1000\n" + "\t'eval2' val=2000\n" + "\t'eval3' val=3000", + "[21] DATASEC 'datasec1' size=12 vlen=2\n" + "\ttype_id=15 offset=0 size=4\n" + "\ttype_id=16 offset=4 size=8"); + + for (id = 1; id < btf__type_cnt(btf); id++) { + struct btf_type *t = btf_type_by_id(btf, id); + struct btf_field_iter it_strs, it_ids; + int str_idx = 0, id_idx = 0; + __u32 *next_str, *next_id; + + if (!ASSERT_OK_PTR(t, "btf_type_by_id")) + break; + if (!ASSERT_OK(btf_field_iter_init(&it_strs, t, BTF_FIELD_ITER_STRS), + "iter_init_strs")) + break; + if (!ASSERT_OK(btf_field_iter_init(&it_ids, t, BTF_FIELD_ITER_IDS), + "iter_init_ids")) + break; + while ((next_str = btf_field_iter_next(&it_strs))) { + const char *str = btf__str_by_offset(btf, *next_str); + + if (!ASSERT_OK(strcmp(fields[id].strs[str_idx], str), "field_str_match")) + break; + str_idx++; + } + /* ensure no more strings are expected */ + ASSERT_EQ(fields[id].strs[str_idx], NULL, "field_str_cnt"); + + while ((next_id = btf_field_iter_next(&it_ids))) { + if (!ASSERT_EQ(*next_id, fields[id].ids[id_idx], "field_id_match")) + break; + id_idx++; + } + /* ensure no more ids are expected */ + ASSERT_EQ(fields[id].ids[id_idx], 0, "field_id_cnt"); + } + btf__free(btf); +} -- cgit v1.2.3-73-gaa49b From 08ac454e258e38813afb906650f19acce3afd982 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Wed, 5 Jun 2024 18:51:35 +0100 Subject: libbpf: Auto-attach struct_ops BPF maps in BPF skeleton Similarly to `bpf_program`, support `bpf_map` automatic attachment in `bpf_object__attach_skeleton`. Currently only struct_ops maps could be attached. On bpftool side, code-generate links in skeleton struct for struct_ops maps. Similarly to `bpf_program_skeleton`, set links in `bpf_map_skeleton`. On libbpf side, extend `bpf_map` with new `autoattach` field to support enabling or disabling autoattach functionality, introducing getter/setter for this field. `bpf_object__(attach|detach)_skeleton` is extended with attaching/detaching struct_ops maps logic. Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240605175135.117127-1-yatsenko@meta.com --- tools/bpf/bpftool/gen.c | 36 ++++++++++++++++++++++++--- tools/lib/bpf/libbpf.c | 64 +++++++++++++++++++++++++++++++++++++++++++++--- tools/lib/bpf/libbpf.h | 18 ++++++++++++++ tools/lib/bpf/libbpf.map | 2 ++ 4 files changed, 113 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index d244a7de387e..4a4eedfcd479 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -848,7 +848,7 @@ out: } static void -codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped) +codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool populate_links) { struct bpf_map *map; char ident[256]; @@ -888,6 +888,14 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped) printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", i, ident); } + + if (populate_links && bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) { + codegen("\ + \n\ + s->maps[%zu].link = &obj->links.%s;\n\ + ", + i, ident); + } i++; } } @@ -1141,7 +1149,7 @@ static void gen_st_ops_shadow_init(struct btf *btf, struct bpf_object *obj) static int do_skeleton(int argc, char **argv) { char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")]; - size_t map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz; + size_t map_cnt = 0, prog_cnt = 0, attach_map_cnt = 0, file_sz, mmap_sz; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data; struct bpf_object *obj = NULL; @@ -1225,6 +1233,10 @@ static int do_skeleton(int argc, char **argv) bpf_map__name(map)); continue; } + + if (bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) + attach_map_cnt++; + map_cnt++; } bpf_object__for_each_program(prog, obj) { @@ -1297,6 +1309,9 @@ static int do_skeleton(int argc, char **argv) bpf_program__name(prog)); } printf("\t} progs;\n"); + } + + if (prog_cnt + attach_map_cnt) { printf("\tstruct {\n"); bpf_object__for_each_program(prog, obj) { if (use_loader) @@ -1306,6 +1321,19 @@ static int do_skeleton(int argc, char **argv) printf("\t\tstruct bpf_link *%s;\n", bpf_program__name(prog)); } + + bpf_object__for_each_map(map, obj) { + if (!get_map_ident(map, ident, sizeof(ident))) + continue; + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) + continue; + + if (use_loader) + printf("t\tint %s_fd;\n", ident); + else + printf("\t\tstruct bpf_link *%s;\n", ident); + } + printf("\t} links;\n"); } @@ -1448,7 +1476,7 @@ static int do_skeleton(int argc, char **argv) obj_name ); - codegen_maps_skeleton(obj, map_cnt, true /*mmaped*/); + codegen_maps_skeleton(obj, map_cnt, true /*mmaped*/, true /*links*/); codegen_progs_skeleton(obj, prog_cnt, true /*populate_links*/); codegen("\ @@ -1786,7 +1814,7 @@ static int do_subskeleton(int argc, char **argv) } } - codegen_maps_skeleton(obj, map_cnt, false /*mmaped*/); + codegen_maps_skeleton(obj, map_cnt, false /*mmaped*/, false /*links*/); codegen_progs_skeleton(obj, prog_cnt, false /*links*/); codegen("\ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d1627a2ca30b..4a28fac4908a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -572,6 +572,7 @@ struct bpf_map { bool pinned; bool reused; bool autocreate; + bool autoattach; __u64 map_extra; }; @@ -1400,6 +1401,7 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, map->def.value_size = type->size; map->def.max_entries = 1; map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0; + map->autoattach = true; map->st_ops = calloc(1, sizeof(*map->st_ops)); if (!map->st_ops) @@ -4819,6 +4821,20 @@ int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) return 0; } +int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach) +{ + if (!bpf_map__is_struct_ops(map)) + return libbpf_err(-EINVAL); + + map->autoattach = autoattach; + return 0; +} + +bool bpf_map__autoattach(const struct bpf_map *map) +{ + return map->autoattach; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info; @@ -12900,8 +12916,10 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) __u32 zero = 0; int err, fd; - if (!bpf_map__is_struct_ops(map)) + if (!bpf_map__is_struct_ops(map)) { + pr_warn("map '%s': can't attach non-struct_ops map\n", map->name); return libbpf_err_ptr(-EINVAL); + } if (map->fd < 0) { pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name); @@ -13945,6 +13963,35 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) */ } + /* Skeleton is created with earlier version of bpftool + * which does not support auto-attachment + */ + if (s->map_skel_sz < sizeof(struct bpf_map_skeleton)) + return 0; + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map *map = *s->maps[i].map; + struct bpf_link **link = s->maps[i].link; + + if (!map->autocreate || !map->autoattach) + continue; + + if (*link) + continue; + + /* only struct_ops maps can be attached */ + if (!bpf_map__is_struct_ops(map)) + continue; + *link = bpf_map__attach_struct_ops(map); + + if (!*link) { + err = -errno; + pr_warn("map '%s': failed to auto-attach: %d\n", + bpf_map__name(map), err); + return libbpf_err(err); + } + } + return 0; } @@ -13958,6 +14005,18 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) bpf_link__destroy(*link); *link = NULL; } + + if (s->map_skel_sz < sizeof(struct bpf_map_skeleton)) + return; + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_link **link = s->maps[i].link; + + if (link) { + bpf_link__destroy(*link); + *link = NULL; + } + } } void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) @@ -13965,8 +14024,7 @@ void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) if (!s) return; - if (s->progs) - bpf_object__detach_skeleton(s); + bpf_object__detach_skeleton(s); if (s->obj) bpf_object__close(*s->obj); free(s->maps); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 26e4e35528c5..64a6a3d323e3 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -978,6 +978,23 @@ bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); LIBBPF_API int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate); LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map); +/** + * @brief **bpf_map__set_autoattach()** sets whether libbpf has to auto-attach + * map during BPF skeleton attach phase. + * @param map the BPF map instance + * @param autoattach whether to attach map during BPF skeleton attach phase + * @return 0 on success; negative error code, otherwise + */ +LIBBPF_API int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach); + +/** + * @brief **bpf_map__autoattach()** returns whether BPF map is configured to + * auto-attach during BPF skeleton attach phase. + * @param map the BPF map instance + * @return true if map is set to auto-attach during skeleton attach phase; false, otherwise + */ +LIBBPF_API bool bpf_map__autoattach(const struct bpf_map *map); + /** * @brief **bpf_map__fd()** gets the file descriptor of the passed * BPF map @@ -1672,6 +1689,7 @@ struct bpf_map_skeleton { const char *name; struct bpf_map **map; void **mmaped; + struct bpf_link **link; }; struct bpf_prog_skeleton { diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index c1ce8aa3520b..40595233dc7f 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -419,6 +419,8 @@ LIBBPF_1.4.0 { LIBBPF_1.5.0 { global: + bpf_map__autoattach; + bpf_map__set_autoattach; bpf_program__attach_sockmap; ring__consume_n; ring_buffer__consume_n; -- cgit v1.2.3-73-gaa49b From 1e029b73b7d1d8684e52961a7ecf74770d16651b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 29 May 2024 10:53:59 -0700 Subject: tools/memory-model: Add KCSAN LF mentorship session citation Add a citation to Marco's LF mentorship session presentation entitled "The Kernel Concurrency Sanitizer" [ paulmck: Apply Marco Elver feedback. ] Reported-by: Marco Elver Signed-off-by: Paul E. McKenney Acked-by: Andrea Parri Reviewed-by: Akira Yokosawa Acked-by: Marco Elver Cc: Alan Stern Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Nicholas Piggin Cc: David Howells Cc: Jade Alglave Cc: Luc Maranget Cc: Daniel Lustig Cc: Joel Fernandes Cc: --- tools/memory-model/Documentation/access-marking.txt | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt index 65778222183e..f531b0837356 100644 --- a/tools/memory-model/Documentation/access-marking.txt +++ b/tools/memory-model/Documentation/access-marking.txt @@ -6,7 +6,8 @@ normal accesses to shared memory, that is "normal" as in accesses that do not use read-modify-write atomic operations. It also describes how to document these accesses, both with comments and with special assertions processed by the Kernel Concurrency Sanitizer (KCSAN). This discussion -builds on an earlier LWN article [1]. +builds on an earlier LWN article [1] and Linux Foundation mentorship +session [2]. ACCESS-MARKING OPTIONS @@ -31,7 +32,7 @@ example: WRITE_ONCE(a, b + data_race(c + d) + READ_ONCE(e)); Neither plain C-language accesses nor data_race() (#1 and #2 above) place -any sort of constraint on the compiler's choice of optimizations [2]. +any sort of constraint on the compiler's choice of optimizations [3]. In contrast, READ_ONCE() and WRITE_ONCE() (#3 and #4 above) restrict the compiler's use of code-motion and common-subexpression optimizations. Therefore, if a given access is involved in an intentional data race, @@ -594,5 +595,8 @@ REFERENCES [1] "Concurrency bugs should fear the big bad data-race detector (part 2)" https://lwn.net/Articles/816854/ -[2] "Who's afraid of a big bad optimizing compiler?" +[2] "The Kernel Concurrency Sanitizer" + https://www.linuxfoundation.org/webinars/the-kernel-concurrency-sanitizer + +[3] "Who's afraid of a big bad optimizing compiler?" https://lwn.net/Articles/793253/ -- cgit v1.2.3-73-gaa49b From 520c637bf0aa629ebbdbaf3236b50ad2684fc3f3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 4 Jun 2024 20:59:35 -0700 Subject: tools/memory-model: Add access-marking.txt to README Given that access-marking.txt exists, this commit makes it easier to find. Reported-by: Akira Yokosawa Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/README | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README index db90a26dbdf4..304162743a5b 100644 --- a/tools/memory-model/Documentation/README +++ b/tools/memory-model/Documentation/README @@ -47,6 +47,10 @@ DESCRIPTION OF FILES README This file. +access-marking.txt + Guidelines for marking intentionally concurrent accesses to + shared memory. + cheatsheet.txt Quick-reference guide to the Linux-kernel memory model. -- cgit v1.2.3-73-gaa49b From 4c830eef806679dc243e191f962c488dd9d00708 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 6 Jun 2024 09:57:55 -0400 Subject: tools/memory-model: Fix bug in lock.cat Andrea reported that the following innocuous litmus test: C T {} P0(spinlock_t *x) { int r0; spin_lock(x); spin_unlock(x); r0 = spin_is_locked(x); } gives rise to a nonsensical empty result with no executions: $ herd7 -conf linux-kernel.cfg T.litmus Test T Required States 0 Ok Witnesses Positive: 0 Negative: 0 Condition forall (true) Observation T Never 0 0 Time T 0.00 Hash=6fa204e139ddddf2cb6fa963bad117c0 The problem is caused by a bug in the lock.cat part of the LKMM. Its computation of the rf relation for RU (read-unlocked) events is faulty; it implicitly assumes that every RU event must read from either a UL (unlock) event in another thread or from the lock's initial state. Neither is true in the litmus test above, so the computation yields no possible executions. The lock.cat code tries to make up for this deficiency by allowing RU events outside of critical sections to read from the last po-previous UL event. But it does this incorrectly, trying to keep these rfi links separate from the rfe links that might also be needed, and passing only the latter to herd7's cross() macro. The problem is fixed by merging the two sets of possible rf links for RU events and using them all in the call to cross(). Signed-off-by: Alan Stern Reported-by: Andrea Parri Closes: https://lore.kernel.org/linux-arch/ZlC0IkzpQdeGj+a3@andrea/ Tested-by: Andrea Parri Acked-by: Andrea Parri Fixes: 15553dcbca06 ("tools/memory-model: Add model support for spin_is_locked()") CC: Signed-off-by: Paul E. McKenney --- tools/memory-model/lock.cat | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat index 53b5a492739d..21ba65086938 100644 --- a/tools/memory-model/lock.cat +++ b/tools/memory-model/lock.cat @@ -102,19 +102,19 @@ let rf-lf = rfe-lf | rfi-lf * within one of the lock's critical sections returns False. *) -(* rfi for RU events: an RU may read from the last po-previous UL *) -let rfi-ru = ([UL] ; po-loc ; [RU]) \ ([UL] ; po-loc ; [LKW] ; po-loc) - -(* rfe for RU events: an RU may read from an external UL or the initial write *) -let all-possible-rfe-ru = - let possible-rfe-ru r = +(* + * rf for RU events: an RU may read from an external UL or the initial write, + * or from the last po-previous UL + *) +let all-possible-rf-ru = + let possible-rf-ru r = let pair-to-relation p = p ++ 0 - in map pair-to-relation (((UL | IW) * {r}) & loc & ext) - in map possible-rfe-ru RU + in map pair-to-relation ((((UL | IW) * {r}) & loc & ext) | + (((UL * {r}) & po-loc) \ ([UL] ; po-loc ; [LKW] ; po-loc))) + in map possible-rf-ru RU (* Generate all rf relations for RU events *) -with rfe-ru from cross(all-possible-rfe-ru) -let rf-ru = rfe-ru | rfi-ru +with rf-ru from cross(all-possible-rf-ru) (* Final rf relation *) let rf = rf | rf-lf | rf-ru -- cgit v1.2.3-73-gaa49b From ea6ee1bac6034cb4e91bcc229ed1354ca1a024d5 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 6 Jun 2024 09:59:01 -0400 Subject: tools/memory-model: Code reorganization in lock.cat Code reorganization for the lock.cat file in tools/memory-model: Improve the efficiency by ruling out right at the start RU events (spin_is_locked() calls that return False) inside a critical section for the same lock. Improve the organization of the code for handling LF and RU events by pulling the definitions of the pair-to-relation macro out from two different complicated compound expressions, using a single standalone definition instead. Rewrite the calculations of the rf relation for LF and RU events, for greater clarity. Signed-off-by: Alan Stern Tested-by: Andrea Parri Acked-by: Andrea Parri Signed-off-by: Paul E. McKenney --- tools/memory-model/lock.cat | 56 ++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat index 21ba65086938..03c12efed66a 100644 --- a/tools/memory-model/lock.cat +++ b/tools/memory-model/lock.cat @@ -54,6 +54,12 @@ flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR *) empty ([LKW] ; po-loc ; [LKR]) \ (po-loc ; [UL] ; po-loc) as lock-nest +(* + * In the same way, spin_is_locked() inside a critical section must always + * return True (no RU events can be in a critical section for the same lock). + *) +empty ([LKW] ; po-loc ; [RU]) \ (po-loc ; [UL] ; po-loc) as nested-is-locked + (* The final value of a spinlock should not be tested *) flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final @@ -79,39 +85,47 @@ empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks (* rfi for LF events: link each LKW to the LF events in its critical section *) let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc) -(* rfe for LF events *) +(* Utility macro to convert a single pair to a single-edge relation *) +let pair-to-relation p = p ++ 0 + +(* + * If a given LF event e is outside a critical section, it cannot read + * internally but it may read from an LKW event in another thread. + * Compute the relation containing these possible edges. + *) +let possible-rfe-noncrit-lf e = (LKW * {e}) & loc & ext + +(* Compute set of sets of possible rfe edges for LF events *) let all-possible-rfe-lf = (* - * Given an LF event r, compute the possible rfe edges for that event - * (all those starting from LKW events in other threads), - * and then convert that relation to a set of single-edge relations. + * Convert the possible-rfe-noncrit-lf relation for e + * to a set of single edges *) - let possible-rfe-lf r = - let pair-to-relation p = p ++ 0 - in map pair-to-relation ((LKW * {r}) & loc & ext) - (* Do this for each LF event r that isn't in rfi-lf *) - in map possible-rfe-lf (LF \ range(rfi-lf)) + let set-of-singleton-rfe-lf e = + map pair-to-relation (possible-rfe-noncrit-lf e) + (* Do this for each LF event e that isn't in rfi-lf *) + in map set-of-singleton-rfe-lf (LF \ range(rfi-lf)) (* Generate all rf relations for LF events *) with rfe-lf from cross(all-possible-rfe-lf) let rf-lf = rfe-lf | rfi-lf (* - * RU, i.e., spin_is_locked() returning False, is slightly different. - * We rely on the memory model to rule out cases where spin_is_locked() - * within one of the lock's critical sections returns False. + * A given RU event e may read internally from the last po-previous UL, + * or it may read from a UL event in another thread or the initial write. + * Compute the relation containing these possible edges. *) +let possible-rf-ru e = (((UL * {e}) & po-loc) \ + ([UL] ; po-loc ; [UL] ; po-loc)) | + (((UL | IW) * {e}) & loc & ext) -(* - * rf for RU events: an RU may read from an external UL or the initial write, - * or from the last po-previous UL - *) +(* Compute set of sets of possible rf edges for RU events *) let all-possible-rf-ru = - let possible-rf-ru r = - let pair-to-relation p = p ++ 0 - in map pair-to-relation ((((UL | IW) * {r}) & loc & ext) | - (((UL * {r}) & po-loc) \ ([UL] ; po-loc ; [LKW] ; po-loc))) - in map possible-rf-ru RU + (* Convert the possible-rf-ru relation for e to a set of single edges *) + let set-of-singleton-rf-ru e = + map pair-to-relation (possible-rf-ru e) + (* Do this for each RU event e *) + in map set-of-singleton-rf-ru RU (* Generate all rf relations for RU events *) with rf-ru from cross(all-possible-rf-ru) -- cgit v1.2.3-73-gaa49b From 0ac55d095d375e84fcdac5e51011613734e57854 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 8 May 2024 21:43:56 -0700 Subject: tools/rcu: Add rcu-updaters.sh script This commit adds a tools/rcu/rcu-updaters.sh script that uses bpftrace to print a histogram of the RCU update-side primitives invoked during the specified time interval, or until manually terminated if no interval is specified. Sample output on an idle laptop: @counts[poll_state_synchronize_rcu]: 6 @counts[synchronize_srcu]: 13 @counts[call_rcu_tasks_trace]: 25 @counts[synchronize_rcu]: 54 @counts[kvfree_call_rcu]: 428 @counts[call_rcu]: 2134 Note that when run on a kernel missing one or more of the symbols, this script will issue a diagnostic for each that is not found, but continue normally for the rest of the functions. Signed-off-by: Paul E. McKenney --- tools/rcu/rcu-updaters.sh | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100755 tools/rcu/rcu-updaters.sh (limited to 'tools') diff --git a/tools/rcu/rcu-updaters.sh b/tools/rcu/rcu-updaters.sh new file mode 100755 index 000000000000..4ef1397927bb --- /dev/null +++ b/tools/rcu/rcu-updaters.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Run bpftrace to obtain a histogram of the types of primitives used to +# initiate RCU grace periods. The count associated with rcu_gp_init() +# is the number of normal (non-expedited) grace periods. +# +# Usage: rcu-updaters.sh [ duration-in-seconds ] +# +# Note that not all kernel builds have all of these functions. In those +# that do not, this script will issue a diagnostic for each that is not +# found, but continue normally for the rest of the functions. + +duration=${1} +if test -n "${duration}" +then + exitclause='interval:s:'"${duration}"' { exit(); }' +else + echo 'Hit control-C to end sample and print results.' +fi +bpftrace -e 'kprobe:kvfree_call_rcu, + kprobe:call_rcu, + kprobe:call_rcu_tasks, + kprobe:call_rcu_tasks_rude, + kprobe:call_rcu_tasks_trace, + kprobe:call_srcu, + kprobe:rcu_barrier, + kprobe:rcu_barrier_tasks, + kprobe:rcu_barrier_tasks_rude, + kprobe:rcu_barrier_tasks_trace, + kprobe:srcu_barrier, + kprobe:synchronize_rcu, + kprobe:synchronize_rcu_expedited, + kprobe:synchronize_rcu_tasks, + kprobe:synchronize_rcu_tasks_rude, + kprobe:synchronize_rcu_tasks_trace, + kprobe:synchronize_srcu, + kprobe:synchronize_srcu_expedited, + kprobe:get_state_synchronize_rcu, + kprobe:get_state_synchronize_rcu_full, + kprobe:start_poll_synchronize_rcu, + kprobe:start_poll_synchronize_rcu_expedited, + kprobe:start_poll_synchronize_rcu_full, + kprobe:start_poll_synchronize_rcu_expedited_full, + kprobe:poll_state_synchronize_rcu, + kprobe:poll_state_synchronize_rcu_full, + kprobe:cond_synchronize_rcu, + kprobe:cond_synchronize_rcu_full, + kprobe:start_poll_synchronize_srcu, + kprobe:poll_state_synchronize_srcu, + kprobe:rcu_gp_init + { @counts[func] = count(); } '"${exitclause}" -- cgit v1.2.3-73-gaa49b From 9abdfd8a212332c64f6d0a27fc2ad69e9e0335d1 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 30 May 2024 15:41:08 +0800 Subject: selftests/bpf: Use connect_to_fd_opts in do_test in bpf_tcp_ca This patch uses connect_to_fd_opts() instead of using connect_fd_to_fd() and settcpca() in do_test() in prog_tests/bpf_tcp_ca.c to accept a struct network_helper_opts argument. Then define a dctcp dedicated post_socket_cb callback stg_post_socket_cb(), invoking both settcpca() and bpf_map_update_elem() in it, and set it in test_dctcp(). For passing map_fd into stg_post_socket_cb() callback, a new member map_fd is added in struct cb_opts. Add another "const struct network_helper_opts *cli_opts" to do_test() to separate it from the server "opts". Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/876ec90430865bc468e3b7f6fb2648420b075548.1717054461.git.tanggeliang@kylinos.cn --- .../testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 61 ++++++++++++---------- 1 file changed, 34 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index ebc7d4616880..2f9d373feb0a 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -25,6 +25,7 @@ static int expected_stg = 0xeB9F; struct cb_opts { const char *cc; + int map_fd; }; static int settcpca(int fd, const char *tcp_ca) @@ -39,9 +40,9 @@ static int settcpca(int fd, const char *tcp_ca) } static void do_test(const struct network_helper_opts *opts, + const struct network_helper_opts *cli_opts, const struct bpf_map *sk_stg_map) { - struct cb_opts *cb_opts = (struct cb_opts *)opts->cb_opts; int lfd = -1, fd = -1; int err; @@ -49,25 +50,9 @@ static void do_test(const struct network_helper_opts *opts, if (!ASSERT_NEQ(lfd, -1, "socket")) return; - fd = socket(AF_INET6, SOCK_STREAM, 0); - if (!ASSERT_NEQ(fd, -1, "socket")) { - close(lfd); - return; - } - - if (settcpca(fd, cb_opts->cc)) - goto done; - - if (sk_stg_map) { - err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd, - &expected_stg, BPF_NOEXIST); - if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)")) - goto done; - } - /* connect to server */ - err = connect_fd_to_fd(fd, lfd, 0); - if (!ASSERT_NEQ(err, -1, "connect")) + fd = connect_to_fd_opts(lfd, cli_opts); + if (!ASSERT_NEQ(fd, -1, "connect_to_fd_opts")) goto done; if (sk_stg_map) { @@ -116,7 +101,7 @@ static void test_cubic(void) return; } - do_test(&opts, NULL); + do_test(&opts, &opts, NULL); ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); @@ -124,6 +109,23 @@ static void test_cubic(void) bpf_cubic__destroy(cubic_skel); } +static int stg_post_socket_cb(int fd, void *opts) +{ + struct cb_opts *cb_opts = (struct cb_opts *)opts; + int err; + + err = settcpca(fd, cb_opts->cc); + if (err) + return err; + + err = bpf_map_update_elem(cb_opts->map_fd, &fd, + &expected_stg, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)")) + return err; + + return 0; +} + static void test_dctcp(void) { struct cb_opts cb_opts = { @@ -133,6 +135,10 @@ static void test_dctcp(void) .post_socket_cb = cc_cb, .cb_opts = &cb_opts, }; + struct network_helper_opts cli_opts = { + .post_socket_cb = stg_post_socket_cb, + .cb_opts = &cb_opts, + }; struct bpf_dctcp *dctcp_skel; struct bpf_link *link; @@ -146,7 +152,8 @@ static void test_dctcp(void) return; } - do_test(&opts, dctcp_skel->maps.sk_stg_map); + cb_opts.map_fd = bpf_map__fd(dctcp_skel->maps.sk_stg_map); + do_test(&opts, &cli_opts, dctcp_skel->maps.sk_stg_map); ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result"); bpf_link__destroy(link); @@ -350,14 +357,14 @@ static void test_update_ca(void) link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); - do_test(&opts, NULL); + do_test(&opts, &opts, NULL); saved_ca1_cnt = skel->bss->ca1_cnt; ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_update_2); ASSERT_OK(err, "update_map"); - do_test(&opts, NULL); + do_test(&opts, &opts, NULL); ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); @@ -386,14 +393,14 @@ static void test_update_wrong(void) link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); - do_test(&opts, NULL); + do_test(&opts, &opts, NULL); saved_ca1_cnt = skel->bss->ca1_cnt; ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_wrong); ASSERT_ERR(err, "update_map"); - do_test(&opts, NULL); + do_test(&opts, &opts, NULL); ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); bpf_link__destroy(link); @@ -423,7 +430,7 @@ static void test_mixed_links(void) link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); - do_test(&opts, NULL); + do_test(&opts, &opts, NULL); ASSERT_GT(skel->bss->ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_no_link); @@ -530,7 +537,7 @@ static void test_cc_cubic(void) return; } - do_test(&opts, NULL); + do_test(&opts, &opts, NULL); bpf_link__destroy(link); bpf_cc_cubic__destroy(cc_cubic_skel); -- cgit v1.2.3-73-gaa49b From fee97d0c9a14b5dd5cce0ec1df3a54a6b963f40c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 30 May 2024 15:41:09 +0800 Subject: selftests/bpf: Add start_test helper in bpf_tcp_ca For moving the "if (sk_stg_map)" block out of do_test(), extract the code before this block as a new function start_test(). It creates server-side and client-side sockets and returns them to the caller. Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/48f2921ff9be958f5d3d28fe6bb7269a61cafa9f.1717054461.git.tanggeliang@kylinos.cn --- .../testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 42 +++++++++++++++++----- 1 file changed, 33 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 2f9d373feb0a..794651ce0629 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -39,6 +39,34 @@ static int settcpca(int fd, const char *tcp_ca) return 0; } +static bool start_test(char *addr_str, + const struct network_helper_opts *srv_opts, + const struct network_helper_opts *cli_opts, + int *srv_fd, int *cli_fd) +{ + *srv_fd = start_server_str(AF_INET6, SOCK_STREAM, addr_str, 0, srv_opts); + if (!ASSERT_NEQ(*srv_fd, -1, "start_server_str")) + goto err; + + /* connect to server */ + *cli_fd = connect_to_fd_opts(*srv_fd, cli_opts); + if (!ASSERT_NEQ(*cli_fd, -1, "connect_to_fd_opts")) + goto err; + + return true; + +err: + if (*srv_fd != -1) { + close(*srv_fd); + *srv_fd = -1; + } + if (*cli_fd != -1) { + close(*cli_fd); + *cli_fd = -1; + } + return false; +} + static void do_test(const struct network_helper_opts *opts, const struct network_helper_opts *cli_opts, const struct bpf_map *sk_stg_map) @@ -46,13 +74,7 @@ static void do_test(const struct network_helper_opts *opts, int lfd = -1, fd = -1; int err; - lfd = start_server_str(AF_INET6, SOCK_STREAM, NULL, 0, opts); - if (!ASSERT_NEQ(lfd, -1, "socket")) - return; - - /* connect to server */ - fd = connect_to_fd_opts(lfd, cli_opts); - if (!ASSERT_NEQ(fd, -1, "connect_to_fd_opts")) + if (!start_test(NULL, opts, cli_opts, &lfd, &fd)) goto done; if (sk_stg_map) { @@ -68,8 +90,10 @@ static void do_test(const struct network_helper_opts *opts, ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data"); done: - close(lfd); - close(fd); + if (lfd != -1) + close(lfd); + if (fd != -1) + close(fd); } static int cc_cb(int fd, void *opts) -- cgit v1.2.3-73-gaa49b From 224eeb5598c30ee835dc9fea4c7ad85a8fb7eda4 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 30 May 2024 15:41:10 +0800 Subject: selftests/bpf: Use start_test in test_dctcp_fallback in bpf_tcp_ca The newly added helper start_test() can be used in test_dctcp_fallback() too, to replace start_server_str() and connect_to_fd_opts(). In that way, two network_helper_opts srv_opts and cli_opts are used instead of the previously shared opts. Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/792ca3bb013fa06e618176da02d75e4f79a76733.1717054461.git.tanggeliang@kylinos.cn --- tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 794651ce0629..d10217169ff8 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -229,17 +229,22 @@ static void test_invalid_license(void) static void test_dctcp_fallback(void) { int err, lfd = -1, cli_fd = -1, srv_fd = -1; - struct network_helper_opts opts = { - .post_socket_cb = cc_cb, - }; struct bpf_dctcp *dctcp_skel; struct bpf_link *link = NULL; struct cb_opts dctcp = { .cc = "bpf_dctcp", }; + struct network_helper_opts srv_opts = { + .post_socket_cb = cc_cb, + .cb_opts = &dctcp, + }; struct cb_opts cubic = { .cc = "cubic", }; + struct network_helper_opts cli_opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cubic, + }; char srv_cc[16]; socklen_t cc_len = sizeof(srv_cc); @@ -254,14 +259,7 @@ static void test_dctcp_fallback(void) if (!ASSERT_OK_PTR(link, "dctcp link")) goto done; - opts.cb_opts = &dctcp; - lfd = start_server_str(AF_INET6, SOCK_STREAM, "::1", 0, &opts); - if (!ASSERT_GE(lfd, 0, "lfd")) - goto done; - - opts.cb_opts = &cubic; - cli_fd = connect_to_fd_opts(lfd, &opts); - if (!ASSERT_GE(cli_fd, 0, "cli_fd")) + if (!start_test("::1", &srv_opts, &cli_opts, &lfd, &cli_fd)) goto done; srv_fd = accept(lfd, NULL, 0); -- cgit v1.2.3-73-gaa49b From cd984b2ed62423eb3daceacb21d651115a612af6 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 30 May 2024 15:41:11 +0800 Subject: selftests/bpf: Use start_test in test_dctcp in bpf_tcp_ca The "if (sk_stg_map)" block in do_test() is only used by test_dctcp(), it makes sense to move it from do_test() into test_dctcp(). Then do_test() can be used by other tests except test_dctcp(). Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/9938916627b9810c877e5c03a621bc0ba5acf5c5.1717054461.git.tanggeliang@kylinos.cn --- .../testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 27 ++++++++++++---------- 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index d10217169ff8..1b27d0232cbd 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -72,21 +72,10 @@ static void do_test(const struct network_helper_opts *opts, const struct bpf_map *sk_stg_map) { int lfd = -1, fd = -1; - int err; if (!start_test(NULL, opts, cli_opts, &lfd, &fd)) goto done; - if (sk_stg_map) { - int tmp_stg; - - err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd, - &tmp_stg); - if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") || - !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)")) - goto done; - } - ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data"); done: @@ -163,6 +152,7 @@ static void test_dctcp(void) .post_socket_cb = stg_post_socket_cb, .cb_opts = &cb_opts, }; + int lfd = -1, fd = -1, tmp_stg, err; struct bpf_dctcp *dctcp_skel; struct bpf_link *link; @@ -177,11 +167,24 @@ static void test_dctcp(void) } cb_opts.map_fd = bpf_map__fd(dctcp_skel->maps.sk_stg_map); - do_test(&opts, &cli_opts, dctcp_skel->maps.sk_stg_map); + if (!start_test(NULL, &opts, &cli_opts, &lfd, &fd)) + goto done; + + err = bpf_map_lookup_elem(cb_opts.map_fd, &fd, &tmp_stg); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") || + !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)")) + goto done; + + ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data"); ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result"); +done: bpf_link__destroy(link); bpf_dctcp__destroy(dctcp_skel); + if (lfd != -1) + close(lfd); + if (fd != -1) + close(fd); } static char *err_str; -- cgit v1.2.3-73-gaa49b From f85af9d955ac9601174e1c64f4b3308c1cae4a7e Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 30 May 2024 15:41:12 +0800 Subject: selftests/bpf: Drop useless arguments of do_test in bpf_tcp_ca bpf_map_lookup_elem() has been removed from do_test(), it makes the sk_stg_map argument of do_test() useless. In addition, two exactly the same opts are passed in all the places where do_test() is invoked, so cli_opts argument can be dropped too. This patch drops these two useless arguments of do_test() in bpf_tcp_ca.c. Signed-off-by: Geliang Tang Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/7056eab111d78a05bce29d2821228dc93f240de4.1717054461.git.tanggeliang@kylinos.cn --- tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 1b27d0232cbd..67358adf5db3 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -67,13 +67,11 @@ err: return false; } -static void do_test(const struct network_helper_opts *opts, - const struct network_helper_opts *cli_opts, - const struct bpf_map *sk_stg_map) +static void do_test(const struct network_helper_opts *opts) { int lfd = -1, fd = -1; - if (!start_test(NULL, opts, cli_opts, &lfd, &fd)) + if (!start_test(NULL, opts, opts, &lfd, &fd)) goto done; ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data"); @@ -114,7 +112,7 @@ static void test_cubic(void) return; } - do_test(&opts, &opts, NULL); + do_test(&opts); ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); @@ -382,14 +380,14 @@ static void test_update_ca(void) link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); - do_test(&opts, &opts, NULL); + do_test(&opts); saved_ca1_cnt = skel->bss->ca1_cnt; ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_update_2); ASSERT_OK(err, "update_map"); - do_test(&opts, &opts, NULL); + do_test(&opts); ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); @@ -418,14 +416,14 @@ static void test_update_wrong(void) link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); - do_test(&opts, &opts, NULL); + do_test(&opts); saved_ca1_cnt = skel->bss->ca1_cnt; ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_wrong); ASSERT_ERR(err, "update_map"); - do_test(&opts, &opts, NULL); + do_test(&opts); ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); bpf_link__destroy(link); @@ -455,7 +453,7 @@ static void test_mixed_links(void) link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); - do_test(&opts, &opts, NULL); + do_test(&opts); ASSERT_GT(skel->bss->ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_no_link); @@ -562,7 +560,7 @@ static void test_cc_cubic(void) return; } - do_test(&opts, &opts, NULL); + do_test(&opts); bpf_link__destroy(link); bpf_cc_cubic__destroy(cc_cubic_skel); -- cgit v1.2.3-73-gaa49b From a9700511fd50b9203a9a9d61b4874eb28571d5da Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 5 Jun 2024 16:44:42 +0200 Subject: perf script: netdev-times: add location parameter to consume_skb dd1b527831a3 ("net: add location to trace_consume_skb()") added a new parameter to the consume_skb tracepoint. Adapt the script to match. Signed-off-by: Lucas Stach Acked-by: Arnaldo Carvalho de Melo Cc: kernel@pengutronix.de Cc: patchwork-lst@pengutronix.de Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240605144442.1985270-1-l.stach@pengutronix.de --- tools/perf/scripts/python/netdev-times.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py index 00552eeb7178..30c4bccee5b2 100644 --- a/tools/perf/scripts/python/netdev-times.py +++ b/tools/perf/scripts/python/netdev-times.py @@ -293,7 +293,8 @@ def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr, location, protocol, reason) all_event_list.append(event_info) -def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr): +def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, callchain, + skbaddr, location): event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, skbaddr) all_event_list.append(event_info) -- cgit v1.2.3-73-gaa49b From 0b90dfda222e38b7ca8dad6e098e36f5186f0b94 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 21 May 2024 09:51:07 -0700 Subject: perf maps: Fix use after free in __maps__fixup_overlap_and_insert In the case 'before' and 'after' are broken out from pos, maps_by_address may be changed by __maps__insert, as such it needs re-reading. Don't ignore the return value from __maps_insert. Fixes: 659ad3492b91 ("perf maps: Switch from rbtree to lazily sorted array for addresses") Signed-off-by: Ian Rogers Reviewed-by: James Clark Cc: Steinar H . Gunderson Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240521165109.708593-2-irogers@google.com --- tools/perf/util/maps.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 16b39db594f4..eaada3e0f5b4 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -741,7 +741,6 @@ static unsigned int first_ending_after(struct maps *maps, const struct map *map) */ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) { - struct map **maps_by_address; int err = 0; FILE *fp = debug_file(); @@ -749,12 +748,12 @@ sort_again: if (!maps__maps_by_address_sorted(maps)) __maps__sort_by_address(maps); - maps_by_address = maps__maps_by_address(maps); /* * Iterate through entries where the end of the existing entry is * greater-than the new map's start. */ for (unsigned int i = first_ending_after(maps, new); i < maps__nr_maps(maps); ) { + struct map **maps_by_address = maps__maps_by_address(maps); struct map *pos = maps_by_address[i]; struct map *before = NULL, *after = NULL; @@ -821,8 +820,10 @@ sort_again: /* Maps are still ordered, go to next one. */ i++; if (after) { - __maps__insert(maps, after); + err = __maps__insert(maps, after); map__put(after); + if (err) + goto out_err; if (!maps__maps_by_address_sorted(maps)) { /* * Sorting broken so invariants don't @@ -851,7 +852,7 @@ sort_again: check_invariants(maps); } /* Add the map. */ - __maps__insert(maps, new); + err = __maps__insert(maps, new); out_err: return err; } -- cgit v1.2.3-73-gaa49b From aeefb04393f7525c0d5163f966f60d070b03ab99 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 21 May 2024 09:51:08 -0700 Subject: perf maps: Reduce sorting for overlapping mappings When an 'after' map is generated the 'new' map must be before it so terminate iterating and don't resort. If the entry 'pos' is entirely overlapped by the 'new' mapping then don't remove and insert the mapping, just replace - again to remove sorting. For a perf report on a perf.data file containing overlapping mappings the time numbers are: Before: real 0m9.856s user 0m9.637s sys 0m0.204s After: real 0m5.894s user 0m5.650s sys 0m0.231s Signed-off-by: Ian Rogers Reviewed-by: James Clark Cc: Steinar H . Gunderson Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240521165109.708593-3-irogers@google.com --- tools/perf/util/maps.c | 55 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index eaada3e0f5b4..f6b6df82f4cf 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -744,7 +744,6 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) int err = 0; FILE *fp = debug_file(); -sort_again: if (!maps__maps_by_address_sorted(maps)) __maps__sort_by_address(maps); @@ -820,36 +819,54 @@ sort_again: /* Maps are still ordered, go to next one. */ i++; if (after) { - err = __maps__insert(maps, after); - map__put(after); - if (err) - goto out_err; - if (!maps__maps_by_address_sorted(maps)) { - /* - * Sorting broken so invariants don't - * hold, sort and go again. - */ - goto sort_again; - } /* - * Maps are still ordered, skip after and go to - * next one (terminate loop). + * 'before' and 'after' mean 'new' split the + * 'pos' mapping and therefore there are no + * later mappings. */ - i++; + err = __maps__insert(maps, new); + if (!err) + err = __maps__insert(maps, after); + map__put(after); + check_invariants(maps); + return err; } + check_invariants(maps); } else if (after) { + /* + * 'after' means 'new' split 'pos' and there are no + * later mappings. + */ map__put(maps_by_address[i]); - maps_by_address[i] = after; - /* Maps are ordered, go to next one. */ - i++; + maps_by_address[i] = map__get(new); + err = __maps__insert(maps, after); + map__put(after); + check_invariants(maps); + return err; } else { + struct map *next = NULL; + + if (i + 1 < maps__nr_maps(maps)) + next = maps_by_address[i + 1]; + + if (!next || map__start(next) >= map__end(new)) { + /* + * Replace existing mapping and end knowing + * there aren't later overlapping or any + * mappings. + */ + map__put(maps_by_address[i]); + maps_by_address[i] = map__get(new); + check_invariants(maps); + return err; + } __maps__remove(maps, pos); + check_invariants(maps); /* * Maps are ordered but no need to increase `i` as the * later maps were moved down. */ } - check_invariants(maps); } /* Add the map. */ err = __maps__insert(maps, new); -- cgit v1.2.3-73-gaa49b From d2307fd4f9895b44361d491f8bf474866b8351a2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 21 May 2024 09:51:09 -0700 Subject: perf maps: Add/use a sorted insert for fixup overlap and insert Data may have lots of overlapping mmaps. The regular insert adds at the end and relies on a later sort. For data with overlapping mappings the sort will happen during a subsequent maps__find or __maps__fixup_overlap_and_insert, there's never a period where the inserted maps buffer up and a single sort happens. To avoid back to back sorts, maintain the sort order when fixing up and inserting. Previously the first_ending_after search was O(log n) where n is the size of maps, and the insert was O(1) but because of the continuous sorting was becoming O(n*log(n)). With maintaining sort order, the insert now becomes O(n) for a memmove. For a perf report on a perf.data file containing overlapping mappings the time numbers are: Before: real 0m5.894s user 0m5.650s sys 0m0.231s After: real 0m0.675s user 0m0.454s sys 0m0.196s Signed-off-by: Ian Rogers Reviewed-by: James Clark Cc: Steinar H . Gunderson Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240521165109.708593-4-irogers@google.com --- tools/perf/util/maps.c | 65 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index f6b6df82f4cf..432399cbe5dd 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -735,6 +735,60 @@ static unsigned int first_ending_after(struct maps *maps, const struct map *map) return first; } +static int __maps__insert_sorted(struct maps *maps, unsigned int first_after_index, + struct map *new1, struct map *new2) +{ + struct map **maps_by_address = maps__maps_by_address(maps); + struct map **maps_by_name = maps__maps_by_name(maps); + unsigned int nr_maps = maps__nr_maps(maps); + unsigned int nr_allocate = RC_CHK_ACCESS(maps)->nr_maps_allocated; + unsigned int to_add = new2 ? 2 : 1; + + assert(maps__maps_by_address_sorted(maps)); + assert(first_after_index == nr_maps || + map__end(new1) <= map__start(maps_by_address[first_after_index])); + assert(!new2 || map__end(new1) <= map__start(new2)); + assert(first_after_index == nr_maps || !new2 || + map__end(new2) <= map__start(maps_by_address[first_after_index])); + + if (nr_maps + to_add > nr_allocate) { + nr_allocate = !nr_allocate ? 32 : nr_allocate * 2; + + maps_by_address = realloc(maps_by_address, nr_allocate * sizeof(new1)); + if (!maps_by_address) + return -ENOMEM; + + maps__set_maps_by_address(maps, maps_by_address); + if (maps_by_name) { + maps_by_name = realloc(maps_by_name, nr_allocate * sizeof(new1)); + if (!maps_by_name) { + /* + * If by name fails, just disable by name and it will + * recompute next time it is required. + */ + __maps__free_maps_by_name(maps); + } + maps__set_maps_by_name(maps, maps_by_name); + } + RC_CHK_ACCESS(maps)->nr_maps_allocated = nr_allocate; + } + memmove(&maps_by_address[first_after_index+to_add], + &maps_by_address[first_after_index], + (nr_maps - first_after_index) * sizeof(new1)); + maps_by_address[first_after_index] = map__get(new1); + if (maps_by_name) + maps_by_name[nr_maps] = map__get(new1); + if (new2) { + maps_by_address[first_after_index + 1] = map__get(new2); + if (maps_by_name) + maps_by_name[nr_maps + 1] = map__get(new2); + } + RC_CHK_ACCESS(maps)->nr_maps = nr_maps + to_add; + maps__set_maps_by_name_sorted(maps, false); + check_invariants(maps); + return 0; +} + /* * Adds new to maps, if new overlaps existing entries then the existing maps are * adjusted or removed so that new fits without overlapping any entries. @@ -743,6 +797,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) { int err = 0; FILE *fp = debug_file(); + unsigned int i; if (!maps__maps_by_address_sorted(maps)) __maps__sort_by_address(maps); @@ -751,7 +806,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) * Iterate through entries where the end of the existing entry is * greater-than the new map's start. */ - for (unsigned int i = first_ending_after(maps, new); i < maps__nr_maps(maps); ) { + for (i = first_ending_after(maps, new); i < maps__nr_maps(maps); ) { struct map **maps_by_address = maps__maps_by_address(maps); struct map *pos = maps_by_address[i]; struct map *before = NULL, *after = NULL; @@ -824,9 +879,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) * 'pos' mapping and therefore there are no * later mappings. */ - err = __maps__insert(maps, new); - if (!err) - err = __maps__insert(maps, after); + err = __maps__insert_sorted(maps, i, new, after); map__put(after); check_invariants(maps); return err; @@ -839,7 +892,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) */ map__put(maps_by_address[i]); maps_by_address[i] = map__get(new); - err = __maps__insert(maps, after); + err = __maps__insert_sorted(maps, i + 1, after, NULL); map__put(after); check_invariants(maps); return err; @@ -869,7 +922,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) } } /* Add the map. */ - err = __maps__insert(maps, new); + err = __maps__insert_sorted(maps, i, new, NULL); out_err: return err; } -- cgit v1.2.3-73-gaa49b From c9d197ec16013cb2a3d4efa9b459dcc1be795551 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Fri, 31 May 2024 02:13:09 -0700 Subject: pm-graph: v5.12, fixes - fix S3 suspend fail double run by using fp.flush to /sys/power/state - when running turbostat print the return value - handle case where html files have binary data - max issues in summary-issues is now 100 (in case there are thousands) - add backup to dmidecode, use /sys/class/dmi/id/ in case /dev/mem fails - update summary page to use full mode (disk-platform instead of disk) Signed-off-by: Todd Brandt Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/sleepgraph.py | 102 +++++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 37 deletions(-) (limited to 'tools') diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 40ad221e8881..b709c5f2b6f1 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -86,7 +86,7 @@ def ascii(text): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.11' + version = '5.12' ansi = False rs = 0 display = '' @@ -1181,8 +1181,8 @@ class SystemValues: cmd = self.getExec('turbostat') rawout = keyline = valline = '' fullcmd = '%s -q -S echo freeze > %s' % (cmd, self.powerfile) - fp = Popen(['sh', '-c', fullcmd], stdout=PIPE, stderr=PIPE).stderr - for line in fp: + fp = Popen(['sh', '-c', fullcmd], stdout=PIPE, stderr=PIPE) + for line in fp.stderr: line = ascii(line) rawout += line if keyline and valline: @@ -1191,13 +1191,13 @@ class SystemValues: keyline = line.strip().split() elif keyline: valline = line.strip().split() - fp.close() + fp.wait() if not keyline or not valline or len(keyline) != len(valline): errmsg = 'unrecognized turbostat output:\n'+rawout.strip() self.vprint(errmsg) if not self.verbose: pprint(errmsg) - return '' + return (fp.returncode, '') if self.verbose: pprint(rawout.strip()) out = [] @@ -1207,7 +1207,7 @@ class SystemValues: if key == 'SYS%LPI' and not s0ixready and re.match('^[0\.]*$', val): continue out.append('%s=%s' % (key, val)) - return '|'.join(out) + return (fp.returncode, '|'.join(out)) def netfixon(self, net='both'): cmd = self.getExec('netfix') if not cmd: @@ -4343,7 +4343,8 @@ def createHTMLSummarySimple(testruns, htmlfile, title): list[mode]['data'].append([data['host'], data['kernel'], data['time'], tVal[0], tVal[1], data['url'], res, data['issues'], data['sus_worst'], data['sus_worsttime'], - data['res_worst'], data['res_worsttime'], pkgpc10, syslpi, wifi]) + data['res_worst'], data['res_worsttime'], pkgpc10, syslpi, wifi, + (data['fullmode'] if 'fullmode' in data else mode)]) idx = len(list[mode]['data']) - 1 if res.startswith('fail in'): res = 'fail' @@ -4449,7 +4450,7 @@ def createHTMLSummarySimple(testruns, htmlfile, title): elif idx == iMed[i]: tHigh[i] = ' id="%smed" class=medval title="Median"' % tag html += td.format("%d" % (list[mode]['data'].index(d) + 1)) # row - html += td.format(mode) # mode + html += td.format(d[15]) # mode html += td.format(d[0]) # host html += td.format(d[1]) # kernel html += td.format(d[2]) # time @@ -5524,7 +5525,9 @@ def executeSuspend(quiet=False): if ((mode == 'freeze') or (sv.memmode == 's2idle')) \ and sv.haveTurbostat(): # execution will pause here - turbo = sv.turbostat(s0ixready) + retval, turbo = sv.turbostat(s0ixready) + if retval != 0: + tdata['error'] ='turbostat returned %d' % retval if turbo: tdata['turbo'] = turbo else: @@ -5532,6 +5535,7 @@ def executeSuspend(quiet=False): pf.write(mode) # execution will pause here try: + pf.flush() pf.close() except Exception as e: tdata['error'] = str(e) @@ -5702,6 +5706,40 @@ def getModes(): fp.close() return modes +def dmidecode_backup(out, fatal=False): + cpath, spath, info = '/proc/cpuinfo', '/sys/class/dmi/id', { + 'bios-vendor': 'bios_vendor', + 'bios-version': 'bios_version', + 'bios-release-date': 'bios_date', + 'system-manufacturer': 'sys_vendor', + 'system-product-name': 'product_name', + 'system-version': 'product_version', + 'system-serial-number': 'product_serial', + 'baseboard-manufacturer': 'board_vendor', + 'baseboard-product-name': 'board_name', + 'baseboard-version': 'board_version', + 'baseboard-serial-number': 'board_serial', + 'chassis-manufacturer': 'chassis_vendor', + 'chassis-version': 'chassis_version', + 'chassis-serial-number': 'chassis_serial', + } + for key in info: + if key not in out: + val = sysvals.getVal(os.path.join(spath, info[key])).strip() + if val and val.lower() != 'to be filled by o.e.m.': + out[key] = val + if 'processor-version' not in out and os.path.exists(cpath): + with open(cpath, 'r') as fp: + for line in fp: + m = re.match('^model\s*name\s*\:\s*(?P.*)', line) + if m: + out['processor-version'] = m.group('c').strip() + break + if fatal and len(out) < 1: + doError('dmidecode failed to get info from %s or %s' % \ + (sysvals.mempath, spath)) + return out + # Function: dmidecode # Description: # Read the bios tables and pull out system info @@ -5712,6 +5750,8 @@ def getModes(): # A dict object with all available key/values def dmidecode(mempath, fatal=False): out = dict() + if(not (os.path.exists(mempath) and os.access(mempath, os.R_OK))): + return dmidecode_backup(out, fatal) # the list of values to retrieve, with hardcoded (type, idx) info = { @@ -5727,24 +5767,14 @@ def dmidecode(mempath, fatal=False): 'baseboard-version': (2, 6), 'baseboard-serial-number': (2, 7), 'chassis-manufacturer': (3, 4), - 'chassis-type': (3, 5), 'chassis-version': (3, 6), 'chassis-serial-number': (3, 7), 'processor-manufacturer': (4, 7), 'processor-version': (4, 16), } - if(not os.path.exists(mempath)): - if(fatal): - doError('file does not exist: %s' % mempath) - return out - if(not os.access(mempath, os.R_OK)): - if(fatal): - doError('file is not readable: %s' % mempath) - return out # by default use legacy scan, but try to use EFI first - memaddr = 0xf0000 - memsize = 0x10000 + memaddr, memsize = 0xf0000, 0x10000 for ep in ['/sys/firmware/efi/systab', '/proc/efi/systab']: if not os.path.exists(ep) or not os.access(ep, os.R_OK): continue @@ -5765,11 +5795,7 @@ def dmidecode(mempath, fatal=False): fp.seek(memaddr) buf = fp.read(memsize) except: - if(fatal): - doError('DMI table is unreachable, sorry') - else: - pprint('WARNING: /dev/mem is not readable, ignoring DMI data') - return out + return dmidecode_backup(out, fatal) fp.close() # search for either an SM table or DMI table @@ -5785,10 +5811,7 @@ def dmidecode(mempath, fatal=False): break i += 16 if base == 0 and length == 0 and num == 0: - if(fatal): - doError('Neither SMBIOS nor DMI were found') - else: - return out + return dmidecode_backup(out, fatal) # read in the SM or DMI table try: @@ -5796,11 +5819,7 @@ def dmidecode(mempath, fatal=False): fp.seek(base) buf = fp.read(length) except: - if(fatal): - doError('DMI table is unreachable, sorry') - else: - pprint('WARNING: /dev/mem is not readable, ignoring DMI data') - return out + return dmidecode_backup(out, fatal) fp.close() # scan the table for the values we want @@ -6272,7 +6291,10 @@ def find_in_html(html, start, end, firstonly=True): return out def data_from_html(file, outpath, issues, fulldetail=False): - html = open(file, 'r').read() + try: + html = open(file, 'r').read() + except: + html = ascii(open(file, 'rb').read()) sysvals.htmlfile = os.path.relpath(file, outpath) # extract general info suspend = find_in_html(html, 'Kernel Suspend', 'ms') @@ -6307,8 +6329,9 @@ def data_from_html(file, outpath, issues, fulldetail=False): d.end = 999999999 d.dmesgtext = log.split('\n') tp = d.extractErrorInfo() - for msg in tp.msglist: - sysvals.errorSummary(issues, msg) + if len(issues) < 100: + for msg in tp.msglist: + sysvals.errorSummary(issues, msg) if stmp[2] == 'freeze': extra = d.turbostatInfo() elist = dict() @@ -6325,6 +6348,11 @@ def data_from_html(file, outpath, issues, fulldetail=False): line = find_in_html(log, '# netfix ', '\n') if line: extra['netfix'] = line + line = find_in_html(log, '# command ', '\n') + if line: + m = re.match('.* -m (?P\S*).*', line) + if m: + extra['fullmode'] = m.group('m') low = find_in_html(html, 'freeze time: ', ' ms') for lowstr in ['waking', '+']: if not low: -- cgit v1.2.3-73-gaa49b From 8b2f0cb63e6f5370b2c100f5e0f0288339344827 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Fri, 31 May 2024 02:13:10 -0700 Subject: pm-graph: v5.12, code revamp for python3.12 sleepgraph/bootgraph function correctly in python3.12 but include a slew of deprecation warnings for unsupported regexes. This patch fixes up all the strings in the code so that it comforms with python3.12 standards. Signed-off-by: Todd Brandt Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/bootgraph.py | 16 +- tools/power/pm-graph/sleepgraph.py | 1000 ++++++++++++++++++------------------ 2 files changed, 508 insertions(+), 508 deletions(-) (limited to 'tools') diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py index f96f50e0c336..8a3ef94fe88f 100755 --- a/tools/power/pm-graph/bootgraph.py +++ b/tools/power/pm-graph/bootgraph.py @@ -77,12 +77,12 @@ class SystemValues(aslib.SystemValues): fp.close() self.testdir = datetime.now().strftime('boot-%y%m%d-%H%M%S') def kernelVersion(self, msg): - m = re.match('^[Ll]inux *[Vv]ersion *(?P\S*) .*', msg) + m = re.match(r'^[Ll]inux *[Vv]ersion *(?P\S*) .*', msg) if m: return m.group('v') return 'unknown' def checkFtraceKernelVersion(self): - m = re.match('^(?P[0-9]*)\.(?P[0-9]*)\.(?P[0-9]*).*', self.kernel) + m = re.match(r'^(?P[0-9]*)\.(?P[0-9]*)\.(?P[0-9]*).*', self.kernel) if m: val = tuple(map(int, m.groups())) if val >= (4, 10, 0): @@ -324,7 +324,7 @@ def parseKernelLog(): idx = line.find('[') if idx > 1: line = line[idx:] - m = re.match('[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) if(not m): continue ktime = float(m.group('ktime')) @@ -332,24 +332,24 @@ def parseKernelLog(): break msg = m.group('msg') data.dmesgtext.append(line) - if(ktime == 0.0 and re.match('^Linux version .*', msg)): + if(ktime == 0.0 and re.match(r'^Linux version .*', msg)): if(not sysvals.stamp['kernel']): sysvals.stamp['kernel'] = sysvals.kernelVersion(msg) continue - m = re.match('.* setting system clock to (?P[0-9\-]*)[ A-Z](?P[0-9:]*) UTC.*', msg) + m = re.match(r'.* setting system clock to (?P[0-9\-]*)[ A-Z](?P[0-9:]*) UTC.*', msg) if(m): bt = datetime.strptime(m.group('d')+' '+m.group('t'), '%Y-%m-%d %H:%M:%S') bt = bt - timedelta(seconds=int(ktime)) data.boottime = bt.strftime('%Y-%m-%d_%H:%M:%S') sysvals.stamp['time'] = bt.strftime('%B %d %Y, %I:%M:%S %p') continue - m = re.match('^calling *(?P.*)\+.* @ (?P

[0-9]*)', msg) + m = re.match(r'^calling *(?P.*)\+.* @ (?P

[0-9]*)', msg) if(m): func = m.group('f') pid = int(m.group('p')) devtemp[func] = (ktime, pid) continue - m = re.match('^initcall *(?P.*)\+.* returned (?P.*) after (?P.*) usecs', msg) + m = re.match(r'^initcall *(?P.*)\+.* returned (?P.*) after (?P.*) usecs', msg) if(m): data.valid = True data.end = ktime @@ -359,7 +359,7 @@ def parseKernelLog(): data.newAction(phase, f, pid, start, ktime, int(r), int(t)) del devtemp[f] continue - if(re.match('^Freeing unused kernel .*', msg)): + if(re.match(r'^Freeing unused kernel .*', msg)): data.tUserMode = ktime data.dmesg['kernel']['end'] = ktime data.dmesg['user']['start'] = ktime diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index b709c5f2b6f1..ef87e63c05c7 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -420,11 +420,11 @@ class SystemValues: return value.format(**args) def setOutputFile(self): if self.dmesgfile != '': - m = re.match('(?P.*)_dmesg\.txt.*', self.dmesgfile) + m = re.match(r'(?P.*)_dmesg\.txt.*', self.dmesgfile) if(m): self.htmlfile = m.group('name')+'.html' if self.ftracefile != '': - m = re.match('(?P.*)_ftrace\.txt.*', self.ftracefile) + m = re.match(r'(?P.*)_ftrace\.txt.*', self.ftracefile) if(m): self.htmlfile = m.group('name')+'.html' def systemInfo(self, info): @@ -464,15 +464,15 @@ class SystemValues: if os.path.exists('/proc/cpuinfo'): with open('/proc/cpuinfo', 'r') as fp: for line in fp: - if re.match('^processor[ \t]*:[ \t]*[0-9]*', line): + if re.match(r'^processor[ \t]*:[ \t]*[0-9]*', line): self.cpucount += 1 if os.path.exists('/proc/meminfo'): with open('/proc/meminfo', 'r') as fp: for line in fp: - m = re.match('^MemTotal:[ \t]*(?P[0-9]*) *kB', line) + m = re.match(r'^MemTotal:[ \t]*(?P[0-9]*) *kB', line) if m: self.memtotal = int(m.group('sz')) - m = re.match('^MemFree:[ \t]*(?P[0-9]*) *kB', line) + m = re.match(r'^MemFree:[ \t]*(?P[0-9]*) *kB', line) if m: self.memfree = int(m.group('sz')) if os.path.exists('/etc/os-release'): @@ -539,7 +539,7 @@ class SystemValues: idx = line.find('[') if idx > 1: line = line[idx:] - m = re.match('[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) if(m): ktime = m.group('ktime') break @@ -553,7 +553,7 @@ class SystemValues: idx = line.find('[') if idx > 1: line = line[idx:] - m = re.match('[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) if(not m): continue ktime = float(m.group('ktime')) @@ -636,11 +636,11 @@ class SystemValues: # now process the args for arg in sorted(args): arglist[arg] = '' - m = re.match('.* '+arg+'=(?P.*) ', data); + m = re.match(r'.* '+arg+'=(?P.*) ', data); if m: arglist[arg] = m.group('arg') else: - m = re.match('.* '+arg+'=(?P.*)', data); + m = re.match(r'.* '+arg+'=(?P.*)', data); if m: arglist[arg] = m.group('arg') out = fmt.format(**arglist) @@ -989,7 +989,7 @@ class SystemValues: m = re.match(tp.ftrace_line_fmt, line) if(not m or 'device_pm_callback_start' not in line): continue - m = re.match('.*: (?P.*) (?P.*), parent: *(?P

.*), .*', m.group('msg')); + m = re.match(r'.*: (?P.*) (?P.*), parent: *(?P

.*), .*', m.group('msg')); if(not m): continue dev = m.group('d') @@ -999,7 +999,7 @@ class SystemValues: # now get the syspath for each target device for dirname, dirnames, filenames in os.walk('/sys/devices'): - if(re.match('.*/power', dirname) and 'async' in filenames): + if(re.match(r'.*/power', dirname) and 'async' in filenames): dev = dirname.split('/')[-2] if dev in props and (not props[dev].syspath or len(dirname) < len(props[dev].syspath)): props[dev].syspath = dirname[:-6] @@ -1143,12 +1143,12 @@ class SystemValues: elif value and os.path.exists(file): fp = open(file, 'r+') if fmt == 'radio': - m = re.match('.*\[(?P.*)\].*', fp.read()) + m = re.match(r'.*\[(?P.*)\].*', fp.read()) if m: self.cfgdef[file] = m.group('v') elif fmt == 'acpi': line = fp.read().strip().split('\n')[-1] - m = re.match('.* (?P[0-9A-Fx]*) .*', line) + m = re.match(r'.* (?P[0-9A-Fx]*) .*', line) if m: self.cfgdef[file] = m.group('v') else: @@ -1173,7 +1173,7 @@ class SystemValues: fp = Popen([cmd, '-v'], stdout=PIPE, stderr=PIPE).stderr out = ascii(fp.read()).strip() fp.close() - if re.match('turbostat version .*', out): + if re.match(r'turbostat version .*', out): self.vprint(out) return True return False @@ -1187,7 +1187,7 @@ class SystemValues: rawout += line if keyline and valline: continue - if re.match('(?i)Avg_MHz.*', line): + if re.match(r'(?i)Avg_MHz.*', line): keyline = line.strip().split() elif keyline: valline = line.strip().split() @@ -1204,7 +1204,7 @@ class SystemValues: for key in keyline: idx = keyline.index(key) val = valline[idx] - if key == 'SYS%LPI' and not s0ixready and re.match('^[0\.]*$', val): + if key == 'SYS%LPI' and not s0ixready and re.match(r'^[0\.]*$', val): continue out.append('%s=%s' % (key, val)) return (fp.returncode, '|'.join(out)) @@ -1232,7 +1232,7 @@ class SystemValues: except: return '' for line in reversed(w.split('\n')): - m = re.match(' *(?P.*): (?P[0-9a-f]*) .*', line) + m = re.match(r' *(?P.*): (?P[0-9a-f]*) .*', line) if not m or (dev and dev != m.group('dev')): continue return m.group('dev') @@ -1261,14 +1261,14 @@ class SystemValues: return arr = msg.split() for j in range(len(arr)): - if re.match('^[0-9,\-\.]*$', arr[j]): - arr[j] = '[0-9,\-\.]*' + if re.match(r'^[0-9,\-\.]*$', arr[j]): + arr[j] = r'[0-9,\-\.]*' else: arr[j] = arr[j]\ - .replace('\\', '\\\\').replace(']', '\]').replace('[', '\[')\ - .replace('.', '\.').replace('+', '\+').replace('*', '\*')\ - .replace('(', '\(').replace(')', '\)').replace('}', '\}')\ - .replace('{', '\{') + .replace('\\', r'\\\\').replace(']', r'\]').replace('[', r'\[')\ + .replace('.', r'\.').replace('+', r'\+').replace('*', r'\*')\ + .replace('(', r'\(').replace(')', r'\)').replace('}', r'\}')\ + .replace('{', r'\{') mstr = ' *'.join(arr) entry = { 'line': msg, @@ -1340,7 +1340,7 @@ class SystemValues: fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout ret = 'unknown' for line in fp: - m = re.match('[\s]*Monitor is (?P.*)', ascii(line)) + m = re.match(r'[\s]*Monitor is (?P.*)', ascii(line)) if(m and len(m.group('m')) >= 2): out = m.group('m').lower() ret = out[3:] if out[0:2] == 'in' else out @@ -1566,7 +1566,7 @@ class Data: i += 1 if tp.stampInfo(line, sysvals): continue - m = re.match('[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) if not m: continue t = float(m.group('ktime')) @@ -1574,7 +1574,7 @@ class Data: continue dir = 'suspend' if t < self.tSuspended else 'resume' msg = m.group('msg') - if re.match('capability: warning: .*', msg): + if re.match(r'capability: warning: .*', msg): continue for err in self.errlist: if re.match(self.errlist[err], msg): @@ -1679,8 +1679,8 @@ class Data: ubiquitous = False if kprobename in dtf and 'ub' in dtf[kprobename]: ubiquitous = True - mc = re.match('\(.*\) *(?P.*)', cdata) - mr = re.match('\((?P\S*).* arg1=(?P.*)', rdata) + mc = re.match(r'\(.*\) *(?P.*)', cdata) + mr = re.match(r'\((?P\S*).* arg1=(?P.*)', rdata) if mc and mr: c = mr.group('caller').split('+')[0] a = mc.group('args').strip() @@ -1997,7 +1997,7 @@ class Data: list = self.dmesg[phase]['list'] mydev = '' for devname in sorted(list): - if name == devname or re.match('^%s\[(?P[0-9]*)\]$' % name, devname): + if name == devname or re.match(r'^%s\[(?P[0-9]*)\]$' % name, devname): mydev = devname if mydev: return list[mydev] @@ -2099,7 +2099,7 @@ class Data: for dev in sorted(list): pdev = list[dev]['par'] pid = list[dev]['pid'] - if(pid < 0 or re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)): + if(pid < 0 or re.match(r'[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)): continue if pdev and pdev not in real and pdev not in rootlist: rootlist.append(pdev) @@ -2190,26 +2190,26 @@ class Data: if 'resume_complete' in dm: dm['resume_complete']['end'] = time def initcall_debug_call(self, line, quick=False): - m = re.match('.*(\[ *)(?P[0-9\.]*)(\]) .* (?P.*)\: '+\ - 'PM: *calling .* @ (?P.*), parent: (?P

.*)', line) + m = re.match(r'.*(\[ *)(?P[0-9\.]*)(\]) .* (?P.*)\: '+\ + r'PM: *calling .* @ (?P.*), parent: (?P

.*)', line) if not m: - m = re.match('.*(\[ *)(?P[0-9\.]*)(\]) .* (?P.*)\: '+\ - 'calling .* @ (?P.*), parent: (?P

.*)', line) + m = re.match(r'.*(\[ *)(?P[0-9\.]*)(\]) .* (?P.*)\: '+\ + r'calling .* @ (?P.*), parent: (?P

.*)', line) if not m: - m = re.match('.*(\[ *)(?P[0-9\.]*)(\]) calling '+\ - '(?P.*)\+ @ (?P.*), parent: (?P

.*)', line) + m = re.match(r'.*(\[ *)(?P[0-9\.]*)(\]) calling '+\ + r'(?P.*)\+ @ (?P.*), parent: (?P

.*)', line) if m: return True if quick else m.group('t', 'f', 'n', 'p') return False if quick else ('', '', '', '') def initcall_debug_return(self, line, quick=False): - m = re.match('.*(\[ *)(?P[0-9\.]*)(\]) .* (?P.*)\: PM: '+\ - '.* returned (?P[0-9]*) after (?P

[0-9]*) usecs', line) + m = re.match(r'.*(\[ *)(?P[0-9\.]*)(\]) .* (?P.*)\: PM: '+\ + r'.* returned (?P[0-9]*) after (?P
[0-9]*) usecs', line) if not m: - m = re.match('.*(\[ *)(?P[0-9\.]*)(\]) .* (?P.*)\: '+\ - '.* returned (?P[0-9]*) after (?P
[0-9]*) usecs', line) + m = re.match(r'.*(\[ *)(?P[0-9\.]*)(\]) .* (?P.*)\: '+\ + r'.* returned (?P[0-9]*) after (?P
[0-9]*) usecs', line) if not m: - m = re.match('.*(\[ *)(?P[0-9\.]*)(\]) call '+\ - '(?P.*)\+ returned .* after (?P
.*) usecs', line) + m = re.match(r'.*(\[ *)(?P[0-9\.]*)(\]) call '+\ + r'(?P.*)\+ returned .* after (?P
.*) usecs', line) if m: return True if quick else m.group('t', 'f', 'dt') return False if quick else ('', '', '') @@ -2294,28 +2294,28 @@ class FTraceLine: if not m and not d: return # is this a trace event - if(d == 'traceevent' or re.match('^ *\/\* *(?P.*) \*\/ *$', m)): + if(d == 'traceevent' or re.match(r'^ *\/\* *(?P.*) \*\/ *$', m)): if(d == 'traceevent'): # nop format trace event msg = m else: # function_graph format trace event - em = re.match('^ *\/\* *(?P.*) \*\/ *$', m) + em = re.match(r'^ *\/\* *(?P.*) \*\/ *$', m) msg = em.group('msg') - emm = re.match('^(?P.*?): (?P.*)', msg) + emm = re.match(r'^(?P.*?): (?P.*)', msg) if(emm): self.name = emm.group('msg') self.type = emm.group('call') else: self.name = msg - km = re.match('^(?P.*)_cal$', self.type) + km = re.match(r'^(?P.*)_cal$', self.type) if km: self.fcall = True self.fkprobe = True self.type = km.group('n') return - km = re.match('^(?P.*)_ret$', self.type) + km = re.match(r'^(?P.*)_ret$', self.type) if km: self.freturn = True self.fkprobe = True @@ -2327,7 +2327,7 @@ class FTraceLine: if(d): self.length = float(d)/1000000 # the indentation determines the depth - match = re.match('^(?P *)(?P.*)$', m) + match = re.match(r'^(?P *)(?P.*)$', m) if(not match): return self.depth = self.getDepth(match.group('d')) @@ -2337,7 +2337,7 @@ class FTraceLine: self.freturn = True if(len(m) > 1): # includes comment with function name - match = re.match('^} *\/\* *(?P.*) *\*\/$', m) + match = re.match(r'^} *\/\* *(?P.*) *\*\/$', m) if(match): self.name = match.group('n').strip() # function call @@ -2345,13 +2345,13 @@ class FTraceLine: self.fcall = True # function call with children if(m[-1] == '{'): - match = re.match('^(?P.*) *\(.*', m) + match = re.match(r'^(?P.*) *\(.*', m) if(match): self.name = match.group('n').strip() # function call with no children (leaf) elif(m[-1] == ';'): self.freturn = True - match = re.match('^(?P.*) *\(.*', m) + match = re.match(r'^(?P.*) *\(.*', m) if(match): self.name = match.group('n').strip() # something else (possibly a trace marker) @@ -2385,7 +2385,7 @@ class FTraceLine: return False else: if(self.type == 'suspend_resume' and - re.match('suspend_enter\[.*\] begin', self.name)): + re.match(r'suspend_enter\[.*\] begin', self.name)): return True return False def endMarker(self): @@ -2398,7 +2398,7 @@ class FTraceLine: return False else: if(self.type == 'suspend_resume' and - re.match('thaw_processes\[.*\] end', self.name)): + re.match(r'thaw_processes\[.*\] end', self.name)): return True return False @@ -2976,30 +2976,30 @@ class Timeline: # Description: # A list of values describing the properties of these test runs class TestProps: - stampfmt = '# [a-z]*-(?P[0-9]{2})(?P[0-9]{2})(?P[0-9]{2})-'+\ - '(?P[0-9]{2})(?P[0-9]{2})(?P[0-9]{2})'+\ - ' (?P.*) (?P.*) (?P.*)$' - wififmt = '^# wifi *(?P\S*) *(?P\S*) *(?P[0-9\.]+).*' - tstatfmt = '^# turbostat (?P\S*)' - testerrfmt = '^# enter_sleep_error (?P.*)' - sysinfofmt = '^# sysinfo .*' - cmdlinefmt = '^# command \| (?P.*)' - kparamsfmt = '^# kparams \| (?P.*)' - devpropfmt = '# Device Properties: .*' - pinfofmt = '# platform-(?P[a-z,A-Z,0-9,_]*): (?P.*)' - tracertypefmt = '# tracer: (?P.*)' - firmwarefmt = '# fwsuspend (?P[0-9]*) fwresume (?P[0-9]*)$' - procexecfmt = 'ps - (?P.*)$' - procmultifmt = '@(?P[0-9]*)\|(?P.*)$' + stampfmt = r'# [a-z]*-(?P[0-9]{2})(?P[0-9]{2})(?P[0-9]{2})-'+\ + r'(?P[0-9]{2})(?P[0-9]{2})(?P[0-9]{2})'+\ + r' (?P.*) (?P.*) (?P.*)$' + wififmt = r'^# wifi *(?P\S*) *(?P\S*) *(?P[0-9\.]+).*' + tstatfmt = r'^# turbostat (?P\S*)' + testerrfmt = r'^# enter_sleep_error (?P.*)' + sysinfofmt = r'^# sysinfo .*' + cmdlinefmt = r'^# command \| (?P.*)' + kparamsfmt = r'^# kparams \| (?P.*)' + devpropfmt = r'# Device Properties: .*' + pinfofmt = r'# platform-(?P[a-z,A-Z,0-9,_]*): (?P.*)' + tracertypefmt = r'# tracer: (?P.*)' + firmwarefmt = r'# fwsuspend (?P[0-9]*) fwresume (?P[0-9]*)$' + procexecfmt = r'ps - (?P.*)$' + procmultifmt = r'@(?P[0-9]*)\|(?P.*)$' ftrace_line_fmt_fg = \ - '^ *(?P