aboutsummaryrefslogtreecommitdiff
path: root/tools/perf/util/bpf_skel
AgeCommit message (Collapse)AuthorFilesLines
2021-12-16perf ftrace: Implement cpu and task filters in BPFNamhyung Kim1-0/+21
Honor cpu and task options to set up filters (by pid or tid) in the BPF program. For example, the following command will show latency of the mutex_lock for process 2570. # perf ftrace latency -b -T mutex_lock -p 2570 sleep 3 # DURATION | COUNT | GRAPH | 0 - 1 us | 675 | ############################## | 1 - 2 us | 9 | | 2 - 4 us | 0 | | 4 - 8 us | 0 | | 8 - 16 us | 0 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | Committer testing: Looking at faults on a firefox process: # strace -e bpf perf ftrace latency -b -p 1674378 -T __handle_mm_fault bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7ffee1fee740, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\20\0\0\0\20\0\0\0\5\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=45, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\t\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=81, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\08\0\0\08\0\0\0\t\0\0\0\0\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=89, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\f\0\0\0\f\0\0\0\7\0\0\0\1\0\0\0\0\0\0\20"..., btf_log_buf=NULL, btf_size=43, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\t\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=81, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\5\0\0\0\0\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=77, btf_log_size=0, btf_log_level=0}, 128) = -1 EINVAL (Invalid argument) bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0 \3\0\0 \3\0\0\306\3\0\0\0\0\0\0\0\0\0\2"..., btf_log_buf=NULL, btf_size=1790, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=32, max_entries=1, map_flags=0, inner_map_fd=0, map_name="", map_ifindex=0, btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=5, insns=0x7ffee1fee570, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 5 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=4, max_entries=1, map_flags=BPF_F_MMAPABLE, inner_map_fd=0, map_name="", map_ifindex=0, btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7ffee1fee3c0, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="test", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 4 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=8, value_size=8, max_entries=10000, map_flags=0, inner_map_fd=0, map_name="functime", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=4, value_size=1, max_entries=1, map_flags=0, inner_map_fd=0, map_name="cpu_filter", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 5 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=4, value_size=1, max_entries=36, map_flags=0, inner_map_fd=0, map_name="task_filter", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 6 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_PERCPU_ARRAY, key_size=4, value_size=8, max_entries=22, map_flags=0, inner_map_fd=0, map_name="latency", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 7 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=12, max_entries=1, map_flags=BPF_F_MMAPABLE, inner_map_fd=0, map_name="func_lat.bss", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=32, btf_vmlinux_value_type_id=0}, 128) = 8 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=8, key=0x7ffee1fee580, value=0x7f01d940a000, flags=BPF_ANY}, 128) = 0 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_KPROBE, insn_cnt=42, insns=0x1871f30, license="", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(5, 14, 16), prog_flags=0, prog_name="func_begin", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=3, func_info_rec_size=8, func_info=0x18746a0, func_info_cnt=1, line_info_rec_size=16, line_info=0x1874550, line_info_cnt=20, attach_btf_id=0, attach_prog_fd=0}, 128) = 9 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_KPROBE, insn_cnt=99, insns=0x18769b0, license="", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(5, 14, 16), prog_flags=0, prog_name="func_end", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=3, func_info_rec_size=8, func_info=0x188a640, func_info_cnt=1, line_info_rec_size=16, line_info=0x188a660, line_info_cnt=20, attach_btf_id=0, attach_prog_fd=0}, 128) = 10 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=6, key=0x7ffee1fee8e0, value=0x7ffee1fee8df, flags=BPF_ANY}, 128) = 0 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_TRACEPOINT, insn_cnt=2, insns=0x7ffee1fee3c0, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 12 bpf(BPF_LINK_CREATE, {link_create={prog_fd=12, target_fd=-1, attach_type=0x29 /* BPF_??? */, flags=0}}, 128) = -1 EINVAL (Invalid argument) ^Cstrace: Process 1702285 detached # DURATION | COUNT | GRAPH | 0 - 1 us | 109 | ################# | 1 - 2 us | 127 | ################### | 2 - 4 us | 36 | ##### | 4 - 8 us | 20 | ### | 8 - 16 us | 2 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | # Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Cc: Changbin Du <changbin.du@gmail.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <songliubraving@fb.com> Cc: Stephane Eranian <eranian@google.com> Link: https://lore.kernel.org/r/20211215185154.360314-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-12-16perf ftrace: Add -b/--use-bpf option for latency subcommandNamhyung Kim1-0/+93
The -b/--use-bpf option is to use BPF to get latency info of kernel functions. It'd have better performance impact and I observed that latency of same function is smaller than before when using BPF. Committer testing: # strace -e bpf perf ftrace latency -b -T __handle_mm_fault -a sleep 1 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7fff51914e00, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\20\0\0\0\20\0\0\0\5\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=45, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\t\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=81, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\08\0\0\08\0\0\0\t\0\0\0\0\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=89, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\f\0\0\0\f\0\0\0\7\0\0\0\1\0\0\0\0\0\0\20"..., btf_log_buf=NULL, btf_size=43, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\t\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=81, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0000\0\0\0000\0\0\0\5\0\0\0\0\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=77, btf_log_size=0, btf_log_level=0}, 128) = -1 EINVAL (Invalid argument) bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\350\2\0\0\350\2\0\0\353\2\0\0\0\0\0\0\0\0\0\2"..., btf_log_buf=NULL, btf_size=1515, btf_log_size=0, btf_log_level=0}, 128) = 3 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=32, max_entries=1, map_flags=0, inner_map_fd=0, map_name="", map_ifindex=0, btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=5, insns=0x7fff51914c30, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 5 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=4, max_entries=1, map_flags=BPF_F_MMAPABLE, inner_map_fd=0, map_name="", map_ifindex=0, btf_fd=0, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7fff51914a80, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="test", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 4 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=8, value_size=8, max_entries=10000, map_flags=0, inner_map_fd=0, map_name="functime", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 4 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=4, value_size=1, max_entries=1, map_flags=0, inner_map_fd=0, map_name="cpu_filter", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 5 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_HASH, key_size=4, value_size=1, max_entries=1, map_flags=0, inner_map_fd=0, map_name="task_filter", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 7 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_PERCPU_ARRAY, key_size=4, value_size=8, max_entries=22, map_flags=0, inner_map_fd=0, map_name="latency", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=0, btf_vmlinux_value_type_id=0}, 128) = 8 bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=4, max_entries=1, map_flags=BPF_F_MMAPABLE, inner_map_fd=0, map_name="func_lat.bss", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=30, btf_vmlinux_value_type_id=0}, 128) = 9 bpf(BPF_MAP_UPDATE_ELEM, {map_fd=9, key=0x7fff51914c40, value=0x7f6e99be2000, flags=BPF_ANY}, 128) = 0 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_KPROBE, insn_cnt=18, insns=0x11e4160, license="", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(5, 14, 16), prog_flags=0, prog_name="func_begin", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=3, func_info_rec_size=8, func_info=0x11dfc50, func_info_cnt=1, line_info_rec_size=16, line_info=0x11e04c0, line_info_cnt=9, attach_btf_id=0, attach_prog_fd=0}, 128) = 10 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_KPROBE, insn_cnt=99, insns=0x11ded70, license="", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(5, 14, 16), prog_flags=0, prog_name="func_end", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=3, func_info_rec_size=8, func_info=0x11dfc70, func_info_cnt=1, line_info_rec_size=16, line_info=0x11f6e10, line_info_cnt=20, attach_btf_id=0, attach_prog_fd=0}, 128) = 11 bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_TRACEPOINT, insn_cnt=2, insns=0x7fff51914a80, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0, func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 128) = 13 bpf(BPF_LINK_CREATE, {link_create={prog_fd=13, target_fd=-1, attach_type=0x29 /* BPF_??? */, flags=0}}, 128) = -1 EINVAL (Invalid argument) --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=1699992, si_uid=0, si_status=0, si_utime=0, si_stime=0} --- bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=8, key=0x7fff51914f84, value=0x11f6fa0, flags=BPF_ANY}, 128) = 0 # DURATION | COUNT | GRAPH | 0 - 1 us | 52 | ################### | 1 - 2 us | 36 | ############# | 2 - 4 us | 24 | ######### | 4 - 8 us | 7 | ## | 8 - 16 us | 1 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | +++ exited with 0 +++ # Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Cc: Changbin Du <changbin.du@gmail.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <songliubraving@fb.com> Cc: Stephane Eranian <eranian@google.com> Link: https://lore.kernel.org/r/20211215185154.360314-5-namhyung@kernel.org [ Add missing util/cpumap.h include and removed unused 'fd' variable ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-12-06perf bpf_skel: Do not use typedef to avoid error on old clangSong Liu3-20/+26
When building bpf_skel with clang-10, typedef causes confusions like: libbpf: map 'prev_readings': unexpected def kind var. Fix this by removing the typedef. Fixes: 7fac83aaf2eecc9e ("perf stat: Introduce 'bperf' to share hardware PMCs with BPF") Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Song Liu <songliubraving@fb.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Link: http://lore.kernel.org/lkml/BEF5C312-4331-4A60-AEC0-AD7617CB2BC4@fb.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-12-06perf bpf: Fix building perf with BUILD_BPF_SKEL=1 by default in more distrosSong Liu3-5/+3
Arnaldo reported that building all his containers with BUILD_BPF_SKEL=1 to then make this the default he found problems in some distros where the system linux/bpf.h file was being used and lacked this: util/bpf_skel/bperf_leader.bpf.c:13:20: error: use of undeclared identifier 'BPF_F_PRESERVE_ELEMS' __uint(map_flags, BPF_F_PRESERVE_ELEMS); So use instead the vmlinux.h file generated by bpftool from BTF info. This fixed these as well, getting the build back working on debian:11, debian:experimental and ubuntu:21.10: In file included from In file included from util/bpf_skel/bperf_leader.bpf.cutil/bpf_skel/bpf_prog_profiler.bpf.c::33: : In file included from In file included from /usr/include/linux/bpf.h/usr/include/linux/bpf.h::1111: : /usr/include/linux/types.h/usr/include/linux/types.h::55::1010:: In file included from util/bpf_skel/bperf_follower.bpf.c:3fatal errorfatal error: : : In file included from /usr/include/linux/bpf.h:'asm/types.h' file not found11'asm/types.h' file not found: /usr/include/linux/types.h:5:10: fatal error: 'asm/types.h' file not found #include <asm/types.h>#include <asm/types.h> ^~~~~~~~~~~~~ ^~~~~~~~~~~~~ #include <asm/types.h> ^~~~~~~~~~~~~ 1 error generated. Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Song Liu <song@kernel.org> Tested-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Link: http://lore.kernel.org/lkml/CF175681-8101-43D1-ABDB-449E644BE986@fb.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-07-05perf stat: Enable BPF counter with --for-each-cgroupNamhyung Kim1-0/+191
Recently bperf was added to use BPF to count perf events for various purposes. This is an extension for the approach and targetting to cgroup usages. Unlike the other bperf, it doesn't share the events with other processes but it'd reduce unnecessary events (and the overhead of multiplexing) for each monitored cgroup within the perf session. When --for-each-cgroup is used with --bpf-counters, it will open cgroup-switches event per cpu internally and attach the new BPF program to read given perf_events and to aggregate the results for cgroups. It's only called when task is switched to a task in a different cgroup. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Song Liu <songliubraving@fb.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Link: http://lore.kernel.org/lkml/20210701211227.1403788-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-03-23perf stat: Introduce 'bperf' to share hardware PMCs with BPFSong Liu4-0/+143
The perf tool uses performance monitoring counters (PMCs) to monitor system performance. The PMCs are limited hardware resources. For example, Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu. Modern data center systems use these PMCs in many different ways: system level monitoring, (maybe nested) container level monitoring, per process monitoring, profiling (in sample mode), etc. In some cases, there are more active perf_events than available hardware PMCs. To allow all perf_events to have a chance to run, it is necessary to do expensive time multiplexing of events. On the other hand, many monitoring tools count the common metrics (cycles, instructions). It is a waste to have multiple tools create multiple perf_events of "cycles" and occupy multiple PMCs. bperf tries to reduce such wastes by allowing multiple perf_events of "cycles" or "instructions" (at different scopes) to share PMUs. Instead of having each perf-stat session to read its own perf_events, bperf uses BPF programs to read the perf_events and aggregate readings to BPF maps. Then, the perf-stat session(s) reads the values from these BPF maps. Please refer to the comment before the definition of bperf_ops for the description of bperf architecture. bperf is off by default. To enable it, pass --bpf-counters option to perf-stat. bperf uses a BPF hashmap to share information about BPF programs and maps used by bperf. This map is pinned to bpffs. The default path is /sys/fs/bpf/perf_attr_map. The user could change the path with option --bpf-attr-map. Committer testing: # dmesg|grep "Performance Events" -A5 [ 0.225277] Performance Events: Fam17h+ core perfctr, AMD PMU driver. [ 0.225280] ... version: 0 [ 0.225280] ... bit width: 48 [ 0.225281] ... generic registers: 6 [ 0.225281] ... value mask: 0000ffffffffffff [ 0.225281] ... max period: 00007fffffffffff # # for a in $(seq 6) ; do perf stat -a -e cycles,instructions sleep 100000 & done [1] 2436231 [2] 2436232 [3] 2436233 [4] 2436234 [5] 2436235 [6] 2436236 # perf stat -a -e cycles,instructions sleep 0.1 Performance counter stats for 'system wide': 310,326,987 cycles (41.87%) 236,143,290 instructions # 0.76 insn per cycle (41.87%) 0.100800885 seconds time elapsed # We can see that the counters were enabled for this workload 41.87% of the time. Now with --bpf-counters: # for a in $(seq 32) ; do perf stat --bpf-counters -a -e cycles,instructions sleep 100000 & done [1] 2436514 [2] 2436515 [3] 2436516 [4] 2436517 [5] 2436518 [6] 2436519 [7] 2436520 [8] 2436521 [9] 2436522 [10] 2436523 [11] 2436524 [12] 2436525 [13] 2436526 [14] 2436527 [15] 2436528 [16] 2436529 [17] 2436530 [18] 2436531 [19] 2436532 [20] 2436533 [21] 2436534 [22] 2436535 [23] 2436536 [24] 2436537 [25] 2436538 [26] 2436539 [27] 2436540 [28] 2436541 [29] 2436542 [30] 2436543 [31] 2436544 [32] 2436545 # # ls -la /sys/fs/bpf/perf_attr_map -rw-------. 1 root root 0 Mar 23 14:53 /sys/fs/bpf/perf_attr_map # bpftool map | grep bperf | wc -l 64 # # bpftool map | tail 1265: percpu_array name accum_readings flags 0x0 key 4B value 24B max_entries 1 memlock 4096B 1266: hash name filter flags 0x0 key 4B value 4B max_entries 1 memlock 4096B 1267: array name bperf_fo.bss flags 0x400 key 4B value 8B max_entries 1 memlock 4096B btf_id 996 pids perf(2436545) 1268: percpu_array name accum_readings flags 0x0 key 4B value 24B max_entries 1 memlock 4096B 1269: hash name filter flags 0x0 key 4B value 4B max_entries 1 memlock 4096B 1270: array name bperf_fo.bss flags 0x400 key 4B value 8B max_entries 1 memlock 4096B btf_id 997 pids perf(2436541) 1285: array name pid_iter.rodata flags 0x480 key 4B value 4B max_entries 1 memlock 4096B btf_id 1017 frozen pids bpftool(2437504) 1286: array flags 0x0 key 4B value 32B max_entries 1 memlock 4096B # # bpftool map dump id 1268 | tail value (CPU 21): 8f f3 bc ca 00 00 00 00 80 fd 2a d1 4d 00 00 00 80 fd 2a d1 4d 00 00 00 value (CPU 22): 7e d5 64 4d 00 00 00 00 a4 8a 2e ee 4d 00 00 00 a4 8a 2e ee 4d 00 00 00 value (CPU 23): a7 78 3e 06 01 00 00 00 b2 34 94 f6 4d 00 00 00 b2 34 94 f6 4d 00 00 00 Found 1 element # bpftool map dump id 1268 | tail value (CPU 21): c6 8b d9 ca 00 00 00 00 20 c6 fc 83 4e 00 00 00 20 c6 fc 83 4e 00 00 00 value (CPU 22): 9c b4 d2 4d 00 00 00 00 3e 0c df 89 4e 00 00 00 3e 0c df 89 4e 00 00 00 value (CPU 23): 18 43 66 06 01 00 00 00 5b 69 ed 83 4e 00 00 00 5b 69 ed 83 4e 00 00 00 Found 1 element # bpftool map dump id 1268 | tail value (CPU 21): f2 6e db ca 00 00 00 00 92 67 4c ba 4e 00 00 00 92 67 4c ba 4e 00 00 00 value (CPU 22): dc 8e e1 4d 00 00 00 00 d9 32 7a c5 4e 00 00 00 d9 32 7a c5 4e 00 00 00 value (CPU 23): bd 2b 73 06 01 00 00 00 7c 73 87 bf 4e 00 00 00 7c 73 87 bf 4e 00 00 00 Found 1 element # # perf stat --bpf-counters -a -e cycles,instructions sleep 0.1 Performance counter stats for 'system wide': 119,410,122 cycles 152,105,479 instructions # 1.27 insn per cycle 0.101395093 seconds time elapsed # See? We had the counters enabled all the time. Signed-off-by: Song Liu <songliubraving@fb.com> Reviewed-by: Jiri Olsa <jolsa@kernel.org> Acked-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: kernel-team@fb.com Link: http://lore.kernel.org/lkml/20210316211837.910506-2-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-03-06perf skel: Remove some unused variables.Ian Rogers1-2/+1
Fixes -Wall warnings. Signed-off-by: Ian Rogers <irogers@google.com> Acked-by: Song Liu <songliubraving@fb.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Link: http://lore.kernel.org/lkml/20210306080840.3785816-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-01-20perf stat: Enable counting events for BPF programsSong Liu1-0/+93
Introduce 'perf stat -b' option, which counts events for BPF programs, like: [root@localhost ~]# ~/perf stat -e ref-cycles,cycles -b 254 -I 1000 1.487903822 115,200 ref-cycles 1.487903822 86,012 cycles 2.489147029 80,560 ref-cycles 2.489147029 73,784 cycles 3.490341825 60,720 ref-cycles 3.490341825 37,797 cycles 4.491540887 37,120 ref-cycles 4.491540887 31,963 cycles The example above counts 'cycles' and 'ref-cycles' of BPF program of id 254. This is similar to bpftool-prog-profile command, but more flexible. 'perf stat -b' creates per-cpu perf_event and loads fentry/fexit BPF programs (monitor-progs) to the target BPF program (target-prog). The monitor-progs read perf_event before and after the target-prog, and aggregate the difference in a BPF map. Then the user space reads data from these maps. A new 'struct bpf_counter' is introduced to provide a common interface that uses BPF programs/maps to count perf events. Committer notes: Removed all but bpf_counter.h includes from evsel.h, not needed at all. Also BPF map lookups for PERCPU_ARRAYs need to have as its value receive buffer passed to the kernel libbpf_num_possible_cpus() entries, not evsel__nr_cpus(evsel), as the former uses /sys/devices/system/cpu/possible while the later uses /sys/devices/system/cpu/online, which may be less than the 'possible' number making the bpf map lookup overwrite memory and cause hard to debug memory corruption. We need to continue using evsel__nr_cpus(evsel) when accessing the perf_counts array tho, not to overwrite another are of memory :-) Signed-off-by: Song Liu <songliubraving@fb.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Link: https://lore.kernel.org/lkml/20210120163031.GU12699@kernel.org/ Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: kernel-team@fb.com Link: http://lore.kernel.org/lkml/20201229214214.3413833-4-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-01-15perf build: Support build BPF skeletons with perfSong Liu1-0/+3
BPF programs are useful in perf to profile BPF programs. BPF skeleton is by far the easiest way to write BPF tools. Enable building BPF skeletons in util/bpf_skel. A dummy bpf skeleton is added. More bpf skeletons will be added for different use cases. Signed-off-by: Song Liu <songliubraving@fb.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: kernel-team@fb.com Link: http://lore.kernel.org/lkml/20201229214214.3413833-3-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>