aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNamhyung Kim <[email protected]>2024-09-02 13:05:14 -0700
committerArnaldo Carvalho de Melo <[email protected]>2024-09-03 11:53:15 -0300
commit4afdc00c378f34943fb3a3cc08db4babdacb5c5b (patch)
tree39f609e112b16c9537a634f8833fd73135d14ab7
parent066fd840873f2187deb4a646c5f531a8dba2fd36 (diff)
perf lock contention: Constify control data for BPF
The control knobs set before loading BPF programs should be declared as 'const volatile' so that it can be optimized by the BPF core. Committer testing: root@x1:~# perf lock contention --use-bpf contended total wait max wait avg wait type caller 5 31.57 us 14.93 us 6.31 us mutex btrfs_delayed_update_inode+0x43 1 16.91 us 16.91 us 16.91 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 15.13 us 15.13 us 15.13 us spinlock btrfs_getattr+0xd1 1 6.65 us 6.65 us 6.65 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 4.34 us 4.34 us 4.34 us spinlock process_one_work+0x1a9 root@x1:~# root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf 0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8) = -1 EOPNOTSUPP (Operation not supported) 0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16 0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16 26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148) = 16 26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80) = 16 87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40) = 16 87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40) = 16 88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40) = 16 88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148) = 17 88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40) = 16 root@x1:~# Signed-off-by: Namhyung Kim <[email protected]> Tested-by: Arnaldo Carvalho de Melo <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Ian Rogers <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Kan Liang <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Song Liu <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
-rw-r--r--tools/perf/util/bpf_lock_contention.c45
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c27
2 files changed, 38 insertions, 34 deletions
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index bc4e92c0c08b..41a1ad087895 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -46,14 +46,22 @@ int lock_contention_prepare(struct lock_contention *con)
else
bpf_map__set_max_entries(skel->maps.stacks, 1);
- if (target__has_cpu(target))
+ if (target__has_cpu(target)) {
+ skel->rodata->has_cpu = 1;
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
- if (target__has_task(target))
+ }
+ if (target__has_task(target)) {
+ skel->rodata->has_task = 1;
ntasks = perf_thread_map__nr(evlist->core.threads);
- if (con->filters->nr_types)
+ }
+ if (con->filters->nr_types) {
+ skel->rodata->has_type = 1;
ntypes = con->filters->nr_types;
- if (con->filters->nr_cgrps)
+ }
+ if (con->filters->nr_cgrps) {
+ skel->rodata->has_cgroup = 1;
ncgrps = con->filters->nr_cgrps;
+ }
/* resolve lock name filters to addr */
if (con->filters->nr_syms) {
@@ -82,6 +90,7 @@ int lock_contention_prepare(struct lock_contention *con)
con->filters->addrs = addrs;
}
naddrs = con->filters->nr_addrs;
+ skel->rodata->has_addr = 1;
}
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
@@ -90,6 +99,16 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_map__set_max_entries(skel->maps.addr_filter, naddrs);
bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
+ skel->rodata->stack_skip = con->stack_skip;
+ skel->rodata->aggr_mode = con->aggr_mode;
+ skel->rodata->needs_callstack = con->save_callstack;
+ skel->rodata->lock_owner = con->owner;
+
+ if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) {
+ if (cgroup_is_v2("perf_event"))
+ skel->rodata->use_cgroup_v2 = 1;
+ }
+
if (lock_contention_bpf__load(skel) < 0) {
pr_err("Failed to load lock-contention BPF skeleton\n");
return -1;
@@ -99,7 +118,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 cpu;
u8 val = 1;
- skel->bss->has_cpu = 1;
fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) {
@@ -112,7 +130,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 pid;
u8 val = 1;
- skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) {
@@ -125,7 +142,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 pid = evlist->workload.pid;
u8 val = 1;
- skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
}
@@ -133,7 +149,6 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_types) {
u8 val = 1;
- skel->bss->has_type = 1;
fd = bpf_map__fd(skel->maps.type_filter);
for (i = 0; i < con->filters->nr_types; i++)
@@ -143,7 +158,6 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_addrs) {
u8 val = 1;
- skel->bss->has_addr = 1;
fd = bpf_map__fd(skel->maps.addr_filter);
for (i = 0; i < con->filters->nr_addrs; i++)
@@ -153,25 +167,14 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_cgrps) {
u8 val = 1;
- skel->bss->has_cgroup = 1;
fd = bpf_map__fd(skel->maps.cgroup_filter);
for (i = 0; i < con->filters->nr_cgrps; i++)
bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY);
}
- /* these don't work well if in the rodata section */
- skel->bss->stack_skip = con->stack_skip;
- skel->bss->aggr_mode = con->aggr_mode;
- skel->bss->needs_callstack = con->save_callstack;
- skel->bss->lock_owner = con->owner;
-
- if (con->aggr_mode == LOCK_AGGR_CGROUP) {
- if (cgroup_is_v2("perf_event"))
- skel->bss->use_cgroup_v2 = 1;
-
+ if (con->aggr_mode == LOCK_AGGR_CGROUP)
read_all_cgroups(&con->cgroups);
- }
bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 52a876b42699..1069bda5d733 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -117,21 +117,22 @@ struct mm_struct___new {
} __attribute__((preserve_access_index));
/* control flags */
-int enabled;
-int has_cpu;
-int has_task;
-int has_type;
-int has_addr;
-int has_cgroup;
-int needs_callstack;
-int stack_skip;
-int lock_owner;
-
-int use_cgroup_v2;
-int perf_subsys_id = -1;
+const volatile int has_cpu;
+const volatile int has_task;
+const volatile int has_type;
+const volatile int has_addr;
+const volatile int has_cgroup;
+const volatile int needs_callstack;
+const volatile int stack_skip;
+const volatile int lock_owner;
+const volatile int use_cgroup_v2;
/* determine the key of lock stat */
-int aggr_mode;
+const volatile int aggr_mode;
+
+int enabled;
+
+int perf_subsys_id = -1;
__u64 end_ts;