aboutsummaryrefslogtreecommitdiff
path: root/tools/perf/util/bpf_skel
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util/bpf_skel')
-rw-r--r--tools/perf/util/bpf_skel/.gitignore2
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c136
-rw-r--r--tools/perf/util/bpf_skel/lock_data.h17
-rw-r--r--tools/perf/util/bpf_skel/sample-filter.h27
-rw-r--r--tools/perf/util/bpf_skel/sample_filter.bpf.c196
-rw-r--r--tools/perf/util/bpf_skel/vmlinux.h173
6 files changed, 541 insertions, 10 deletions
diff --git a/tools/perf/util/bpf_skel/.gitignore b/tools/perf/util/bpf_skel/.gitignore
index 5263e9e6c5d8..7a1c832825de 100644
--- a/tools/perf/util/bpf_skel/.gitignore
+++ b/tools/perf/util/bpf_skel/.gitignore
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
.tmp
-*.skel.h \ No newline at end of file
+*.skel.h
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index e6007eaeda1a..8d3cfbb3cc65 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -4,11 +4,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
+#include <asm-generic/errno-base.h>
#include "lock_data.h"
-/* default buffer size */
-#define MAX_ENTRIES 10240
+/* for collect_lock_syms(). 4096 was rejected by the verifier */
+#define MAX_CPUS 1024
/* lock contention flags from include/trace/events/lock.h */
#define LCB_F_SPIN (1U << 0)
@@ -58,6 +59,13 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u32));
+ __uint(max_entries, MAX_ENTRIES);
+} lock_syms SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u8));
__uint(max_entries, 1);
@@ -92,6 +100,14 @@ struct rw_semaphore___new {
atomic_long_t owner;
} __attribute__((preserve_access_index));
+struct mm_struct___old {
+ struct rw_semaphore mmap_sem;
+} __attribute__((preserve_access_index));
+
+struct mm_struct___new {
+ struct rw_semaphore mmap_lock;
+} __attribute__((preserve_access_index));
+
/* control flags */
int enabled;
int has_cpu;
@@ -106,7 +122,13 @@ int lock_owner;
int aggr_mode;
/* error stat */
-int lost;
+int task_fail;
+int stack_fail;
+int time_fail;
+int data_fail;
+
+int task_map_full;
+int data_map_full;
static inline int can_record(u64 *ctx)
{
@@ -159,11 +181,12 @@ static inline int update_task_data(struct task_struct *task)
return -1;
p = bpf_map_lookup_elem(&task_data, &pid);
- if (p == NULL) {
+ if (p == NULL && !task_map_full) {
struct contention_task_data data = {};
BPF_CORE_READ_STR_INTO(&data.comm, task, comm);
- bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST);
+ if (bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST) == -E2BIG)
+ task_map_full = 1;
}
return 0;
@@ -182,7 +205,13 @@ static inline struct task_struct *get_lock_owner(__u64 lock, __u32 flags)
struct mutex *mutex = (void *)lock;
owner = BPF_CORE_READ(mutex, owner.counter);
} else if (flags == LCB_F_READ || flags == LCB_F_WRITE) {
-#if __has_builtin(bpf_core_type_matches)
+ /*
+ * Support for the BPF_TYPE_MATCHES argument to the
+ * __builtin_preserve_type_info builtin was added at some point during
+ * development of clang 15 and it's what is needed for
+ * bpf_core_type_matches.
+ */
+#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
if (bpf_core_type_matches(struct rw_semaphore___old)) {
struct rw_semaphore___old *rwsem = (void *)lock;
owner = (unsigned long)BPF_CORE_READ(rwsem, owner);
@@ -204,6 +233,41 @@ static inline struct task_struct *get_lock_owner(__u64 lock, __u32 flags)
return task;
}
+static inline __u32 check_lock_type(__u64 lock, __u32 flags)
+{
+ struct task_struct *curr;
+ struct mm_struct___old *mm_old;
+ struct mm_struct___new *mm_new;
+
+ switch (flags) {
+ case LCB_F_READ: /* rwsem */
+ case LCB_F_WRITE:
+ curr = bpf_get_current_task_btf();
+ if (curr->mm == NULL)
+ break;
+ mm_new = (void *)curr->mm;
+ if (bpf_core_field_exists(mm_new->mmap_lock)) {
+ if (&mm_new->mmap_lock == (void *)lock)
+ return LCD_F_MMAP_LOCK;
+ break;
+ }
+ mm_old = (void *)curr->mm;
+ if (bpf_core_field_exists(mm_old->mmap_sem)) {
+ if (&mm_old->mmap_sem == (void *)lock)
+ return LCD_F_MMAP_LOCK;
+ }
+ break;
+ case LCB_F_SPIN: /* spinlock */
+ curr = bpf_get_current_task_btf();
+ if (&curr->sighand->siglock == (void *)lock)
+ return LCD_F_SIGHAND_LOCK;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
SEC("tp_btf/contention_begin")
int contention_begin(u64 *ctx)
{
@@ -224,7 +288,7 @@ int contention_begin(u64 *ctx)
bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY);
pelem = bpf_map_lookup_elem(&tstamp, &pid);
if (pelem == NULL) {
- lost++;
+ __sync_fetch_and_add(&task_fail, 1);
return 0;
}
}
@@ -237,7 +301,7 @@ int contention_begin(u64 *ctx)
pelem->stack_id = bpf_get_stackid(ctx, &stacks,
BPF_F_FAST_STACK_CMP | stack_skip);
if (pelem->stack_id < 0)
- lost++;
+ __sync_fetch_and_add(&stack_fail, 1);
} else if (aggr_mode == LOCK_AGGR_TASK) {
struct task_struct *task;
@@ -281,6 +345,11 @@ int contention_end(u64 *ctx)
return 0;
duration = bpf_ktime_get_ns() - pelem->timestamp;
+ if ((__s64)duration < 0) {
+ bpf_map_delete_elem(&tstamp, &pid);
+ __sync_fetch_and_add(&time_fail, 1);
+ return 0;
+ }
switch (aggr_mode) {
case LOCK_AGGR_CALLER:
@@ -306,6 +375,12 @@ int contention_end(u64 *ctx)
data = bpf_map_lookup_elem(&lock_stat, &key);
if (!data) {
+ if (data_map_full) {
+ bpf_map_delete_elem(&tstamp, &pid);
+ __sync_fetch_and_add(&data_fail, 1);
+ return 0;
+ }
+
struct contention_data first = {
.total_time = duration,
.max_time = duration,
@@ -313,8 +388,17 @@ int contention_end(u64 *ctx)
.count = 1,
.flags = pelem->flags,
};
+ int err;
+
+ if (aggr_mode == LOCK_AGGR_ADDR)
+ first.flags |= check_lock_type(pelem->lock, pelem->flags);
- bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
+ err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
+ if (err < 0) {
+ if (err == -E2BIG)
+ data_map_full = 1;
+ __sync_fetch_and_add(&data_fail, 1);
+ }
bpf_map_delete_elem(&tstamp, &pid);
return 0;
}
@@ -332,4 +416,38 @@ int contention_end(u64 *ctx)
return 0;
}
+extern struct rq runqueues __ksym;
+
+struct rq___old {
+ raw_spinlock_t lock;
+} __attribute__((preserve_access_index));
+
+struct rq___new {
+ raw_spinlock_t __lock;
+} __attribute__((preserve_access_index));
+
+SEC("raw_tp/bpf_test_finish")
+int BPF_PROG(collect_lock_syms)
+{
+ __u64 lock_addr, lock_off;
+ __u32 lock_flag;
+
+ if (bpf_core_field_exists(struct rq___new, __lock))
+ lock_off = offsetof(struct rq___new, __lock);
+ else
+ lock_off = offsetof(struct rq___old, lock);
+
+ for (int i = 0; i < MAX_CPUS; i++) {
+ struct rq *rq = bpf_per_cpu_ptr(&runqueues, i);
+
+ if (rq == NULL)
+ break;
+
+ lock_addr = (__u64)(void *)rq + lock_off;
+ lock_flag = LOCK_CLASS_RQLOCK;
+ bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY);
+ }
+ return 0;
+}
+
char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
index 3d35fd4407ac..260062a9f2ab 100644
--- a/tools/perf/util/bpf_skel/lock_data.h
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -15,6 +15,18 @@ struct contention_task_data {
char comm[TASK_COMM_LEN];
};
+/* default buffer size */
+#define MAX_ENTRIES 16384
+
+/*
+ * Upper bits of the flags in the contention_data are used to identify
+ * some well-known locks which do not have symbols (non-global locks).
+ */
+#define LCD_F_MMAP_LOCK (1U << 31)
+#define LCD_F_SIGHAND_LOCK (1U << 30)
+
+#define LCB_F_MAX_FLAGS (1U << 7)
+
struct contention_data {
u64 total_time;
u64 min_time;
@@ -29,4 +41,9 @@ enum lock_aggr_mode {
LOCK_AGGR_CALLER,
};
+enum lock_class_sym {
+ LOCK_CLASS_NONE,
+ LOCK_CLASS_RQLOCK,
+};
+
#endif /* UTIL_BPF_SKEL_LOCK_DATA_H */
diff --git a/tools/perf/util/bpf_skel/sample-filter.h b/tools/perf/util/bpf_skel/sample-filter.h
new file mode 100644
index 000000000000..2e96e1ab084a
--- /dev/null
+++ b/tools/perf/util/bpf_skel/sample-filter.h
@@ -0,0 +1,27 @@
+#ifndef PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H
+#define PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H
+
+#define MAX_FILTERS 64
+
+/* supported filter operations */
+enum perf_bpf_filter_op {
+ PBF_OP_EQ,
+ PBF_OP_NEQ,
+ PBF_OP_GT,
+ PBF_OP_GE,
+ PBF_OP_LT,
+ PBF_OP_LE,
+ PBF_OP_AND,
+ PBF_OP_GROUP_BEGIN,
+ PBF_OP_GROUP_END,
+};
+
+/* BPF map entry for filtering */
+struct perf_bpf_filter_entry {
+ enum perf_bpf_filter_op op;
+ __u32 part; /* sub-sample type info when it has multiple values */
+ __u64 flags; /* perf sample type flags */
+ __u64 value;
+};
+
+#endif /* PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H */ \ No newline at end of file
diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c
new file mode 100644
index 000000000000..cffe493af1ed
--- /dev/null
+++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2023 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include "sample-filter.h"
+
+/* BPF map that will be filled by user space */
+struct filters {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct perf_bpf_filter_entry);
+ __uint(max_entries, MAX_FILTERS);
+} filters SEC(".maps");
+
+int dropped;
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+
+/* new kernel perf_sample_data definition */
+struct perf_sample_data___new {
+ __u64 sample_flags;
+} __attribute__((preserve_access_index));
+
+/* new kernel perf_mem_data_src definition */
+union perf_mem_data_src__new {
+ __u64 val;
+ struct {
+ __u64 mem_op:5, /* type of opcode */
+ mem_lvl:14, /* memory hierarchy level */
+ mem_snoop:5, /* snoop mode */
+ mem_lock:2, /* lock instr */
+ mem_dtlb:7, /* tlb access */
+ mem_lvl_num:4, /* memory hierarchy level number */
+ mem_remote:1, /* remote */
+ mem_snoopx:2, /* snoop mode, ext */
+ mem_blk:3, /* access blocked */
+ mem_hops:3, /* hop level */
+ mem_rsvd:18;
+ };
+};
+
+/* helper function to return the given perf sample data */
+static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
+ struct perf_bpf_filter_entry *entry)
+{
+ struct perf_sample_data___new *data = (void *)kctx->data;
+
+ if (!bpf_core_field_exists(data->sample_flags) ||
+ (data->sample_flags & entry->flags) == 0)
+ return 0;
+
+ switch (entry->flags) {
+ case PERF_SAMPLE_IP:
+ return kctx->data->ip;
+ case PERF_SAMPLE_ID:
+ return kctx->data->id;
+ case PERF_SAMPLE_TID:
+ if (entry->part)
+ return kctx->data->tid_entry.pid;
+ else
+ return kctx->data->tid_entry.tid;
+ case PERF_SAMPLE_CPU:
+ return kctx->data->cpu_entry.cpu;
+ case PERF_SAMPLE_TIME:
+ return kctx->data->time;
+ case PERF_SAMPLE_ADDR:
+ return kctx->data->addr;
+ case PERF_SAMPLE_PERIOD:
+ return kctx->data->period;
+ case PERF_SAMPLE_TRANSACTION:
+ return kctx->data->txn;
+ case PERF_SAMPLE_WEIGHT_STRUCT:
+ if (entry->part == 1)
+ return kctx->data->weight.var1_dw;
+ if (entry->part == 2)
+ return kctx->data->weight.var2_w;
+ if (entry->part == 3)
+ return kctx->data->weight.var3_w;
+ /* fall through */
+ case PERF_SAMPLE_WEIGHT:
+ return kctx->data->weight.full;
+ case PERF_SAMPLE_PHYS_ADDR:
+ return kctx->data->phys_addr;
+ case PERF_SAMPLE_CODE_PAGE_SIZE:
+ return kctx->data->code_page_size;
+ case PERF_SAMPLE_DATA_PAGE_SIZE:
+ return kctx->data->data_page_size;
+ case PERF_SAMPLE_DATA_SRC:
+ if (entry->part == 1)
+ return kctx->data->data_src.mem_op;
+ if (entry->part == 2)
+ return kctx->data->data_src.mem_lvl_num;
+ if (entry->part == 3) {
+ __u32 snoop = kctx->data->data_src.mem_snoop;
+ __u32 snoopx = kctx->data->data_src.mem_snoopx;
+
+ return (snoopx << 5) | snoop;
+ }
+ if (entry->part == 4)
+ return kctx->data->data_src.mem_remote;
+ if (entry->part == 5)
+ return kctx->data->data_src.mem_lock;
+ if (entry->part == 6)
+ return kctx->data->data_src.mem_dtlb;
+ if (entry->part == 7)
+ return kctx->data->data_src.mem_blk;
+ if (entry->part == 8) {
+ union perf_mem_data_src__new *data = (void *)&kctx->data->data_src;
+
+ if (bpf_core_field_exists(data->mem_hops))
+ return data->mem_hops;
+
+ return 0;
+ }
+ /* return the whole word */
+ return kctx->data->data_src.val;
+ default:
+ break;
+ }
+ return 0;
+}
+
+#define CHECK_RESULT(data, op, val) \
+ if (!(data op val)) { \
+ if (!in_group) \
+ goto drop; \
+ } else if (in_group) { \
+ group_result = 1; \
+ }
+
+/* BPF program to be called from perf event overflow handler */
+SEC("perf_event")
+int perf_sample_filter(void *ctx)
+{
+ struct bpf_perf_event_data_kern *kctx;
+ struct perf_bpf_filter_entry *entry;
+ __u64 sample_data;
+ int in_group = 0;
+ int group_result = 0;
+ int i;
+
+ kctx = bpf_cast_to_kern_ctx(ctx);
+
+ for (i = 0; i < MAX_FILTERS; i++) {
+ int key = i; /* needed for verifier :( */
+
+ entry = bpf_map_lookup_elem(&filters, &key);
+ if (entry == NULL)
+ break;
+ sample_data = perf_get_sample(kctx, entry);
+
+ switch (entry->op) {
+ case PBF_OP_EQ:
+ CHECK_RESULT(sample_data, ==, entry->value)
+ break;
+ case PBF_OP_NEQ:
+ CHECK_RESULT(sample_data, !=, entry->value)
+ break;
+ case PBF_OP_GT:
+ CHECK_RESULT(sample_data, >, entry->value)
+ break;
+ case PBF_OP_GE:
+ CHECK_RESULT(sample_data, >=, entry->value)
+ break;
+ case PBF_OP_LT:
+ CHECK_RESULT(sample_data, <, entry->value)
+ break;
+ case PBF_OP_LE:
+ CHECK_RESULT(sample_data, <=, entry->value)
+ break;
+ case PBF_OP_AND:
+ CHECK_RESULT(sample_data, &, entry->value)
+ break;
+ case PBF_OP_GROUP_BEGIN:
+ in_group = 1;
+ group_result = 0;
+ break;
+ case PBF_OP_GROUP_END:
+ if (group_result == 0)
+ goto drop;
+ in_group = 0;
+ break;
+ }
+ }
+ /* generate sample data */
+ return 1;
+
+drop:
+ __sync_fetch_and_add(&dropped, 1);
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux.h
new file mode 100644
index 000000000000..449b1ea91fc4
--- /dev/null
+++ b/tools/perf/util/bpf_skel/vmlinux.h
@@ -0,0 +1,173 @@
+#ifndef __VMLINUX_H
+#define __VMLINUX_H
+
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <stdbool.h>
+
+// non-UAPI kernel data structures, used in the .bpf.c BPF tool component.
+
+// Just the fields used in these tools preserving the access index so that
+// libbpf can fixup offsets with the ones used in the kernel when loading the
+// BPF bytecode, if they differ from what is used here.
+
+typedef __u8 u8;
+typedef __u32 u32;
+typedef __u64 u64;
+typedef __s64 s64;
+
+typedef int pid_t;
+
+enum cgroup_subsys_id {
+ perf_event_cgrp_id = 8,
+};
+
+enum {
+ HI_SOFTIRQ = 0,
+ TIMER_SOFTIRQ,
+ NET_TX_SOFTIRQ,
+ NET_RX_SOFTIRQ,
+ BLOCK_SOFTIRQ,
+ IRQ_POLL_SOFTIRQ,
+ TASKLET_SOFTIRQ,
+ SCHED_SOFTIRQ,
+ HRTIMER_SOFTIRQ,
+ RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
+
+ NR_SOFTIRQS
+};
+
+typedef struct {
+ s64 counter;
+} __attribute__((preserve_access_index)) atomic64_t;
+
+typedef atomic64_t atomic_long_t;
+
+struct raw_spinlock {
+ int rawlock;
+} __attribute__((preserve_access_index));
+
+typedef struct raw_spinlock raw_spinlock_t;
+
+typedef struct {
+ struct raw_spinlock rlock;
+} __attribute__((preserve_access_index)) spinlock_t;
+
+struct sighand_struct {
+ spinlock_t siglock;
+} __attribute__((preserve_access_index));
+
+struct rw_semaphore {
+ atomic_long_t owner;
+} __attribute__((preserve_access_index));
+
+struct mutex {
+ atomic_long_t owner;
+} __attribute__((preserve_access_index));
+
+struct kernfs_node {
+ u64 id;
+} __attribute__((preserve_access_index));
+
+struct cgroup {
+ struct kernfs_node *kn;
+ int level;
+} __attribute__((preserve_access_index));
+
+struct cgroup_subsys_state {
+ struct cgroup *cgroup;
+} __attribute__((preserve_access_index));
+
+struct css_set {
+ struct cgroup_subsys_state *subsys[13];
+ struct cgroup *dfl_cgrp;
+} __attribute__((preserve_access_index));
+
+struct mm_struct {
+ struct rw_semaphore mmap_lock;
+} __attribute__((preserve_access_index));
+
+struct task_struct {
+ unsigned int flags;
+ struct mm_struct *mm;
+ pid_t pid;
+ pid_t tgid;
+ char comm[16];
+ struct sighand_struct *sighand;
+ struct css_set *cgroups;
+} __attribute__((preserve_access_index));
+
+struct trace_entry {
+ short unsigned int type;
+ unsigned char flags;
+ unsigned char preempt_count;
+ int pid;
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_irq_handler_entry {
+ struct trace_entry ent;
+ int irq;
+ u32 __data_loc_name;
+ char __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_irq_handler_exit {
+ struct trace_entry ent;
+ int irq;
+ int ret;
+ char __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_softirq {
+ struct trace_entry ent;
+ unsigned int vec;
+ char __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_workqueue_execute_start {
+ struct trace_entry ent;
+ void *work;
+ void *function;
+ char __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_workqueue_execute_end {
+ struct trace_entry ent;
+ void *work;
+ void *function;
+ char __data[];
+} __attribute__((preserve_access_index));
+
+struct trace_event_raw_workqueue_activate_work {
+ struct trace_entry ent;
+ void *work;
+ char __data[];
+} __attribute__((preserve_access_index));
+
+struct perf_sample_data {
+ u64 addr;
+ u64 period;
+ union perf_sample_weight weight;
+ u64 txn;
+ union perf_mem_data_src data_src;
+ u64 ip;
+ struct {
+ u32 pid;
+ u32 tid;
+ } tid_entry;
+ u64 time;
+ u64 id;
+ struct {
+ u32 cpu;
+ } cpu_entry;
+ u64 phys_addr;
+ u64 data_page_size;
+ u64 code_page_size;
+} __attribute__((__aligned__(64))) __attribute__((preserve_access_index));
+
+struct bpf_perf_event_data_kern {
+ struct perf_sample_data *data;
+ struct perf_event *event;
+} __attribute__((preserve_access_index));
+#endif // __VMLINUX_H