aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Borkmann <[email protected]>2018-03-08 02:22:34 +0100
committerDaniel Borkmann <[email protected]>2018-03-08 02:22:35 +0100
commit12ef9bda06a5536fc377bab636074848799d7749 (patch)
tree822b9bb7680c97b84f98f8d471b297637048d473
parenta366e300ae9fc466d333e6d8f2bc5d58ed248041 (diff)
parent12fe12253c56a26e591ceefbdf0998b391022003 (diff)
Merge branch 'bpf-perf-sample-addr'
Teng Qin says: ==================== These patches add support that allows bpf programs attached to perf events to read the address values recorded with the perf events. These values are requested by specifying sample_type with PERF_SAMPLE_ADDR when calling perf_event_open(). The main motivation for these changes is to support building memory or lock access profiling and tracing tools. For example on Intel CPUs, the recorded address values for supported memory or lock access perf events would be the access or lock target addresses from PEBS buffer. Such information would be very valuable for building tools that help understand memory access or lock acquire pattern. ==================== Signed-off-by: Daniel Borkmann <[email protected]>
-rw-r--r--include/uapi/linux/bpf_perf_event.h1
-rw-r--r--kernel/trace/bpf_trace.c20
-rw-r--r--samples/bpf/trace_event_kern.c4
-rw-r--r--samples/bpf/trace_event_user.c15
4 files changed, 36 insertions, 4 deletions
diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
index 8f95303f9d80..eb1b9d21250c 100644
--- a/include/uapi/linux/bpf_perf_event.h
+++ b/include/uapi/linux/bpf_perf_event.h
@@ -13,6 +13,7 @@
struct bpf_perf_event_data {
bpf_user_pt_regs_t regs;
__u64 sample_period;
+ __u64 addr;
};
#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c0a9e310d715..c634e093951f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -726,8 +726,7 @@ const struct bpf_prog_ops tracepoint_prog_ops = {
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
- const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
- sample_period);
+ const int size_u64 = sizeof(u64);
if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
return false;
@@ -738,8 +737,13 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
switch (off) {
case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
- bpf_ctx_record_field_size(info, size_sp);
- if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
+ bpf_ctx_record_field_size(info, size_u64);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
+ return false;
+ break;
+ case bpf_ctx_range(struct bpf_perf_event_data, addr):
+ bpf_ctx_record_field_size(info, size_u64);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
return false;
break;
default:
@@ -766,6 +770,14 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
bpf_target_off(struct perf_sample_data, period, 8,
target_size));
break;
+ case offsetof(struct bpf_perf_event_data, addr):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+ data), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_perf_event_data_kern, data));
+ *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct perf_sample_data, addr, 8,
+ target_size));
+ break;
default:
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
regs), si->dst_reg, si->src_reg,
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index a77a583d94d4..7068fbdde951 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -39,6 +39,7 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
{
char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu";
char time_fmt2[] = "Get Time Failed, ErrCode: %d";
+ char addr_fmt[] = "Address recorded on event: %llx";
char fmt[] = "CPU-%d period %lld ip %llx";
u32 cpu = bpf_get_smp_processor_id();
struct bpf_perf_event_value value_buf;
@@ -64,6 +65,9 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
else
bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret);
+ if (ctx->addr != 0)
+ bpf_trace_printk(addr_fmt, sizeof(addr_fmt), ctx->addr);
+
val = bpf_map_lookup_elem(&counts, &key);
if (val)
(*val)++;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index bf4f1b6d9a52..56f7a259a7c9 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -215,6 +215,17 @@ static void test_bpf_perf_event(void)
/* Intel Instruction Retired */
.config = 0xc0,
};
+ struct perf_event_attr attr_type_raw_lock_load = {
+ .sample_freq = SAMPLE_FREQ,
+ .freq = 1,
+ .type = PERF_TYPE_RAW,
+ /* Intel MEM_UOPS_RETIRED.LOCK_LOADS */
+ .config = 0x21d0,
+ /* Request to record lock address from PEBS */
+ .sample_type = PERF_SAMPLE_ADDR,
+ /* Record address value requires precise event */
+ .precise_ip = 2,
+ };
printf("Test HW_CPU_CYCLES\n");
test_perf_event_all_cpu(&attr_type_hw);
@@ -236,6 +247,10 @@ static void test_bpf_perf_event(void)
test_perf_event_all_cpu(&attr_type_raw);
test_perf_event_task(&attr_type_raw);
+ printf("Test Lock Load\n");
+ test_perf_event_all_cpu(&attr_type_raw_lock_load);
+ test_perf_event_task(&attr_type_raw_lock_load);
+
printf("*** PASS ***\n");
}