diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2018-03-28 22:55:20 +0200 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-03-28 22:55:21 +0200 |
commit | f6ef56589374670b7c1939720dfa00212bd80a5b (patch) | |
tree | f8f5e66c8fba220b34783b0d38518192bc53bf39 /kernel/bpf/syscall.c | |
parent | 6f5c39fa5cd4a78c5432021e981aa8f79437a32c (diff) | |
parent | 3bbe0869884ceebffd59d5519c1d560207c6e116 (diff) |
Merge branch 'bpf-raw-tracepoints'
Alexei Starovoitov says:
====================
v7->v8:
- moved 'u32 num_args' from 'struct tracepoint' into 'struct bpf_raw_event_map'
that increases memory overhead, but can be optimized/compressed later.
Now it's zero changes in tracepoint.[ch]
v6->v7:
- adopted Steven's bpf_raw_tp_map section approach to find tracepoint
and corresponding bpf probe function instead of kallsyms approach.
dropped kernel_tracepoint_find_by_name() patch
v5->v6:
- avoid changing semantics of for_each_kernel_tracepoint() function, instead
introduce kernel_tracepoint_find_by_name() helper
v4->v5:
- adopted Daniel's fancy REPEAT macro in bpf_trace.c in patch 6
v3->v4:
- adopted Linus's CAST_TO_U64 macro to cast any integer, pointer, or small
struct to u64. That nicely reduced the size of patch 1
v2->v3:
- with Linus's suggestion introduced generic COUNT_ARGS and CONCATENATE macros
(or rather moved them from apparmor)
that cleaned up patch 6
- added patch 4 to refactor trace_iwlwifi_dev_ucode_error() from 17 args to 4
Now any tracepoint with >12 args will have build error
v1->v2:
- simplified api by combing bpf_raw_tp_open(name) + bpf_attach(prog_fd) into
bpf_raw_tp_open(name, prog_fd) as suggested by Daniel.
That simplifies bpf_detach as well which is now simple close() of fd.
- fixed memory leak in error path which was spotted by Daniel.
- fixed bpf_get_stackid(), bpf_perf_event_output() called from raw tracepoints
- added more tests
- fixed allyesconfig build caught by buildbot
v1:
This patch set is a different way to address the pressing need to access
task_struct pointers in sched tracepoints from bpf programs.
The first approach simply added these pointers to sched tracepoints:
https://lkml.org/lkml/2017/12/14/753
which Peter nacked.
Few options were discussed and eventually the discussion converged on
doing bpf specific tracepoint_probe_register() probe functions.
Details here:
https://lkml.org/lkml/2017/12/20/929
Patch 1 is kernel wide cleanup of pass-struct-by-value into
pass-struct-by-reference into tracepoints.
Patches 2 and 3 are minor cleanups to address allyesconfig build
Patch 4 refactor trace_iwlwifi_dev_ucode_error from 17 to 4 args
Patch 5 introduces COUNT_ARGS macro
Patch 6 introduces BPF_RAW_TRACEPOINT api.
the auto-cleanup and multiple concurrent users are must have
features of tracing api. For bpf raw tracepoints it looks like:
// load bpf prog with BPF_PROG_TYPE_RAW_TRACEPOINT type
prog_fd = bpf_prog_load(...);
// receive anon_inode fd for given bpf_raw_tracepoint
// and attach bpf program to it
raw_tp_fd = bpf_raw_tracepoint_open("xdp_exception", prog_fd);
Ctrl-C of tracing daemon or cmdline tool will automatically
detach bpf program, unload it and unregister tracepoint probe.
More details in patch 6.
Patch 7 - trivial support in libbpf
Patches 8, 9 - user space tests
samples/bpf/test_overhead performance on 1 cpu:
tracepoint base kprobe+bpf tracepoint+bpf raw_tracepoint+bpf
task_rename 1.1M 769K 947K 1.0M
urandom_read 789K 697K 750K 755K
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r-- | kernel/bpf/syscall.c | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 77d45bd9f507..95ca2523fa6e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1315,6 +1315,81 @@ static int bpf_obj_get(const union bpf_attr *attr) attr->file_flags); } +struct bpf_raw_tracepoint { + struct bpf_raw_event_map *btp; + struct bpf_prog *prog; +}; + +static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp) +{ + struct bpf_raw_tracepoint *raw_tp = filp->private_data; + + if (raw_tp->prog) { + bpf_probe_unregister(raw_tp->btp, raw_tp->prog); + bpf_prog_put(raw_tp->prog); + } + kfree(raw_tp); + return 0; +} + +static const struct file_operations bpf_raw_tp_fops = { + .release = bpf_raw_tracepoint_release, + .read = bpf_dummy_read, + .write = bpf_dummy_write, +}; + +#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd + +static int bpf_raw_tracepoint_open(const union bpf_attr *attr) +{ + struct bpf_raw_tracepoint *raw_tp; + struct bpf_raw_event_map *btp; + struct bpf_prog *prog; + char tp_name[128]; + int tp_fd, err; + + if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name), + sizeof(tp_name) - 1) < 0) + return -EFAULT; + tp_name[sizeof(tp_name) - 1] = 0; + + btp = bpf_find_raw_tracepoint(tp_name); + if (!btp) + return -ENOENT; + + raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER); + if (!raw_tp) + return -ENOMEM; + raw_tp->btp = btp; + + prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd, + BPF_PROG_TYPE_RAW_TRACEPOINT); + if (IS_ERR(prog)) { + err = PTR_ERR(prog); + goto out_free_tp; + } + + err = bpf_probe_register(raw_tp->btp, prog); + if (err) + goto out_put_prog; + + raw_tp->prog = prog; + tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp, + O_CLOEXEC); + if (tp_fd < 0) { + bpf_probe_unregister(raw_tp->btp, prog); + err = tp_fd; + goto out_put_prog; + } + return tp_fd; + +out_put_prog: + bpf_prog_put(prog); +out_free_tp: + kfree(raw_tp); + return err; +} + #ifdef CONFIG_CGROUP_BPF #define BPF_PROG_ATTACH_LAST_FIELD attach_flags @@ -1925,6 +2000,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_OBJ_GET_INFO_BY_FD: err = bpf_obj_get_info_by_fd(&attr, uattr); break; + case BPF_RAW_TRACEPOINT_OPEN: + err = bpf_raw_tracepoint_open(&attr); + break; default: err = -EINVAL; break; |