diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 413 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 10 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 20 | ||||
-rw-r--r-- | kernel/trace/trace.c | 123 | ||||
-rw-r--r-- | kernel/trace/trace.h | 14 | ||||
-rw-r--r-- | kernel/trace/trace_btf.c | 122 | ||||
-rw-r--r-- | kernel/trace/trace_btf.h | 11 | ||||
-rw-r--r-- | kernel/trace/trace_entries.h | 2 | ||||
-rw-r--r-- | kernel/trace/trace_eprobe.c | 22 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 76 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 315 | ||||
-rw-r--r-- | kernel/trace/trace_events_user.c | 15 | ||||
-rw-r--r-- | kernel/trace/trace_export.c | 9 | ||||
-rw-r--r-- | kernel/trace/trace_fprobe.c | 59 | ||||
-rw-r--r-- | kernel/trace/trace_hwlat.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 14 | ||||
-rw-r--r-- | kernel/trace/trace_probe.c | 499 | ||||
-rw-r--r-- | kernel/trace/trace_probe.h | 32 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 12 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 11 |
21 files changed, 1413 insertions, 369 deletions
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 64b61f67a403..057cd975d014 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -99,6 +99,7 @@ obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o +obj-$(CONFIG_PROBE_EVENTS_BTF_ARGS) += trace_btf.o obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index bd1a42b23f3f..a7264b2c17ad 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -23,6 +23,7 @@ #include <linux/sort.h> #include <linux/key.h> #include <linux/verification.h> +#include <linux/namei.h> #include <net/bpf_sk_storage.h> @@ -86,6 +87,9 @@ static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size, static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx); static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx); +static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx); +static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx); + /** * trace_call_bpf - invoke BPF program * @call: tracepoint event @@ -223,17 +227,6 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = { .arg3_type = ARG_ANYTHING, }; -static __always_inline int -bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) -{ - int ret; - - ret = copy_from_kernel_nofault(dst, unsafe_ptr, size); - if (unlikely(ret < 0)) - memset(dst, 0, size); - return ret; -} - BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size, const void *, unsafe_ptr) { @@ -1066,7 +1059,16 @@ static unsigned long get_entry_ip(unsigned long fentry_ip) BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) { - struct kprobe *kp = kprobe_running(); + struct bpf_trace_run_ctx *run_ctx __maybe_unused; + struct kprobe *kp; + +#ifdef CONFIG_UPROBES + run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); + if (run_ctx->is_uprobe) + return ((struct uprobe_dispatch_data *)current->utask->vaddr)->bp_addr; +#endif + + kp = kprobe_running(); if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY)) return 0; @@ -1105,6 +1107,30 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs) +{ + return bpf_uprobe_multi_entry_ip(current->bpf_ctx); +} + +static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = { + .func = bpf_get_func_ip_uprobe_multi, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs) +{ + return bpf_uprobe_multi_cookie(current->bpf_ctx); +} + +static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = { + .func = bpf_get_attach_cookie_uprobe_multi, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx) { struct bpf_trace_run_ctx *run_ctx; @@ -1547,13 +1573,17 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_override_return_proto; #endif case BPF_FUNC_get_func_ip: - return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ? - &bpf_get_func_ip_proto_kprobe_multi : - &bpf_get_func_ip_proto_kprobe; + if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) + return &bpf_get_func_ip_proto_kprobe_multi; + if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) + return &bpf_get_func_ip_proto_uprobe_multi; + return &bpf_get_func_ip_proto_kprobe; case BPF_FUNC_get_attach_cookie: - return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ? - &bpf_get_attach_cookie_proto_kmulti : - &bpf_get_attach_cookie_proto_trace; + if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) + return &bpf_get_attach_cookie_proto_kmulti; + if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) + return &bpf_get_attach_cookie_proto_umulti; + return &bpf_get_attach_cookie_proto_trace; default: return bpf_tracing_func_proto(func_id, prog); } @@ -2376,9 +2406,13 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, if (is_tracepoint || is_syscall_tp) { *buf = is_tracepoint ? event->tp_event->tp->name : event->tp_event->name; - *fd_type = BPF_FD_TYPE_TRACEPOINT; - *probe_offset = 0x0; - *probe_addr = 0x0; + /* We allow NULL pointer for tracepoint */ + if (fd_type) + *fd_type = BPF_FD_TYPE_TRACEPOINT; + if (probe_offset) + *probe_offset = 0x0; + if (probe_addr) + *probe_addr = 0x0; } else { /* kprobe/uprobe */ err = -EOPNOTSUPP; @@ -2391,7 +2425,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, #ifdef CONFIG_UPROBE_EVENTS if (flags & TRACE_EVENT_FL_UPROBE) err = bpf_get_uprobe_info(event, fd_type, buf, - probe_offset, + probe_offset, probe_addr, event->attr.type == PERF_TYPE_TRACEPOINT); #endif } @@ -2476,6 +2510,7 @@ struct bpf_kprobe_multi_link { u32 cnt; u32 mods_cnt; struct module **mods; + u32 flags; }; struct bpf_kprobe_multi_run_ctx { @@ -2565,9 +2600,44 @@ static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link) kfree(kmulti_link); } +static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs); + struct bpf_kprobe_multi_link *kmulti_link; + u32 ucount = info->kprobe_multi.count; + int err = 0, i; + + if (!uaddrs ^ !ucount) + return -EINVAL; + + kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); + info->kprobe_multi.count = kmulti_link->cnt; + info->kprobe_multi.flags = kmulti_link->flags; + + if (!uaddrs) + return 0; + if (ucount < kmulti_link->cnt) + err = -ENOSPC; + else + ucount = kmulti_link->cnt; + + if (kallsyms_show_value(current_cred())) { + if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64))) + return -EFAULT; + } else { + for (i = 0; i < ucount; i++) { + if (put_user(0, uaddrs + i)) + return -EFAULT; + } + } + return err; +} + static const struct bpf_link_ops bpf_kprobe_multi_link_lops = { .release = bpf_kprobe_multi_link_release, .dealloc = bpf_kprobe_multi_link_dealloc, + .fill_link_info = bpf_kprobe_multi_link_fill_link_info, }; static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv) @@ -2881,6 +2951,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr link->addrs = addrs; link->cookies = cookies; link->cnt = cnt; + link->flags = flags; if (cookies) { /* @@ -2931,3 +3002,301 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) return 0; } #endif + +#ifdef CONFIG_UPROBES +struct bpf_uprobe_multi_link; + +struct bpf_uprobe { + struct bpf_uprobe_multi_link *link; + loff_t offset; + u64 cookie; + struct uprobe_consumer consumer; +}; + +struct bpf_uprobe_multi_link { + struct path path; + struct bpf_link link; + u32 cnt; + struct bpf_uprobe *uprobes; + struct task_struct *task; +}; + +struct bpf_uprobe_multi_run_ctx { + struct bpf_run_ctx run_ctx; + unsigned long entry_ip; + struct bpf_uprobe *uprobe; +}; + +static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, + u32 cnt) +{ + u32 i; + + for (i = 0; i < cnt; i++) { + uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, + &uprobes[i].consumer); + } +} + +static void bpf_uprobe_multi_link_release(struct bpf_link *link) +{ + struct bpf_uprobe_multi_link *umulti_link; + + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); + bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt); +} + +static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) +{ + struct bpf_uprobe_multi_link *umulti_link; + + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); + if (umulti_link->task) + put_task_struct(umulti_link->task); + path_put(&umulti_link->path); + kvfree(umulti_link->uprobes); + kfree(umulti_link); +} + +static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { + .release = bpf_uprobe_multi_link_release, + .dealloc = bpf_uprobe_multi_link_dealloc, +}; + +static int uprobe_prog_run(struct bpf_uprobe *uprobe, + unsigned long entry_ip, + struct pt_regs *regs) +{ + struct bpf_uprobe_multi_link *link = uprobe->link; + struct bpf_uprobe_multi_run_ctx run_ctx = { + .entry_ip = entry_ip, + .uprobe = uprobe, + }; + struct bpf_prog *prog = link->link.prog; + bool sleepable = prog->aux->sleepable; + struct bpf_run_ctx *old_run_ctx; + int err = 0; + + if (link->task && current != link->task) + return 0; + + if (sleepable) + rcu_read_lock_trace(); + else + rcu_read_lock(); + + migrate_disable(); + + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + err = bpf_prog_run(link->link.prog, regs); + bpf_reset_run_ctx(old_run_ctx); + + migrate_enable(); + + if (sleepable) + rcu_read_unlock_trace(); + else + rcu_read_unlock(); + return err; +} + +static bool +uprobe_multi_link_filter(struct uprobe_consumer *con, enum uprobe_filter_ctx ctx, + struct mm_struct *mm) +{ + struct bpf_uprobe *uprobe; + + uprobe = container_of(con, struct bpf_uprobe, consumer); + return uprobe->link->task->mm == mm; +} + +static int +uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) +{ + struct bpf_uprobe *uprobe; + + uprobe = container_of(con, struct bpf_uprobe, consumer); + return uprobe_prog_run(uprobe, instruction_pointer(regs), regs); +} + +static int +uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) +{ + struct bpf_uprobe *uprobe; + + uprobe = container_of(con, struct bpf_uprobe, consumer); + return uprobe_prog_run(uprobe, func, regs); +} + +static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) +{ + struct bpf_uprobe_multi_run_ctx *run_ctx; + + run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx); + return run_ctx->entry_ip; +} + +static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) +{ + struct bpf_uprobe_multi_run_ctx *run_ctx; + + run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx); + return run_ctx->uprobe->cookie; +} + +int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct bpf_uprobe_multi_link *link = NULL; + unsigned long __user *uref_ctr_offsets; + unsigned long *ref_ctr_offsets = NULL; + struct bpf_link_primer link_primer; + struct bpf_uprobe *uprobes = NULL; + struct task_struct *task = NULL; + unsigned long __user *uoffsets; + u64 __user *ucookies; + void __user *upath; + u32 flags, cnt, i; + struct path path; + char *name; + pid_t pid; + int err; + + /* no support for 32bit archs yet */ + if (sizeof(u64) != sizeof(void *)) + return -EOPNOTSUPP; + + if (prog->expected_attach_type != BPF_TRACE_UPROBE_MULTI) + return -EINVAL; + + flags = attr->link_create.uprobe_multi.flags; + if (flags & ~BPF_F_UPROBE_MULTI_RETURN) + return -EINVAL; + + /* + * path, offsets and cnt are mandatory, + * ref_ctr_offsets and cookies are optional + */ + upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); + uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); + cnt = attr->link_create.uprobe_multi.cnt; + + if (!upath || !uoffsets || !cnt) + return -EINVAL; + + uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets); + ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies); + + name = strndup_user(upath, PATH_MAX); + if (IS_ERR(name)) { + err = PTR_ERR(name); + return err; + } + + err = kern_path(name, LOOKUP_FOLLOW, &path); + kfree(name); + if (err) + return err; + + if (!d_is_reg(path.dentry)) { + err = -EBADF; + goto error_path_put; + } + + pid = attr->link_create.uprobe_multi.pid; + if (pid) { + rcu_read_lock(); + task = get_pid_task(find_vpid(pid), PIDTYPE_PID); + rcu_read_unlock(); + if (!task) + goto error_path_put; + } + + err = -ENOMEM; + + link = kzalloc(sizeof(*link), GFP_KERNEL); + uprobes = kvcalloc(cnt, sizeof(*uprobes), GFP_KERNEL); + + if (!uprobes || !link) + goto error_free; + + if (uref_ctr_offsets) { + ref_ctr_offsets = kvcalloc(cnt, sizeof(*ref_ctr_offsets), GFP_KERNEL); + if (!ref_ctr_offsets) + goto error_free; + } + + for (i = 0; i < cnt; i++) { + if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) { + err = -EFAULT; + goto error_free; + } + if (uref_ctr_offsets && __get_user(ref_ctr_offsets[i], uref_ctr_offsets + i)) { + err = -EFAULT; + goto error_free; + } + if (__get_user(uprobes[i].offset, uoffsets + i)) { + err = -EFAULT; + goto error_free; + } + + uprobes[i].link = link; + + if (flags & BPF_F_UPROBE_MULTI_RETURN) + uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler; + else + uprobes[i].consumer.handler = uprobe_multi_link_handler; + + if (pid) + uprobes[i].consumer.filter = uprobe_multi_link_filter; + } + + link->cnt = cnt; + link->uprobes = uprobes; + link->path = path; + link->task = task; + + bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI, + &bpf_uprobe_multi_link_lops, prog); + + for (i = 0; i < cnt; i++) { + err = uprobe_register_refctr(d_real_inode(link->path.dentry), + uprobes[i].offset, + ref_ctr_offsets ? ref_ctr_offsets[i] : 0, + &uprobes[i].consumer); + if (err) { + bpf_uprobe_unregister(&path, uprobes, i); + goto error_free; + } + } + + err = bpf_link_prime(&link->link, &link_primer); + if (err) + goto error_free; + + kvfree(ref_ctr_offsets); + return bpf_link_settle(&link_primer); + +error_free: + kvfree(ref_ctr_offsets); + kvfree(uprobes); + kfree(link); + if (task) + put_task_struct(task); +error_path_put: + path_put(&path); + return err; +} +#else /* !CONFIG_UPROBES */ +int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} +static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) +{ + return 0; +} +static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) +{ + return 0; +} +#endif /* CONFIG_UPROBES */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 05c0024815bf..8de8bec5f366 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6779,8 +6779,7 @@ void ftrace_release_mod(struct module *mod) last_pg = &ftrace_pages_start; for (pg = ftrace_pages_start; pg; pg = *last_pg) { rec = &pg->records[0]; - if (within_module_core(rec->ip, mod) || - within_module_init(rec->ip, mod)) { + if (within_module(rec->ip, mod)) { /* * As core pages are first, the first * page should never be a module page. @@ -6852,8 +6851,7 @@ void ftrace_module_enable(struct module *mod) * not part of this module, then skip this pg, * which the "break" will do. */ - if (!within_module_core(rec->ip, mod) && - !within_module_init(rec->ip, mod)) + if (!within_module(rec->ip, mod)) break; /* Weak functions should still be ignored */ @@ -7142,9 +7140,7 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) struct dyn_ftrace key; struct ftrace_mod_map *mod_map = NULL; struct ftrace_init_func *func, *func_next; - struct list_head clear_hash; - - INIT_LIST_HEAD(&clear_hash); + LIST_HEAD(clear_hash); key.ip = start; key.flags = end; /* overload flags, as it is unsigned long */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 52dea5dd5362..78502d4c7214 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -692,10 +692,7 @@ static void rb_time_set(rb_time_t *t, u64 val) static inline bool rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set) { - unsigned long ret; - - ret = local_cmpxchg(l, expect, set); - return ret == expect; + return local_try_cmpxchg(l, &expect, set); } static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) @@ -752,9 +749,7 @@ static void rb_time_set(rb_time_t *t, u64 val) static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) { - u64 val; - val = local64_cmpxchg(&t->time, expect, set); - return val == expect; + return local64_try_cmpxchg(&t->time, &expect, set); } #endif @@ -1494,14 +1489,11 @@ static bool rb_head_page_replace(struct buffer_page *old, { unsigned long *ptr = (unsigned long *)&old->list.prev->next; unsigned long val; - unsigned long ret; val = *ptr & ~RB_FLAG_MASK; val |= RB_PAGE_HEAD; - ret = cmpxchg(ptr, val, (unsigned long)&new->list); - - return ret == val; + return try_cmpxchg(ptr, &val, (unsigned long)&new->list); } /* @@ -3003,7 +2995,6 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, { unsigned long new_index, old_index; struct buffer_page *bpage; - unsigned long index; unsigned long addr; u64 write_stamp; u64 delta; @@ -3060,8 +3051,9 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, */ old_index += write_mask; new_index += write_mask; - index = local_cmpxchg(&bpage->write, old_index, new_index); - if (index == old_index) { + + /* caution: old_index gets updated on cmpxchg failure */ + if (local_try_cmpxchg(&bpage->write, &old_index, new_index)) { /* update counters */ local_sub(event_length, &cpu_buffer->entries_bytes); return true; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8e64aaad5361..2b4ded753367 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3119,7 +3119,6 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, struct ftrace_stack *fstack; struct stack_entry *entry; int stackidx; - void *ptr; /* * Add one, for this function and the call to save_stack_trace() @@ -3157,32 +3156,16 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer, nr_entries = stack_trace_save(fstack->calls, size, skip); } - size = nr_entries * sizeof(unsigned long); event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, - (sizeof(*entry) - sizeof(entry->caller)) + size, + struct_size(entry, caller, nr_entries), trace_ctx); if (!event) goto out; - ptr = ring_buffer_event_data(event); - entry = ptr; - - /* - * For backward compatibility reasons, the entry->caller is an - * array of 8 slots to store the stack. This is also exported - * to user space. The amount allocated on the ring buffer actually - * holds enough for the stack specified by nr_entries. This will - * go into the location of entry->caller. Due to string fortifiers - * checking the size of the destination of memcpy() it triggers - * when it detects that size is greater than 8. To hide this from - * the fortifiers, we use "ptr" and pointer arithmetic to assign caller. - * - * The below is really just: - * memcpy(&entry->caller, fstack->calls, size); - */ - ptr += offsetof(typeof(*entry), caller); - memcpy(ptr, fstack->calls, size); + entry = ring_buffer_event_data(event); entry->size = nr_entries; + memcpy(&entry->caller, fstack->calls, + flex_array_size(entry, caller, nr_entries)); if (!call_filter_check_discard(call, entry, buffer, event)) __buffer_unlock_commit(buffer, event); @@ -4206,18 +4189,12 @@ static void *s_start(struct seq_file *m, loff_t *pos) loff_t l = 0; int cpu; - /* - * copy the tracer to avoid using a global lock all around. - * iter->trace is a copy of current_trace, the pointer to the - * name may be used instead of a strcmp(), as iter->trace->name - * will point to the same string as current_trace->name. - */ mutex_lock(&trace_types_lock); - if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) { + if (unlikely(tr->current_trace != iter->trace)) { /* Close iter->trace before switching to the new current tracer */ if (iter->trace->close) iter->trace->close(iter); - *iter->trace = *tr->current_trace; + iter->trace = tr->current_trace; /* Reopen the new current tracer */ if (iter->trace->open) iter->trace->open(iter); @@ -4829,6 +4806,25 @@ static const struct seq_operations tracer_seq_ops = { .show = s_show, }; +/* + * Note, as iter itself can be allocated and freed in different + * ways, this function is only used to free its content, and not + * the iterator itself. The only requirement to all the allocations + * is that it must zero all fields (kzalloc), as freeing works with + * ethier allocated content or NULL. + */ +static void free_trace_iter_content(struct trace_iterator *iter) +{ + /* The fmt is either NULL, allocated or points to static_fmt_buf */ + if (iter->fmt != static_fmt_buf) + kfree(iter->fmt); + + kfree(iter->temp); + kfree(iter->buffer_iter); + mutex_destroy(&iter->mutex); + free_cpumask_var(iter->started); +} + static struct trace_iterator * __tracing_open(struct inode *inode, struct file *file, bool snapshot) { @@ -4870,16 +4866,8 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) iter->fmt = NULL; iter->fmt_size = 0; - /* - * We make a copy of the current tracer to avoid concurrent - * changes on it while we are reading. - */ mutex_lock(&trace_types_lock); - iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL); - if (!iter->trace) - goto fail; - - *iter->trace = *tr->current_trace; + iter->trace = tr->current_trace; if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) goto fail; @@ -4944,9 +4932,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) fail: mutex_unlock(&trace_types_lock); - kfree(iter->trace); - kfree(iter->temp); - kfree(iter->buffer_iter); + free_trace_iter_content(iter); release: seq_release_private(inode, file); return ERR_PTR(-ENOMEM); @@ -5025,12 +5011,7 @@ static int tracing_release(struct inode *inode, struct file *file) mutex_unlock(&trace_types_lock); - mutex_destroy(&iter->mutex); - free_cpumask_var(iter->started); - kfree(iter->fmt); - kfree(iter->temp); - kfree(iter->trace); - kfree(iter->buffer_iter); + free_trace_iter_content(iter); seq_release_private(inode, file); return 0; @@ -5730,7 +5711,8 @@ static const char readme_msg[] = "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS - "\t $stack<index>, $stack, $retval, $comm, $arg<N>, <argname>\n" + "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" + "\t <argname>[->field[->field|.field...]],\n" #else "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" #endif @@ -6318,6 +6300,15 @@ static void set_buffer_entries(struct array_buffer *buf, unsigned long val) per_cpu_ptr(buf->data, cpu)->entries = val; } +static void update_buffer_entries(struct array_buffer *buf, int cpu) +{ + if (cpu == RING_BUFFER_ALL_CPUS) { + set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0)); + } else { + per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu); + } +} + #ifdef CONFIG_TRACER_MAX_TRACE /* resize @tr's buffer to the size of @size_tr's entries */ static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, @@ -6396,18 +6387,12 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, return ret; } - if (cpu == RING_BUFFER_ALL_CPUS) - set_buffer_entries(&tr->max_buffer, size); - else - per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size; + update_buffer_entries(&tr->max_buffer, cpu); out: #endif /* CONFIG_TRACER_MAX_TRACE */ - if (cpu == RING_BUFFER_ALL_CPUS) - set_buffer_entries(&tr->array_buffer, size); - else - per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size; + update_buffer_entries(&tr->array_buffer, cpu); return ret; } @@ -6825,10 +6810,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) close_pipe_on_cpu(tr, iter->cpu_file); mutex_unlock(&trace_types_lock); - free_cpumask_var(iter->started); - kfree(iter->fmt); - kfree(iter->temp); - mutex_destroy(&iter->mutex); + free_trace_iter_content(iter); kfree(iter); trace_array_put(tr); @@ -7618,6 +7600,11 @@ out: return ret; } +static void tracing_swap_cpu_buffer(void *tr) +{ + update_max_tr_single((struct trace_array *)tr, current, smp_processor_id()); +} + static ssize_t tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -7676,13 +7663,15 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) break; - local_irq_disable(); /* Now, we're going to swap */ - if (iter->cpu_file == RING_BUFFER_ALL_CPUS) + if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { + local_irq_disable(); update_max_tr(tr, current, smp_processor_id(), NULL); - else - update_max_tr_single(tr, current, iter->cpu_file); - local_irq_enable(); + local_irq_enable(); + } else { + smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer, + (void *)tr, 1); + } break; default: if (tr->allocated_snapshot) { @@ -9486,7 +9475,7 @@ static struct trace_array *trace_array_create(const char *name) if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) goto out_free_tr; - if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) goto out_free_tr; tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; @@ -10431,7 +10420,7 @@ __init static int tracer_alloc_buffers(void) if (trace_create_savedcmd() < 0) goto out_free_temp_buffer; - if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) goto out_free_savedcmd; /* TODO: make the number of buffers hot pluggable with CPUS */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 73eaec158473..5669dd1f90d9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -77,6 +77,16 @@ enum trace_type { #undef __array #define __array(type, item, size) type item[size]; +/* + * For backward compatibility, older user space expects to see the + * kernel_stack event with a fixed size caller field. But today the fix + * size is ignored by the kernel, and the real structure is dynamic. + * Expose to user space: "unsigned long caller[8];" but the real structure + * will be "unsigned long caller[] __counted_by(size)" + */ +#undef __stack_array +#define __stack_array(type, item, size, field) type item[] __counted_by(field); + #undef __array_desc #define __array_desc(type, container, item, size) @@ -596,7 +606,6 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); -void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); void tracing_reset_all_online_cpus_unlocked(void); int tracing_open_generic(struct inode *inode, struct file *filp); @@ -697,7 +706,6 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list, void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos); void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos); int trace_pid_show(struct seq_file *m, void *v); -void trace_free_pid_list(struct trace_pid_list *pid_list); int trace_pid_write(struct trace_pid_list *filtered_pids, struct trace_pid_list **new_pid_list, const char __user *ubuf, size_t cnt); @@ -1334,7 +1342,7 @@ struct trace_subsystem_dir { struct list_head list; struct event_subsystem *subsystem; struct trace_array *tr; - struct dentry *entry; + struct eventfs_file *ef; int ref_count; int nr_events; }; diff --git a/kernel/trace/trace_btf.c b/kernel/trace/trace_btf.c new file mode 100644 index 000000000000..ca224d53bfdc --- /dev/null +++ b/kernel/trace/trace_btf.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/btf.h> +#include <linux/kernel.h> +#include <linux/slab.h> + +#include "trace_btf.h" + +/* + * Find a function proto type by name, and return the btf_type with its btf + * in *@btf_p. Return NULL if not found. + * Note that caller has to call btf_put(*@btf_p) after using the btf_type. + */ +const struct btf_type *btf_find_func_proto(const char *func_name, struct btf **btf_p) +{ + const struct btf_type *t; + s32 id; + + id = bpf_find_btf_id(func_name, BTF_KIND_FUNC, btf_p); + if (id < 0) + return NULL; + + /* Get BTF_KIND_FUNC type */ + t = btf_type_by_id(*btf_p, id); + if (!t || !btf_type_is_func(t)) + goto err; + + /* The type of BTF_KIND_FUNC is BTF_KIND_FUNC_PROTO */ + t = btf_type_by_id(*btf_p, t->type); + if (!t || !btf_type_is_func_proto(t)) + goto err; + + return t; +err: + btf_put(*btf_p); + return NULL; +} + +/* + * Get function parameter with the number of parameters. + * This can return NULL if the function has no parameters. + * It can return -EINVAL if the @func_proto is not a function proto type. + */ +const struct btf_param *btf_get_func_param(const struct btf_type *func_proto, s32 *nr) +{ + if (!btf_type_is_func_proto(func_proto)) + return ERR_PTR(-EINVAL); + + *nr = btf_type_vlen(func_proto); + if (*nr > 0) + return (const struct btf_param *)(func_proto + 1); + else + return NULL; +} + +#define BTF_ANON_STACK_MAX 16 + +struct btf_anon_stack { + u32 tid; + u32 offset; +}; + +/* + * Find a member of data structure/union by name and return it. + * Return NULL if not found, or -EINVAL if parameter is invalid. + * If the member is an member of anonymous union/structure, the offset + * of that anonymous union/structure is stored into @anon_offset. Caller + * can calculate the correct offset from the root data structure by + * adding anon_offset to the member's offset. + */ +const struct btf_member *btf_find_struct_member(struct btf *btf, + const struct btf_type *type, + const char *member_name, + u32 *anon_offset) +{ + struct btf_anon_stack *anon_stack; + const struct btf_member *member; + u32 tid, cur_offset = 0; + const char *name; + int i, top = 0; + + anon_stack = kcalloc(BTF_ANON_STACK_MAX, sizeof(*anon_stack), GFP_KERNEL); + if (!anon_stack) + return ERR_PTR(-ENOMEM); + +retry: + if (!btf_type_is_struct(type)) { + member = ERR_PTR(-EINVAL); + goto out; + } + + for_each_member(i, type, member) { + if (!member->name_off) { + /* Anonymous union/struct: push it for later use */ + type = btf_type_skip_modifiers(btf, member->type, &tid); + if (type && top < BTF_ANON_STACK_MAX) { + anon_stack[top].tid = tid; + anon_stack[top++].offset = + cur_offset + member->offset; + } + } else { + name = btf_name_by_offset(btf, member->name_off); + if (name && !strcmp(member_name, name)) { + if (anon_offset) + *anon_offset = cur_offset; + goto out; + } + } + } + if (top > 0) { + /* Pop from the anonymous stack and retry */ + tid = anon_stack[--top].tid; + cur_offset = anon_stack[top].offset; + type = btf_type_by_id(btf, tid); + goto retry; + } + member = NULL; + +out: + kfree(anon_stack); + return member; +} + diff --git a/kernel/trace/trace_btf.h b/kernel/trace/trace_btf.h new file mode 100644 index 000000000000..4bc44bc261e6 --- /dev/null +++ b/kernel/trace/trace_btf.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/btf.h> + +const struct btf_type *btf_find_func_proto(const char *func_name, + struct btf **btf_p); +const struct btf_param *btf_get_func_param(const struct btf_type *func_proto, + s32 *nr); +const struct btf_member *btf_find_struct_member(struct btf *btf, + const struct btf_type *type, + const char *member_name, + u32 *anon_offset); diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 340b2fa98218..c47422b20908 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -190,7 +190,7 @@ FTRACE_ENTRY(kernel_stack, stack_entry, F_STRUCT( __field( int, size ) - __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) + __stack_array( unsigned long, caller, FTRACE_STACK_ENTRIES, size) ), F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n" diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index a0a704ba27db..72714cbf475c 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -41,6 +41,10 @@ struct eprobe_data { struct trace_eprobe *ep; }; + +#define for_each_trace_eprobe_tp(ep, _tp) \ + list_for_each_entry(ep, trace_probe_probe_list(_tp), tp.list) + static int __trace_eprobe_create(int argc, const char *argv[]); static void trace_event_probe_cleanup(struct trace_eprobe *ep) @@ -640,7 +644,7 @@ static int disable_eprobe(struct trace_eprobe *ep, static int enable_trace_eprobe(struct trace_event_call *call, struct trace_event_file *file) { - struct trace_probe *pos, *tp; + struct trace_probe *tp; struct trace_eprobe *ep; bool enabled; int ret = 0; @@ -662,8 +666,7 @@ static int enable_trace_eprobe(struct trace_event_call *call, if (enabled) return 0; - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - ep = container_of(pos, struct trace_eprobe, tp); + for_each_trace_eprobe_tp(ep, tp) { ret = enable_eprobe(ep, file); if (ret) break; @@ -680,8 +683,7 @@ static int enable_trace_eprobe(struct trace_event_call *call, */ WARN_ON_ONCE(ret != -ENOMEM); - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - ep = container_of(pos, struct trace_eprobe, tp); + for_each_trace_eprobe_tp(ep, tp) { disable_eprobe(ep, file->tr); if (!--cnt) break; @@ -699,7 +701,7 @@ static int enable_trace_eprobe(struct trace_event_call *call, static int disable_trace_eprobe(struct trace_event_call *call, struct trace_event_file *file) { - struct trace_probe *pos, *tp; + struct trace_probe *tp; struct trace_eprobe *ep; tp = trace_probe_primary_from_call(call); @@ -716,10 +718,8 @@ static int disable_trace_eprobe(struct trace_event_call *call, trace_probe_clear_flag(tp, TP_FLAG_PROFILE); if (!trace_probe_is_enabled(tp)) { - list_for_each_entry(pos, trace_probe_probe_list(tp), list) { - ep = container_of(pos, struct trace_eprobe, tp); + for_each_trace_eprobe_tp(ep, tp) disable_eprobe(ep, file->tr); - } } out: @@ -807,13 +807,11 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[ int ret; ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], &ctx); - if (ret) - return ret; - /* Handle symbols "@" */ if (!ret) ret = traceprobe_update_arg(&ep->tp.args[i]); + traceprobe_finish_parse(&ctx); return ret; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 578f1f7d49a6..ed367d713be0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -984,7 +984,7 @@ static void remove_subsystem(struct trace_subsystem_dir *dir) return; if (!--dir->nr_events) { - tracefs_remove(dir->entry); + eventfs_remove(dir->ef); list_del(&dir->list); __put_system_dir(dir); } @@ -1005,7 +1005,7 @@ static void remove_event_file_dir(struct trace_event_file *file) tracefs_remove(dir); } - + eventfs_remove(file->ef); list_del(&file->list); remove_subsystem(file->system); free_event_filter(file->filter); @@ -2291,13 +2291,13 @@ create_new_subsystem(const char *name) return NULL; } -static struct dentry * +static struct eventfs_file * event_subsystem_dir(struct trace_array *tr, const char *name, struct trace_event_file *file, struct dentry *parent) { struct event_subsystem *system, *iter; struct trace_subsystem_dir *dir; - struct dentry *entry; + int res; /* First see if we did not already create this dir */ list_for_each_entry(dir, &tr->systems, list) { @@ -2305,7 +2305,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name, if (strcmp(system->name, name) == 0) { dir->nr_events++; file->system = dir; - return dir->entry; + return dir->ef; } } @@ -2329,8 +2329,8 @@ event_subsystem_dir(struct trace_array *tr, const char *name, } else __get_system(system); - dir->entry = tracefs_create_dir(name, parent); - if (!dir->entry) { + dir->ef = eventfs_add_subsystem_dir(name, parent); + if (IS_ERR(dir->ef)) { pr_warn("Failed to create system directory %s\n", name); __put_system(system); goto out_free; @@ -2345,22 +2345,22 @@ event_subsystem_dir(struct trace_array *tr, const char *name, /* the ftrace system is special, do not create enable or filter files */ if (strcmp(name, "ftrace") != 0) { - entry = tracefs_create_file("filter", TRACE_MODE_WRITE, - dir->entry, dir, + res = eventfs_add_file("filter", TRACE_MODE_WRITE, + dir->ef, dir, &ftrace_subsystem_filter_fops); - if (!entry) { + if (res) { kfree(system->filter); system->filter = NULL; pr_warn("Could not create tracefs '%s/filter' entry\n", name); } - trace_create_file("enable", TRACE_MODE_WRITE, dir->entry, dir, + eventfs_add_file("enable", TRACE_MODE_WRITE, dir->ef, dir, &ftrace_system_enable_fops); } list_add(&dir->list, &tr->systems); - return dir->entry; + return dir->ef; out_free: kfree(dir); @@ -2413,36 +2413,37 @@ static int event_create_dir(struct dentry *parent, struct trace_event_file *file) { struct trace_event_call *call = file->event_call; + struct eventfs_file *ef_subsystem = NULL; struct trace_array *tr = file->tr; - struct dentry *d_events; const char *name; int ret; /* * If the trace point header did not define TRACE_SYSTEM - * then the system would be called "TRACE_SYSTEM". + * then the system would be called "TRACE_SYSTEM". This should + * never happen. */ - if (strcmp(call->class->system, TRACE_SYSTEM) != 0) { - d_events = event_subsystem_dir(tr, call->class->system, file, parent); - if (!d_events) - return -ENOMEM; - } else - d_events = parent; + if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0)) + return -ENODEV; + + ef_subsystem = event_subsystem_dir(tr, call->class->system, file, parent); + if (!ef_subsystem) + return -ENOMEM; name = trace_event_name(call); - file->dir = tracefs_create_dir(name, d_events); - if (!file->dir) { + file->ef = eventfs_add_dir(name, ef_subsystem); + if (IS_ERR(file->ef)) { pr_warn("Could not create tracefs '%s' directory\n", name); return -1; } if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) - trace_create_file("enable", TRACE_MODE_WRITE, file->dir, file, + eventfs_add_file("enable", TRACE_MODE_WRITE, file->ef, file, &ftrace_enable_fops); #ifdef CONFIG_PERF_EVENTS if (call->event.type && call->class->reg) - trace_create_file("id", TRACE_MODE_READ, file->dir, + eventfs_add_file("id", TRACE_MODE_READ, file->ef, (void *)(long)call->event.type, &ftrace_event_id_fops); #endif @@ -2458,27 +2459,27 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) * triggers or filters. */ if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { - trace_create_file("filter", TRACE_MODE_WRITE, file->dir, + eventfs_add_file("filter", TRACE_MODE_WRITE, file->ef, file, &ftrace_event_filter_fops); - trace_create_file("trigger", TRACE_MODE_WRITE, file->dir, + eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef, file, &event_trigger_fops); } #ifdef CONFIG_HIST_TRIGGERS - trace_create_file("hist", TRACE_MODE_READ, file->dir, file, + eventfs_add_file("hist", TRACE_MODE_READ, file->ef, file, &event_hist_fops); #endif #ifdef CONFIG_HIST_TRIGGERS_DEBUG - trace_create_file("hist_debug", TRACE_MODE_READ, file->dir, file, + eventfs_add_file("hist_debug", TRACE_MODE_READ, file->ef, file, &event_hist_debug_fops); #endif - trace_create_file("format", TRACE_MODE_READ, file->dir, call, + eventfs_add_file("format", TRACE_MODE_READ, file->ef, call, &ftrace_event_format_fops); #ifdef CONFIG_TRACE_EVENT_INJECT if (call->event.type && call->class->reg) - trace_create_file("inject", 0200, file->dir, file, + eventfs_add_file("inject", 0200, file->ef, file, &event_inject_fops); #endif @@ -3631,21 +3632,22 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) { struct dentry *d_events; struct dentry *entry; + int error = 0; entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent, tr, &ftrace_set_event_fops); if (!entry) return -ENOMEM; - d_events = tracefs_create_dir("events", parent); - if (!d_events) { + d_events = eventfs_create_events_dir("events", parent); + if (IS_ERR(d_events)) { pr_warn("Could not create tracefs 'events' directory\n"); return -ENOMEM; } - entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events, + error = eventfs_add_events_file("enable", TRACE_MODE_WRITE, d_events, tr, &ftrace_tr_enable_fops); - if (!entry) + if (error) return -ENOMEM; /* There are not as crucial, just warn if they are not created */ @@ -3658,11 +3660,11 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) &ftrace_set_event_notrace_pid_fops); /* ring buffer internal formats */ - trace_create_file("header_page", TRACE_MODE_READ, d_events, + eventfs_add_events_file("header_page", TRACE_MODE_READ, d_events, ring_buffer_print_page_header, &ftrace_show_header_fops); - trace_create_file("header_event", TRACE_MODE_READ, d_events, + eventfs_add_events_file("header_event", TRACE_MODE_READ, d_events, ring_buffer_print_entry_header, &ftrace_show_header_fops); @@ -3750,7 +3752,7 @@ int event_trace_del_tracer(struct trace_array *tr) down_write(&trace_event_sem); __trace_remove_event_dirs(tr); - tracefs_remove(tr->event_dir); + eventfs_remove_events_dir(tr->event_dir); up_write(&trace_event_sem); tr->event_dir = NULL; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 1dad64267878..33264e510d16 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -46,15 +46,19 @@ static const char * ops[] = { OPS }; enum filter_pred_fn { FILTER_PRED_FN_NOP, FILTER_PRED_FN_64, + FILTER_PRED_FN_64_CPUMASK, FILTER_PRED_FN_S64, FILTER_PRED_FN_U64, FILTER_PRED_FN_32, + FILTER_PRED_FN_32_CPUMASK, FILTER_PRED_FN_S32, FILTER_PRED_FN_U32, FILTER_PRED_FN_16, + FILTER_PRED_FN_16_CPUMASK, FILTER_PRED_FN_S16, FILTER_PRED_FN_U16, FILTER_PRED_FN_8, + FILTER_PRED_FN_8_CPUMASK, FILTER_PRED_FN_S8, FILTER_PRED_FN_U8, FILTER_PRED_FN_COMM, @@ -64,21 +68,25 @@ enum filter_pred_fn { FILTER_PRED_FN_PCHAR_USER, FILTER_PRED_FN_PCHAR, FILTER_PRED_FN_CPU, + FILTER_PRED_FN_CPU_CPUMASK, + FILTER_PRED_FN_CPUMASK, + FILTER_PRED_FN_CPUMASK_CPU, FILTER_PRED_FN_FUNCTION, FILTER_PRED_FN_, FILTER_PRED_TEST_VISITED, }; struct filter_pred { - enum filter_pred_fn fn_num; - u64 val; - u64 val2; - struct regex regex; + struct regex *regex; + struct cpumask *mask; unsigned short *ops; struct ftrace_event_field *field; - int offset; + u64 val; + u64 val2; + enum filter_pred_fn fn_num; + int offset; int not; - int op; + int op; }; /* @@ -94,6 +102,8 @@ struct filter_pred { C(TOO_MANY_OPEN, "Too many '('"), \ C(TOO_MANY_CLOSE, "Too few '('"), \ C(MISSING_QUOTE, "Missing matching quote"), \ + C(MISSING_BRACE_OPEN, "Missing '{'"), \ + C(MISSING_BRACE_CLOSE, "Missing '}'"), \ C(OPERAND_TOO_LONG, "Operand too long"), \ C(EXPECT_STRING, "Expecting string field"), \ C(EXPECT_DIGIT, "Expecting numeric field"), \ @@ -103,6 +113,7 @@ struct filter_pred { C(BAD_SUBSYS_FILTER, "Couldn't find or set field in one of a subsystem's events"), \ C(TOO_MANY_PREDS, "Too many terms in predicate expression"), \ C(INVALID_FILTER, "Meaningless filter expression"), \ + C(INVALID_CPULIST, "Invalid cpulist"), \ C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \ C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \ C(NO_FUNCTION, "Function not found"), \ @@ -186,6 +197,15 @@ enum { PROCESS_OR = 4, }; +static void free_predicate(struct filter_pred *pred) +{ + if (pred) { + kfree(pred->regex); + kfree(pred->mask); + kfree(pred); + } +} + /* * Without going into a formal proof, this explains the method that is used in * parsing the logical expressions. @@ -623,12 +643,64 @@ out_free: kfree(inverts); if (prog_stack) { for (i = 0; prog_stack[i].pred; i++) - kfree(prog_stack[i].pred); + free_predicate(prog_stack[i].pred); kfree(prog_stack); } return ERR_PTR(ret); } +static inline int +do_filter_cpumask(int op, const struct cpumask *mask, const struct cpumask *cmp) +{ + switch (op) { + case OP_EQ: + return cpumask_equal(mask, cmp); + case OP_NE: + return !cpumask_equal(mask, cmp); + case OP_BAND: + return cpumask_intersects(mask, cmp); + default: + return 0; + } +} + +/* Optimisation of do_filter_cpumask() for scalar fields */ +static inline int +do_filter_scalar_cpumask(int op, unsigned int cpu, const struct cpumask *mask) +{ + /* + * Per the weight-of-one cpumask optimisations, the mask passed in this + * function has a weight >= 2, so it is never equal to a single scalar. + */ + switch (op) { + case OP_EQ: + return false; + case OP_NE: + return true; + case OP_BAND: + return cpumask_test_cpu(cpu, mask); + default: + return 0; + } +} + +static inline int +do_filter_cpumask_scalar(int op, const struct cpumask *mask, unsigned int cpu) +{ + switch (op) { + case OP_EQ: + return cpumask_test_cpu(cpu, mask) && + cpumask_nth(1, mask) >= nr_cpu_ids; + case OP_NE: + return !cpumask_test_cpu(cpu, mask) || + cpumask_nth(1, mask) < nr_cpu_ids; + case OP_BAND: + return cpumask_test_cpu(cpu, mask); + default: + return 0; + } +} + enum pred_cmp_types { PRED_CMP_TYPE_NOP, PRED_CMP_TYPE_LT, @@ -672,6 +744,18 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \ } \ } +#define DEFINE_CPUMASK_COMPARISON_PRED(size) \ +static int filter_pred_##size##_cpumask(struct filter_pred *pred, void *event) \ +{ \ + u##size *addr = (u##size *)(event + pred->offset); \ + unsigned int cpu = *addr; \ + \ + if (cpu >= nr_cpu_ids) \ + return 0; \ + \ + return do_filter_scalar_cpumask(pred->op, cpu, pred->mask); \ +} + #define DEFINE_EQUALITY_PRED(size) \ static int filter_pred_##size(struct filter_pred *pred, void *event) \ { \ @@ -693,6 +777,11 @@ DEFINE_COMPARISON_PRED(u16); DEFINE_COMPARISON_PRED(s8); DEFINE_COMPARISON_PRED(u8); +DEFINE_CPUMASK_COMPARISON_PRED(64); +DEFINE_CPUMASK_COMPARISON_PRED(32); +DEFINE_CPUMASK_COMPARISON_PRED(16); +DEFINE_CPUMASK_COMPARISON_PRED(8); + DEFINE_EQUALITY_PRED(64); DEFINE_EQUALITY_PRED(32); DEFINE_EQUALITY_PRED(16); @@ -750,7 +839,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event) char *addr = (char *)(event + pred->offset); int cmp, match; - cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len); + cmp = pred->regex->match(addr, pred->regex, pred->regex->field_len); match = cmp ^ pred->not; @@ -763,7 +852,7 @@ static __always_inline int filter_pchar(struct filter_pred *pred, char *str) int len; len = strlen(str) + 1; /* including tailing '\0' */ - cmp = pred->regex.match(str, &pred->regex, len); + cmp = pred->regex->match(str, pred->regex, len); match = cmp ^ pred->not; @@ -813,7 +902,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event) char *addr = (char *)(event + str_loc); int cmp, match; - cmp = pred->regex.match(addr, &pred->regex, str_len); + cmp = pred->regex->match(addr, pred->regex, str_len); match = cmp ^ pred->not; @@ -836,7 +925,7 @@ static int filter_pred_strrelloc(struct filter_pred *pred, void *event) char *addr = (char *)(&item[1]) + str_loc; int cmp, match; - cmp = pred->regex.match(addr, &pred->regex, str_len); + cmp = pred->regex->match(addr, pred->regex, str_len); match = cmp ^ pred->not; @@ -869,12 +958,42 @@ static int filter_pred_cpu(struct filter_pred *pred, void *event) } } +/* Filter predicate for current CPU vs user-provided cpumask */ +static int filter_pred_cpu_cpumask(struct filter_pred *pred, void *event) +{ + int cpu = raw_smp_processor_id(); + + return do_filter_scalar_cpumask(pred->op, cpu, pred->mask); +} + +/* Filter predicate for cpumask field vs user-provided cpumask */ +static int filter_pred_cpumask(struct filter_pred *pred, void *event) +{ + u32 item = *(u32 *)(event + pred->offset); + int loc = item & 0xffff; + const struct cpumask *mask = (event + loc); + const struct cpumask *cmp = pred->mask; + + return do_filter_cpumask(pred->op, mask, cmp); +} + +/* Filter predicate for cpumask field vs user-provided scalar */ +static int filter_pred_cpumask_cpu(struct filter_pred *pred, void *event) +{ + u32 item = *(u32 *)(event + pred->offset); + int loc = item & 0xffff; + const struct cpumask *mask = (event + loc); + unsigned int cpu = pred->val; + + return do_filter_cpumask_scalar(pred->op, mask, cpu); +} + /* Filter predicate for COMM. */ static int filter_pred_comm(struct filter_pred *pred, void *event) { int cmp; - cmp = pred->regex.match(current->comm, &pred->regex, + cmp = pred->regex->match(current->comm, pred->regex, TASK_COMM_LEN); return cmp ^ pred->not; } @@ -1004,7 +1123,7 @@ enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not) static void filter_build_regex(struct filter_pred *pred) { - struct regex *r = &pred->regex; + struct regex *r = pred->regex; char *search; enum regex_type type = MATCH_FULL; @@ -1169,7 +1288,7 @@ static void free_prog(struct event_filter *filter) return; for (i = 0; prog[i].pred; i++) - kfree(prog[i].pred); + free_predicate(prog[i].pred); kfree(prog); } @@ -1236,8 +1355,12 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, int filter_assign_type(const char *type) { - if (strstr(type, "__data_loc") && strstr(type, "char")) - return FILTER_DYN_STRING; + if (strstr(type, "__data_loc")) { + if (strstr(type, "char")) + return FILTER_DYN_STRING; + if (strstr(type, "cpumask_t")) + return FILTER_CPUMASK; + } if (strstr(type, "__rel_loc") && strstr(type, "char")) return FILTER_RDYN_STRING; @@ -1313,24 +1436,32 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event) switch (pred->fn_num) { case FILTER_PRED_FN_64: return filter_pred_64(pred, event); + case FILTER_PRED_FN_64_CPUMASK: + return filter_pred_64_cpumask(pred, event); case FILTER_PRED_FN_S64: return filter_pred_s64(pred, event); case FILTER_PRED_FN_U64: return filter_pred_u64(pred, event); case FILTER_PRED_FN_32: return filter_pred_32(pred, event); + case FILTER_PRED_FN_32_CPUMASK: + return filter_pred_32_cpumask(pred, event); case FILTER_PRED_FN_S32: return filter_pred_s32(pred, event); case FILTER_PRED_FN_U32: return filter_pred_u32(pred, event); case FILTER_PRED_FN_16: return filter_pred_16(pred, event); + case FILTER_PRED_FN_16_CPUMASK: + return filter_pred_16_cpumask(pred, event); case FILTER_PRED_FN_S16: return filter_pred_s16(pred, event); case FILTER_PRED_FN_U16: return filter_pred_u16(pred, event); case FILTER_PRED_FN_8: return filter_pred_8(pred, event); + case FILTER_PRED_FN_8_CPUMASK: + return filter_pred_8_cpumask(pred, event); case FILTER_PRED_FN_S8: return filter_pred_s8(pred, event); case FILTER_PRED_FN_U8: @@ -1349,6 +1480,12 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event) return filter_pred_pchar(pred, event); case FILTER_PRED_FN_CPU: return filter_pred_cpu(pred, event); + case FILTER_PRED_FN_CPU_CPUMASK: + return filter_pred_cpu_cpumask(pred, event); + case FILTER_PRED_FN_CPUMASK: + return filter_pred_cpumask(pred, event); + case FILTER_PRED_FN_CPUMASK_CPU: + return filter_pred_cpumask_cpu(pred, event); case FILTER_PRED_FN_FUNCTION: return filter_pred_function(pred, event); case FILTER_PRED_TEST_VISITED: @@ -1553,9 +1690,130 @@ static int parse_pred(const char *str, void *data, goto err_free; } - pred->regex.len = len; - strncpy(pred->regex.pattern, str + s, len); - pred->regex.pattern[len] = 0; + pred->regex = kzalloc(sizeof(*pred->regex), GFP_KERNEL); + if (!pred->regex) + goto err_mem; + pred->regex->len = len; + strncpy(pred->regex->pattern, str + s, len); + pred->regex->pattern[len] = 0; + + } else if (!strncmp(str + i, "CPUS", 4)) { + unsigned int maskstart; + bool single; + char *tmp; + + switch (field->filter_type) { + case FILTER_CPUMASK: + case FILTER_CPU: + case FILTER_OTHER: + break; + default: + parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i); + goto err_free; + } + + switch (op) { + case OP_EQ: + case OP_NE: + case OP_BAND: + break; + default: + parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i); + goto err_free; + } + + /* Skip CPUS */ + i += 4; + if (str[i++] != '{') { + parse_error(pe, FILT_ERR_MISSING_BRACE_OPEN, pos + i); + goto err_free; + } + maskstart = i; + + /* Walk the cpulist until closing } */ + for (; str[i] && str[i] != '}'; i++) + ; + + if (str[i] != '}') { + parse_error(pe, FILT_ERR_MISSING_BRACE_CLOSE, pos + i); + goto err_free; + } + + if (maskstart == i) { + parse_error(pe, FILT_ERR_INVALID_CPULIST, pos + i); + goto err_free; + } + + /* Copy the cpulist between { and } */ + tmp = kmalloc((i - maskstart) + 1, GFP_KERNEL); + if (!tmp) + goto err_mem; + + strscpy(tmp, str + maskstart, (i - maskstart) + 1); + pred->mask = kzalloc(cpumask_size(), GFP_KERNEL); + if (!pred->mask) { + kfree(tmp); + goto err_mem; + } + + /* Now parse it */ + if (cpulist_parse(tmp, pred->mask)) { + kfree(tmp); + parse_error(pe, FILT_ERR_INVALID_CPULIST, pos + i); + goto err_free; + } + kfree(tmp); + + /* Move along */ + i++; + + /* + * Optimisation: if the user-provided mask has a weight of one + * then we can treat it as a scalar input. + */ + single = cpumask_weight(pred->mask) == 1; + if (single) { + pred->val = cpumask_first(pred->mask); + kfree(pred->mask); + pred->mask = NULL; + } + + if (field->filter_type == FILTER_CPUMASK) { + pred->fn_num = single ? + FILTER_PRED_FN_CPUMASK_CPU : + FILTER_PRED_FN_CPUMASK; + } else if (field->filter_type == FILTER_CPU) { + if (single) { + if (pred->op == OP_BAND) + pred->op = OP_EQ; + + pred->fn_num = FILTER_PRED_FN_CPU; + } else { + pred->fn_num = FILTER_PRED_FN_CPU_CPUMASK; + } + } else if (single) { + if (pred->op == OP_BAND) + pred->op = OP_EQ; + + pred->fn_num = select_comparison_fn(pred->op, field->size, false); + if (pred->op == OP_NE) + pred->not = 1; + } else { + switch (field->size) { + case 8: + pred->fn_num = FILTER_PRED_FN_64_CPUMASK; + break; + case 4: + pred->fn_num = FILTER_PRED_FN_32_CPUMASK; + break; + case 2: + pred->fn_num = FILTER_PRED_FN_16_CPUMASK; + break; + case 1: + pred->fn_num = FILTER_PRED_FN_8_CPUMASK; + break; + } + } /* This is either a string, or an integer */ } else if (str[i] == '\'' || str[i] == '"') { @@ -1597,9 +1855,12 @@ static int parse_pred(const char *str, void *data, goto err_free; } - pred->regex.len = len; - strncpy(pred->regex.pattern, str + s, len); - pred->regex.pattern[len] = 0; + pred->regex = kzalloc(sizeof(*pred->regex), GFP_KERNEL); + if (!pred->regex) + goto err_mem; + pred->regex->len = len; + strncpy(pred->regex->pattern, str + s, len); + pred->regex->pattern[len] = 0; filter_build_regex(pred); @@ -1608,7 +1869,7 @@ static int parse_pred(const char *str, void *data, } else if (field->filter_type == FILTER_STATIC_STRING) { pred->fn_num = FILTER_PRED_FN_STRING; - pred->regex.field_len = field->size; + pred->regex->field_len = field->size; } else if (field->filter_type == FILTER_DYN_STRING) { pred->fn_num = FILTER_PRED_FN_STRLOC; @@ -1691,10 +1952,10 @@ static int parse_pred(const char *str, void *data, return i; err_free: - kfree(pred); + free_predicate(pred); return -EINVAL; err_mem: - kfree(pred); + free_predicate(pred); return -ENOMEM; } @@ -2287,8 +2548,8 @@ static int ftrace_function_set_filter_pred(struct filter_pred *pred, return ret; return __ftrace_function_set_filter(pred->op == OP_EQ, - pred->regex.pattern, - pred->regex.len, + pred->regex->pattern, + pred->regex->len, data); } diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 33cb6af31f39..6f046650e527 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1328,14 +1328,14 @@ static int user_field_set_string(struct ftrace_event_field *field, static int user_event_set_print_fmt(struct user_event *user, char *buf, int len) { - struct ftrace_event_field *field, *next; + struct ftrace_event_field *field; struct list_head *head = &user->fields; int pos = 0, depth = 0; const char *str_func; pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); - list_for_each_entry_safe_reverse(field, next, head, link) { + list_for_each_entry_reverse(field, head, link) { if (depth != 0) pos += snprintf(buf + pos, LEN_OR_ZERO, " "); @@ -1347,7 +1347,7 @@ static int user_event_set_print_fmt(struct user_event *user, char *buf, int len) pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); - list_for_each_entry_safe_reverse(field, next, head, link) { + list_for_each_entry_reverse(field, head, link) { if (user_field_is_dyn_string(field->type, &str_func)) pos += snprintf(buf + pos, LEN_OR_ZERO, ", %s(%s)", str_func, field->name); @@ -1732,7 +1732,7 @@ static int user_event_create(const char *raw_command) static int user_event_show(struct seq_file *m, struct dyn_event *ev) { struct user_event *user = container_of(ev, struct user_event, devent); - struct ftrace_event_field *field, *next; + struct ftrace_event_field *field; struct list_head *head; int depth = 0; @@ -1740,7 +1740,7 @@ static int user_event_show(struct seq_file *m, struct dyn_event *ev) head = trace_get_fields(&user->call); - list_for_each_entry_safe_reverse(field, next, head, link) { + list_for_each_entry_reverse(field, head, link) { if (depth == 0) seq_puts(m, " "); else @@ -1816,13 +1816,14 @@ out: static bool user_fields_match(struct user_event *user, int argc, const char **argv) { - struct ftrace_event_field *field, *next; + struct ftrace_event_field *field; struct list_head *head = &user->fields; int i = 0; - list_for_each_entry_safe_reverse(field, next, head, link) + list_for_each_entry_reverse(field, head, link) { if (!user_field_match(field, argc, argv, &i)) return false; + } if (i != argc) return false; diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 58f3946081e2..1698fc22afa0 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -51,6 +51,9 @@ static int ftrace_event_register(struct trace_event_call *call, #undef __array #define __array(type, item, size) type item[size]; +#undef __stack_array +#define __stack_array(type, item, size, field) __array(type, item, size) + #undef __array_desc #define __array_desc(type, container, item, size) type item[size]; @@ -114,6 +117,9 @@ static void __always_unused ____ftrace_check_##name(void) \ is_signed_type(_type), .filter_type = FILTER_OTHER, \ .len = _len }, +#undef __stack_array +#define __stack_array(_type, _item, _len, _field) __array(_type, _item, _len) + #undef __array_desc #define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len) @@ -149,6 +155,9 @@ static struct trace_event_fields ftrace_event_fields_##name[] = { \ #undef __array #define __array(type, item, len) +#undef __stack_array +#define __stack_array(type, item, len, field) + #undef __array_desc #define __array_desc(type, container, item, len) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index dfe2e546acdc..8bfe23af9c73 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -898,6 +898,46 @@ static struct tracepoint *find_tracepoint(const char *tp_name) return data.tpoint; } +static int parse_symbol_and_return(int argc, const char *argv[], + char **symbol, bool *is_return, + bool is_tracepoint) +{ + char *tmp = strchr(argv[1], '%'); + int i; + + if (tmp) { + int len = tmp - argv[1]; + + if (!is_tracepoint && !strcmp(tmp, "%return")) { + *is_return = true; + } else { + trace_probe_log_err(len, BAD_ADDR_SUFFIX); + return -EINVAL; + } + *symbol = kmemdup_nul(argv[1], len, GFP_KERNEL); + } else + *symbol = kstrdup(argv[1], GFP_KERNEL); + if (!*symbol) + return -ENOMEM; + + if (*is_return) + return 0; + + /* If there is $retval, this should be a return fprobe. */ + for (i = 2; i < argc; i++) { + tmp = strstr(argv[i], "$retval"); + if (tmp && !isalnum(tmp[7]) && tmp[7] != '_') { + *is_return = true; + /* + * NOTE: Don't check is_tracepoint here, because it will + * be checked when the argument is parsed. + */ + break; + } + } + return 0; +} + static int __trace_fprobe_create(int argc, const char *argv[]) { /* @@ -927,7 +967,7 @@ static int __trace_fprobe_create(int argc, const char *argv[]) struct trace_fprobe *tf = NULL; int i, len, new_argc = 0, ret = 0; bool is_return = false; - char *symbol = NULL, *tmp = NULL; + char *symbol = NULL; const char *event = NULL, *group = FPROBE_EVENT_SYSTEM; const char **new_argv = NULL; int maxactive = 0; @@ -983,20 +1023,10 @@ static int __trace_fprobe_create(int argc, const char *argv[]) trace_probe_log_set_index(1); /* a symbol(or tracepoint) must be specified */ - symbol = kstrdup(argv[1], GFP_KERNEL); - if (!symbol) - return -ENOMEM; + ret = parse_symbol_and_return(argc, argv, &symbol, &is_return, is_tracepoint); + if (ret < 0) + goto parse_error; - tmp = strchr(symbol, '%'); - if (tmp) { - if (!is_tracepoint && !strcmp(tmp, "%return")) { - *tmp = '\0'; - is_return = true; - } else { - trace_probe_log_err(tmp - symbol, BAD_ADDR_SUFFIX); - goto parse_error; - } - } if (!is_return && maxactive) { trace_probe_log_set_index(0); trace_probe_log_err(1, BAD_MAXACT_TYPE); @@ -1096,6 +1126,7 @@ static int __trace_fprobe_create(int argc, const char *argv[]) } out: + traceprobe_finish_parse(&ctx); trace_probe_log_clear(); kfree(new_argv); kfree(symbol); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 2f37a6e68aa9..b791524a6536 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -635,7 +635,7 @@ static int s_mode_show(struct seq_file *s, void *v) else seq_printf(s, "%s", thread_mode_str[mode]); - if (mode != MODE_MAX) + if (mode < MODE_MAX - 1) /* if mode is any but last */ seq_puts(s, " "); return 0; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 23dba01831f7..3d7a180a8427 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -907,6 +907,7 @@ static int __trace_kprobe_create(int argc, const char *argv[]) } out: + traceprobe_finish_parse(&ctx); trace_probe_log_clear(); kfree(new_argv); kfree(symbol); @@ -1561,15 +1562,10 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, *fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE : BPF_FD_TYPE_KPROBE; - if (tk->symbol) { - *symbol = tk->symbol; - *probe_offset = tk->rp.kp.offset; - *probe_addr = 0; - } else { - *symbol = NULL; - *probe_offset = 0; - *probe_addr = (unsigned long)tk->rp.kp.addr; - } + *probe_offset = tk->rp.kp.offset; + *probe_addr = kallsyms_show_value(current_cred()) ? + (unsigned long)tk->rp.kp.addr : 0; + *symbol = tk->symbol; return 0; } #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index c68a72707852..4dc74d73fc1d 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt) "trace_probe: " fmt #include <linux/bpf.h> +#include "trace_btf.h" #include "trace_probe.h" @@ -304,31 +305,90 @@ static int parse_trace_event_arg(char *arg, struct fetch_insn *code, #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS -static struct btf *traceprobe_get_btf(void) +static u32 btf_type_int(const struct btf_type *t) { - struct btf *btf = bpf_get_btf_vmlinux(); + return *(u32 *)(t + 1); +} - if (IS_ERR_OR_NULL(btf)) - return NULL; +static bool btf_type_is_char_ptr(struct btf *btf, const struct btf_type *type) +{ + const struct btf_type *real_type; + u32 intdata; + s32 tid; + + real_type = btf_type_skip_modifiers(btf, type->type, &tid); + if (!real_type) + return false; + + if (BTF_INFO_KIND(real_type->info) != BTF_KIND_INT) + return false; - return btf; + intdata = btf_type_int(real_type); + return !(BTF_INT_ENCODING(intdata) & BTF_INT_SIGNED) + && BTF_INT_BITS(intdata) == 8; } -static u32 btf_type_int(const struct btf_type *t) +static bool btf_type_is_char_array(struct btf *btf, const struct btf_type *type) { - return *(u32 *)(t + 1); + const struct btf_type *real_type; + const struct btf_array *array; + u32 intdata; + s32 tid; + + if (BTF_INFO_KIND(type->info) != BTF_KIND_ARRAY) + return false; + + array = (const struct btf_array *)(type + 1); + + real_type = btf_type_skip_modifiers(btf, array->type, &tid); + + intdata = btf_type_int(real_type); + return !(BTF_INT_ENCODING(intdata) & BTF_INT_SIGNED) + && BTF_INT_BITS(intdata) == 8; } -static const char *type_from_btf_id(struct btf *btf, s32 id) +static int check_prepare_btf_string_fetch(char *typename, + struct fetch_insn **pcode, + struct traceprobe_parse_context *ctx) +{ + struct btf *btf = ctx->btf; + + if (!btf || !ctx->last_type) + return 0; + + /* char [] does not need any change. */ + if (btf_type_is_char_array(btf, ctx->last_type)) + return 0; + + /* char * requires dereference the pointer. */ + if (btf_type_is_char_ptr(btf, ctx->last_type)) { + struct fetch_insn *code = *pcode + 1; + + if (code->op == FETCH_OP_END) { + trace_probe_log_err(ctx->offset, TOO_MANY_OPS); + return -E2BIG; + } + if (typename[0] == 'u') + code->op = FETCH_OP_UDEREF; + else + code->op = FETCH_OP_DEREF; + code->offset = 0; + *pcode = code; + return 0; + } + /* Other types are not available for string */ + trace_probe_log_err(ctx->offset, BAD_TYPE4STR); + return -EINVAL; +} + +static const char *fetch_type_from_btf_type(struct btf *btf, + const struct btf_type *type, + struct traceprobe_parse_context *ctx) { - const struct btf_type *t; u32 intdata; - s32 tid; /* TODO: const char * could be converted as a string */ - t = btf_type_skip_modifiers(btf, id, &tid); - - switch (BTF_INFO_KIND(t->info)) { + switch (BTF_INFO_KIND(type->info)) { case BTF_KIND_ENUM: /* enum is "int", so convert to "s32" */ return "s32"; @@ -341,7 +401,7 @@ static const char *type_from_btf_id(struct btf *btf, s32 id) else return "x32"; case BTF_KIND_INT: - intdata = btf_type_int(t); + intdata = btf_type_int(type); if (BTF_INT_ENCODING(intdata) & BTF_INT_SIGNED) { switch (BTF_INT_BITS(intdata)) { case 8: @@ -364,6 +424,10 @@ static const char *type_from_btf_id(struct btf *btf, s32 id) case 64: return "u64"; } + /* bitfield, size is encoded in the type */ + ctx->last_bitsize = BTF_INT_BITS(intdata); + ctx->last_bitoffs += BTF_INT_OFFSET(intdata); + return "u64"; } } /* TODO: support other types */ @@ -371,88 +435,223 @@ static const char *type_from_btf_id(struct btf *btf, s32 id) return NULL; } -static const struct btf_type *find_btf_func_proto(const char *funcname) +static int query_btf_context(struct traceprobe_parse_context *ctx) { - struct btf *btf = traceprobe_get_btf(); - const struct btf_type *t; - s32 id; + const struct btf_param *param; + const struct btf_type *type; + struct btf *btf; + s32 nr; - if (!btf || !funcname) - return ERR_PTR(-EINVAL); + if (ctx->btf) + return 0; + + if (!ctx->funcname) + return -EINVAL; + + type = btf_find_func_proto(ctx->funcname, &btf); + if (!type) + return -ENOENT; - id = btf_find_by_name_kind(btf, funcname, BTF_KIND_FUNC); - if (id <= 0) - return ERR_PTR(-ENOENT); + ctx->btf = btf; + ctx->proto = type; + + /* ctx->params is optional, since func(void) will not have params. */ + nr = 0; + param = btf_get_func_param(type, &nr); + if (!IS_ERR_OR_NULL(param)) { + /* Hide the first 'data' argument of tracepoint */ + if (ctx->flags & TPARG_FL_TPOINT) { + nr--; + param++; + } + } - /* Get BTF_KIND_FUNC type */ - t = btf_type_by_id(btf, id); - if (!t || !btf_type_is_func(t)) - return ERR_PTR(-ENOENT); + if (nr > 0) { + ctx->nr_params = nr; + ctx->params = param; + } else { + ctx->nr_params = 0; + ctx->params = NULL; + } - /* The type of BTF_KIND_FUNC is BTF_KIND_FUNC_PROTO */ - t = btf_type_by_id(btf, t->type); - if (!t || !btf_type_is_func_proto(t)) - return ERR_PTR(-ENOENT); + return 0; +} - return t; +static void clear_btf_context(struct traceprobe_parse_context *ctx) +{ + if (ctx->btf) { + btf_put(ctx->btf); + ctx->btf = NULL; + ctx->proto = NULL; + ctx->params = NULL; + ctx->nr_params = 0; + } } -static const struct btf_param *find_btf_func_param(const char *funcname, s32 *nr, - bool tracepoint) +/* Return 1 if the field separater is arrow operator ('->') */ +static int split_next_field(char *varname, char **next_field, + struct traceprobe_parse_context *ctx) { - const struct btf_param *param; - const struct btf_type *t; + char *field; + int ret = 0; + + field = strpbrk(varname, ".-"); + if (field) { + if (field[0] == '-' && field[1] == '>') { + field[0] = '\0'; + field += 2; + ret = 1; + } else if (field[0] == '.') { + field[0] = '\0'; + field += 1; + } else { + trace_probe_log_err(ctx->offset + field - varname, BAD_HYPHEN); + return -EINVAL; + } + *next_field = field; + } - if (!funcname || !nr) - return ERR_PTR(-EINVAL); + return ret; +} - t = find_btf_func_proto(funcname); - if (IS_ERR(t)) - return (const struct btf_param *)t; +/* + * Parse the field of data structure. The @type must be a pointer type + * pointing the target data structure type. + */ +static int parse_btf_field(char *fieldname, const struct btf_type *type, + struct fetch_insn **pcode, struct fetch_insn *end, + struct traceprobe_parse_context *ctx) +{ + struct fetch_insn *code = *pcode; + const struct btf_member *field; + u32 bitoffs, anon_offs; + char *next; + int is_ptr; + s32 tid; - *nr = btf_type_vlen(t); - param = (const struct btf_param *)(t + 1); + do { + /* Outer loop for solving arrow operator ('->') */ + if (BTF_INFO_KIND(type->info) != BTF_KIND_PTR) { + trace_probe_log_err(ctx->offset, NO_PTR_STRCT); + return -EINVAL; + } + /* Convert a struct pointer type to a struct type */ + type = btf_type_skip_modifiers(ctx->btf, type->type, &tid); + if (!type) { + trace_probe_log_err(ctx->offset, BAD_BTF_TID); + return -EINVAL; + } - /* Hide the first 'data' argument of tracepoint */ - if (tracepoint) { - (*nr)--; - param++; - } + bitoffs = 0; + do { + /* Inner loop for solving dot operator ('.') */ + next = NULL; + is_ptr = split_next_field(fieldname, &next, ctx); + if (is_ptr < 0) + return is_ptr; + + anon_offs = 0; + field = btf_find_struct_member(ctx->btf, type, fieldname, + &anon_offs); + if (!field) { + trace_probe_log_err(ctx->offset, NO_BTF_FIELD); + return -ENOENT; + } + /* Add anonymous structure/union offset */ + bitoffs += anon_offs; + + /* Accumulate the bit-offsets of the dot-connected fields */ + if (btf_type_kflag(type)) { + bitoffs += BTF_MEMBER_BIT_OFFSET(field->offset); + ctx->last_bitsize = BTF_MEMBER_BITFIELD_SIZE(field->offset); + } else { + bitoffs += field->offset; + ctx->last_bitsize = 0; + } - if (*nr > 0) - return param; - else - return NULL; + type = btf_type_skip_modifiers(ctx->btf, field->type, &tid); + if (!type) { + trace_probe_log_err(ctx->offset, BAD_BTF_TID); + return -EINVAL; + } + + ctx->offset += next - fieldname; + fieldname = next; + } while (!is_ptr && fieldname); + + if (++code == end) { + trace_probe_log_err(ctx->offset, TOO_MANY_OPS); + return -EINVAL; + } + code->op = FETCH_OP_DEREF; /* TODO: user deref support */ + code->offset = bitoffs / 8; + *pcode = code; + + ctx->last_bitoffs = bitoffs % 8; + ctx->last_type = type; + } while (fieldname); + + return 0; } -static int parse_btf_arg(const char *varname, struct fetch_insn *code, +static int parse_btf_arg(char *varname, + struct fetch_insn **pcode, struct fetch_insn *end, struct traceprobe_parse_context *ctx) { - struct btf *btf = traceprobe_get_btf(); + struct fetch_insn *code = *pcode; const struct btf_param *params; - int i; + const struct btf_type *type; + char *field = NULL; + int i, is_ptr, ret; + u32 tid; + + if (WARN_ON_ONCE(!ctx->funcname)) + return -EINVAL; - if (!btf) { - trace_probe_log_err(ctx->offset, NOSUP_BTFARG); + is_ptr = split_next_field(varname, &field, ctx); + if (is_ptr < 0) + return is_ptr; + if (!is_ptr && field) { + /* dot-connected field on an argument is not supported. */ + trace_probe_log_err(ctx->offset + field - varname, + NOSUP_DAT_ARG); return -EOPNOTSUPP; } - if (WARN_ON_ONCE(!ctx->funcname)) - return -EINVAL; + if (ctx->flags & TPARG_FL_RETURN) { + if (strcmp(varname, "$retval") != 0) { + trace_probe_log_err(ctx->offset, NO_BTFARG); + return -ENOENT; + } + code->op = FETCH_OP_RETVAL; + /* Check whether the function return type is not void */ + if (query_btf_context(ctx) == 0) { + if (ctx->proto->type == 0) { + trace_probe_log_err(ctx->offset, NO_RETVAL); + return -ENOENT; + } + tid = ctx->proto->type; + goto found; + } + if (field) { + trace_probe_log_err(ctx->offset + field - varname, + NO_BTF_ENTRY); + return -ENOENT; + } + return 0; + } - if (!ctx->params) { - params = find_btf_func_param(ctx->funcname, &ctx->nr_params, - ctx->flags & TPARG_FL_TPOINT); - if (IS_ERR_OR_NULL(params)) { + if (!ctx->btf) { + ret = query_btf_context(ctx); + if (ret < 0 || ctx->nr_params == 0) { trace_probe_log_err(ctx->offset, NO_BTF_ENTRY); return PTR_ERR(params); } - ctx->params = params; - } else - params = ctx->params; + } + params = ctx->params; for (i = 0; i < ctx->nr_params; i++) { - const char *name = btf_name_by_offset(btf, params[i].name_off); + const char *name = btf_name_by_offset(ctx->btf, params[i].name_off); if (name && !strcmp(name, varname)) { code->op = FETCH_OP_ARG; @@ -460,91 +659,114 @@ static int parse_btf_arg(const char *varname, struct fetch_insn *code, code->param = i + 1; else code->param = i; - return 0; + tid = params[i].type; + goto found; } } trace_probe_log_err(ctx->offset, NO_BTFARG); return -ENOENT; -} - -static const struct fetch_type *parse_btf_arg_type(int arg_idx, - struct traceprobe_parse_context *ctx) -{ - struct btf *btf = traceprobe_get_btf(); - const char *typestr = NULL; - if (btf && ctx->params) { - if (ctx->flags & TPARG_FL_TPOINT) - arg_idx--; - typestr = type_from_btf_id(btf, ctx->params[arg_idx].type); +found: + type = btf_type_skip_modifiers(ctx->btf, tid, &tid); + if (!type) { + trace_probe_log_err(ctx->offset, BAD_BTF_TID); + return -EINVAL; } - - return find_fetch_type(typestr, ctx->flags); + /* Initialize the last type information */ + ctx->last_type = type; + ctx->last_bitoffs = 0; + ctx->last_bitsize = 0; + if (field) { + ctx->offset += field - varname; + return parse_btf_field(field, type, pcode, end, ctx); + } + return 0; } -static const struct fetch_type *parse_btf_retval_type( +static const struct fetch_type *find_fetch_type_from_btf_type( struct traceprobe_parse_context *ctx) { - struct btf *btf = traceprobe_get_btf(); + struct btf *btf = ctx->btf; const char *typestr = NULL; - const struct btf_type *t; - if (btf && ctx->funcname) { - t = find_btf_func_proto(ctx->funcname); - if (!IS_ERR(t)) - typestr = type_from_btf_id(btf, t->type); - } + if (btf && ctx->last_type) + typestr = fetch_type_from_btf_type(btf, ctx->last_type, ctx); return find_fetch_type(typestr, ctx->flags); } -static bool is_btf_retval_void(const char *funcname) +static int parse_btf_bitfield(struct fetch_insn **pcode, + struct traceprobe_parse_context *ctx) { - const struct btf_type *t; + struct fetch_insn *code = *pcode; - t = find_btf_func_proto(funcname); - if (IS_ERR(t)) - return false; + if ((ctx->last_bitsize % 8 == 0) && ctx->last_bitoffs == 0) + return 0; + + code++; + if (code->op != FETCH_OP_NOP) { + trace_probe_log_err(ctx->offset, TOO_MANY_OPS); + return -EINVAL; + } + *pcode = code; - return t->type == 0; + code->op = FETCH_OP_MOD_BF; + code->lshift = 64 - (ctx->last_bitsize + ctx->last_bitoffs); + code->rshift = 64 - ctx->last_bitsize; + code->basesize = 64 / 8; + return 0; } + #else -static struct btf *traceprobe_get_btf(void) +static void clear_btf_context(struct traceprobe_parse_context *ctx) { - return NULL; + ctx->btf = NULL; } -static const struct btf_param *find_btf_func_param(const char *funcname, s32 *nr, - bool tracepoint) +static int query_btf_context(struct traceprobe_parse_context *ctx) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } -static int parse_btf_arg(const char *varname, struct fetch_insn *code, +static int parse_btf_arg(char *varname, + struct fetch_insn **pcode, struct fetch_insn *end, struct traceprobe_parse_context *ctx) { trace_probe_log_err(ctx->offset, NOSUP_BTFARG); return -EOPNOTSUPP; } -#define parse_btf_arg_type(idx, ctx) \ - find_fetch_type(NULL, ctx->flags) +static int parse_btf_bitfield(struct fetch_insn **pcode, + struct traceprobe_parse_context *ctx) +{ + trace_probe_log_err(ctx->offset, NOSUP_BTFARG); + return -EOPNOTSUPP; +} -#define parse_btf_retval_type(ctx) \ +#define find_fetch_type_from_btf_type(ctx) \ find_fetch_type(NULL, ctx->flags) -#define is_btf_retval_void(funcname) (false) +static int check_prepare_btf_string_fetch(char *typename, + struct fetch_insn **pcode, + struct traceprobe_parse_context *ctx) +{ + return 0; +} #endif #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) -static int parse_probe_vars(char *arg, const struct fetch_type *t, - struct fetch_insn *code, +/* Parse $vars. @orig_arg points '$', which syncs to @ctx->offset */ +static int parse_probe_vars(char *orig_arg, const struct fetch_type *t, + struct fetch_insn **pcode, + struct fetch_insn *end, struct traceprobe_parse_context *ctx) { - unsigned long param; + struct fetch_insn *code = *pcode; int err = TP_ERR_BAD_VAR; + char *arg = orig_arg + 1; + unsigned long param; int ret = 0; int len; @@ -563,18 +785,17 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t, goto inval; } - if (strcmp(arg, "retval") == 0) { - if (ctx->flags & TPARG_FL_RETURN) { - if ((ctx->flags & TPARG_FL_KERNEL) && - is_btf_retval_void(ctx->funcname)) { - err = TP_ERR_NO_RETVAL; - goto inval; - } + if (str_has_prefix(arg, "retval")) { + if (!(ctx->flags & TPARG_FL_RETURN)) { + err = TP_ERR_RETVAL_ON_PROBE; + goto inval; + } + if (!(ctx->flags & TPARG_FL_KERNEL) || + !IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS)) { code->op = FETCH_OP_RETVAL; return 0; } - err = TP_ERR_RETVAL_ON_PROBE; - goto inval; + return parse_btf_arg(orig_arg, pcode, end, ctx); } len = str_has_prefix(arg, "stack"); @@ -676,7 +897,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, switch (arg[0]) { case '$': - ret = parse_probe_vars(arg + 1, type, code, ctx); + ret = parse_probe_vars(arg, type, pcode, end, ctx); break; case '%': /* named register */ @@ -795,6 +1016,8 @@ parse_probe_arg(char *arg, const struct fetch_type *type, code->op = deref; code->offset = offset; + /* Reset the last type if used */ + ctx->last_type = NULL; } break; case '\\': /* Immediate value */ @@ -818,7 +1041,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type, trace_probe_log_err(ctx->offset, NOSUP_BTFARG); return -EINVAL; } - ret = parse_btf_arg(arg, code, ctx); + ret = parse_btf_arg(arg, pcode, end, ctx); break; } } @@ -964,17 +1187,22 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, goto out; code[FETCH_INSN_MAX - 1].op = FETCH_OP_END; + ctx->last_type = NULL; ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1], ctx); if (ret) goto fail; /* Update storing type if BTF is available */ - if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS) && !t) { - if (code->op == FETCH_OP_ARG) - parg->type = parse_btf_arg_type(code->param, ctx); - else if (code->op == FETCH_OP_RETVAL) - parg->type = parse_btf_retval_type(ctx); + if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS) && + ctx->last_type) { + if (!t) { + parg->type = find_fetch_type_from_btf_type(ctx); + } else if (strstr(t, "string")) { + ret = check_prepare_btf_string_fetch(t, &code, ctx); + if (ret) + goto fail; + } } ret = -EINVAL; @@ -1048,6 +1276,11 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, trace_probe_log_err(ctx->offset + t - arg, BAD_BITFIELD); goto fail; } + } else if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS) && + ctx->last_type) { + ret = parse_btf_bitfield(&code, ctx); + if (ret) + goto fail; } ret = -EINVAL; /* Loop(Array) operation */ @@ -1231,7 +1464,6 @@ static int sprint_nth_btf_arg(int idx, const char *type, char *buf, int bufsize, struct traceprobe_parse_context *ctx) { - struct btf *btf = traceprobe_get_btf(); const char *name; int ret; @@ -1239,7 +1471,7 @@ static int sprint_nth_btf_arg(int idx, const char *type, trace_probe_log_err(0, NO_BTFARG); return -ENOENT; } - name = btf_name_by_offset(btf, ctx->params[idx].name_off); + name = btf_name_by_offset(ctx->btf, ctx->params[idx].name_off); if (!name) { trace_probe_log_err(0, NO_BTF_ENTRY); return -ENOENT; @@ -1260,7 +1492,6 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[], const struct btf_param *params = NULL; int i, j, n, used, ret, args_idx = -1; const char **new_argv = NULL; - int nr_params; ret = argv_has_var_arg(argc, argv, &args_idx, ctx); if (ret < 0) @@ -1271,9 +1502,8 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[], return NULL; } - params = find_btf_func_param(ctx->funcname, &nr_params, - ctx->flags & TPARG_FL_TPOINT); - if (IS_ERR_OR_NULL(params)) { + ret = query_btf_context(ctx); + if (ret < 0 || ctx->nr_params == 0) { if (args_idx != -1) { /* $arg* requires BTF info */ trace_probe_log_err(0, NOSUP_BTFARG); @@ -1282,8 +1512,6 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[], *new_argc = argc; return NULL; } - ctx->params = params; - ctx->nr_params = nr_params; if (args_idx >= 0) *new_argc = argc + ctx->nr_params - 1; @@ -1298,7 +1526,7 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[], for (i = 0, j = 0; i < argc; i++) { trace_probe_log_set_index(i + 2); if (i == args_idx) { - for (n = 0; n < nr_params; n++) { + for (n = 0; n < ctx->nr_params; n++) { ret = sprint_nth_btf_arg(n, "", buf + used, bufsize - used, ctx); if (ret < 0) @@ -1337,6 +1565,11 @@ error: return ERR_PTR(ret); } +void traceprobe_finish_parse(struct traceprobe_parse_context *ctx) +{ + clear_btf_context(ctx); +} + int traceprobe_update_arg(struct probe_arg *arg) { struct fetch_insn *code = arg->code; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 01ea148723de..02b432ae7513 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -383,9 +383,15 @@ static inline bool tparg_is_function_entry(unsigned int flags) struct traceprobe_parse_context { struct trace_event_call *event; - const struct btf_param *params; - s32 nr_params; - const char *funcname; + /* BTF related parameters */ + const char *funcname; /* Function name in BTF */ + const struct btf_type *proto; /* Prototype of the function */ + const struct btf_param *params; /* Parameter of the function */ + s32 nr_params; /* The number of the parameters */ + struct btf *btf; /* The BTF to be used */ + const struct btf_type *last_type; /* Saved type */ + u32 last_bitoffs; /* Saved bitoffs */ + u32 last_bitsize; /* Saved bitsize */ unsigned int flags; int offset; }; @@ -400,6 +406,12 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[], extern int traceprobe_update_arg(struct probe_arg *arg); extern void traceprobe_free_probe_arg(struct probe_arg *arg); +/* + * If either traceprobe_parse_probe_arg() or traceprobe_expand_meta_args() is called, + * this MUST be called for clean up the context and return a resource. + */ +void traceprobe_finish_parse(struct traceprobe_parse_context *ctx); + extern int traceprobe_split_symbol_offset(char *symbol, long *offset); int traceprobe_parse_event_name(const char **pevent, const char **pgroup, char *buf, int offset); @@ -495,7 +507,14 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_VAR_ARGS, "$arg* must be an independent parameter without name etc."),\ C(NOFENTRY_ARGS, "$arg* can be used only on function entry"), \ C(DOUBLE_ARGS, "$arg* can be used only once in the parameters"), \ - C(ARGS_2LONG, "$arg* failed because the argument list is too long"), + C(ARGS_2LONG, "$arg* failed because the argument list is too long"), \ + C(ARGIDX_2BIG, "$argN index is too big"), \ + C(NO_PTR_STRCT, "This is not a pointer to union/structure."), \ + C(NOSUP_DAT_ARG, "Non pointer structure/union argument is not supported."),\ + C(BAD_HYPHEN, "Failed to parse single hyphen. Forgot '>'?"), \ + C(NO_BTF_FIELD, "This field is not found."), \ + C(BAD_BTF_TID, "Failed to get BTF type info."),\ + C(BAD_TYPE4STR, "This type does not fit for string."), #undef C #define C(a, b) TP_ERR_##a @@ -519,3 +538,8 @@ void __trace_probe_log_err(int offset, int err); #define trace_probe_log_err(offs, err) \ __trace_probe_log_err(offs, TP_ERR_##err) + +struct uprobe_dispatch_data { + struct trace_uprobe *tu; + unsigned long bp_addr; +}; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 942ddbdace4a..de753403cdaf 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -555,12 +555,15 @@ static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *re struct syscall_trace_enter *rec) { struct syscall_tp_t { - unsigned long long regs; + struct trace_entry ent; unsigned long syscall_nr; unsigned long args[SYSCALL_DEFINE_MAXARGS]; - } param; + } __aligned(8) param; int i; + BUILD_BUG_ON(sizeof(param.ent) < sizeof(void *)); + + /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. ¶m) */ *(struct pt_regs **)¶m = regs; param.syscall_nr = rec->nr; for (i = 0; i < sys_data->nb_args; i++) @@ -657,11 +660,12 @@ static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *reg struct syscall_trace_exit *rec) { struct syscall_tp_t { - unsigned long long regs; + struct trace_entry ent; unsigned long syscall_nr; unsigned long ret; - } param; + } __aligned(8) param; + /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. ¶m) */ *(struct pt_regs **)¶m = regs; param.syscall_nr = rec->nr; param.ret = rec->ret; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 688bf579f2f1..99c051de412a 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -88,11 +88,6 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev) static int register_uprobe_event(struct trace_uprobe *tu); static int unregister_uprobe_event(struct trace_uprobe *tu); -struct uprobe_dispatch_data { - struct trace_uprobe *tu; - unsigned long bp_addr; -}; - static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); static int uretprobe_dispatcher(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs); @@ -693,6 +688,7 @@ static int __trace_uprobe_create(int argc, const char **argv) trace_probe_log_set_index(i + 2); ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i], &ctx); + traceprobe_finish_parse(&ctx); if (ret) goto error; } @@ -1352,7 +1348,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, if (bpf_prog_array_valid(call)) { u32 ret; - ret = bpf_prog_run_array_sleepable(call->prog_array, regs, bpf_prog_run); + ret = bpf_prog_run_array_uprobe(call->prog_array, regs, bpf_prog_run); if (!ret) return; } @@ -1418,7 +1414,7 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, const char **filename, u64 *probe_offset, - bool perf_type_tracepoint) + u64 *probe_addr, bool perf_type_tracepoint) { const char *pevent = trace_event_name(event->tp_event); const char *group = event->tp_event->class->system; @@ -1435,6 +1431,7 @@ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, : BPF_FD_TYPE_UPROBE; *filename = tu->filename; *probe_offset = tu->offset; + *probe_addr = 0; return 0; } #endif /* CONFIG_PERF_EVENTS */ |