aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/bpf_trace.c413
-rw-r--r--kernel/trace/ftrace.c10
-rw-r--r--kernel/trace/ring_buffer.c20
-rw-r--r--kernel/trace/trace.c123
-rw-r--r--kernel/trace/trace.h14
-rw-r--r--kernel/trace/trace_btf.c122
-rw-r--r--kernel/trace/trace_btf.h11
-rw-r--r--kernel/trace/trace_entries.h2
-rw-r--r--kernel/trace/trace_eprobe.c22
-rw-r--r--kernel/trace/trace_events.c76
-rw-r--r--kernel/trace/trace_events_filter.c315
-rw-r--r--kernel/trace/trace_events_user.c15
-rw-r--r--kernel/trace/trace_export.c9
-rw-r--r--kernel/trace/trace_fprobe.c59
-rw-r--r--kernel/trace/trace_hwlat.c2
-rw-r--r--kernel/trace/trace_kprobe.c14
-rw-r--r--kernel/trace/trace_probe.c499
-rw-r--r--kernel/trace/trace_probe.h32
-rw-r--r--kernel/trace/trace_syscalls.c12
-rw-r--r--kernel/trace/trace_uprobe.c11
21 files changed, 1413 insertions, 369 deletions
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 64b61f67a403..057cd975d014 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
+obj-$(CONFIG_PROBE_EVENTS_BTF_ARGS) += trace_btf.o
obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index bd1a42b23f3f..a7264b2c17ad 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -23,6 +23,7 @@
#include <linux/sort.h>
#include <linux/key.h>
#include <linux/verification.h>
+#include <linux/namei.h>
#include <net/bpf_sk_storage.h>
@@ -86,6 +87,9 @@ static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx);
static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
+static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx);
+static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
+
/**
* trace_call_bpf - invoke BPF program
* @call: tracepoint event
@@ -223,17 +227,6 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = {
.arg3_type = ARG_ANYTHING,
};
-static __always_inline int
-bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
-{
- int ret;
-
- ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
- if (unlikely(ret < 0))
- memset(dst, 0, size);
- return ret;
-}
-
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
{
@@ -1066,7 +1059,16 @@ static unsigned long get_entry_ip(unsigned long fentry_ip)
BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
{
- struct kprobe *kp = kprobe_running();
+ struct bpf_trace_run_ctx *run_ctx __maybe_unused;
+ struct kprobe *kp;
+
+#ifdef CONFIG_UPROBES
+ run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
+ if (run_ctx->is_uprobe)
+ return ((struct uprobe_dispatch_data *)current->utask->vaddr)->bp_addr;
+#endif
+
+ kp = kprobe_running();
if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY))
return 0;
@@ -1105,6 +1107,30 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs)
+{
+ return bpf_uprobe_multi_entry_ip(current->bpf_ctx);
+}
+
+static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = {
+ .func = bpf_get_func_ip_uprobe_multi,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs)
+{
+ return bpf_uprobe_multi_cookie(current->bpf_ctx);
+}
+
+static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = {
+ .func = bpf_get_attach_cookie_uprobe_multi,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx)
{
struct bpf_trace_run_ctx *run_ctx;
@@ -1547,13 +1573,17 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_override_return_proto;
#endif
case BPF_FUNC_get_func_ip:
- return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
- &bpf_get_func_ip_proto_kprobe_multi :
- &bpf_get_func_ip_proto_kprobe;
+ if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI)
+ return &bpf_get_func_ip_proto_kprobe_multi;
+ if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI)
+ return &bpf_get_func_ip_proto_uprobe_multi;
+ return &bpf_get_func_ip_proto_kprobe;
case BPF_FUNC_get_attach_cookie:
- return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
- &bpf_get_attach_cookie_proto_kmulti :
- &bpf_get_attach_cookie_proto_trace;
+ if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI)
+ return &bpf_get_attach_cookie_proto_kmulti;
+ if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI)
+ return &bpf_get_attach_cookie_proto_umulti;
+ return &bpf_get_attach_cookie_proto_trace;
default:
return bpf_tracing_func_proto(func_id, prog);
}
@@ -2376,9 +2406,13 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
if (is_tracepoint || is_syscall_tp) {
*buf = is_tracepoint ? event->tp_event->tp->name
: event->tp_event->name;
- *fd_type = BPF_FD_TYPE_TRACEPOINT;
- *probe_offset = 0x0;
- *probe_addr = 0x0;
+ /* We allow NULL pointer for tracepoint */
+ if (fd_type)
+ *fd_type = BPF_FD_TYPE_TRACEPOINT;
+ if (probe_offset)
+ *probe_offset = 0x0;
+ if (probe_addr)
+ *probe_addr = 0x0;
} else {
/* kprobe/uprobe */
err = -EOPNOTSUPP;
@@ -2391,7 +2425,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
#ifdef CONFIG_UPROBE_EVENTS
if (flags & TRACE_EVENT_FL_UPROBE)
err = bpf_get_uprobe_info(event, fd_type, buf,
- probe_offset,
+ probe_offset, probe_addr,
event->attr.type == PERF_TYPE_TRACEPOINT);
#endif
}
@@ -2476,6 +2510,7 @@ struct bpf_kprobe_multi_link {
u32 cnt;
u32 mods_cnt;
struct module **mods;
+ u32 flags;
};
struct bpf_kprobe_multi_run_ctx {
@@ -2565,9 +2600,44 @@ static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
kfree(kmulti_link);
}
+static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs);
+ struct bpf_kprobe_multi_link *kmulti_link;
+ u32 ucount = info->kprobe_multi.count;
+ int err = 0, i;
+
+ if (!uaddrs ^ !ucount)
+ return -EINVAL;
+
+ kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
+ info->kprobe_multi.count = kmulti_link->cnt;
+ info->kprobe_multi.flags = kmulti_link->flags;
+
+ if (!uaddrs)
+ return 0;
+ if (ucount < kmulti_link->cnt)
+ err = -ENOSPC;
+ else
+ ucount = kmulti_link->cnt;
+
+ if (kallsyms_show_value(current_cred())) {
+ if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64)))
+ return -EFAULT;
+ } else {
+ for (i = 0; i < ucount; i++) {
+ if (put_user(0, uaddrs + i))
+ return -EFAULT;
+ }
+ }
+ return err;
+}
+
static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
.release = bpf_kprobe_multi_link_release,
.dealloc = bpf_kprobe_multi_link_dealloc,
+ .fill_link_info = bpf_kprobe_multi_link_fill_link_info,
};
static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv)
@@ -2881,6 +2951,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
link->addrs = addrs;
link->cookies = cookies;
link->cnt = cnt;
+ link->flags = flags;
if (cookies) {
/*
@@ -2931,3 +3002,301 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
return 0;
}
#endif
+
+#ifdef CONFIG_UPROBES
+struct bpf_uprobe_multi_link;
+
+struct bpf_uprobe {
+ struct bpf_uprobe_multi_link *link;
+ loff_t offset;
+ u64 cookie;
+ struct uprobe_consumer consumer;
+};
+
+struct bpf_uprobe_multi_link {
+ struct path path;
+ struct bpf_link link;
+ u32 cnt;
+ struct bpf_uprobe *uprobes;
+ struct task_struct *task;
+};
+
+struct bpf_uprobe_multi_run_ctx {
+ struct bpf_run_ctx run_ctx;
+ unsigned long entry_ip;
+ struct bpf_uprobe *uprobe;
+};
+
+static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes,
+ u32 cnt)
+{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset,
+ &uprobes[i].consumer);
+ }
+}
+
+static void bpf_uprobe_multi_link_release(struct bpf_link *link)
+{
+ struct bpf_uprobe_multi_link *umulti_link;
+
+ umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+ bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
+}
+
+static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_uprobe_multi_link *umulti_link;
+
+ umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+ if (umulti_link->task)
+ put_task_struct(umulti_link->task);
+ path_put(&umulti_link->path);
+ kvfree(umulti_link->uprobes);
+ kfree(umulti_link);
+}
+
+static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
+ .release = bpf_uprobe_multi_link_release,
+ .dealloc = bpf_uprobe_multi_link_dealloc,
+};
+
+static int uprobe_prog_run(struct bpf_uprobe *uprobe,
+ unsigned long entry_ip,
+ struct pt_regs *regs)
+{
+ struct bpf_uprobe_multi_link *link = uprobe->link;
+ struct bpf_uprobe_multi_run_ctx run_ctx = {
+ .entry_ip = entry_ip,
+ .uprobe = uprobe,
+ };
+ struct bpf_prog *prog = link->link.prog;
+ bool sleepable = prog->aux->sleepable;
+ struct bpf_run_ctx *old_run_ctx;
+ int err = 0;
+
+ if (link->task && current != link->task)
+ return 0;
+
+ if (sleepable)
+ rcu_read_lock_trace();
+ else
+ rcu_read_lock();
+
+ migrate_disable();
+
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ err = bpf_prog_run(link->link.prog, regs);
+ bpf_reset_run_ctx(old_run_ctx);
+
+ migrate_enable();
+
+ if (sleepable)
+ rcu_read_unlock_trace();
+ else
+ rcu_read_unlock();
+ return err;
+}
+
+static bool
+uprobe_multi_link_filter(struct uprobe_consumer *con, enum uprobe_filter_ctx ctx,
+ struct mm_struct *mm)
+{
+ struct bpf_uprobe *uprobe;
+
+ uprobe = container_of(con, struct bpf_uprobe, consumer);
+ return uprobe->link->task->mm == mm;
+}
+
+static int
+uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
+{
+ struct bpf_uprobe *uprobe;
+
+ uprobe = container_of(con, struct bpf_uprobe, consumer);
+ return uprobe_prog_run(uprobe, instruction_pointer(regs), regs);
+}
+
+static int
+uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs)
+{
+ struct bpf_uprobe *uprobe;
+
+ uprobe = container_of(con, struct bpf_uprobe, consumer);
+ return uprobe_prog_run(uprobe, func, regs);
+}
+
+static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
+{
+ struct bpf_uprobe_multi_run_ctx *run_ctx;
+
+ run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx);
+ return run_ctx->entry_ip;
+}
+
+static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
+{
+ struct bpf_uprobe_multi_run_ctx *run_ctx;
+
+ run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx);
+ return run_ctx->uprobe->cookie;
+}
+
+int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct bpf_uprobe_multi_link *link = NULL;
+ unsigned long __user *uref_ctr_offsets;
+ unsigned long *ref_ctr_offsets = NULL;
+ struct bpf_link_primer link_primer;
+ struct bpf_uprobe *uprobes = NULL;
+ struct task_struct *task = NULL;
+ unsigned long __user *uoffsets;
+ u64 __user *ucookies;
+ void __user *upath;
+ u32 flags, cnt, i;
+ struct path path;
+ char *name;
+ pid_t pid;
+ int err;
+
+ /* no support for 32bit archs yet */
+ if (sizeof(u64) != sizeof(void *))
+ return -EOPNOTSUPP;
+
+ if (prog->expected_attach_type != BPF_TRACE_UPROBE_MULTI)
+ return -EINVAL;
+
+ flags = attr->link_create.uprobe_multi.flags;
+ if (flags & ~BPF_F_UPROBE_MULTI_RETURN)
+ return -EINVAL;
+
+ /*
+ * path, offsets and cnt are mandatory,
+ * ref_ctr_offsets and cookies are optional
+ */
+ upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
+ uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
+ cnt = attr->link_create.uprobe_multi.cnt;
+
+ if (!upath || !uoffsets || !cnt)
+ return -EINVAL;
+
+ uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets);
+ ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies);
+
+ name = strndup_user(upath, PATH_MAX);
+ if (IS_ERR(name)) {
+ err = PTR_ERR(name);
+ return err;
+ }
+
+ err = kern_path(name, LOOKUP_FOLLOW, &path);
+ kfree(name);
+ if (err)
+ return err;
+
+ if (!d_is_reg(path.dentry)) {
+ err = -EBADF;
+ goto error_path_put;
+ }
+
+ pid = attr->link_create.uprobe_multi.pid;
+ if (pid) {
+ rcu_read_lock();
+ task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
+ rcu_read_unlock();
+ if (!task)
+ goto error_path_put;
+ }
+
+ err = -ENOMEM;
+
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ uprobes = kvcalloc(cnt, sizeof(*uprobes), GFP_KERNEL);
+
+ if (!uprobes || !link)
+ goto error_free;
+
+ if (uref_ctr_offsets) {
+ ref_ctr_offsets = kvcalloc(cnt, sizeof(*ref_ctr_offsets), GFP_KERNEL);
+ if (!ref_ctr_offsets)
+ goto error_free;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) {
+ err = -EFAULT;
+ goto error_free;
+ }
+ if (uref_ctr_offsets && __get_user(ref_ctr_offsets[i], uref_ctr_offsets + i)) {
+ err = -EFAULT;
+ goto error_free;
+ }
+ if (__get_user(uprobes[i].offset, uoffsets + i)) {
+ err = -EFAULT;
+ goto error_free;
+ }
+
+ uprobes[i].link = link;
+
+ if (flags & BPF_F_UPROBE_MULTI_RETURN)
+ uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
+ else
+ uprobes[i].consumer.handler = uprobe_multi_link_handler;
+
+ if (pid)
+ uprobes[i].consumer.filter = uprobe_multi_link_filter;
+ }
+
+ link->cnt = cnt;
+ link->uprobes = uprobes;
+ link->path = path;
+ link->task = task;
+
+ bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI,
+ &bpf_uprobe_multi_link_lops, prog);
+
+ for (i = 0; i < cnt; i++) {
+ err = uprobe_register_refctr(d_real_inode(link->path.dentry),
+ uprobes[i].offset,
+ ref_ctr_offsets ? ref_ctr_offsets[i] : 0,
+ &uprobes[i].consumer);
+ if (err) {
+ bpf_uprobe_unregister(&path, uprobes, i);
+ goto error_free;
+ }
+ }
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err)
+ goto error_free;
+
+ kvfree(ref_ctr_offsets);
+ return bpf_link_settle(&link_primer);
+
+error_free:
+ kvfree(ref_ctr_offsets);
+ kvfree(uprobes);
+ kfree(link);
+ if (task)
+ put_task_struct(task);
+error_path_put:
+ path_put(&path);
+ return err;
+}
+#else /* !CONFIG_UPROBES */
+int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ return -EOPNOTSUPP;
+}
+static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
+{
+ return 0;
+}
+static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
+{
+ return 0;
+}
+#endif /* CONFIG_UPROBES */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 05c0024815bf..8de8bec5f366 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6779,8 +6779,7 @@ void ftrace_release_mod(struct module *mod)
last_pg = &ftrace_pages_start;
for (pg = ftrace_pages_start; pg; pg = *last_pg) {
rec = &pg->records[0];
- if (within_module_core(rec->ip, mod) ||
- within_module_init(rec->ip, mod)) {
+ if (within_module(rec->ip, mod)) {
/*
* As core pages are first, the first
* page should never be a module page.
@@ -6852,8 +6851,7 @@ void ftrace_module_enable(struct module *mod)
* not part of this module, then skip this pg,
* which the "break" will do.
*/
- if (!within_module_core(rec->ip, mod) &&
- !within_module_init(rec->ip, mod))
+ if (!within_module(rec->ip, mod))
break;
/* Weak functions should still be ignored */
@@ -7142,9 +7140,7 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr)
struct dyn_ftrace key;
struct ftrace_mod_map *mod_map = NULL;
struct ftrace_init_func *func, *func_next;
- struct list_head clear_hash;
-
- INIT_LIST_HEAD(&clear_hash);
+ LIST_HEAD(clear_hash);
key.ip = start;
key.flags = end; /* overload flags, as it is unsigned long */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 52dea5dd5362..78502d4c7214 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -692,10 +692,7 @@ static void rb_time_set(rb_time_t *t, u64 val)
static inline bool
rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set)
{
- unsigned long ret;
-
- ret = local_cmpxchg(l, expect, set);
- return ret == expect;
+ return local_try_cmpxchg(l, &expect, set);
}
static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
@@ -752,9 +749,7 @@ static void rb_time_set(rb_time_t *t, u64 val)
static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
{
- u64 val;
- val = local64_cmpxchg(&t->time, expect, set);
- return val == expect;
+ return local64_try_cmpxchg(&t->time, &expect, set);
}
#endif
@@ -1494,14 +1489,11 @@ static bool rb_head_page_replace(struct buffer_page *old,
{
unsigned long *ptr = (unsigned long *)&old->list.prev->next;
unsigned long val;
- unsigned long ret;
val = *ptr & ~RB_FLAG_MASK;
val |= RB_PAGE_HEAD;
- ret = cmpxchg(ptr, val, (unsigned long)&new->list);
-
- return ret == val;
+ return try_cmpxchg(ptr, &val, (unsigned long)&new->list);
}
/*
@@ -3003,7 +2995,6 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
{
unsigned long new_index, old_index;
struct buffer_page *bpage;
- unsigned long index;
unsigned long addr;
u64 write_stamp;
u64 delta;
@@ -3060,8 +3051,9 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
*/
old_index += write_mask;
new_index += write_mask;
- index = local_cmpxchg(&bpage->write, old_index, new_index);
- if (index == old_index) {
+
+ /* caution: old_index gets updated on cmpxchg failure */
+ if (local_try_cmpxchg(&bpage->write, &old_index, new_index)) {
/* update counters */
local_sub(event_length, &cpu_buffer->entries_bytes);
return true;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8e64aaad5361..2b4ded753367 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3119,7 +3119,6 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
struct ftrace_stack *fstack;
struct stack_entry *entry;
int stackidx;
- void *ptr;
/*
* Add one, for this function and the call to save_stack_trace()
@@ -3157,32 +3156,16 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
nr_entries = stack_trace_save(fstack->calls, size, skip);
}
- size = nr_entries * sizeof(unsigned long);
event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
- (sizeof(*entry) - sizeof(entry->caller)) + size,
+ struct_size(entry, caller, nr_entries),
trace_ctx);
if (!event)
goto out;
- ptr = ring_buffer_event_data(event);
- entry = ptr;
-
- /*
- * For backward compatibility reasons, the entry->caller is an
- * array of 8 slots to store the stack. This is also exported
- * to user space. The amount allocated on the ring buffer actually
- * holds enough for the stack specified by nr_entries. This will
- * go into the location of entry->caller. Due to string fortifiers
- * checking the size of the destination of memcpy() it triggers
- * when it detects that size is greater than 8. To hide this from
- * the fortifiers, we use "ptr" and pointer arithmetic to assign caller.
- *
- * The below is really just:
- * memcpy(&entry->caller, fstack->calls, size);
- */
- ptr += offsetof(typeof(*entry), caller);
- memcpy(ptr, fstack->calls, size);
+ entry = ring_buffer_event_data(event);
entry->size = nr_entries;
+ memcpy(&entry->caller, fstack->calls,
+ flex_array_size(entry, caller, nr_entries));
if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
@@ -4206,18 +4189,12 @@ static void *s_start(struct seq_file *m, loff_t *pos)
loff_t l = 0;
int cpu;
- /*
- * copy the tracer to avoid using a global lock all around.
- * iter->trace is a copy of current_trace, the pointer to the
- * name may be used instead of a strcmp(), as iter->trace->name
- * will point to the same string as current_trace->name.
- */
mutex_lock(&trace_types_lock);
- if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
+ if (unlikely(tr->current_trace != iter->trace)) {
/* Close iter->trace before switching to the new current tracer */
if (iter->trace->close)
iter->trace->close(iter);
- *iter->trace = *tr->current_trace;
+ iter->trace = tr->current_trace;
/* Reopen the new current tracer */
if (iter->trace->open)
iter->trace->open(iter);
@@ -4829,6 +4806,25 @@ static const struct seq_operations tracer_seq_ops = {
.show = s_show,
};
+/*
+ * Note, as iter itself can be allocated and freed in different
+ * ways, this function is only used to free its content, and not
+ * the iterator itself. The only requirement to all the allocations
+ * is that it must zero all fields (kzalloc), as freeing works with
+ * ethier allocated content or NULL.
+ */
+static void free_trace_iter_content(struct trace_iterator *iter)
+{
+ /* The fmt is either NULL, allocated or points to static_fmt_buf */
+ if (iter->fmt != static_fmt_buf)
+ kfree(iter->fmt);
+
+ kfree(iter->temp);
+ kfree(iter->buffer_iter);
+ mutex_destroy(&iter->mutex);
+ free_cpumask_var(iter->started);
+}
+
static struct trace_iterator *
__tracing_open(struct inode *inode, struct file *file, bool snapshot)
{
@@ -4870,16 +4866,8 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
iter->fmt = NULL;
iter->fmt_size = 0;
- /*
- * We make a copy of the current tracer to avoid concurrent
- * changes on it while we are reading.
- */
mutex_lock(&trace_types_lock);
- iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
- if (!iter->trace)
- goto fail;
-
- *iter->trace = *tr->current_trace;
+ iter->trace = tr->current_trace;
if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
goto fail;
@@ -4944,9 +4932,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
fail:
mutex_unlock(&trace_types_lock);
- kfree(iter->trace);
- kfree(iter->temp);
- kfree(iter->buffer_iter);
+ free_trace_iter_content(iter);
release:
seq_release_private(inode, file);
return ERR_PTR(-ENOMEM);
@@ -5025,12 +5011,7 @@ static int tracing_release(struct inode *inode, struct file *file)
mutex_unlock(&trace_types_lock);
- mutex_destroy(&iter->mutex);
- free_cpumask_var(iter->started);
- kfree(iter->fmt);
- kfree(iter->temp);
- kfree(iter->trace);
- kfree(iter->buffer_iter);
+ free_trace_iter_content(iter);
seq_release_private(inode, file);
return 0;
@@ -5730,7 +5711,8 @@ static const char readme_msg[] =
"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
- "\t $stack<index>, $stack, $retval, $comm, $arg<N>, <argname>\n"
+ "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
+ "\t <argname>[->field[->field|.field...]],\n"
#else
"\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
#endif
@@ -6318,6 +6300,15 @@ static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
per_cpu_ptr(buf->data, cpu)->entries = val;
}
+static void update_buffer_entries(struct array_buffer *buf, int cpu)
+{
+ if (cpu == RING_BUFFER_ALL_CPUS) {
+ set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
+ } else {
+ per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
+ }
+}
+
#ifdef CONFIG_TRACER_MAX_TRACE
/* resize @tr's buffer to the size of @size_tr's entries */
static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
@@ -6396,18 +6387,12 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
return ret;
}
- if (cpu == RING_BUFFER_ALL_CPUS)
- set_buffer_entries(&tr->max_buffer, size);
- else
- per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
+ update_buffer_entries(&tr->max_buffer, cpu);
out:
#endif /* CONFIG_TRACER_MAX_TRACE */
- if (cpu == RING_BUFFER_ALL_CPUS)
- set_buffer_entries(&tr->array_buffer, size);
- else
- per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
+ update_buffer_entries(&tr->array_buffer, cpu);
return ret;
}
@@ -6825,10 +6810,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
close_pipe_on_cpu(tr, iter->cpu_file);
mutex_unlock(&trace_types_lock);
- free_cpumask_var(iter->started);
- kfree(iter->fmt);
- kfree(iter->temp);
- mutex_destroy(&iter->mutex);
+ free_trace_iter_content(iter);
kfree(iter);
trace_array_put(tr);
@@ -7618,6 +7600,11 @@ out:
return ret;
}
+static void tracing_swap_cpu_buffer(void *tr)
+{
+ update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
+}
+
static ssize_t
tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
@@ -7676,13 +7663,15 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
ret = tracing_alloc_snapshot_instance(tr);
if (ret < 0)
break;
- local_irq_disable();
/* Now, we're going to swap */
- if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
+ if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
+ local_irq_disable();
update_max_tr(tr, current, smp_processor_id(), NULL);
- else
- update_max_tr_single(tr, current, iter->cpu_file);
- local_irq_enable();
+ local_irq_enable();
+ } else {
+ smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
+ (void *)tr, 1);
+ }
break;
default:
if (tr->allocated_snapshot) {
@@ -9486,7 +9475,7 @@ static struct trace_array *trace_array_create(const char *name)
if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
goto out_free_tr;
- if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
goto out_free_tr;
tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
@@ -10431,7 +10420,7 @@ __init static int tracer_alloc_buffers(void)
if (trace_create_savedcmd() < 0)
goto out_free_temp_buffer;
- if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
goto out_free_savedcmd;
/* TODO: make the number of buffers hot pluggable with CPUS */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 73eaec158473..5669dd1f90d9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -77,6 +77,16 @@ enum trace_type {
#undef __array
#define __array(type, item, size) type item[size];
+/*
+ * For backward compatibility, older user space expects to see the
+ * kernel_stack event with a fixed size caller field. But today the fix
+ * size is ignored by the kernel, and the real structure is dynamic.
+ * Expose to user space: "unsigned long caller[8];" but the real structure
+ * will be "unsigned long caller[] __counted_by(size)"
+ */
+#undef __stack_array
+#define __stack_array(type, item, size, field) type item[] __counted_by(field);
+
#undef __array_desc
#define __array_desc(type, container, item, size)
@@ -596,7 +606,6 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu)
int tracer_init(struct tracer *t, struct trace_array *tr);
int tracing_is_enabled(void);
void tracing_reset_online_cpus(struct array_buffer *buf);
-void tracing_reset_current(int cpu);
void tracing_reset_all_online_cpus(void);
void tracing_reset_all_online_cpus_unlocked(void);
int tracing_open_generic(struct inode *inode, struct file *filp);
@@ -697,7 +706,6 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos);
void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos);
int trace_pid_show(struct seq_file *m, void *v);
-void trace_free_pid_list(struct trace_pid_list *pid_list);
int trace_pid_write(struct trace_pid_list *filtered_pids,
struct trace_pid_list **new_pid_list,
const char __user *ubuf, size_t cnt);
@@ -1334,7 +1342,7 @@ struct trace_subsystem_dir {
struct list_head list;
struct event_subsystem *subsystem;
struct trace_array *tr;
- struct dentry *entry;
+ struct eventfs_file *ef;
int ref_count;
int nr_events;
};
diff --git a/kernel/trace/trace_btf.c b/kernel/trace/trace_btf.c
new file mode 100644
index 000000000000..ca224d53bfdc
--- /dev/null
+++ b/kernel/trace/trace_btf.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/btf.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "trace_btf.h"
+
+/*
+ * Find a function proto type by name, and return the btf_type with its btf
+ * in *@btf_p. Return NULL if not found.
+ * Note that caller has to call btf_put(*@btf_p) after using the btf_type.
+ */
+const struct btf_type *btf_find_func_proto(const char *func_name, struct btf **btf_p)
+{
+ const struct btf_type *t;
+ s32 id;
+
+ id = bpf_find_btf_id(func_name, BTF_KIND_FUNC, btf_p);
+ if (id < 0)
+ return NULL;
+
+ /* Get BTF_KIND_FUNC type */
+ t = btf_type_by_id(*btf_p, id);
+ if (!t || !btf_type_is_func(t))
+ goto err;
+
+ /* The type of BTF_KIND_FUNC is BTF_KIND_FUNC_PROTO */
+ t = btf_type_by_id(*btf_p, t->type);
+ if (!t || !btf_type_is_func_proto(t))
+ goto err;
+
+ return t;
+err:
+ btf_put(*btf_p);
+ return NULL;
+}
+
+/*
+ * Get function parameter with the number of parameters.
+ * This can return NULL if the function has no parameters.
+ * It can return -EINVAL if the @func_proto is not a function proto type.
+ */
+const struct btf_param *btf_get_func_param(const struct btf_type *func_proto, s32 *nr)
+{
+ if (!btf_type_is_func_proto(func_proto))
+ return ERR_PTR(-EINVAL);
+
+ *nr = btf_type_vlen(func_proto);
+ if (*nr > 0)
+ return (const struct btf_param *)(func_proto + 1);
+ else
+ return NULL;
+}
+
+#define BTF_ANON_STACK_MAX 16
+
+struct btf_anon_stack {
+ u32 tid;
+ u32 offset;
+};
+
+/*
+ * Find a member of data structure/union by name and return it.
+ * Return NULL if not found, or -EINVAL if parameter is invalid.
+ * If the member is an member of anonymous union/structure, the offset
+ * of that anonymous union/structure is stored into @anon_offset. Caller
+ * can calculate the correct offset from the root data structure by
+ * adding anon_offset to the member's offset.
+ */
+const struct btf_member *btf_find_struct_member(struct btf *btf,
+ const struct btf_type *type,
+ const char *member_name,
+ u32 *anon_offset)
+{
+ struct btf_anon_stack *anon_stack;
+ const struct btf_member *member;
+ u32 tid, cur_offset = 0;
+ const char *name;
+ int i, top = 0;
+
+ anon_stack = kcalloc(BTF_ANON_STACK_MAX, sizeof(*anon_stack), GFP_KERNEL);
+ if (!anon_stack)
+ return ERR_PTR(-ENOMEM);
+
+retry:
+ if (!btf_type_is_struct(type)) {
+ member = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ for_each_member(i, type, member) {
+ if (!member->name_off) {
+ /* Anonymous union/struct: push it for later use */
+ type = btf_type_skip_modifiers(btf, member->type, &tid);
+ if (type && top < BTF_ANON_STACK_MAX) {
+ anon_stack[top].tid = tid;
+ anon_stack[top++].offset =
+ cur_offset + member->offset;
+ }
+ } else {
+ name = btf_name_by_offset(btf, member->name_off);
+ if (name && !strcmp(member_name, name)) {
+ if (anon_offset)
+ *anon_offset = cur_offset;
+ goto out;
+ }
+ }
+ }
+ if (top > 0) {
+ /* Pop from the anonymous stack and retry */
+ tid = anon_stack[--top].tid;
+ cur_offset = anon_stack[top].offset;
+ type = btf_type_by_id(btf, tid);
+ goto retry;
+ }
+ member = NULL;
+
+out:
+ kfree(anon_stack);
+ return member;
+}
+
diff --git a/kernel/trace/trace_btf.h b/kernel/trace/trace_btf.h
new file mode 100644
index 000000000000..4bc44bc261e6
--- /dev/null
+++ b/kernel/trace/trace_btf.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/btf.h>
+
+const struct btf_type *btf_find_func_proto(const char *func_name,
+ struct btf **btf_p);
+const struct btf_param *btf_get_func_param(const struct btf_type *func_proto,
+ s32 *nr);
+const struct btf_member *btf_find_struct_member(struct btf *btf,
+ const struct btf_type *type,
+ const char *member_name,
+ u32 *anon_offset);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 340b2fa98218..c47422b20908 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -190,7 +190,7 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
F_STRUCT(
__field( int, size )
- __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
+ __stack_array( unsigned long, caller, FTRACE_STACK_ENTRIES, size)
),
F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n"
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index a0a704ba27db..72714cbf475c 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -41,6 +41,10 @@ struct eprobe_data {
struct trace_eprobe *ep;
};
+
+#define for_each_trace_eprobe_tp(ep, _tp) \
+ list_for_each_entry(ep, trace_probe_probe_list(_tp), tp.list)
+
static int __trace_eprobe_create(int argc, const char *argv[]);
static void trace_event_probe_cleanup(struct trace_eprobe *ep)
@@ -640,7 +644,7 @@ static int disable_eprobe(struct trace_eprobe *ep,
static int enable_trace_eprobe(struct trace_event_call *call,
struct trace_event_file *file)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_eprobe *ep;
bool enabled;
int ret = 0;
@@ -662,8 +666,7 @@ static int enable_trace_eprobe(struct trace_event_call *call,
if (enabled)
return 0;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- ep = container_of(pos, struct trace_eprobe, tp);
+ for_each_trace_eprobe_tp(ep, tp) {
ret = enable_eprobe(ep, file);
if (ret)
break;
@@ -680,8 +683,7 @@ static int enable_trace_eprobe(struct trace_event_call *call,
*/
WARN_ON_ONCE(ret != -ENOMEM);
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- ep = container_of(pos, struct trace_eprobe, tp);
+ for_each_trace_eprobe_tp(ep, tp) {
disable_eprobe(ep, file->tr);
if (!--cnt)
break;
@@ -699,7 +701,7 @@ static int enable_trace_eprobe(struct trace_event_call *call,
static int disable_trace_eprobe(struct trace_event_call *call,
struct trace_event_file *file)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_eprobe *ep;
tp = trace_probe_primary_from_call(call);
@@ -716,10 +718,8 @@ static int disable_trace_eprobe(struct trace_event_call *call,
trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
if (!trace_probe_is_enabled(tp)) {
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- ep = container_of(pos, struct trace_eprobe, tp);
+ for_each_trace_eprobe_tp(ep, tp)
disable_eprobe(ep, file->tr);
- }
}
out:
@@ -807,13 +807,11 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[
int ret;
ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], &ctx);
- if (ret)
- return ret;
-
/* Handle symbols "@" */
if (!ret)
ret = traceprobe_update_arg(&ep->tp.args[i]);
+ traceprobe_finish_parse(&ctx);
return ret;
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 578f1f7d49a6..ed367d713be0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -984,7 +984,7 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
return;
if (!--dir->nr_events) {
- tracefs_remove(dir->entry);
+ eventfs_remove(dir->ef);
list_del(&dir->list);
__put_system_dir(dir);
}
@@ -1005,7 +1005,7 @@ static void remove_event_file_dir(struct trace_event_file *file)
tracefs_remove(dir);
}
-
+ eventfs_remove(file->ef);
list_del(&file->list);
remove_subsystem(file->system);
free_event_filter(file->filter);
@@ -2291,13 +2291,13 @@ create_new_subsystem(const char *name)
return NULL;
}
-static struct dentry *
+static struct eventfs_file *
event_subsystem_dir(struct trace_array *tr, const char *name,
struct trace_event_file *file, struct dentry *parent)
{
struct event_subsystem *system, *iter;
struct trace_subsystem_dir *dir;
- struct dentry *entry;
+ int res;
/* First see if we did not already create this dir */
list_for_each_entry(dir, &tr->systems, list) {
@@ -2305,7 +2305,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
if (strcmp(system->name, name) == 0) {
dir->nr_events++;
file->system = dir;
- return dir->entry;
+ return dir->ef;
}
}
@@ -2329,8 +2329,8 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
} else
__get_system(system);
- dir->entry = tracefs_create_dir(name, parent);
- if (!dir->entry) {
+ dir->ef = eventfs_add_subsystem_dir(name, parent);
+ if (IS_ERR(dir->ef)) {
pr_warn("Failed to create system directory %s\n", name);
__put_system(system);
goto out_free;
@@ -2345,22 +2345,22 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
/* the ftrace system is special, do not create enable or filter files */
if (strcmp(name, "ftrace") != 0) {
- entry = tracefs_create_file("filter", TRACE_MODE_WRITE,
- dir->entry, dir,
+ res = eventfs_add_file("filter", TRACE_MODE_WRITE,
+ dir->ef, dir,
&ftrace_subsystem_filter_fops);
- if (!entry) {
+ if (res) {
kfree(system->filter);
system->filter = NULL;
pr_warn("Could not create tracefs '%s/filter' entry\n", name);
}
- trace_create_file("enable", TRACE_MODE_WRITE, dir->entry, dir,
+ eventfs_add_file("enable", TRACE_MODE_WRITE, dir->ef, dir,
&ftrace_system_enable_fops);
}
list_add(&dir->list, &tr->systems);
- return dir->entry;
+ return dir->ef;
out_free:
kfree(dir);
@@ -2413,36 +2413,37 @@ static int
event_create_dir(struct dentry *parent, struct trace_event_file *file)
{
struct trace_event_call *call = file->event_call;
+ struct eventfs_file *ef_subsystem = NULL;
struct trace_array *tr = file->tr;
- struct dentry *d_events;
const char *name;
int ret;
/*
* If the trace point header did not define TRACE_SYSTEM
- * then the system would be called "TRACE_SYSTEM".
+ * then the system would be called "TRACE_SYSTEM". This should
+ * never happen.
*/
- if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
- d_events = event_subsystem_dir(tr, call->class->system, file, parent);
- if (!d_events)
- return -ENOMEM;
- } else
- d_events = parent;
+ if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0))
+ return -ENODEV;
+
+ ef_subsystem = event_subsystem_dir(tr, call->class->system, file, parent);
+ if (!ef_subsystem)
+ return -ENOMEM;
name = trace_event_name(call);
- file->dir = tracefs_create_dir(name, d_events);
- if (!file->dir) {
+ file->ef = eventfs_add_dir(name, ef_subsystem);
+ if (IS_ERR(file->ef)) {
pr_warn("Could not create tracefs '%s' directory\n", name);
return -1;
}
if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
- trace_create_file("enable", TRACE_MODE_WRITE, file->dir, file,
+ eventfs_add_file("enable", TRACE_MODE_WRITE, file->ef, file,
&ftrace_enable_fops);
#ifdef CONFIG_PERF_EVENTS
if (call->event.type && call->class->reg)
- trace_create_file("id", TRACE_MODE_READ, file->dir,
+ eventfs_add_file("id", TRACE_MODE_READ, file->ef,
(void *)(long)call->event.type,
&ftrace_event_id_fops);
#endif
@@ -2458,27 +2459,27 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
* triggers or filters.
*/
if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
- trace_create_file("filter", TRACE_MODE_WRITE, file->dir,
+ eventfs_add_file("filter", TRACE_MODE_WRITE, file->ef,
file, &ftrace_event_filter_fops);
- trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
+ eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
file, &event_trigger_fops);
}
#ifdef CONFIG_HIST_TRIGGERS
- trace_create_file("hist", TRACE_MODE_READ, file->dir, file,
+ eventfs_add_file("hist", TRACE_MODE_READ, file->ef, file,
&event_hist_fops);
#endif
#ifdef CONFIG_HIST_TRIGGERS_DEBUG
- trace_create_file("hist_debug", TRACE_MODE_READ, file->dir, file,
+ eventfs_add_file("hist_debug", TRACE_MODE_READ, file->ef, file,
&event_hist_debug_fops);
#endif
- trace_create_file("format", TRACE_MODE_READ, file->dir, call,
+ eventfs_add_file("format", TRACE_MODE_READ, file->ef, call,
&ftrace_event_format_fops);
#ifdef CONFIG_TRACE_EVENT_INJECT
if (call->event.type && call->class->reg)
- trace_create_file("inject", 0200, file->dir, file,
+ eventfs_add_file("inject", 0200, file->ef, file,
&event_inject_fops);
#endif
@@ -3631,21 +3632,22 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
{
struct dentry *d_events;
struct dentry *entry;
+ int error = 0;
entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
tr, &ftrace_set_event_fops);
if (!entry)
return -ENOMEM;
- d_events = tracefs_create_dir("events", parent);
- if (!d_events) {
+ d_events = eventfs_create_events_dir("events", parent);
+ if (IS_ERR(d_events)) {
pr_warn("Could not create tracefs 'events' directory\n");
return -ENOMEM;
}
- entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events,
+ error = eventfs_add_events_file("enable", TRACE_MODE_WRITE, d_events,
tr, &ftrace_tr_enable_fops);
- if (!entry)
+ if (error)
return -ENOMEM;
/* There are not as crucial, just warn if they are not created */
@@ -3658,11 +3660,11 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
&ftrace_set_event_notrace_pid_fops);
/* ring buffer internal formats */
- trace_create_file("header_page", TRACE_MODE_READ, d_events,
+ eventfs_add_events_file("header_page", TRACE_MODE_READ, d_events,
ring_buffer_print_page_header,
&ftrace_show_header_fops);
- trace_create_file("header_event", TRACE_MODE_READ, d_events,
+ eventfs_add_events_file("header_event", TRACE_MODE_READ, d_events,
ring_buffer_print_entry_header,
&ftrace_show_header_fops);
@@ -3750,7 +3752,7 @@ int event_trace_del_tracer(struct trace_array *tr)
down_write(&trace_event_sem);
__trace_remove_event_dirs(tr);
- tracefs_remove(tr->event_dir);
+ eventfs_remove_events_dir(tr->event_dir);
up_write(&trace_event_sem);
tr->event_dir = NULL;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 1dad64267878..33264e510d16 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -46,15 +46,19 @@ static const char * ops[] = { OPS };
enum filter_pred_fn {
FILTER_PRED_FN_NOP,
FILTER_PRED_FN_64,
+ FILTER_PRED_FN_64_CPUMASK,
FILTER_PRED_FN_S64,
FILTER_PRED_FN_U64,
FILTER_PRED_FN_32,
+ FILTER_PRED_FN_32_CPUMASK,
FILTER_PRED_FN_S32,
FILTER_PRED_FN_U32,
FILTER_PRED_FN_16,
+ FILTER_PRED_FN_16_CPUMASK,
FILTER_PRED_FN_S16,
FILTER_PRED_FN_U16,
FILTER_PRED_FN_8,
+ FILTER_PRED_FN_8_CPUMASK,
FILTER_PRED_FN_S8,
FILTER_PRED_FN_U8,
FILTER_PRED_FN_COMM,
@@ -64,21 +68,25 @@ enum filter_pred_fn {
FILTER_PRED_FN_PCHAR_USER,
FILTER_PRED_FN_PCHAR,
FILTER_PRED_FN_CPU,
+ FILTER_PRED_FN_CPU_CPUMASK,
+ FILTER_PRED_FN_CPUMASK,
+ FILTER_PRED_FN_CPUMASK_CPU,
FILTER_PRED_FN_FUNCTION,
FILTER_PRED_FN_,
FILTER_PRED_TEST_VISITED,
};
struct filter_pred {
- enum filter_pred_fn fn_num;
- u64 val;
- u64 val2;
- struct regex regex;
+ struct regex *regex;
+ struct cpumask *mask;
unsigned short *ops;
struct ftrace_event_field *field;
- int offset;
+ u64 val;
+ u64 val2;
+ enum filter_pred_fn fn_num;
+ int offset;
int not;
- int op;
+ int op;
};
/*
@@ -94,6 +102,8 @@ struct filter_pred {
C(TOO_MANY_OPEN, "Too many '('"), \
C(TOO_MANY_CLOSE, "Too few '('"), \
C(MISSING_QUOTE, "Missing matching quote"), \
+ C(MISSING_BRACE_OPEN, "Missing '{'"), \
+ C(MISSING_BRACE_CLOSE, "Missing '}'"), \
C(OPERAND_TOO_LONG, "Operand too long"), \
C(EXPECT_STRING, "Expecting string field"), \
C(EXPECT_DIGIT, "Expecting numeric field"), \
@@ -103,6 +113,7 @@ struct filter_pred {
C(BAD_SUBSYS_FILTER, "Couldn't find or set field in one of a subsystem's events"), \
C(TOO_MANY_PREDS, "Too many terms in predicate expression"), \
C(INVALID_FILTER, "Meaningless filter expression"), \
+ C(INVALID_CPULIST, "Invalid cpulist"), \
C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \
C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \
C(NO_FUNCTION, "Function not found"), \
@@ -186,6 +197,15 @@ enum {
PROCESS_OR = 4,
};
+static void free_predicate(struct filter_pred *pred)
+{
+ if (pred) {
+ kfree(pred->regex);
+ kfree(pred->mask);
+ kfree(pred);
+ }
+}
+
/*
* Without going into a formal proof, this explains the method that is used in
* parsing the logical expressions.
@@ -623,12 +643,64 @@ out_free:
kfree(inverts);
if (prog_stack) {
for (i = 0; prog_stack[i].pred; i++)
- kfree(prog_stack[i].pred);
+ free_predicate(prog_stack[i].pred);
kfree(prog_stack);
}
return ERR_PTR(ret);
}
+static inline int
+do_filter_cpumask(int op, const struct cpumask *mask, const struct cpumask *cmp)
+{
+ switch (op) {
+ case OP_EQ:
+ return cpumask_equal(mask, cmp);
+ case OP_NE:
+ return !cpumask_equal(mask, cmp);
+ case OP_BAND:
+ return cpumask_intersects(mask, cmp);
+ default:
+ return 0;
+ }
+}
+
+/* Optimisation of do_filter_cpumask() for scalar fields */
+static inline int
+do_filter_scalar_cpumask(int op, unsigned int cpu, const struct cpumask *mask)
+{
+ /*
+ * Per the weight-of-one cpumask optimisations, the mask passed in this
+ * function has a weight >= 2, so it is never equal to a single scalar.
+ */
+ switch (op) {
+ case OP_EQ:
+ return false;
+ case OP_NE:
+ return true;
+ case OP_BAND:
+ return cpumask_test_cpu(cpu, mask);
+ default:
+ return 0;
+ }
+}
+
+static inline int
+do_filter_cpumask_scalar(int op, const struct cpumask *mask, unsigned int cpu)
+{
+ switch (op) {
+ case OP_EQ:
+ return cpumask_test_cpu(cpu, mask) &&
+ cpumask_nth(1, mask) >= nr_cpu_ids;
+ case OP_NE:
+ return !cpumask_test_cpu(cpu, mask) ||
+ cpumask_nth(1, mask) < nr_cpu_ids;
+ case OP_BAND:
+ return cpumask_test_cpu(cpu, mask);
+ default:
+ return 0;
+ }
+}
+
enum pred_cmp_types {
PRED_CMP_TYPE_NOP,
PRED_CMP_TYPE_LT,
@@ -672,6 +744,18 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \
} \
}
+#define DEFINE_CPUMASK_COMPARISON_PRED(size) \
+static int filter_pred_##size##_cpumask(struct filter_pred *pred, void *event) \
+{ \
+ u##size *addr = (u##size *)(event + pred->offset); \
+ unsigned int cpu = *addr; \
+ \
+ if (cpu >= nr_cpu_ids) \
+ return 0; \
+ \
+ return do_filter_scalar_cpumask(pred->op, cpu, pred->mask); \
+}
+
#define DEFINE_EQUALITY_PRED(size) \
static int filter_pred_##size(struct filter_pred *pred, void *event) \
{ \
@@ -693,6 +777,11 @@ DEFINE_COMPARISON_PRED(u16);
DEFINE_COMPARISON_PRED(s8);
DEFINE_COMPARISON_PRED(u8);
+DEFINE_CPUMASK_COMPARISON_PRED(64);
+DEFINE_CPUMASK_COMPARISON_PRED(32);
+DEFINE_CPUMASK_COMPARISON_PRED(16);
+DEFINE_CPUMASK_COMPARISON_PRED(8);
+
DEFINE_EQUALITY_PRED(64);
DEFINE_EQUALITY_PRED(32);
DEFINE_EQUALITY_PRED(16);
@@ -750,7 +839,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
char *addr = (char *)(event + pred->offset);
int cmp, match;
- cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
+ cmp = pred->regex->match(addr, pred->regex, pred->regex->field_len);
match = cmp ^ pred->not;
@@ -763,7 +852,7 @@ static __always_inline int filter_pchar(struct filter_pred *pred, char *str)
int len;
len = strlen(str) + 1; /* including tailing '\0' */
- cmp = pred->regex.match(str, &pred->regex, len);
+ cmp = pred->regex->match(str, pred->regex, len);
match = cmp ^ pred->not;
@@ -813,7 +902,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
char *addr = (char *)(event + str_loc);
int cmp, match;
- cmp = pred->regex.match(addr, &pred->regex, str_len);
+ cmp = pred->regex->match(addr, pred->regex, str_len);
match = cmp ^ pred->not;
@@ -836,7 +925,7 @@ static int filter_pred_strrelloc(struct filter_pred *pred, void *event)
char *addr = (char *)(&item[1]) + str_loc;
int cmp, match;
- cmp = pred->regex.match(addr, &pred->regex, str_len);
+ cmp = pred->regex->match(addr, pred->regex, str_len);
match = cmp ^ pred->not;
@@ -869,12 +958,42 @@ static int filter_pred_cpu(struct filter_pred *pred, void *event)
}
}
+/* Filter predicate for current CPU vs user-provided cpumask */
+static int filter_pred_cpu_cpumask(struct filter_pred *pred, void *event)
+{
+ int cpu = raw_smp_processor_id();
+
+ return do_filter_scalar_cpumask(pred->op, cpu, pred->mask);
+}
+
+/* Filter predicate for cpumask field vs user-provided cpumask */
+static int filter_pred_cpumask(struct filter_pred *pred, void *event)
+{
+ u32 item = *(u32 *)(event + pred->offset);
+ int loc = item & 0xffff;
+ const struct cpumask *mask = (event + loc);
+ const struct cpumask *cmp = pred->mask;
+
+ return do_filter_cpumask(pred->op, mask, cmp);
+}
+
+/* Filter predicate for cpumask field vs user-provided scalar */
+static int filter_pred_cpumask_cpu(struct filter_pred *pred, void *event)
+{
+ u32 item = *(u32 *)(event + pred->offset);
+ int loc = item & 0xffff;
+ const struct cpumask *mask = (event + loc);
+ unsigned int cpu = pred->val;
+
+ return do_filter_cpumask_scalar(pred->op, mask, cpu);
+}
+
/* Filter predicate for COMM. */
static int filter_pred_comm(struct filter_pred *pred, void *event)
{
int cmp;
- cmp = pred->regex.match(current->comm, &pred->regex,
+ cmp = pred->regex->match(current->comm, pred->regex,
TASK_COMM_LEN);
return cmp ^ pred->not;
}
@@ -1004,7 +1123,7 @@ enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
static void filter_build_regex(struct filter_pred *pred)
{
- struct regex *r = &pred->regex;
+ struct regex *r = pred->regex;
char *search;
enum regex_type type = MATCH_FULL;
@@ -1169,7 +1288,7 @@ static void free_prog(struct event_filter *filter)
return;
for (i = 0; prog[i].pred; i++)
- kfree(prog[i].pred);
+ free_predicate(prog[i].pred);
kfree(prog);
}
@@ -1236,8 +1355,12 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
int filter_assign_type(const char *type)
{
- if (strstr(type, "__data_loc") && strstr(type, "char"))
- return FILTER_DYN_STRING;
+ if (strstr(type, "__data_loc")) {
+ if (strstr(type, "char"))
+ return FILTER_DYN_STRING;
+ if (strstr(type, "cpumask_t"))
+ return FILTER_CPUMASK;
+ }
if (strstr(type, "__rel_loc") && strstr(type, "char"))
return FILTER_RDYN_STRING;
@@ -1313,24 +1436,32 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event)
switch (pred->fn_num) {
case FILTER_PRED_FN_64:
return filter_pred_64(pred, event);
+ case FILTER_PRED_FN_64_CPUMASK:
+ return filter_pred_64_cpumask(pred, event);
case FILTER_PRED_FN_S64:
return filter_pred_s64(pred, event);
case FILTER_PRED_FN_U64:
return filter_pred_u64(pred, event);
case FILTER_PRED_FN_32:
return filter_pred_32(pred, event);
+ case FILTER_PRED_FN_32_CPUMASK:
+ return filter_pred_32_cpumask(pred, event);
case FILTER_PRED_FN_S32:
return filter_pred_s32(pred, event);
case FILTER_PRED_FN_U32:
return filter_pred_u32(pred, event);
case FILTER_PRED_FN_16:
return filter_pred_16(pred, event);
+ case FILTER_PRED_FN_16_CPUMASK:
+ return filter_pred_16_cpumask(pred, event);
case FILTER_PRED_FN_S16:
return filter_pred_s16(pred, event);
case FILTER_PRED_FN_U16:
return filter_pred_u16(pred, event);
case FILTER_PRED_FN_8:
return filter_pred_8(pred, event);
+ case FILTER_PRED_FN_8_CPUMASK:
+ return filter_pred_8_cpumask(pred, event);
case FILTER_PRED_FN_S8:
return filter_pred_s8(pred, event);
case FILTER_PRED_FN_U8:
@@ -1349,6 +1480,12 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event)
return filter_pred_pchar(pred, event);
case FILTER_PRED_FN_CPU:
return filter_pred_cpu(pred, event);
+ case FILTER_PRED_FN_CPU_CPUMASK:
+ return filter_pred_cpu_cpumask(pred, event);
+ case FILTER_PRED_FN_CPUMASK:
+ return filter_pred_cpumask(pred, event);
+ case FILTER_PRED_FN_CPUMASK_CPU:
+ return filter_pred_cpumask_cpu(pred, event);
case FILTER_PRED_FN_FUNCTION:
return filter_pred_function(pred, event);
case FILTER_PRED_TEST_VISITED:
@@ -1553,9 +1690,130 @@ static int parse_pred(const char *str, void *data,
goto err_free;
}
- pred->regex.len = len;
- strncpy(pred->regex.pattern, str + s, len);
- pred->regex.pattern[len] = 0;
+ pred->regex = kzalloc(sizeof(*pred->regex), GFP_KERNEL);
+ if (!pred->regex)
+ goto err_mem;
+ pred->regex->len = len;
+ strncpy(pred->regex->pattern, str + s, len);
+ pred->regex->pattern[len] = 0;
+
+ } else if (!strncmp(str + i, "CPUS", 4)) {
+ unsigned int maskstart;
+ bool single;
+ char *tmp;
+
+ switch (field->filter_type) {
+ case FILTER_CPUMASK:
+ case FILTER_CPU:
+ case FILTER_OTHER:
+ break;
+ default:
+ parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+ goto err_free;
+ }
+
+ switch (op) {
+ case OP_EQ:
+ case OP_NE:
+ case OP_BAND:
+ break;
+ default:
+ parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+ goto err_free;
+ }
+
+ /* Skip CPUS */
+ i += 4;
+ if (str[i++] != '{') {
+ parse_error(pe, FILT_ERR_MISSING_BRACE_OPEN, pos + i);
+ goto err_free;
+ }
+ maskstart = i;
+
+ /* Walk the cpulist until closing } */
+ for (; str[i] && str[i] != '}'; i++)
+ ;
+
+ if (str[i] != '}') {
+ parse_error(pe, FILT_ERR_MISSING_BRACE_CLOSE, pos + i);
+ goto err_free;
+ }
+
+ if (maskstart == i) {
+ parse_error(pe, FILT_ERR_INVALID_CPULIST, pos + i);
+ goto err_free;
+ }
+
+ /* Copy the cpulist between { and } */
+ tmp = kmalloc((i - maskstart) + 1, GFP_KERNEL);
+ if (!tmp)
+ goto err_mem;
+
+ strscpy(tmp, str + maskstart, (i - maskstart) + 1);
+ pred->mask = kzalloc(cpumask_size(), GFP_KERNEL);
+ if (!pred->mask) {
+ kfree(tmp);
+ goto err_mem;
+ }
+
+ /* Now parse it */
+ if (cpulist_parse(tmp, pred->mask)) {
+ kfree(tmp);
+ parse_error(pe, FILT_ERR_INVALID_CPULIST, pos + i);
+ goto err_free;
+ }
+ kfree(tmp);
+
+ /* Move along */
+ i++;
+
+ /*
+ * Optimisation: if the user-provided mask has a weight of one
+ * then we can treat it as a scalar input.
+ */
+ single = cpumask_weight(pred->mask) == 1;
+ if (single) {
+ pred->val = cpumask_first(pred->mask);
+ kfree(pred->mask);
+ pred->mask = NULL;
+ }
+
+ if (field->filter_type == FILTER_CPUMASK) {
+ pred->fn_num = single ?
+ FILTER_PRED_FN_CPUMASK_CPU :
+ FILTER_PRED_FN_CPUMASK;
+ } else if (field->filter_type == FILTER_CPU) {
+ if (single) {
+ if (pred->op == OP_BAND)
+ pred->op = OP_EQ;
+
+ pred->fn_num = FILTER_PRED_FN_CPU;
+ } else {
+ pred->fn_num = FILTER_PRED_FN_CPU_CPUMASK;
+ }
+ } else if (single) {
+ if (pred->op == OP_BAND)
+ pred->op = OP_EQ;
+
+ pred->fn_num = select_comparison_fn(pred->op, field->size, false);
+ if (pred->op == OP_NE)
+ pred->not = 1;
+ } else {
+ switch (field->size) {
+ case 8:
+ pred->fn_num = FILTER_PRED_FN_64_CPUMASK;
+ break;
+ case 4:
+ pred->fn_num = FILTER_PRED_FN_32_CPUMASK;
+ break;
+ case 2:
+ pred->fn_num = FILTER_PRED_FN_16_CPUMASK;
+ break;
+ case 1:
+ pred->fn_num = FILTER_PRED_FN_8_CPUMASK;
+ break;
+ }
+ }
/* This is either a string, or an integer */
} else if (str[i] == '\'' || str[i] == '"') {
@@ -1597,9 +1855,12 @@ static int parse_pred(const char *str, void *data,
goto err_free;
}
- pred->regex.len = len;
- strncpy(pred->regex.pattern, str + s, len);
- pred->regex.pattern[len] = 0;
+ pred->regex = kzalloc(sizeof(*pred->regex), GFP_KERNEL);
+ if (!pred->regex)
+ goto err_mem;
+ pred->regex->len = len;
+ strncpy(pred->regex->pattern, str + s, len);
+ pred->regex->pattern[len] = 0;
filter_build_regex(pred);
@@ -1608,7 +1869,7 @@ static int parse_pred(const char *str, void *data,
} else if (field->filter_type == FILTER_STATIC_STRING) {
pred->fn_num = FILTER_PRED_FN_STRING;
- pred->regex.field_len = field->size;
+ pred->regex->field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING) {
pred->fn_num = FILTER_PRED_FN_STRLOC;
@@ -1691,10 +1952,10 @@ static int parse_pred(const char *str, void *data,
return i;
err_free:
- kfree(pred);
+ free_predicate(pred);
return -EINVAL;
err_mem:
- kfree(pred);
+ free_predicate(pred);
return -ENOMEM;
}
@@ -2287,8 +2548,8 @@ static int ftrace_function_set_filter_pred(struct filter_pred *pred,
return ret;
return __ftrace_function_set_filter(pred->op == OP_EQ,
- pred->regex.pattern,
- pred->regex.len,
+ pred->regex->pattern,
+ pred->regex->len,
data);
}
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 33cb6af31f39..6f046650e527 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -1328,14 +1328,14 @@ static int user_field_set_string(struct ftrace_event_field *field,
static int user_event_set_print_fmt(struct user_event *user, char *buf, int len)
{
- struct ftrace_event_field *field, *next;
+ struct ftrace_event_field *field;
struct list_head *head = &user->fields;
int pos = 0, depth = 0;
const char *str_func;
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
- list_for_each_entry_safe_reverse(field, next, head, link) {
+ list_for_each_entry_reverse(field, head, link) {
if (depth != 0)
pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
@@ -1347,7 +1347,7 @@ static int user_event_set_print_fmt(struct user_event *user, char *buf, int len)
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
- list_for_each_entry_safe_reverse(field, next, head, link) {
+ list_for_each_entry_reverse(field, head, link) {
if (user_field_is_dyn_string(field->type, &str_func))
pos += snprintf(buf + pos, LEN_OR_ZERO,
", %s(%s)", str_func, field->name);
@@ -1732,7 +1732,7 @@ static int user_event_create(const char *raw_command)
static int user_event_show(struct seq_file *m, struct dyn_event *ev)
{
struct user_event *user = container_of(ev, struct user_event, devent);
- struct ftrace_event_field *field, *next;
+ struct ftrace_event_field *field;
struct list_head *head;
int depth = 0;
@@ -1740,7 +1740,7 @@ static int user_event_show(struct seq_file *m, struct dyn_event *ev)
head = trace_get_fields(&user->call);
- list_for_each_entry_safe_reverse(field, next, head, link) {
+ list_for_each_entry_reverse(field, head, link) {
if (depth == 0)
seq_puts(m, " ");
else
@@ -1816,13 +1816,14 @@ out:
static bool user_fields_match(struct user_event *user, int argc,
const char **argv)
{
- struct ftrace_event_field *field, *next;
+ struct ftrace_event_field *field;
struct list_head *head = &user->fields;
int i = 0;
- list_for_each_entry_safe_reverse(field, next, head, link)
+ list_for_each_entry_reverse(field, head, link) {
if (!user_field_match(field, argc, argv, &i))
return false;
+ }
if (i != argc)
return false;
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 58f3946081e2..1698fc22afa0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -51,6 +51,9 @@ static int ftrace_event_register(struct trace_event_call *call,
#undef __array
#define __array(type, item, size) type item[size];
+#undef __stack_array
+#define __stack_array(type, item, size, field) __array(type, item, size)
+
#undef __array_desc
#define __array_desc(type, container, item, size) type item[size];
@@ -114,6 +117,9 @@ static void __always_unused ____ftrace_check_##name(void) \
is_signed_type(_type), .filter_type = FILTER_OTHER, \
.len = _len },
+#undef __stack_array
+#define __stack_array(_type, _item, _len, _field) __array(_type, _item, _len)
+
#undef __array_desc
#define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len)
@@ -149,6 +155,9 @@ static struct trace_event_fields ftrace_event_fields_##name[] = { \
#undef __array
#define __array(type, item, len)
+#undef __stack_array
+#define __stack_array(type, item, len, field)
+
#undef __array_desc
#define __array_desc(type, container, item, len)
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index dfe2e546acdc..8bfe23af9c73 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -898,6 +898,46 @@ static struct tracepoint *find_tracepoint(const char *tp_name)
return data.tpoint;
}
+static int parse_symbol_and_return(int argc, const char *argv[],
+ char **symbol, bool *is_return,
+ bool is_tracepoint)
+{
+ char *tmp = strchr(argv[1], '%');
+ int i;
+
+ if (tmp) {
+ int len = tmp - argv[1];
+
+ if (!is_tracepoint && !strcmp(tmp, "%return")) {
+ *is_return = true;
+ } else {
+ trace_probe_log_err(len, BAD_ADDR_SUFFIX);
+ return -EINVAL;
+ }
+ *symbol = kmemdup_nul(argv[1], len, GFP_KERNEL);
+ } else
+ *symbol = kstrdup(argv[1], GFP_KERNEL);
+ if (!*symbol)
+ return -ENOMEM;
+
+ if (*is_return)
+ return 0;
+
+ /* If there is $retval, this should be a return fprobe. */
+ for (i = 2; i < argc; i++) {
+ tmp = strstr(argv[i], "$retval");
+ if (tmp && !isalnum(tmp[7]) && tmp[7] != '_') {
+ *is_return = true;
+ /*
+ * NOTE: Don't check is_tracepoint here, because it will
+ * be checked when the argument is parsed.
+ */
+ break;
+ }
+ }
+ return 0;
+}
+
static int __trace_fprobe_create(int argc, const char *argv[])
{
/*
@@ -927,7 +967,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
struct trace_fprobe *tf = NULL;
int i, len, new_argc = 0, ret = 0;
bool is_return = false;
- char *symbol = NULL, *tmp = NULL;
+ char *symbol = NULL;
const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
const char **new_argv = NULL;
int maxactive = 0;
@@ -983,20 +1023,10 @@ static int __trace_fprobe_create(int argc, const char *argv[])
trace_probe_log_set_index(1);
/* a symbol(or tracepoint) must be specified */
- symbol = kstrdup(argv[1], GFP_KERNEL);
- if (!symbol)
- return -ENOMEM;
+ ret = parse_symbol_and_return(argc, argv, &symbol, &is_return, is_tracepoint);
+ if (ret < 0)
+ goto parse_error;
- tmp = strchr(symbol, '%');
- if (tmp) {
- if (!is_tracepoint && !strcmp(tmp, "%return")) {
- *tmp = '\0';
- is_return = true;
- } else {
- trace_probe_log_err(tmp - symbol, BAD_ADDR_SUFFIX);
- goto parse_error;
- }
- }
if (!is_return && maxactive) {
trace_probe_log_set_index(0);
trace_probe_log_err(1, BAD_MAXACT_TYPE);
@@ -1096,6 +1126,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
}
out:
+ traceprobe_finish_parse(&ctx);
trace_probe_log_clear();
kfree(new_argv);
kfree(symbol);
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 2f37a6e68aa9..b791524a6536 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -635,7 +635,7 @@ static int s_mode_show(struct seq_file *s, void *v)
else
seq_printf(s, "%s", thread_mode_str[mode]);
- if (mode != MODE_MAX)
+ if (mode < MODE_MAX - 1) /* if mode is any but last */
seq_puts(s, " ");
return 0;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 23dba01831f7..3d7a180a8427 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -907,6 +907,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
}
out:
+ traceprobe_finish_parse(&ctx);
trace_probe_log_clear();
kfree(new_argv);
kfree(symbol);
@@ -1561,15 +1562,10 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
: BPF_FD_TYPE_KPROBE;
- if (tk->symbol) {
- *symbol = tk->symbol;
- *probe_offset = tk->rp.kp.offset;
- *probe_addr = 0;
- } else {
- *symbol = NULL;
- *probe_offset = 0;
- *probe_addr = (unsigned long)tk->rp.kp.addr;
- }
+ *probe_offset = tk->rp.kp.offset;
+ *probe_addr = kallsyms_show_value(current_cred()) ?
+ (unsigned long)tk->rp.kp.addr : 0;
+ *symbol = tk->symbol;
return 0;
}
#endif /* CONFIG_PERF_EVENTS */
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index c68a72707852..4dc74d73fc1d 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -12,6 +12,7 @@
#define pr_fmt(fmt) "trace_probe: " fmt
#include <linux/bpf.h>
+#include "trace_btf.h"
#include "trace_probe.h"
@@ -304,31 +305,90 @@ static int parse_trace_event_arg(char *arg, struct fetch_insn *code,
#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
-static struct btf *traceprobe_get_btf(void)
+static u32 btf_type_int(const struct btf_type *t)
{
- struct btf *btf = bpf_get_btf_vmlinux();
+ return *(u32 *)(t + 1);
+}
- if (IS_ERR_OR_NULL(btf))
- return NULL;
+static bool btf_type_is_char_ptr(struct btf *btf, const struct btf_type *type)
+{
+ const struct btf_type *real_type;
+ u32 intdata;
+ s32 tid;
+
+ real_type = btf_type_skip_modifiers(btf, type->type, &tid);
+ if (!real_type)
+ return false;
+
+ if (BTF_INFO_KIND(real_type->info) != BTF_KIND_INT)
+ return false;
- return btf;
+ intdata = btf_type_int(real_type);
+ return !(BTF_INT_ENCODING(intdata) & BTF_INT_SIGNED)
+ && BTF_INT_BITS(intdata) == 8;
}
-static u32 btf_type_int(const struct btf_type *t)
+static bool btf_type_is_char_array(struct btf *btf, const struct btf_type *type)
{
- return *(u32 *)(t + 1);
+ const struct btf_type *real_type;
+ const struct btf_array *array;
+ u32 intdata;
+ s32 tid;
+
+ if (BTF_INFO_KIND(type->info) != BTF_KIND_ARRAY)
+ return false;
+
+ array = (const struct btf_array *)(type + 1);
+
+ real_type = btf_type_skip_modifiers(btf, array->type, &tid);
+
+ intdata = btf_type_int(real_type);
+ return !(BTF_INT_ENCODING(intdata) & BTF_INT_SIGNED)
+ && BTF_INT_BITS(intdata) == 8;
}
-static const char *type_from_btf_id(struct btf *btf, s32 id)
+static int check_prepare_btf_string_fetch(char *typename,
+ struct fetch_insn **pcode,
+ struct traceprobe_parse_context *ctx)
+{
+ struct btf *btf = ctx->btf;
+
+ if (!btf || !ctx->last_type)
+ return 0;
+
+ /* char [] does not need any change. */
+ if (btf_type_is_char_array(btf, ctx->last_type))
+ return 0;
+
+ /* char * requires dereference the pointer. */
+ if (btf_type_is_char_ptr(btf, ctx->last_type)) {
+ struct fetch_insn *code = *pcode + 1;
+
+ if (code->op == FETCH_OP_END) {
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
+ return -E2BIG;
+ }
+ if (typename[0] == 'u')
+ code->op = FETCH_OP_UDEREF;
+ else
+ code->op = FETCH_OP_DEREF;
+ code->offset = 0;
+ *pcode = code;
+ return 0;
+ }
+ /* Other types are not available for string */
+ trace_probe_log_err(ctx->offset, BAD_TYPE4STR);
+ return -EINVAL;
+}
+
+static const char *fetch_type_from_btf_type(struct btf *btf,
+ const struct btf_type *type,
+ struct traceprobe_parse_context *ctx)
{
- const struct btf_type *t;
u32 intdata;
- s32 tid;
/* TODO: const char * could be converted as a string */
- t = btf_type_skip_modifiers(btf, id, &tid);
-
- switch (BTF_INFO_KIND(t->info)) {
+ switch (BTF_INFO_KIND(type->info)) {
case BTF_KIND_ENUM:
/* enum is "int", so convert to "s32" */
return "s32";
@@ -341,7 +401,7 @@ static const char *type_from_btf_id(struct btf *btf, s32 id)
else
return "x32";
case BTF_KIND_INT:
- intdata = btf_type_int(t);
+ intdata = btf_type_int(type);
if (BTF_INT_ENCODING(intdata) & BTF_INT_SIGNED) {
switch (BTF_INT_BITS(intdata)) {
case 8:
@@ -364,6 +424,10 @@ static const char *type_from_btf_id(struct btf *btf, s32 id)
case 64:
return "u64";
}
+ /* bitfield, size is encoded in the type */
+ ctx->last_bitsize = BTF_INT_BITS(intdata);
+ ctx->last_bitoffs += BTF_INT_OFFSET(intdata);
+ return "u64";
}
}
/* TODO: support other types */
@@ -371,88 +435,223 @@ static const char *type_from_btf_id(struct btf *btf, s32 id)
return NULL;
}
-static const struct btf_type *find_btf_func_proto(const char *funcname)
+static int query_btf_context(struct traceprobe_parse_context *ctx)
{
- struct btf *btf = traceprobe_get_btf();
- const struct btf_type *t;
- s32 id;
+ const struct btf_param *param;
+ const struct btf_type *type;
+ struct btf *btf;
+ s32 nr;
- if (!btf || !funcname)
- return ERR_PTR(-EINVAL);
+ if (ctx->btf)
+ return 0;
+
+ if (!ctx->funcname)
+ return -EINVAL;
+
+ type = btf_find_func_proto(ctx->funcname, &btf);
+ if (!type)
+ return -ENOENT;
- id = btf_find_by_name_kind(btf, funcname, BTF_KIND_FUNC);
- if (id <= 0)
- return ERR_PTR(-ENOENT);
+ ctx->btf = btf;
+ ctx->proto = type;
+
+ /* ctx->params is optional, since func(void) will not have params. */
+ nr = 0;
+ param = btf_get_func_param(type, &nr);
+ if (!IS_ERR_OR_NULL(param)) {
+ /* Hide the first 'data' argument of tracepoint */
+ if (ctx->flags & TPARG_FL_TPOINT) {
+ nr--;
+ param++;
+ }
+ }
- /* Get BTF_KIND_FUNC type */
- t = btf_type_by_id(btf, id);
- if (!t || !btf_type_is_func(t))
- return ERR_PTR(-ENOENT);
+ if (nr > 0) {
+ ctx->nr_params = nr;
+ ctx->params = param;
+ } else {
+ ctx->nr_params = 0;
+ ctx->params = NULL;
+ }
- /* The type of BTF_KIND_FUNC is BTF_KIND_FUNC_PROTO */
- t = btf_type_by_id(btf, t->type);
- if (!t || !btf_type_is_func_proto(t))
- return ERR_PTR(-ENOENT);
+ return 0;
+}
- return t;
+static void clear_btf_context(struct traceprobe_parse_context *ctx)
+{
+ if (ctx->btf) {
+ btf_put(ctx->btf);
+ ctx->btf = NULL;
+ ctx->proto = NULL;
+ ctx->params = NULL;
+ ctx->nr_params = 0;
+ }
}
-static const struct btf_param *find_btf_func_param(const char *funcname, s32 *nr,
- bool tracepoint)
+/* Return 1 if the field separater is arrow operator ('->') */
+static int split_next_field(char *varname, char **next_field,
+ struct traceprobe_parse_context *ctx)
{
- const struct btf_param *param;
- const struct btf_type *t;
+ char *field;
+ int ret = 0;
+
+ field = strpbrk(varname, ".-");
+ if (field) {
+ if (field[0] == '-' && field[1] == '>') {
+ field[0] = '\0';
+ field += 2;
+ ret = 1;
+ } else if (field[0] == '.') {
+ field[0] = '\0';
+ field += 1;
+ } else {
+ trace_probe_log_err(ctx->offset + field - varname, BAD_HYPHEN);
+ return -EINVAL;
+ }
+ *next_field = field;
+ }
- if (!funcname || !nr)
- return ERR_PTR(-EINVAL);
+ return ret;
+}
- t = find_btf_func_proto(funcname);
- if (IS_ERR(t))
- return (const struct btf_param *)t;
+/*
+ * Parse the field of data structure. The @type must be a pointer type
+ * pointing the target data structure type.
+ */
+static int parse_btf_field(char *fieldname, const struct btf_type *type,
+ struct fetch_insn **pcode, struct fetch_insn *end,
+ struct traceprobe_parse_context *ctx)
+{
+ struct fetch_insn *code = *pcode;
+ const struct btf_member *field;
+ u32 bitoffs, anon_offs;
+ char *next;
+ int is_ptr;
+ s32 tid;
- *nr = btf_type_vlen(t);
- param = (const struct btf_param *)(t + 1);
+ do {
+ /* Outer loop for solving arrow operator ('->') */
+ if (BTF_INFO_KIND(type->info) != BTF_KIND_PTR) {
+ trace_probe_log_err(ctx->offset, NO_PTR_STRCT);
+ return -EINVAL;
+ }
+ /* Convert a struct pointer type to a struct type */
+ type = btf_type_skip_modifiers(ctx->btf, type->type, &tid);
+ if (!type) {
+ trace_probe_log_err(ctx->offset, BAD_BTF_TID);
+ return -EINVAL;
+ }
- /* Hide the first 'data' argument of tracepoint */
- if (tracepoint) {
- (*nr)--;
- param++;
- }
+ bitoffs = 0;
+ do {
+ /* Inner loop for solving dot operator ('.') */
+ next = NULL;
+ is_ptr = split_next_field(fieldname, &next, ctx);
+ if (is_ptr < 0)
+ return is_ptr;
+
+ anon_offs = 0;
+ field = btf_find_struct_member(ctx->btf, type, fieldname,
+ &anon_offs);
+ if (!field) {
+ trace_probe_log_err(ctx->offset, NO_BTF_FIELD);
+ return -ENOENT;
+ }
+ /* Add anonymous structure/union offset */
+ bitoffs += anon_offs;
+
+ /* Accumulate the bit-offsets of the dot-connected fields */
+ if (btf_type_kflag(type)) {
+ bitoffs += BTF_MEMBER_BIT_OFFSET(field->offset);
+ ctx->last_bitsize = BTF_MEMBER_BITFIELD_SIZE(field->offset);
+ } else {
+ bitoffs += field->offset;
+ ctx->last_bitsize = 0;
+ }
- if (*nr > 0)
- return param;
- else
- return NULL;
+ type = btf_type_skip_modifiers(ctx->btf, field->type, &tid);
+ if (!type) {
+ trace_probe_log_err(ctx->offset, BAD_BTF_TID);
+ return -EINVAL;
+ }
+
+ ctx->offset += next - fieldname;
+ fieldname = next;
+ } while (!is_ptr && fieldname);
+
+ if (++code == end) {
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
+ return -EINVAL;
+ }
+ code->op = FETCH_OP_DEREF; /* TODO: user deref support */
+ code->offset = bitoffs / 8;
+ *pcode = code;
+
+ ctx->last_bitoffs = bitoffs % 8;
+ ctx->last_type = type;
+ } while (fieldname);
+
+ return 0;
}
-static int parse_btf_arg(const char *varname, struct fetch_insn *code,
+static int parse_btf_arg(char *varname,
+ struct fetch_insn **pcode, struct fetch_insn *end,
struct traceprobe_parse_context *ctx)
{
- struct btf *btf = traceprobe_get_btf();
+ struct fetch_insn *code = *pcode;
const struct btf_param *params;
- int i;
+ const struct btf_type *type;
+ char *field = NULL;
+ int i, is_ptr, ret;
+ u32 tid;
+
+ if (WARN_ON_ONCE(!ctx->funcname))
+ return -EINVAL;
- if (!btf) {
- trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
+ is_ptr = split_next_field(varname, &field, ctx);
+ if (is_ptr < 0)
+ return is_ptr;
+ if (!is_ptr && field) {
+ /* dot-connected field on an argument is not supported. */
+ trace_probe_log_err(ctx->offset + field - varname,
+ NOSUP_DAT_ARG);
return -EOPNOTSUPP;
}
- if (WARN_ON_ONCE(!ctx->funcname))
- return -EINVAL;
+ if (ctx->flags & TPARG_FL_RETURN) {
+ if (strcmp(varname, "$retval") != 0) {
+ trace_probe_log_err(ctx->offset, NO_BTFARG);
+ return -ENOENT;
+ }
+ code->op = FETCH_OP_RETVAL;
+ /* Check whether the function return type is not void */
+ if (query_btf_context(ctx) == 0) {
+ if (ctx->proto->type == 0) {
+ trace_probe_log_err(ctx->offset, NO_RETVAL);
+ return -ENOENT;
+ }
+ tid = ctx->proto->type;
+ goto found;
+ }
+ if (field) {
+ trace_probe_log_err(ctx->offset + field - varname,
+ NO_BTF_ENTRY);
+ return -ENOENT;
+ }
+ return 0;
+ }
- if (!ctx->params) {
- params = find_btf_func_param(ctx->funcname, &ctx->nr_params,
- ctx->flags & TPARG_FL_TPOINT);
- if (IS_ERR_OR_NULL(params)) {
+ if (!ctx->btf) {
+ ret = query_btf_context(ctx);
+ if (ret < 0 || ctx->nr_params == 0) {
trace_probe_log_err(ctx->offset, NO_BTF_ENTRY);
return PTR_ERR(params);
}
- ctx->params = params;
- } else
- params = ctx->params;
+ }
+ params = ctx->params;
for (i = 0; i < ctx->nr_params; i++) {
- const char *name = btf_name_by_offset(btf, params[i].name_off);
+ const char *name = btf_name_by_offset(ctx->btf, params[i].name_off);
if (name && !strcmp(name, varname)) {
code->op = FETCH_OP_ARG;
@@ -460,91 +659,114 @@ static int parse_btf_arg(const char *varname, struct fetch_insn *code,
code->param = i + 1;
else
code->param = i;
- return 0;
+ tid = params[i].type;
+ goto found;
}
}
trace_probe_log_err(ctx->offset, NO_BTFARG);
return -ENOENT;
-}
-
-static const struct fetch_type *parse_btf_arg_type(int arg_idx,
- struct traceprobe_parse_context *ctx)
-{
- struct btf *btf = traceprobe_get_btf();
- const char *typestr = NULL;
- if (btf && ctx->params) {
- if (ctx->flags & TPARG_FL_TPOINT)
- arg_idx--;
- typestr = type_from_btf_id(btf, ctx->params[arg_idx].type);
+found:
+ type = btf_type_skip_modifiers(ctx->btf, tid, &tid);
+ if (!type) {
+ trace_probe_log_err(ctx->offset, BAD_BTF_TID);
+ return -EINVAL;
}
-
- return find_fetch_type(typestr, ctx->flags);
+ /* Initialize the last type information */
+ ctx->last_type = type;
+ ctx->last_bitoffs = 0;
+ ctx->last_bitsize = 0;
+ if (field) {
+ ctx->offset += field - varname;
+ return parse_btf_field(field, type, pcode, end, ctx);
+ }
+ return 0;
}
-static const struct fetch_type *parse_btf_retval_type(
+static const struct fetch_type *find_fetch_type_from_btf_type(
struct traceprobe_parse_context *ctx)
{
- struct btf *btf = traceprobe_get_btf();
+ struct btf *btf = ctx->btf;
const char *typestr = NULL;
- const struct btf_type *t;
- if (btf && ctx->funcname) {
- t = find_btf_func_proto(ctx->funcname);
- if (!IS_ERR(t))
- typestr = type_from_btf_id(btf, t->type);
- }
+ if (btf && ctx->last_type)
+ typestr = fetch_type_from_btf_type(btf, ctx->last_type, ctx);
return find_fetch_type(typestr, ctx->flags);
}
-static bool is_btf_retval_void(const char *funcname)
+static int parse_btf_bitfield(struct fetch_insn **pcode,
+ struct traceprobe_parse_context *ctx)
{
- const struct btf_type *t;
+ struct fetch_insn *code = *pcode;
- t = find_btf_func_proto(funcname);
- if (IS_ERR(t))
- return false;
+ if ((ctx->last_bitsize % 8 == 0) && ctx->last_bitoffs == 0)
+ return 0;
+
+ code++;
+ if (code->op != FETCH_OP_NOP) {
+ trace_probe_log_err(ctx->offset, TOO_MANY_OPS);
+ return -EINVAL;
+ }
+ *pcode = code;
- return t->type == 0;
+ code->op = FETCH_OP_MOD_BF;
+ code->lshift = 64 - (ctx->last_bitsize + ctx->last_bitoffs);
+ code->rshift = 64 - ctx->last_bitsize;
+ code->basesize = 64 / 8;
+ return 0;
}
+
#else
-static struct btf *traceprobe_get_btf(void)
+static void clear_btf_context(struct traceprobe_parse_context *ctx)
{
- return NULL;
+ ctx->btf = NULL;
}
-static const struct btf_param *find_btf_func_param(const char *funcname, s32 *nr,
- bool tracepoint)
+static int query_btf_context(struct traceprobe_parse_context *ctx)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
-static int parse_btf_arg(const char *varname, struct fetch_insn *code,
+static int parse_btf_arg(char *varname,
+ struct fetch_insn **pcode, struct fetch_insn *end,
struct traceprobe_parse_context *ctx)
{
trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
return -EOPNOTSUPP;
}
-#define parse_btf_arg_type(idx, ctx) \
- find_fetch_type(NULL, ctx->flags)
+static int parse_btf_bitfield(struct fetch_insn **pcode,
+ struct traceprobe_parse_context *ctx)
+{
+ trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
+ return -EOPNOTSUPP;
+}
-#define parse_btf_retval_type(ctx) \
+#define find_fetch_type_from_btf_type(ctx) \
find_fetch_type(NULL, ctx->flags)
-#define is_btf_retval_void(funcname) (false)
+static int check_prepare_btf_string_fetch(char *typename,
+ struct fetch_insn **pcode,
+ struct traceprobe_parse_context *ctx)
+{
+ return 0;
+}
#endif
#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
-static int parse_probe_vars(char *arg, const struct fetch_type *t,
- struct fetch_insn *code,
+/* Parse $vars. @orig_arg points '$', which syncs to @ctx->offset */
+static int parse_probe_vars(char *orig_arg, const struct fetch_type *t,
+ struct fetch_insn **pcode,
+ struct fetch_insn *end,
struct traceprobe_parse_context *ctx)
{
- unsigned long param;
+ struct fetch_insn *code = *pcode;
int err = TP_ERR_BAD_VAR;
+ char *arg = orig_arg + 1;
+ unsigned long param;
int ret = 0;
int len;
@@ -563,18 +785,17 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
goto inval;
}
- if (strcmp(arg, "retval") == 0) {
- if (ctx->flags & TPARG_FL_RETURN) {
- if ((ctx->flags & TPARG_FL_KERNEL) &&
- is_btf_retval_void(ctx->funcname)) {
- err = TP_ERR_NO_RETVAL;
- goto inval;
- }
+ if (str_has_prefix(arg, "retval")) {
+ if (!(ctx->flags & TPARG_FL_RETURN)) {
+ err = TP_ERR_RETVAL_ON_PROBE;
+ goto inval;
+ }
+ if (!(ctx->flags & TPARG_FL_KERNEL) ||
+ !IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS)) {
code->op = FETCH_OP_RETVAL;
return 0;
}
- err = TP_ERR_RETVAL_ON_PROBE;
- goto inval;
+ return parse_btf_arg(orig_arg, pcode, end, ctx);
}
len = str_has_prefix(arg, "stack");
@@ -676,7 +897,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
switch (arg[0]) {
case '$':
- ret = parse_probe_vars(arg + 1, type, code, ctx);
+ ret = parse_probe_vars(arg, type, pcode, end, ctx);
break;
case '%': /* named register */
@@ -795,6 +1016,8 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
code->op = deref;
code->offset = offset;
+ /* Reset the last type if used */
+ ctx->last_type = NULL;
}
break;
case '\\': /* Immediate value */
@@ -818,7 +1041,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
return -EINVAL;
}
- ret = parse_btf_arg(arg, code, ctx);
+ ret = parse_btf_arg(arg, pcode, end, ctx);
break;
}
}
@@ -964,17 +1187,22 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
goto out;
code[FETCH_INSN_MAX - 1].op = FETCH_OP_END;
+ ctx->last_type = NULL;
ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1],
ctx);
if (ret)
goto fail;
/* Update storing type if BTF is available */
- if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS) && !t) {
- if (code->op == FETCH_OP_ARG)
- parg->type = parse_btf_arg_type(code->param, ctx);
- else if (code->op == FETCH_OP_RETVAL)
- parg->type = parse_btf_retval_type(ctx);
+ if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS) &&
+ ctx->last_type) {
+ if (!t) {
+ parg->type = find_fetch_type_from_btf_type(ctx);
+ } else if (strstr(t, "string")) {
+ ret = check_prepare_btf_string_fetch(t, &code, ctx);
+ if (ret)
+ goto fail;
+ }
}
ret = -EINVAL;
@@ -1048,6 +1276,11 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
trace_probe_log_err(ctx->offset + t - arg, BAD_BITFIELD);
goto fail;
}
+ } else if (IS_ENABLED(CONFIG_PROBE_EVENTS_BTF_ARGS) &&
+ ctx->last_type) {
+ ret = parse_btf_bitfield(&code, ctx);
+ if (ret)
+ goto fail;
}
ret = -EINVAL;
/* Loop(Array) operation */
@@ -1231,7 +1464,6 @@ static int sprint_nth_btf_arg(int idx, const char *type,
char *buf, int bufsize,
struct traceprobe_parse_context *ctx)
{
- struct btf *btf = traceprobe_get_btf();
const char *name;
int ret;
@@ -1239,7 +1471,7 @@ static int sprint_nth_btf_arg(int idx, const char *type,
trace_probe_log_err(0, NO_BTFARG);
return -ENOENT;
}
- name = btf_name_by_offset(btf, ctx->params[idx].name_off);
+ name = btf_name_by_offset(ctx->btf, ctx->params[idx].name_off);
if (!name) {
trace_probe_log_err(0, NO_BTF_ENTRY);
return -ENOENT;
@@ -1260,7 +1492,6 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[],
const struct btf_param *params = NULL;
int i, j, n, used, ret, args_idx = -1;
const char **new_argv = NULL;
- int nr_params;
ret = argv_has_var_arg(argc, argv, &args_idx, ctx);
if (ret < 0)
@@ -1271,9 +1502,8 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[],
return NULL;
}
- params = find_btf_func_param(ctx->funcname, &nr_params,
- ctx->flags & TPARG_FL_TPOINT);
- if (IS_ERR_OR_NULL(params)) {
+ ret = query_btf_context(ctx);
+ if (ret < 0 || ctx->nr_params == 0) {
if (args_idx != -1) {
/* $arg* requires BTF info */
trace_probe_log_err(0, NOSUP_BTFARG);
@@ -1282,8 +1512,6 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[],
*new_argc = argc;
return NULL;
}
- ctx->params = params;
- ctx->nr_params = nr_params;
if (args_idx >= 0)
*new_argc = argc + ctx->nr_params - 1;
@@ -1298,7 +1526,7 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[],
for (i = 0, j = 0; i < argc; i++) {
trace_probe_log_set_index(i + 2);
if (i == args_idx) {
- for (n = 0; n < nr_params; n++) {
+ for (n = 0; n < ctx->nr_params; n++) {
ret = sprint_nth_btf_arg(n, "", buf + used,
bufsize - used, ctx);
if (ret < 0)
@@ -1337,6 +1565,11 @@ error:
return ERR_PTR(ret);
}
+void traceprobe_finish_parse(struct traceprobe_parse_context *ctx)
+{
+ clear_btf_context(ctx);
+}
+
int traceprobe_update_arg(struct probe_arg *arg)
{
struct fetch_insn *code = arg->code;
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 01ea148723de..02b432ae7513 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -383,9 +383,15 @@ static inline bool tparg_is_function_entry(unsigned int flags)
struct traceprobe_parse_context {
struct trace_event_call *event;
- const struct btf_param *params;
- s32 nr_params;
- const char *funcname;
+ /* BTF related parameters */
+ const char *funcname; /* Function name in BTF */
+ const struct btf_type *proto; /* Prototype of the function */
+ const struct btf_param *params; /* Parameter of the function */
+ s32 nr_params; /* The number of the parameters */
+ struct btf *btf; /* The BTF to be used */
+ const struct btf_type *last_type; /* Saved type */
+ u32 last_bitoffs; /* Saved bitoffs */
+ u32 last_bitsize; /* Saved bitsize */
unsigned int flags;
int offset;
};
@@ -400,6 +406,12 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[],
extern int traceprobe_update_arg(struct probe_arg *arg);
extern void traceprobe_free_probe_arg(struct probe_arg *arg);
+/*
+ * If either traceprobe_parse_probe_arg() or traceprobe_expand_meta_args() is called,
+ * this MUST be called for clean up the context and return a resource.
+ */
+void traceprobe_finish_parse(struct traceprobe_parse_context *ctx);
+
extern int traceprobe_split_symbol_offset(char *symbol, long *offset);
int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
char *buf, int offset);
@@ -495,7 +507,14 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
C(BAD_VAR_ARGS, "$arg* must be an independent parameter without name etc."),\
C(NOFENTRY_ARGS, "$arg* can be used only on function entry"), \
C(DOUBLE_ARGS, "$arg* can be used only once in the parameters"), \
- C(ARGS_2LONG, "$arg* failed because the argument list is too long"),
+ C(ARGS_2LONG, "$arg* failed because the argument list is too long"), \
+ C(ARGIDX_2BIG, "$argN index is too big"), \
+ C(NO_PTR_STRCT, "This is not a pointer to union/structure."), \
+ C(NOSUP_DAT_ARG, "Non pointer structure/union argument is not supported."),\
+ C(BAD_HYPHEN, "Failed to parse single hyphen. Forgot '>'?"), \
+ C(NO_BTF_FIELD, "This field is not found."), \
+ C(BAD_BTF_TID, "Failed to get BTF type info."),\
+ C(BAD_TYPE4STR, "This type does not fit for string."),
#undef C
#define C(a, b) TP_ERR_##a
@@ -519,3 +538,8 @@ void __trace_probe_log_err(int offset, int err);
#define trace_probe_log_err(offs, err) \
__trace_probe_log_err(offs, TP_ERR_##err)
+
+struct uprobe_dispatch_data {
+ struct trace_uprobe *tu;
+ unsigned long bp_addr;
+};
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 942ddbdace4a..de753403cdaf 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -555,12 +555,15 @@ static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *re
struct syscall_trace_enter *rec)
{
struct syscall_tp_t {
- unsigned long long regs;
+ struct trace_entry ent;
unsigned long syscall_nr;
unsigned long args[SYSCALL_DEFINE_MAXARGS];
- } param;
+ } __aligned(8) param;
int i;
+ BUILD_BUG_ON(sizeof(param.ent) < sizeof(void *));
+
+ /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. &param) */
*(struct pt_regs **)&param = regs;
param.syscall_nr = rec->nr;
for (i = 0; i < sys_data->nb_args; i++)
@@ -657,11 +660,12 @@ static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *reg
struct syscall_trace_exit *rec)
{
struct syscall_tp_t {
- unsigned long long regs;
+ struct trace_entry ent;
unsigned long syscall_nr;
unsigned long ret;
- } param;
+ } __aligned(8) param;
+ /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. &param) */
*(struct pt_regs **)&param = regs;
param.syscall_nr = rec->nr;
param.ret = rec->ret;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 688bf579f2f1..99c051de412a 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -88,11 +88,6 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev)
static int register_uprobe_event(struct trace_uprobe *tu);
static int unregister_uprobe_event(struct trace_uprobe *tu);
-struct uprobe_dispatch_data {
- struct trace_uprobe *tu;
- unsigned long bp_addr;
-};
-
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
static int uretprobe_dispatcher(struct uprobe_consumer *con,
unsigned long func, struct pt_regs *regs);
@@ -693,6 +688,7 @@ static int __trace_uprobe_create(int argc, const char **argv)
trace_probe_log_set_index(i + 2);
ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i], &ctx);
+ traceprobe_finish_parse(&ctx);
if (ret)
goto error;
}
@@ -1352,7 +1348,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
if (bpf_prog_array_valid(call)) {
u32 ret;
- ret = bpf_prog_run_array_sleepable(call->prog_array, regs, bpf_prog_run);
+ ret = bpf_prog_run_array_uprobe(call->prog_array, regs, bpf_prog_run);
if (!ret)
return;
}
@@ -1418,7 +1414,7 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
const char **filename, u64 *probe_offset,
- bool perf_type_tracepoint)
+ u64 *probe_addr, bool perf_type_tracepoint)
{
const char *pevent = trace_event_name(event->tp_event);
const char *group = event->tp_event->class->system;
@@ -1435,6 +1431,7 @@ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
: BPF_FD_TYPE_UPROBE;
*filename = tu->filename;
*probe_offset = tu->offset;
+ *probe_addr = 0;
return 0;
}
#endif /* CONFIG_PERF_EVENTS */