aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/bpf_trace.c126
-rw-r--r--kernel/trace/fgraph.c626
-rw-r--r--kernel/trace/ftrace.c512
-rw-r--r--kernel/trace/ftrace_internal.h75
-rw-r--r--kernel/trace/ring_buffer.c106
-rw-r--r--kernel/trace/trace.c106
-rw-r--r--kernel/trace/trace.h70
-rw-r--r--kernel/trace/trace_dynevent.c217
-rw-r--r--kernel/trace/trace_dynevent.h119
-rw-r--r--kernel/trace/trace_event_perf.c7
-rw-r--r--kernel/trace/trace_events.c10
-rw-r--r--kernel/trace/trace_events_filter.c14
-rw-r--r--kernel/trace/trace_events_hist.c595
-rw-r--r--kernel/trace/trace_events_trigger.c6
-rw-r--r--kernel/trace/trace_functions_graph.c311
-rw-r--r--kernel/trace/trace_irqsoff.c29
-rw-r--r--kernel/trace/trace_kprobe.c751
-rw-r--r--kernel/trace/trace_output.c38
-rw-r--r--kernel/trace/trace_preemptirq.c5
-rw-r--r--kernel/trace/trace_printk.c2
-rw-r--r--kernel/trace/trace_probe.c751
-rw-r--r--kernel/trace/trace_probe.h294
-rw-r--r--kernel/trace/trace_probe_tmpl.h218
-rw-r--r--kernel/trace/trace_sched_wakeup.c272
-rw-r--r--kernel/trace/trace_selftest.c8
-rw-r--r--kernel/trace/trace_stack.c10
-rw-r--r--kernel/trace/trace_uprobe.c561
29 files changed, 3363 insertions, 2484 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5e3de28c7677..fa8b1fe824f3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -461,6 +461,7 @@ config KPROBE_EVENTS
bool "Enable kprobes-based dynamic events"
select TRACING
select PROBE_EVENTS
+ select DYNAMIC_EVENTS
default y
help
This allows the user to add tracing events (similar to tracepoints)
@@ -500,6 +501,7 @@ config UPROBE_EVENTS
depends on PERF_EVENTS
select UPROBES
select PROBE_EVENTS
+ select DYNAMIC_EVENTS
select TRACING
default y
help
@@ -518,6 +520,9 @@ config BPF_EVENTS
help
This allows the user to attach BPF programs to kprobe events.
+config DYNAMIC_EVENTS
+ def_bool n
+
config PROBE_EVENTS
def_bool n
@@ -630,6 +635,7 @@ config HIST_TRIGGERS
depends on ARCH_HAVE_NMI_SAFE_CMPXCHG
select TRACING_MAP
select TRACING
+ select DYNAMIC_EVENTS
default n
help
Hist triggers allow one or more arbitrary trace event fields
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index f81dadbc7c4a..c2b2148bb1d2 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -57,6 +57,7 @@ obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += fgraph.o
ifeq ($(CONFIG_BLOCK),y)
obj-$(CONFIG_EVENT_TRACING) += blktrace.o
endif
@@ -78,6 +79,7 @@ endif
ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
+obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 08fcfe440c63..d64c00afceb5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -17,6 +17,43 @@
#include "trace_probe.h"
#include "trace.h"
+#ifdef CONFIG_MODULES
+struct bpf_trace_module {
+ struct module *module;
+ struct list_head list;
+};
+
+static LIST_HEAD(bpf_trace_modules);
+static DEFINE_MUTEX(bpf_module_mutex);
+
+static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
+{
+ struct bpf_raw_event_map *btp, *ret = NULL;
+ struct bpf_trace_module *btm;
+ unsigned int i;
+
+ mutex_lock(&bpf_module_mutex);
+ list_for_each_entry(btm, &bpf_trace_modules, list) {
+ for (i = 0; i < btm->module->num_bpf_raw_events; ++i) {
+ btp = &btm->module->bpf_raw_events[i];
+ if (!strcmp(btp->tp->name, name)) {
+ if (try_module_get(btm->module))
+ ret = btp;
+ goto out;
+ }
+ }
+ }
+out:
+ mutex_unlock(&bpf_module_mutex);
+ return ret;
+}
+#else
+static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
+{
+ return NULL;
+}
+#endif /* CONFIG_MODULES */
+
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
@@ -133,7 +170,7 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
return -EPERM;
if (unlikely(uaccess_kernel()))
return -EPERM;
- if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
+ if (!access_ok(unsafe_ptr, size))
return -EPERM;
return probe_kernel_write(unsafe_ptr, src, size);
@@ -196,11 +233,13 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
i++;
} else if (fmt[i] == 'p' || fmt[i] == 's') {
mod[fmt_cnt]++;
- i++;
- if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
+ /* disallow any further format extensions */
+ if (fmt[i + 1] != 0 &&
+ !isspace(fmt[i + 1]) &&
+ !ispunct(fmt[i + 1]))
return -EINVAL;
fmt_cnt++;
- if (fmt[i - 1] == 's') {
+ if (fmt[i] == 's') {
if (str_seen)
/* allow only one '%s' per fmt string */
return -EINVAL;
@@ -392,8 +431,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
if (unlikely(event->oncpu != cpu))
return -EOPNOTSUPP;
- perf_event_output(event, sd, regs);
- return 0;
+ return perf_event_output(event, sd, regs);
}
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
@@ -1074,7 +1112,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
extern struct bpf_raw_event_map __start__bpf_raw_tp[];
extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
-struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
+struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
{
struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
@@ -1082,7 +1120,16 @@ struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
if (!strcmp(btp->tp->name, name))
return btp;
}
- return NULL;
+
+ return bpf_get_raw_tracepoint_module(name);
+}
+
+void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
+{
+ struct module *mod = __module_address((unsigned long)btp);
+
+ if (mod)
+ module_put(mod);
}
static __always_inline
@@ -1156,22 +1203,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
{
- int err;
-
- mutex_lock(&bpf_event_mutex);
- err = __bpf_probe_register(btp, prog);
- mutex_unlock(&bpf_event_mutex);
- return err;
+ return __bpf_probe_register(btp, prog);
}
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
{
- int err;
-
- mutex_lock(&bpf_event_mutex);
- err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
- mutex_unlock(&bpf_event_mutex);
- return err;
+ return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
}
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
@@ -1220,3 +1257,52 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
return err;
}
+
+#ifdef CONFIG_MODULES
+int bpf_event_notify(struct notifier_block *nb, unsigned long op, void *module)
+{
+ struct bpf_trace_module *btm, *tmp;
+ struct module *mod = module;
+
+ if (mod->num_bpf_raw_events == 0 ||
+ (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
+ return 0;
+
+ mutex_lock(&bpf_module_mutex);
+
+ switch (op) {
+ case MODULE_STATE_COMING:
+ btm = kzalloc(sizeof(*btm), GFP_KERNEL);
+ if (btm) {
+ btm->module = module;
+ list_add(&btm->list, &bpf_trace_modules);
+ }
+ break;
+ case MODULE_STATE_GOING:
+ list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) {
+ if (btm->module == module) {
+ list_del(&btm->list);
+ kfree(btm);
+ break;
+ }
+ }
+ break;
+ }
+
+ mutex_unlock(&bpf_module_mutex);
+
+ return 0;
+}
+
+static struct notifier_block bpf_module_nb = {
+ .notifier_call = bpf_event_notify,
+};
+
+int __init bpf_event_init(void)
+{
+ register_module_notifier(&bpf_module_nb);
+ return 0;
+}
+
+fs_initcall(bpf_event_init);
+#endif /* CONFIG_MODULES */
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
new file mode 100644
index 000000000000..8dfd5021b933
--- /dev/null
+++ b/kernel/trace/fgraph.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Infrastructure to took into function calls and returns.
+ * Copyright (c) 2008-2009 Frederic Weisbecker <[email protected]>
+ * Mostly borrowed from function tracer which
+ * is Copyright (c) Steven Rostedt <[email protected]>
+ *
+ * Highly modified by Steven Rostedt (VMware).
+ */
+#include <linux/suspend.h>
+#include <linux/ftrace.h>
+#include <linux/slab.h>
+
+#include <trace/events/sched.h>
+
+#include "ftrace_internal.h"
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+#define ASSIGN_OPS_HASH(opsname, val) \
+ .func_hash = val, \
+ .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
+#else
+#define ASSIGN_OPS_HASH(opsname, val)
+#endif
+
+static bool kill_ftrace_graph;
+int ftrace_graph_active;
+
+/* Both enabled by default (can be cleared by function_graph tracer flags */
+static bool fgraph_sleep_time = true;
+
+/**
+ * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
+ *
+ * ftrace_graph_stop() is called when a severe error is detected in
+ * the function graph tracing. This function is called by the critical
+ * paths of function graph to keep those paths from doing any more harm.
+ */
+bool ftrace_graph_is_dead(void)
+{
+ return kill_ftrace_graph;
+}
+
+/**
+ * ftrace_graph_stop - set to permanently disable function graph tracincg
+ *
+ * In case of an error int function graph tracing, this is called
+ * to try to keep function graph tracing from causing any more harm.
+ * Usually this is pretty severe and this is called to try to at least
+ * get a warning out to the user.
+ */
+void ftrace_graph_stop(void)
+{
+ kill_ftrace_graph = true;
+}
+
+/* Add a function return address to the trace stack on thread info.*/
+static int
+ftrace_push_return_trace(unsigned long ret, unsigned long func,
+ unsigned long frame_pointer, unsigned long *retp)
+{
+ unsigned long long calltime;
+ int index;
+
+ if (unlikely(ftrace_graph_is_dead()))
+ return -EBUSY;
+
+ if (!current->ret_stack)
+ return -EBUSY;
+
+ /*
+ * We must make sure the ret_stack is tested before we read
+ * anything else.
+ */
+ smp_rmb();
+
+ /* The return trace stack is full */
+ if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
+ atomic_inc(&current->trace_overrun);
+ return -EBUSY;
+ }
+
+ calltime = trace_clock_local();
+
+ index = ++current->curr_ret_stack;
+ barrier();
+ current->ret_stack[index].ret = ret;
+ current->ret_stack[index].func = func;
+ current->ret_stack[index].calltime = calltime;
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ current->ret_stack[index].fp = frame_pointer;
+#endif
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+ current->ret_stack[index].retp = retp;
+#endif
+ return 0;
+}
+
+int function_graph_enter(unsigned long ret, unsigned long func,
+ unsigned long frame_pointer, unsigned long *retp)
+{
+ struct ftrace_graph_ent trace;
+
+ trace.func = func;
+ trace.depth = ++current->curr_ret_depth;
+
+ if (ftrace_push_return_trace(ret, func, frame_pointer, retp))
+ goto out;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace))
+ goto out_ret;
+
+ return 0;
+ out_ret:
+ current->curr_ret_stack--;
+ out:
+ current->curr_ret_depth--;
+ return -EBUSY;
+}
+
+/* Retrieve a function return address to the trace stack on thread info.*/
+static void
+ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
+ unsigned long frame_pointer)
+{
+ int index;
+
+ index = current->curr_ret_stack;
+
+ if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ /* Might as well panic, otherwise we have no where to go */
+ *ret = (unsigned long)panic;
+ return;
+ }
+
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ /*
+ * The arch may choose to record the frame pointer used
+ * and check it here to make sure that it is what we expect it
+ * to be. If gcc does not set the place holder of the return
+ * address in the frame pointer, and does a copy instead, then
+ * the function graph trace will fail. This test detects this
+ * case.
+ *
+ * Currently, x86_32 with optimize for size (-Os) makes the latest
+ * gcc do the above.
+ *
+ * Note, -mfentry does not use frame pointers, and this test
+ * is not needed if CC_USING_FENTRY is set.
+ */
+ if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
+ ftrace_graph_stop();
+ WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
+ " from func %ps return to %lx\n",
+ current->ret_stack[index].fp,
+ frame_pointer,
+ (void *)current->ret_stack[index].func,
+ current->ret_stack[index].ret);
+ *ret = (unsigned long)panic;
+ return;
+ }
+#endif
+
+ *ret = current->ret_stack[index].ret;
+ trace->func = current->ret_stack[index].func;
+ trace->calltime = current->ret_stack[index].calltime;
+ trace->overrun = atomic_read(&current->trace_overrun);
+ trace->depth = current->curr_ret_depth--;
+ /*
+ * We still want to trace interrupts coming in if
+ * max_depth is set to 1. Make sure the decrement is
+ * seen before ftrace_graph_return.
+ */
+ barrier();
+}
+
+/*
+ * Hibernation protection.
+ * The state of the current task is too much unstable during
+ * suspend/restore to disk. We want to protect against that.
+ */
+static int
+ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
+ void *unused)
+{
+ switch (state) {
+ case PM_HIBERNATION_PREPARE:
+ pause_graph_tracing();
+ break;
+
+ case PM_POST_HIBERNATION:
+ unpause_graph_tracing();
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ftrace_suspend_notifier = {
+ .notifier_call = ftrace_suspend_notifier_call,
+};
+
+/*
+ * Send the trace to the ring-buffer.
+ * @return the original return address.
+ */
+unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
+{
+ struct ftrace_graph_ret trace;
+ unsigned long ret;
+
+ ftrace_pop_return_trace(&trace, &ret, frame_pointer);
+ trace.rettime = trace_clock_local();
+ ftrace_graph_return(&trace);
+ /*
+ * The ftrace_graph_return() may still access the current
+ * ret_stack structure, we need to make sure the update of
+ * curr_ret_stack is after that.
+ */
+ barrier();
+ current->curr_ret_stack--;
+
+ if (unlikely(!ret)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ /* Might as well panic. What else to do? */
+ ret = (unsigned long)panic;
+ }
+
+ return ret;
+}
+
+/**
+ * ftrace_graph_get_ret_stack - return the entry of the shadow stack
+ * @task: The task to read the shadow stack from
+ * @idx: Index down the shadow stack
+ *
+ * Return the ret_struct on the shadow stack of the @task at the
+ * call graph at @idx starting with zero. If @idx is zero, it
+ * will return the last saved ret_stack entry. If it is greater than
+ * zero, it will return the corresponding ret_stack for the depth
+ * of saved return addresses.
+ */
+struct ftrace_ret_stack *
+ftrace_graph_get_ret_stack(struct task_struct *task, int idx)
+{
+ idx = task->curr_ret_stack - idx;
+
+ if (idx >= 0 && idx <= task->curr_ret_stack)
+ return &task->ret_stack[idx];
+
+ return NULL;
+}
+
+/**
+ * ftrace_graph_ret_addr - convert a potentially modified stack return address
+ * to its original value
+ *
+ * This function can be called by stack unwinding code to convert a found stack
+ * return address ('ret') to its original value, in case the function graph
+ * tracer has modified it to be 'return_to_handler'. If the address hasn't
+ * been modified, the unchanged value of 'ret' is returned.
+ *
+ * 'idx' is a state variable which should be initialized by the caller to zero
+ * before the first call.
+ *
+ * 'retp' is a pointer to the return address on the stack. It's ignored if
+ * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
+ */
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+ unsigned long ret, unsigned long *retp)
+{
+ int index = task->curr_ret_stack;
+ int i;
+
+ if (ret != (unsigned long)return_to_handler)
+ return ret;
+
+ if (index < 0)
+ return ret;
+
+ for (i = 0; i <= index; i++)
+ if (task->ret_stack[i].retp == retp)
+ return task->ret_stack[i].ret;
+
+ return ret;
+}
+#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+ unsigned long ret, unsigned long *retp)
+{
+ int task_idx;
+
+ if (ret != (unsigned long)return_to_handler)
+ return ret;
+
+ task_idx = task->curr_ret_stack;
+
+ if (!task->ret_stack || task_idx < *idx)
+ return ret;
+
+ task_idx -= *idx;
+ (*idx)++;
+
+ return task->ret_stack[task_idx].ret;
+}
+#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
+
+static struct ftrace_ops graph_ops = {
+ .func = ftrace_stub,
+ .flags = FTRACE_OPS_FL_RECURSION_SAFE |
+ FTRACE_OPS_FL_INITIALIZED |
+ FTRACE_OPS_FL_PID |
+ FTRACE_OPS_FL_STUB,
+#ifdef FTRACE_GRAPH_TRAMP_ADDR
+ .trampoline = FTRACE_GRAPH_TRAMP_ADDR,
+ /* trampoline_size is only needed for dynamically allocated tramps */
+#endif
+ ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
+};
+
+void ftrace_graph_sleep_time_control(bool enable)
+{
+ fgraph_sleep_time = enable;
+}
+
+int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
+{
+ return 0;
+}
+
+/* The callbacks that hook a function */
+trace_func_graph_ret_t ftrace_graph_return =
+ (trace_func_graph_ret_t)ftrace_stub;
+trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
+static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
+
+/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
+static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
+{
+ int i;
+ int ret = 0;
+ int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
+ struct task_struct *g, *t;
+
+ for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
+ ret_stack_list[i] =
+ kmalloc_array(FTRACE_RETFUNC_DEPTH,
+ sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!ret_stack_list[i]) {
+ start = 0;
+ end = i;
+ ret = -ENOMEM;
+ goto free;
+ }
+ }
+
+ read_lock(&tasklist_lock);
+ do_each_thread(g, t) {
+ if (start == end) {
+ ret = -EAGAIN;
+ goto unlock;
+ }
+
+ if (t->ret_stack == NULL) {
+ atomic_set(&t->tracing_graph_pause, 0);
+ atomic_set(&t->trace_overrun, 0);
+ t->curr_ret_stack = -1;
+ t->curr_ret_depth = -1;
+ /* Make sure the tasks see the -1 first: */
+ smp_wmb();
+ t->ret_stack = ret_stack_list[start++];
+ }
+ } while_each_thread(g, t);
+
+unlock:
+ read_unlock(&tasklist_lock);
+free:
+ for (i = start; i < end; i++)
+ kfree(ret_stack_list[i]);
+ return ret;
+}
+
+static void
+ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
+ struct task_struct *prev, struct task_struct *next)
+{
+ unsigned long long timestamp;
+ int index;
+
+ /*
+ * Does the user want to count the time a function was asleep.
+ * If so, do not update the time stamps.
+ */
+ if (fgraph_sleep_time)
+ return;
+
+ timestamp = trace_clock_local();
+
+ prev->ftrace_timestamp = timestamp;
+
+ /* only process tasks that we timestamped */
+ if (!next->ftrace_timestamp)
+ return;
+
+ /*
+ * Update all the counters in next to make up for the
+ * time next was sleeping.
+ */
+ timestamp -= next->ftrace_timestamp;
+
+ for (index = next->curr_ret_stack; index >= 0; index--)
+ next->ret_stack[index].calltime += timestamp;
+}
+
+static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
+{
+ if (!ftrace_ops_test(&global_ops, trace->func, NULL))
+ return 0;
+ return __ftrace_graph_entry(trace);
+}
+
+/*
+ * The function graph tracer should only trace the functions defined
+ * by set_ftrace_filter and set_ftrace_notrace. If another function
+ * tracer ops is registered, the graph tracer requires testing the
+ * function against the global ops, and not just trace any function
+ * that any ftrace_ops registered.
+ */
+void update_function_graph_func(void)
+{
+ struct ftrace_ops *op;
+ bool do_test = false;
+
+ /*
+ * The graph and global ops share the same set of functions
+ * to test. If any other ops is on the list, then
+ * the graph tracing needs to test if its the function
+ * it should call.
+ */
+ do_for_each_ftrace_op(op, ftrace_ops_list) {
+ if (op != &global_ops && op != &graph_ops &&
+ op != &ftrace_list_end) {
+ do_test = true;
+ /* in double loop, break out with goto */
+ goto out;
+ }
+ } while_for_each_ftrace_op(op);
+ out:
+ if (do_test)
+ ftrace_graph_entry = ftrace_graph_entry_test;
+ else
+ ftrace_graph_entry = __ftrace_graph_entry;
+}
+
+static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
+
+static void
+graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
+{
+ atomic_set(&t->tracing_graph_pause, 0);
+ atomic_set(&t->trace_overrun, 0);
+ t->ftrace_timestamp = 0;
+ /* make curr_ret_stack visible before we add the ret_stack */
+ smp_wmb();
+ t->ret_stack = ret_stack;
+}
+
+/*
+ * Allocate a return stack for the idle task. May be the first
+ * time through, or it may be done by CPU hotplug online.
+ */
+void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
+{
+ t->curr_ret_stack = -1;
+ t->curr_ret_depth = -1;
+ /*
+ * The idle task has no parent, it either has its own
+ * stack or no stack at all.
+ */
+ if (t->ret_stack)
+ WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
+
+ if (ftrace_graph_active) {
+ struct ftrace_ret_stack *ret_stack;
+
+ ret_stack = per_cpu(idle_ret_stack, cpu);
+ if (!ret_stack) {
+ ret_stack =
+ kmalloc_array(FTRACE_RETFUNC_DEPTH,
+ sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!ret_stack)
+ return;
+ per_cpu(idle_ret_stack, cpu) = ret_stack;
+ }
+ graph_init_task(t, ret_stack);
+ }
+}
+
+/* Allocate a return stack for newly created task */
+void ftrace_graph_init_task(struct task_struct *t)
+{
+ /* Make sure we do not use the parent ret_stack */
+ t->ret_stack = NULL;
+ t->curr_ret_stack = -1;
+ t->curr_ret_depth = -1;
+
+ if (ftrace_graph_active) {
+ struct ftrace_ret_stack *ret_stack;
+
+ ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH,
+ sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!ret_stack)
+ return;
+ graph_init_task(t, ret_stack);
+ }
+}
+
+void ftrace_graph_exit_task(struct task_struct *t)
+{
+ struct ftrace_ret_stack *ret_stack = t->ret_stack;
+
+ t->ret_stack = NULL;
+ /* NULL must become visible to IRQs before we free it: */
+ barrier();
+
+ kfree(ret_stack);
+}
+
+/* Allocate a return stack for each task */
+static int start_graph_tracing(void)
+{
+ struct ftrace_ret_stack **ret_stack_list;
+ int ret, cpu;
+
+ ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
+ sizeof(struct ftrace_ret_stack *),
+ GFP_KERNEL);
+
+ if (!ret_stack_list)
+ return -ENOMEM;
+
+ /* The cpu_boot init_task->ret_stack will never be freed */
+ for_each_online_cpu(cpu) {
+ if (!idle_task(cpu)->ret_stack)
+ ftrace_graph_init_idle_task(idle_task(cpu), cpu);
+ }
+
+ do {
+ ret = alloc_retstack_tasklist(ret_stack_list);
+ } while (ret == -EAGAIN);
+
+ if (!ret) {
+ ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
+ if (ret)
+ pr_info("ftrace_graph: Couldn't activate tracepoint"
+ " probe to kernel_sched_switch\n");
+ }
+
+ kfree(ret_stack_list);
+ return ret;
+}
+
+int register_ftrace_graph(struct fgraph_ops *gops)
+{
+ int ret = 0;
+
+ mutex_lock(&ftrace_lock);
+
+ /* we currently allow only one tracer registered at a time */
+ if (ftrace_graph_active) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ register_pm_notifier(&ftrace_suspend_notifier);
+
+ ftrace_graph_active++;
+ ret = start_graph_tracing();
+ if (ret) {
+ ftrace_graph_active--;
+ goto out;
+ }
+
+ ftrace_graph_return = gops->retfunc;
+
+ /*
+ * Update the indirect function to the entryfunc, and the
+ * function that gets called to the entry_test first. Then
+ * call the update fgraph entry function to determine if
+ * the entryfunc should be called directly or not.
+ */
+ __ftrace_graph_entry = gops->entryfunc;
+ ftrace_graph_entry = ftrace_graph_entry_test;
+ update_function_graph_func();
+
+ ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
+out:
+ mutex_unlock(&ftrace_lock);
+ return ret;
+}
+
+void unregister_ftrace_graph(struct fgraph_ops *gops)
+{
+ mutex_lock(&ftrace_lock);
+
+ if (unlikely(!ftrace_graph_active))
+ goto out;
+
+ ftrace_graph_active--;
+ ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
+ ftrace_graph_entry = ftrace_graph_entry_stub;
+ __ftrace_graph_entry = ftrace_graph_entry_stub;
+ ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
+ unregister_pm_notifier(&ftrace_suspend_notifier);
+ unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
+
+ out:
+ mutex_unlock(&ftrace_lock);
+}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f536f601bd46..aac7847c0214 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -19,7 +19,6 @@
#include <linux/sched/task.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
-#include <linux/suspend.h>
#include <linux/tracefs.h>
#include <linux/hardirq.h>
#include <linux/kthread.h>
@@ -40,6 +39,7 @@
#include <asm/sections.h>
#include <asm/setup.h>
+#include "ftrace_internal.h"
#include "trace_output.h"
#include "trace_stat.h"
@@ -77,7 +77,12 @@
#define ASSIGN_OPS_HASH(opsname, val)
#endif
-static struct ftrace_ops ftrace_list_end __read_mostly = {
+enum {
+ FTRACE_MODIFY_ENABLE_FL = (1 << 0),
+ FTRACE_MODIFY_MAY_SLEEP_FL = (1 << 1),
+};
+
+struct ftrace_ops ftrace_list_end __read_mostly = {
.func = ftrace_stub,
.flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
INIT_OPS_HASH(ftrace_list_end)
@@ -112,11 +117,11 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops);
*/
static int ftrace_disabled __read_mostly;
-static DEFINE_MUTEX(ftrace_lock);
+DEFINE_MUTEX(ftrace_lock);
-static struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end;
+struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
-static struct ftrace_ops global_ops;
+struct ftrace_ops global_ops;
#if ARCH_SUPPORTS_FTRACE_OPS
static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
@@ -127,26 +132,6 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
#endif
-/*
- * Traverse the ftrace_global_list, invoking all entries. The reason that we
- * can use rcu_dereference_raw_notrace() is that elements removed from this list
- * are simply leaked, so there is no need to interact with a grace-period
- * mechanism. The rcu_dereference_raw_notrace() calls are needed to handle
- * concurrent insertions into the ftrace_global_list.
- *
- * Silly Alpha and silly pointer-speculation compiler optimizations!
- */
-#define do_for_each_ftrace_op(op, list) \
- op = rcu_dereference_raw_notrace(list); \
- do
-
-/*
- * Optimized for just a single item in the list (as that is the normal case).
- */
-#define while_for_each_ftrace_op(op) \
- while (likely(op = rcu_dereference_raw_notrace((op)->next)) && \
- unlikely((op) != &ftrace_list_end))
-
static inline void ftrace_ops_init(struct ftrace_ops *ops)
{
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -173,7 +158,7 @@ static void ftrace_sync(struct work_struct *work)
{
/*
* This function is just a stub to implement a hard force
- * of synchronize_sched(). This requires synchronizing
+ * of synchronize_rcu(). This requires synchronizing
* tasks even in userspace and idle.
*
* Yes, function tracing is rude.
@@ -186,18 +171,6 @@ static void ftrace_sync_ipi(void *data)
smp_rmb();
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static void update_function_graph_func(void);
-
-/* Both enabled by default (can be cleared by function_graph tracer flags */
-static bool fgraph_sleep_time = true;
-static bool fgraph_graph_time = true;
-
-#else
-static inline void update_function_graph_func(void) { }
-#endif
-
-
static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops)
{
/*
@@ -334,7 +307,7 @@ static int remove_ftrace_ops(struct ftrace_ops __rcu **list,
static void ftrace_update_trampoline(struct ftrace_ops *ops);
-static int __register_ftrace_function(struct ftrace_ops *ops)
+int __register_ftrace_function(struct ftrace_ops *ops)
{
if (ops->flags & FTRACE_OPS_FL_DELETED)
return -EINVAL;
@@ -375,7 +348,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
return 0;
}
-static int __unregister_ftrace_function(struct ftrace_ops *ops)
+int __unregister_ftrace_function(struct ftrace_ops *ops)
{
int ret;
@@ -815,9 +788,16 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static bool fgraph_graph_time = true;
+
+void ftrace_graph_graph_time_control(bool enable)
+{
+ fgraph_graph_time = enable;
+}
+
static int profile_graph_entry(struct ftrace_graph_ent *trace)
{
- int index = trace->depth;
+ struct ftrace_ret_stack *ret_stack;
function_profile_call(trace->func, 0, NULL, NULL);
@@ -825,14 +805,16 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace)
if (!current->ret_stack)
return 0;
- if (index >= 0 && index < FTRACE_RETFUNC_DEPTH)
- current->ret_stack[index].subtime = 0;
+ ret_stack = ftrace_graph_get_ret_stack(current, 0);
+ if (ret_stack)
+ ret_stack->subtime = 0;
return 1;
}
static void profile_graph_return(struct ftrace_graph_ret *trace)
{
+ struct ftrace_ret_stack *ret_stack;
struct ftrace_profile_stat *stat;
unsigned long long calltime;
struct ftrace_profile *rec;
@@ -850,16 +832,15 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
calltime = trace->rettime - trace->calltime;
if (!fgraph_graph_time) {
- int index;
-
- index = trace->depth;
/* Append this call time to the parent time to subtract */
- if (index)
- current->ret_stack[index - 1].subtime += calltime;
+ ret_stack = ftrace_graph_get_ret_stack(current, 1);
+ if (ret_stack)
+ ret_stack->subtime += calltime;
- if (current->ret_stack[index].subtime < calltime)
- calltime -= current->ret_stack[index].subtime;
+ ret_stack = ftrace_graph_get_ret_stack(current, 0);
+ if (ret_stack && ret_stack->subtime < calltime)
+ calltime -= ret_stack->subtime;
else
calltime = 0;
}
@@ -874,15 +855,19 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
local_irq_restore(flags);
}
+static struct fgraph_ops fprofiler_ops = {
+ .entryfunc = &profile_graph_entry,
+ .retfunc = &profile_graph_return,
+};
+
static int register_ftrace_profiler(void)
{
- return register_ftrace_graph(&profile_graph_return,
- &profile_graph_entry);
+ return register_ftrace_graph(&fprofiler_ops);
}
static void unregister_ftrace_profiler(void)
{
- unregister_ftrace_graph();
+ unregister_ftrace_graph(&fprofiler_ops);
}
#else
static struct ftrace_ops ftrace_profile_ops __read_mostly = {
@@ -934,7 +919,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
ftrace_profile_enabled = 0;
/*
* unregister_ftrace_profiler calls stop_machine
- * so this acts like an synchronize_sched.
+ * so this acts like an synchronize_rcu.
*/
unregister_ftrace_profiler();
}
@@ -1021,12 +1006,6 @@ static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
}
#endif /* CONFIG_FUNCTION_PROFILER */
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int ftrace_graph_active;
-#else
-# define ftrace_graph_active 0
-#endif
-
#ifdef CONFIG_DYNAMIC_FTRACE
static struct ftrace_ops *removed_ops;
@@ -1067,7 +1046,7 @@ static const struct ftrace_hash empty_hash = {
};
#define EMPTY_HASH ((struct ftrace_hash *)&empty_hash)
-static struct ftrace_ops global_ops = {
+struct ftrace_ops global_ops = {
.func = ftrace_stub,
.local_hash.notrace_hash = EMPTY_HASH,
.local_hash.filter_hash = EMPTY_HASH,
@@ -1086,7 +1065,7 @@ struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr)
/*
* Some of the ops may be dynamically allocated,
- * they are freed after a synchronize_sched().
+ * they are freed after a synchronize_rcu().
*/
preempt_disable_notrace();
@@ -1286,7 +1265,7 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
{
if (!hash || hash == EMPTY_HASH)
return;
- call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu);
+ call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
}
void ftrace_free_filter(struct ftrace_ops *ops)
@@ -1501,9 +1480,9 @@ static bool hash_contains_ip(unsigned long ip,
* the ip is not in the ops->notrace_hash.
*
* This needs to be called with preemption disabled as
- * the hashes are freed with call_rcu_sched().
+ * the hashes are freed with call_rcu().
*/
-static int
+int
ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
{
struct ftrace_ops_hash hash;
@@ -2415,10 +2394,12 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
return -1; /* unknow ftrace bug */
}
-void __weak ftrace_replace_code(int enable)
+void __weak ftrace_replace_code(int mod_flags)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
+ int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
+ int schedulable = mod_flags & FTRACE_MODIFY_MAY_SLEEP_FL;
int failed;
if (unlikely(ftrace_disabled))
@@ -2435,6 +2416,8 @@ void __weak ftrace_replace_code(int enable)
/* Stop processing */
return;
}
+ if (schedulable)
+ cond_resched();
} while_for_each_ftrace_rec();
}
@@ -2548,8 +2531,12 @@ int __weak ftrace_arch_code_modify_post_process(void)
void ftrace_modify_all_code(int command)
{
int update = command & FTRACE_UPDATE_TRACE_FUNC;
+ int mod_flags = 0;
int err = 0;
+ if (command & FTRACE_MAY_SLEEP)
+ mod_flags = FTRACE_MODIFY_MAY_SLEEP_FL;
+
/*
* If the ftrace_caller calls a ftrace_ops func directly,
* we need to make sure that it only traces functions it
@@ -2567,9 +2554,9 @@ void ftrace_modify_all_code(int command)
}
if (command & FTRACE_UPDATE_CALLS)
- ftrace_replace_code(1);
+ ftrace_replace_code(mod_flags | FTRACE_MODIFY_ENABLE_FL);
else if (command & FTRACE_DISABLE_CALLS)
- ftrace_replace_code(0);
+ ftrace_replace_code(mod_flags);
if (update && ftrace_trace_function != ftrace_ops_list_func) {
function_trace_op = set_function_trace_op;
@@ -2682,7 +2669,7 @@ static void ftrace_startup_all(int command)
update_all_ops = false;
}
-static int ftrace_startup(struct ftrace_ops *ops, int command)
+int ftrace_startup(struct ftrace_ops *ops, int command)
{
int ret;
@@ -2724,7 +2711,7 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
return 0;
}
-static int ftrace_shutdown(struct ftrace_ops *ops, int command)
+int ftrace_shutdown(struct ftrace_ops *ops, int command)
{
int ret;
@@ -4496,7 +4483,7 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
if (ftrace_enabled && !ftrace_hash_empty(hash))
ftrace_run_modify_code(&probe->ops, FTRACE_UPDATE_CALLS,
&old_hash_ops);
- synchronize_sched();
+ synchronize_rcu();
hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) {
hlist_del(&entry->hlist);
@@ -5314,7 +5301,7 @@ ftrace_graph_release(struct inode *inode, struct file *file)
mutex_unlock(&graph_lock);
/* Wait till all users are no longer using the old hash */
- synchronize_sched();
+ synchronize_rcu();
free_ftrace_hash(old_hash);
}
@@ -5460,6 +5447,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops)
if (ops->flags & FTRACE_OPS_FL_ENABLED)
ftrace_shutdown(ops, 0);
ops->flags |= FTRACE_OPS_FL_DELETED;
+ ftrace_free_filter(ops);
mutex_unlock(&ftrace_lock);
}
@@ -5707,7 +5695,7 @@ void ftrace_release_mod(struct module *mod)
list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) {
if (mod_map->mod == mod) {
list_del_rcu(&mod_map->list);
- call_rcu_sched(&mod_map->rcu, ftrace_free_mod_map);
+ call_rcu(&mod_map->rcu, ftrace_free_mod_map);
break;
}
}
@@ -5927,7 +5915,7 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
struct ftrace_mod_map *mod_map;
const char *ret = NULL;
- /* mod_map is freed via call_rcu_sched() */
+ /* mod_map is freed via call_rcu() */
preempt_disable();
list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) {
ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym);
@@ -6177,7 +6165,7 @@ void ftrace_init_trace_array(struct trace_array *tr)
}
#else
-static struct ftrace_ops global_ops = {
+struct ftrace_ops global_ops = {
.func = ftrace_stub,
.flags = FTRACE_OPS_FL_RECURSION_SAFE |
FTRACE_OPS_FL_INITIALIZED |
@@ -6194,31 +6182,10 @@ core_initcall(ftrace_nodyn_init);
static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; }
static inline void ftrace_startup_enable(int command) { }
static inline void ftrace_startup_all(int command) { }
-/* Keep as macros so we do not need to define the commands */
-# define ftrace_startup(ops, command) \
- ({ \
- int ___ret = __register_ftrace_function(ops); \
- if (!___ret) \
- (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
- ___ret; \
- })
-# define ftrace_shutdown(ops, command) \
- ({ \
- int ___ret = __unregister_ftrace_function(ops); \
- if (!___ret) \
- (ops)->flags &= ~FTRACE_OPS_FL_ENABLED; \
- ___ret; \
- })
# define ftrace_startup_sysctl() do { } while (0)
# define ftrace_shutdown_sysctl() do { } while (0)
-static inline int
-ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
-{
- return 1;
-}
-
static void ftrace_update_trampoline(struct ftrace_ops *ops)
{
}
@@ -6262,7 +6229,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
/*
* Some of the ops may be dynamically allocated,
- * they must be freed after a synchronize_sched().
+ * they must be freed after a synchronize_rcu().
*/
preempt_disable_notrace();
@@ -6433,7 +6400,7 @@ static void clear_ftrace_pids(struct trace_array *tr)
rcu_assign_pointer(tr->function_pids, NULL);
/* Wait till all users are no longer using pid filtering */
- synchronize_sched();
+ synchronize_rcu();
trace_free_pid_list(pid_list);
}
@@ -6580,7 +6547,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
rcu_assign_pointer(tr->function_pids, pid_list);
if (filtered_pids) {
- synchronize_sched();
+ synchronize_rcu();
trace_free_pid_list(filtered_pids);
} else if (pid_list) {
/* Register a probe to set whether to ignore the tracing of a task */
@@ -6745,350 +6712,3 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
mutex_unlock(&ftrace_lock);
return ret;
}
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-static struct ftrace_ops graph_ops = {
- .func = ftrace_stub,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE |
- FTRACE_OPS_FL_INITIALIZED |
- FTRACE_OPS_FL_PID |
- FTRACE_OPS_FL_STUB,
-#ifdef FTRACE_GRAPH_TRAMP_ADDR
- .trampoline = FTRACE_GRAPH_TRAMP_ADDR,
- /* trampoline_size is only needed for dynamically allocated tramps */
-#endif
- ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
-};
-
-void ftrace_graph_sleep_time_control(bool enable)
-{
- fgraph_sleep_time = enable;
-}
-
-void ftrace_graph_graph_time_control(bool enable)
-{
- fgraph_graph_time = enable;
-}
-
-int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
-{
- return 0;
-}
-
-/* The callbacks that hook a function */
-trace_func_graph_ret_t ftrace_graph_return =
- (trace_func_graph_ret_t)ftrace_stub;
-trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
-static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
-
-/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
-static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
-{
- int i;
- int ret = 0;
- int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
- struct task_struct *g, *t;
-
- for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
- ret_stack_list[i] =
- kmalloc_array(FTRACE_RETFUNC_DEPTH,
- sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
- if (!ret_stack_list[i]) {
- start = 0;
- end = i;
- ret = -ENOMEM;
- goto free;
- }
- }
-
- read_lock(&tasklist_lock);
- do_each_thread(g, t) {
- if (start == end) {
- ret = -EAGAIN;
- goto unlock;
- }
-
- if (t->ret_stack == NULL) {
- atomic_set(&t->tracing_graph_pause, 0);
- atomic_set(&t->trace_overrun, 0);
- t->curr_ret_stack = -1;
- /* Make sure the tasks see the -1 first: */
- smp_wmb();
- t->ret_stack = ret_stack_list[start++];
- }
- } while_each_thread(g, t);
-
-unlock:
- read_unlock(&tasklist_lock);
-free:
- for (i = start; i < end; i++)
- kfree(ret_stack_list[i]);
- return ret;
-}
-
-static void
-ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
- struct task_struct *prev, struct task_struct *next)
-{
- unsigned long long timestamp;
- int index;
-
- /*
- * Does the user want to count the time a function was asleep.
- * If so, do not update the time stamps.
- */
- if (fgraph_sleep_time)
- return;
-
- timestamp = trace_clock_local();
-
- prev->ftrace_timestamp = timestamp;
-
- /* only process tasks that we timestamped */
- if (!next->ftrace_timestamp)
- return;
-
- /*
- * Update all the counters in next to make up for the
- * time next was sleeping.
- */
- timestamp -= next->ftrace_timestamp;
-
- for (index = next->curr_ret_stack; index >= 0; index--)
- next->ret_stack[index].calltime += timestamp;
-}
-
-/* Allocate a return stack for each task */
-static int start_graph_tracing(void)
-{
- struct ftrace_ret_stack **ret_stack_list;
- int ret, cpu;
-
- ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
- sizeof(struct ftrace_ret_stack *),
- GFP_KERNEL);
-
- if (!ret_stack_list)
- return -ENOMEM;
-
- /* The cpu_boot init_task->ret_stack will never be freed */
- for_each_online_cpu(cpu) {
- if (!idle_task(cpu)->ret_stack)
- ftrace_graph_init_idle_task(idle_task(cpu), cpu);
- }
-
- do {
- ret = alloc_retstack_tasklist(ret_stack_list);
- } while (ret == -EAGAIN);
-
- if (!ret) {
- ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
- if (ret)
- pr_info("ftrace_graph: Couldn't activate tracepoint"
- " probe to kernel_sched_switch\n");
- }
-
- kfree(ret_stack_list);
- return ret;
-}
-
-/*
- * Hibernation protection.
- * The state of the current task is too much unstable during
- * suspend/restore to disk. We want to protect against that.
- */
-static int
-ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
- void *unused)
-{
- switch (state) {
- case PM_HIBERNATION_PREPARE:
- pause_graph_tracing();
- break;
-
- case PM_POST_HIBERNATION:
- unpause_graph_tracing();
- break;
- }
- return NOTIFY_DONE;
-}
-
-static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
-{
- if (!ftrace_ops_test(&global_ops, trace->func, NULL))
- return 0;
- return __ftrace_graph_entry(trace);
-}
-
-/*
- * The function graph tracer should only trace the functions defined
- * by set_ftrace_filter and set_ftrace_notrace. If another function
- * tracer ops is registered, the graph tracer requires testing the
- * function against the global ops, and not just trace any function
- * that any ftrace_ops registered.
- */
-static void update_function_graph_func(void)
-{
- struct ftrace_ops *op;
- bool do_test = false;
-
- /*
- * The graph and global ops share the same set of functions
- * to test. If any other ops is on the list, then
- * the graph tracing needs to test if its the function
- * it should call.
- */
- do_for_each_ftrace_op(op, ftrace_ops_list) {
- if (op != &global_ops && op != &graph_ops &&
- op != &ftrace_list_end) {
- do_test = true;
- /* in double loop, break out with goto */
- goto out;
- }
- } while_for_each_ftrace_op(op);
- out:
- if (do_test)
- ftrace_graph_entry = ftrace_graph_entry_test;
- else
- ftrace_graph_entry = __ftrace_graph_entry;
-}
-
-static struct notifier_block ftrace_suspend_notifier = {
- .notifier_call = ftrace_suspend_notifier_call,
-};
-
-int register_ftrace_graph(trace_func_graph_ret_t retfunc,
- trace_func_graph_ent_t entryfunc)
-{
- int ret = 0;
-
- mutex_lock(&ftrace_lock);
-
- /* we currently allow only one tracer registered at a time */
- if (ftrace_graph_active) {
- ret = -EBUSY;
- goto out;
- }
-
- register_pm_notifier(&ftrace_suspend_notifier);
-
- ftrace_graph_active++;
- ret = start_graph_tracing();
- if (ret) {
- ftrace_graph_active--;
- goto out;
- }
-
- ftrace_graph_return = retfunc;
-
- /*
- * Update the indirect function to the entryfunc, and the
- * function that gets called to the entry_test first. Then
- * call the update fgraph entry function to determine if
- * the entryfunc should be called directly or not.
- */
- __ftrace_graph_entry = entryfunc;
- ftrace_graph_entry = ftrace_graph_entry_test;
- update_function_graph_func();
-
- ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
-out:
- mutex_unlock(&ftrace_lock);
- return ret;
-}
-
-void unregister_ftrace_graph(void)
-{
- mutex_lock(&ftrace_lock);
-
- if (unlikely(!ftrace_graph_active))
- goto out;
-
- ftrace_graph_active--;
- ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
- ftrace_graph_entry = ftrace_graph_entry_stub;
- __ftrace_graph_entry = ftrace_graph_entry_stub;
- ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
- unregister_pm_notifier(&ftrace_suspend_notifier);
- unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
-
- out:
- mutex_unlock(&ftrace_lock);
-}
-
-static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
-
-static void
-graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
-{
- atomic_set(&t->tracing_graph_pause, 0);
- atomic_set(&t->trace_overrun, 0);
- t->ftrace_timestamp = 0;
- /* make curr_ret_stack visible before we add the ret_stack */
- smp_wmb();
- t->ret_stack = ret_stack;
-}
-
-/*
- * Allocate a return stack for the idle task. May be the first
- * time through, or it may be done by CPU hotplug online.
- */
-void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
-{
- t->curr_ret_stack = -1;
- /*
- * The idle task has no parent, it either has its own
- * stack or no stack at all.
- */
- if (t->ret_stack)
- WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
-
- if (ftrace_graph_active) {
- struct ftrace_ret_stack *ret_stack;
-
- ret_stack = per_cpu(idle_ret_stack, cpu);
- if (!ret_stack) {
- ret_stack =
- kmalloc_array(FTRACE_RETFUNC_DEPTH,
- sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
- if (!ret_stack)
- return;
- per_cpu(idle_ret_stack, cpu) = ret_stack;
- }
- graph_init_task(t, ret_stack);
- }
-}
-
-/* Allocate a return stack for newly created task */
-void ftrace_graph_init_task(struct task_struct *t)
-{
- /* Make sure we do not use the parent ret_stack */
- t->ret_stack = NULL;
- t->curr_ret_stack = -1;
-
- if (ftrace_graph_active) {
- struct ftrace_ret_stack *ret_stack;
-
- ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH,
- sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
- if (!ret_stack)
- return;
- graph_init_task(t, ret_stack);
- }
-}
-
-void ftrace_graph_exit_task(struct task_struct *t)
-{
- struct ftrace_ret_stack *ret_stack = t->ret_stack;
-
- t->ret_stack = NULL;
- /* NULL must become visible to IRQs before we free it: */
- barrier();
-
- kfree(ret_stack);
-}
-#endif
diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h
new file mode 100644
index 000000000000..0515a2096f90
--- /dev/null
+++ b/kernel/trace/ftrace_internal.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KERNEL_FTRACE_INTERNAL_H
+#define _LINUX_KERNEL_FTRACE_INTERNAL_H
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+/*
+ * Traverse the ftrace_global_list, invoking all entries. The reason that we
+ * can use rcu_dereference_raw_notrace() is that elements removed from this list
+ * are simply leaked, so there is no need to interact with a grace-period
+ * mechanism. The rcu_dereference_raw_notrace() calls are needed to handle
+ * concurrent insertions into the ftrace_global_list.
+ *
+ * Silly Alpha and silly pointer-speculation compiler optimizations!
+ */
+#define do_for_each_ftrace_op(op, list) \
+ op = rcu_dereference_raw_notrace(list); \
+ do
+
+/*
+ * Optimized for just a single item in the list (as that is the normal case).
+ */
+#define while_for_each_ftrace_op(op) \
+ while (likely(op = rcu_dereference_raw_notrace((op)->next)) && \
+ unlikely((op) != &ftrace_list_end))
+
+extern struct ftrace_ops __rcu *ftrace_ops_list;
+extern struct ftrace_ops ftrace_list_end;
+extern struct mutex ftrace_lock;
+extern struct ftrace_ops global_ops;
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+int ftrace_startup(struct ftrace_ops *ops, int command);
+int ftrace_shutdown(struct ftrace_ops *ops, int command);
+int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs);
+
+#else /* !CONFIG_DYNAMIC_FTRACE */
+
+int __register_ftrace_function(struct ftrace_ops *ops);
+int __unregister_ftrace_function(struct ftrace_ops *ops);
+/* Keep as macros so we do not need to define the commands */
+# define ftrace_startup(ops, command) \
+ ({ \
+ int ___ret = __register_ftrace_function(ops); \
+ if (!___ret) \
+ (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
+ ___ret; \
+ })
+# define ftrace_shutdown(ops, command) \
+ ({ \
+ int ___ret = __unregister_ftrace_function(ops); \
+ if (!___ret) \
+ (ops)->flags &= ~FTRACE_OPS_FL_ENABLED; \
+ ___ret; \
+ })
+static inline int
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
+{
+ return 1;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+extern int ftrace_graph_active;
+void update_function_graph_func(void);
+#else /* !CONFIG_FUNCTION_GRAPH_TRACER */
+# define ftrace_graph_active 0
+static inline void update_function_graph_func(void) { }
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#else /* !CONFIG_FUNCTION_TRACER */
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 65bd4616220d..06e864a334bb 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -487,6 +487,10 @@ struct ring_buffer_per_cpu {
local_t dropped_events;
local_t committing;
local_t commits;
+ local_t pages_touched;
+ local_t pages_read;
+ long last_pages_touch;
+ size_t shortest_full;
unsigned long read;
unsigned long read_bytes;
u64 write_stamp;
@@ -529,6 +533,41 @@ struct ring_buffer_iter {
u64 read_stamp;
};
+/**
+ * ring_buffer_nr_pages - get the number of buffer pages in the ring buffer
+ * @buffer: The ring_buffer to get the number of pages from
+ * @cpu: The cpu of the ring_buffer to get the number of pages from
+ *
+ * Returns the number of pages used by a per_cpu buffer of the ring buffer.
+ */
+size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu)
+{
+ return buffer->buffers[cpu]->nr_pages;
+}
+
+/**
+ * ring_buffer_nr_pages_dirty - get the number of used pages in the ring buffer
+ * @buffer: The ring_buffer to get the number of pages from
+ * @cpu: The cpu of the ring_buffer to get the number of pages from
+ *
+ * Returns the number of pages that have content in the ring buffer.
+ */
+size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu)
+{
+ size_t read;
+ size_t cnt;
+
+ read = local_read(&buffer->buffers[cpu]->pages_read);
+ cnt = local_read(&buffer->buffers[cpu]->pages_touched);
+ /* The reader can read an empty page, but not more than that */
+ if (cnt < read) {
+ WARN_ON_ONCE(read > cnt + 1);
+ return 0;
+ }
+
+ return cnt - read;
+}
+
/*
* rb_wake_up_waiters - wake up tasks waiting for ring buffer input
*
@@ -556,7 +595,7 @@ static void rb_wake_up_waiters(struct irq_work *work)
* as data is added to any of the @buffer's cpu buffers. Otherwise
* it will wait for data to be added to a specific cpu buffer.
*/
-int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full)
{
struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
DEFINE_WAIT(wait);
@@ -571,7 +610,7 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
if (cpu == RING_BUFFER_ALL_CPUS) {
work = &buffer->irq_work;
/* Full only makes sense on per cpu reads */
- full = false;
+ full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -ENODEV;
@@ -623,15 +662,22 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
!ring_buffer_empty_cpu(buffer, cpu)) {
unsigned long flags;
bool pagebusy;
+ size_t nr_pages;
+ size_t dirty;
if (!full)
break;
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+ nr_pages = cpu_buffer->nr_pages;
+ dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
+ if (!cpu_buffer->shortest_full ||
+ cpu_buffer->shortest_full < full)
+ cpu_buffer->shortest_full = full;
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
-
- if (!pagebusy)
+ if (!pagebusy &&
+ (!nr_pages || (dirty * 100) > full * nr_pages))
break;
}
@@ -1054,6 +1100,7 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
+ local_inc(&cpu_buffer->pages_touched);
/*
* Just make sure we have seen our old_write and synchronize
* with any interrupts that come in.
@@ -1834,7 +1881,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
* There could have been a race between checking
* record_disable and incrementing it.
*/
- synchronize_sched();
+ synchronize_rcu();
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
rb_check_pages(cpu_buffer);
@@ -2586,7 +2633,9 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
static __always_inline void
rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
{
- bool pagebusy;
+ size_t nr_pages;
+ size_t dirty;
+ size_t full;
if (buffer->irq_work.waiters_pending) {
buffer->irq_work.waiters_pending = false;
@@ -2600,14 +2649,27 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
irq_work_queue(&cpu_buffer->irq_work.work);
}
- pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+ if (cpu_buffer->last_pages_touch == local_read(&cpu_buffer->pages_touched))
+ return;
- if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
- cpu_buffer->irq_work.wakeup_full = true;
- cpu_buffer->irq_work.full_waiters_pending = false;
- /* irq_work_queue() supplies it's own memory barriers */
- irq_work_queue(&cpu_buffer->irq_work.work);
- }
+ if (cpu_buffer->reader_page == cpu_buffer->commit_page)
+ return;
+
+ if (!cpu_buffer->irq_work.full_waiters_pending)
+ return;
+
+ cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched);
+
+ full = cpu_buffer->shortest_full;
+ nr_pages = cpu_buffer->nr_pages;
+ dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
+ if (full && nr_pages && (dirty * 100) <= full * nr_pages)
+ return;
+
+ cpu_buffer->irq_work.wakeup_full = true;
+ cpu_buffer->irq_work.full_waiters_pending = false;
+ /* irq_work_queue() supplies it's own memory barriers */
+ irq_work_queue(&cpu_buffer->irq_work.work);
}
/*
@@ -3151,7 +3213,7 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
* This prevents all writes to the buffer. Any attempt to write
* to the buffer after this will fail and return NULL.
*
- * The caller should call synchronize_sched() after this.
+ * The caller should call synchronize_rcu() after this.
*/
void ring_buffer_record_disable(struct ring_buffer *buffer)
{
@@ -3253,7 +3315,7 @@ bool ring_buffer_record_is_set_on(struct ring_buffer *buffer)
* This prevents all writes to the buffer. Any attempt to write
* to the buffer after this will fail and return NULL.
*
- * The caller should call synchronize_sched() after this.
+ * The caller should call synchronize_rcu() after this.
*/
void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
{
@@ -3732,13 +3794,15 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
goto spin;
/*
- * Yeah! We succeeded in replacing the page.
+ * Yay! We succeeded in replacing the page.
*
* Now make the new head point back to the reader page.
*/
rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+ local_inc(&cpu_buffer->pages_read);
+
/* Finally update the reader page to the new head */
cpu_buffer->reader_page = reader;
cpu_buffer->reader_page->read = 0;
@@ -4191,7 +4255,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
void
ring_buffer_read_prepare_sync(void)
{
- synchronize_sched();
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
@@ -4334,6 +4398,10 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
local_set(&cpu_buffer->entries, 0);
local_set(&cpu_buffer->committing, 0);
local_set(&cpu_buffer->commits, 0);
+ local_set(&cpu_buffer->pages_touched, 0);
+ local_set(&cpu_buffer->pages_read, 0);
+ cpu_buffer->last_pages_touch = 0;
+ cpu_buffer->shortest_full = 0;
cpu_buffer->read = 0;
cpu_buffer->read_bytes = 0;
@@ -4363,7 +4431,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
atomic_inc(&cpu_buffer->record_disabled);
/* Make sure all commits have finished */
- synchronize_sched();
+ synchronize_rcu();
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -4496,7 +4564,7 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
goto out;
/*
- * We can't do a synchronize_sched here because this
+ * We can't do a synchronize_rcu here because this
* function can be called in atomic context.
* Normally this will be called from the same CPU as cpu.
* If not it's up to the caller to protect this.
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bf6f1d70484d..c4238b441624 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1431,7 +1431,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
}
#endif /* CONFIG_TRACER_MAX_TRACE */
-static int wait_on_pipe(struct trace_iterator *iter, bool full)
+static int wait_on_pipe(struct trace_iterator *iter, int full)
{
/* Iterators are static, they should be filled or empty */
if (trace_buffer_iter(iter, iter->cpu_file))
@@ -1681,7 +1681,7 @@ void tracing_reset(struct trace_buffer *buf, int cpu)
ring_buffer_record_disable(buffer);
/* Make sure all commits have finished */
- synchronize_sched();
+ synchronize_rcu();
ring_buffer_reset_cpu(buffer, cpu);
ring_buffer_record_enable(buffer);
@@ -1698,7 +1698,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf)
ring_buffer_record_disable(buffer);
/* Make sure all commits have finished */
- synchronize_sched();
+ synchronize_rcu();
buf->time_start = buffer_ftrace_now(buf, buf->cpu);
@@ -2250,7 +2250,7 @@ void trace_buffered_event_disable(void)
preempt_enable();
/* Wait for all current users to finish */
- synchronize_sched();
+ synchronize_rcu();
for_each_tracing_cpu(cpu) {
free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
@@ -2452,7 +2452,7 @@ static inline void ftrace_exports_disable(void)
static_branch_disable(&ftrace_exports_enabled);
}
-void ftrace_exports(struct ring_buffer_event *event)
+static void ftrace_exports(struct ring_buffer_event *event)
{
struct trace_export *export;
@@ -2727,6 +2727,7 @@ void trace_dump_stack(int skip)
__ftrace_trace_stack(global_trace.trace_buffer.buffer,
flags, skip, preempt_count(), NULL);
}
+EXPORT_SYMBOL_GPL(trace_dump_stack);
static DEFINE_PER_CPU(int, user_stack_count);
@@ -3383,6 +3384,8 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
const char tgid_space[] = " ";
const char space[] = " ";
+ print_event_info(buf, m);
+
seq_printf(m, "# %s _-----=> irqs-off\n",
tgid ? tgid_space : space);
seq_printf(m, "# %s / _----=> need-resched\n",
@@ -4407,13 +4410,15 @@ static int trace_set_options(struct trace_array *tr, char *option)
int neg = 0;
int ret;
size_t orig_len = strlen(option);
+ int len;
cmp = strstrip(option);
- if (strncmp(cmp, "no", 2) == 0) {
+ len = str_has_prefix(cmp, "no");
+ if (len)
neg = 1;
- cmp += 2;
- }
+
+ cmp += len;
mutex_lock(&trace_types_lock);
@@ -4603,6 +4608,10 @@ static const char readme_msg[] =
"\t\t\t traces\n"
#endif
#endif /* CONFIG_STACK_TRACER */
+#ifdef CONFIG_DYNAMIC_EVENTS
+ " dynamic_events\t\t- Add/remove/show the generic dynamic events\n"
+ "\t\t\t Write into this file to define/undefine new trace events.\n"
+#endif
#ifdef CONFIG_KPROBE_EVENTS
" kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
"\t\t\t Write into this file to define/undefine new trace events.\n"
@@ -4615,19 +4624,32 @@ static const char readme_msg[] =
"\t accepts: event-definitions (one definition per line)\n"
"\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
"\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
+#ifdef CONFIG_HIST_TRIGGERS
+ "\t s:[synthetic/]<event> <field> [<field>]\n"
+#endif
"\t -:[<group>/]<event>\n"
#ifdef CONFIG_KPROBE_EVENTS
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
"place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
#endif
#ifdef CONFIG_UPROBE_EVENTS
- "\t place: <path>:<offset>\n"
+ " place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
#endif
"\t args: <name>=fetcharg[:type]\n"
"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ "\t $stack<index>, $stack, $retval, $comm, $arg<N>\n"
+#else
"\t $stack<index>, $stack, $retval, $comm\n"
- "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
- "\t b<bit-width>@<bit-offset>/<container-size>\n"
+#endif
+ "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
+ "\t b<bit-width>@<bit-offset>/<container-size>,\n"
+ "\t <type>\\[<array-size>\\]\n"
+#ifdef CONFIG_HIST_TRIGGERS
+ "\t field: <stype> <name>;\n"
+ "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
+ "\t [unsigned] char/int/long\n"
+#endif
#endif
" events/\t\t- Directory containing all trace event subsystems:\n"
" enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
@@ -5392,7 +5414,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
if (tr->current_trace->reset)
tr->current_trace->reset(tr);
- /* Current trace needs to be nop_trace before synchronize_sched */
+ /* Current trace needs to be nop_trace before synchronize_rcu */
tr->current_trace = &nop_trace;
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -5406,7 +5428,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
* The update_max_tr is called from interrupts disabled
* so a synchronized_sched() is sufficient.
*/
- synchronize_sched();
+ synchronize_rcu();
free_snapshot(tr);
}
#endif
@@ -5687,7 +5709,7 @@ static int tracing_wait_pipe(struct file *filp)
mutex_unlock(&iter->mutex);
- ret = wait_on_pipe(iter, false);
+ ret = wait_on_pipe(iter, 0);
mutex_lock(&iter->mutex);
@@ -6745,7 +6767,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
if ((filp->f_flags & O_NONBLOCK))
return -EAGAIN;
- ret = wait_on_pipe(iter, false);
+ ret = wait_on_pipe(iter, 0);
if (ret)
return ret;
@@ -6942,7 +6964,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
goto out;
- ret = wait_on_pipe(iter, true);
+ ret = wait_on_pipe(iter, iter->tr->buffer_percent);
if (ret)
goto out;
@@ -7656,6 +7678,53 @@ static const struct file_operations rb_simple_fops = {
.llseek = default_llseek,
};
+static ssize_t
+buffer_percent_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = tr->buffer_percent;
+ r = sprintf(buf, "%d\n", r);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+buffer_percent_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
+ return ret;
+
+ if (val > 100)
+ return -EINVAL;
+
+ if (!val)
+ val = 1;
+
+ tr->buffer_percent = val;
+
+ (*ppos)++;
+
+ return cnt;
+}
+
+static const struct file_operations buffer_percent_fops = {
+ .open = tracing_open_generic_tr,
+ .read = buffer_percent_read,
+ .write = buffer_percent_write,
+ .release = tracing_release_generic_tr,
+ .llseek = default_llseek,
+};
+
struct dentry *trace_instance_dir;
static void
@@ -7964,6 +8033,11 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
trace_create_file("timestamp_mode", 0444, d_tracer, tr,
&trace_time_stamp_mode_fops);
+ tr->buffer_percent = 50;
+
+ trace_create_file("buffer_percent", 0444, d_tracer,
+ tr, &buffer_percent_fops);
+
create_trace_options_dir(tr);
#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3b8c0e24ab30..08900828d282 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -247,6 +247,7 @@ struct trace_array {
int clock_id;
int nr_topts;
bool clear_trace;
+ int buffer_percent;
struct tracer *current_trace;
unsigned int trace_flags;
unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE];
@@ -512,12 +513,51 @@ enum {
* can only be modified by current, we can reuse trace_recursion.
*/
TRACE_IRQ_BIT,
+
+ /* Set if the function is in the set_graph_function file */
+ TRACE_GRAPH_BIT,
+
+ /*
+ * In the very unlikely case that an interrupt came in
+ * at a start of graph tracing, and we want to trace
+ * the function in that interrupt, the depth can be greater
+ * than zero, because of the preempted start of a previous
+ * trace. In an even more unlikely case, depth could be 2
+ * if a softirq interrupted the start of graph tracing,
+ * followed by an interrupt preempting a start of graph
+ * tracing in the softirq, and depth can even be 3
+ * if an NMI came in at the start of an interrupt function
+ * that preempted a softirq start of a function that
+ * preempted normal context!!!! Luckily, it can't be
+ * greater than 3, so the next two bits are a mask
+ * of what the depth is when we set TRACE_GRAPH_BIT
+ */
+
+ TRACE_GRAPH_DEPTH_START_BIT,
+ TRACE_GRAPH_DEPTH_END_BIT,
+
+ /*
+ * To implement set_graph_notrace, if this bit is set, we ignore
+ * function graph tracing of called functions, until the return
+ * function is called to clear it.
+ */
+ TRACE_GRAPH_NOTRACE_BIT,
};
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0)
#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit)))
+#define trace_recursion_depth() \
+ (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3)
+#define trace_recursion_set_depth(depth) \
+ do { \
+ current->trace_recursion &= \
+ ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \
+ current->trace_recursion |= \
+ ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \
+ } while (0)
+
#define TRACE_CONTEXT_BITS 4
#define TRACE_FTRACE_START TRACE_FTRACE_BIT
@@ -823,7 +863,12 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
extern void ftrace_graph_sleep_time_control(bool enable);
+
+#ifdef CONFIG_FUNCTION_PROFILER
extern void ftrace_graph_graph_time_control(bool enable);
+#else
+static inline void ftrace_graph_graph_time_control(bool enable) { }
+#endif
extern enum print_line_t
print_graph_function_flags(struct trace_iterator *iter, u32 flags);
@@ -843,8 +888,9 @@ extern void __trace_graph_return(struct trace_array *tr,
extern struct ftrace_hash *ftrace_graph_hash;
extern struct ftrace_hash *ftrace_graph_notrace_hash;
-static inline int ftrace_graph_addr(unsigned long addr)
+static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
{
+ unsigned long addr = trace->func;
int ret = 0;
preempt_disable_notrace();
@@ -855,6 +901,14 @@ static inline int ftrace_graph_addr(unsigned long addr)
}
if (ftrace_lookup_ip(ftrace_graph_hash, addr)) {
+
+ /*
+ * This needs to be cleared on the return functions
+ * when the depth is zero.
+ */
+ trace_recursion_set(TRACE_GRAPH_BIT);
+ trace_recursion_set_depth(trace->depth);
+
/*
* If no irqs are to be traced, but a set_graph_function
* is set, and called by an interrupt handler, we still
@@ -872,6 +926,13 @@ out:
return ret;
}
+static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace)
+{
+ if (trace_recursion_test(TRACE_GRAPH_BIT) &&
+ trace->depth == trace_recursion_depth())
+ trace_recursion_clear(TRACE_GRAPH_BIT);
+}
+
static inline int ftrace_graph_notrace_addr(unsigned long addr)
{
int ret = 0;
@@ -885,7 +946,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
return ret;
}
#else
-static inline int ftrace_graph_addr(unsigned long addr)
+static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
{
return 1;
}
@@ -894,6 +955,8 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
{
return 0;
}
+static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace)
+{ }
#endif /* CONFIG_DYNAMIC_FTRACE */
extern unsigned int fgraph_max_depth;
@@ -901,7 +964,8 @@ extern unsigned int fgraph_max_depth;
static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace)
{
/* trace it when it is-nested-in or is a function enabled. */
- return !(trace->depth || ftrace_graph_addr(trace->func)) ||
+ return !(trace_recursion_test(TRACE_GRAPH_BIT) ||
+ ftrace_graph_addr(trace)) ||
(trace->depth < 0) ||
(fgraph_max_depth && trace->depth >= fgraph_max_depth);
}
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
new file mode 100644
index 000000000000..dd1f43588d70
--- /dev/null
+++ b/kernel/trace/trace_dynevent.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic dynamic event control interface
+ *
+ * Copyright (C) 2018 Masami Hiramatsu <[email protected]>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/tracefs.h>
+
+#include "trace.h"
+#include "trace_dynevent.h"
+
+static DEFINE_MUTEX(dyn_event_ops_mutex);
+static LIST_HEAD(dyn_event_ops_list);
+
+int dyn_event_register(struct dyn_event_operations *ops)
+{
+ if (!ops || !ops->create || !ops->show || !ops->is_busy ||
+ !ops->free || !ops->match)
+ return -EINVAL;
+
+ INIT_LIST_HEAD(&ops->list);
+ mutex_lock(&dyn_event_ops_mutex);
+ list_add_tail(&ops->list, &dyn_event_ops_list);
+ mutex_unlock(&dyn_event_ops_mutex);
+ return 0;
+}
+
+int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type)
+{
+ struct dyn_event *pos, *n;
+ char *system = NULL, *event, *p;
+ int ret = -ENOENT;
+
+ if (argv[0][0] == '-') {
+ if (argv[0][1] != ':')
+ return -EINVAL;
+ event = &argv[0][2];
+ } else {
+ event = strchr(argv[0], ':');
+ if (!event)
+ return -EINVAL;
+ event++;
+ }
+
+ p = strchr(event, '/');
+ if (p) {
+ system = event;
+ event = p + 1;
+ *p = '\0';
+ }
+ if (event[0] == '\0')
+ return -EINVAL;
+
+ mutex_lock(&event_mutex);
+ for_each_dyn_event_safe(pos, n) {
+ if (type && type != pos->ops)
+ continue;
+ if (pos->ops->match(system, event, pos)) {
+ ret = pos->ops->free(pos);
+ break;
+ }
+ }
+ mutex_unlock(&event_mutex);
+
+ return ret;
+}
+
+static int create_dyn_event(int argc, char **argv)
+{
+ struct dyn_event_operations *ops;
+ int ret;
+
+ if (argv[0][0] == '-' || argv[0][0] == '!')
+ return dyn_event_release(argc, argv, NULL);
+
+ mutex_lock(&dyn_event_ops_mutex);
+ list_for_each_entry(ops, &dyn_event_ops_list, list) {
+ ret = ops->create(argc, (const char **)argv);
+ if (!ret || ret != -ECANCELED)
+ break;
+ }
+ mutex_unlock(&dyn_event_ops_mutex);
+ if (ret == -ECANCELED)
+ ret = -EINVAL;
+
+ return ret;
+}
+
+/* Protected by event_mutex */
+LIST_HEAD(dyn_event_list);
+
+void *dyn_event_seq_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&event_mutex);
+ return seq_list_start(&dyn_event_list, *pos);
+}
+
+void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ return seq_list_next(v, &dyn_event_list, pos);
+}
+
+void dyn_event_seq_stop(struct seq_file *m, void *v)
+{
+ mutex_unlock(&event_mutex);
+}
+
+static int dyn_event_seq_show(struct seq_file *m, void *v)
+{
+ struct dyn_event *ev = v;
+
+ if (ev && ev->ops)
+ return ev->ops->show(m, ev);
+
+ return 0;
+}
+
+static const struct seq_operations dyn_event_seq_op = {
+ .start = dyn_event_seq_start,
+ .next = dyn_event_seq_next,
+ .stop = dyn_event_seq_stop,
+ .show = dyn_event_seq_show
+};
+
+/*
+ * dyn_events_release_all - Release all specific events
+ * @type: the dyn_event_operations * which filters releasing events
+ *
+ * This releases all events which ->ops matches @type. If @type is NULL,
+ * all events are released.
+ * Return -EBUSY if any of them are in use, and return other errors when
+ * it failed to free the given event. Except for -EBUSY, event releasing
+ * process will be aborted at that point and there may be some other
+ * releasable events on the list.
+ */
+int dyn_events_release_all(struct dyn_event_operations *type)
+{
+ struct dyn_event *ev, *tmp;
+ int ret = 0;
+
+ mutex_lock(&event_mutex);
+ for_each_dyn_event(ev) {
+ if (type && ev->ops != type)
+ continue;
+ if (ev->ops->is_busy(ev)) {
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+ for_each_dyn_event_safe(ev, tmp) {
+ if (type && ev->ops != type)
+ continue;
+ ret = ev->ops->free(ev);
+ if (ret)
+ break;
+ }
+out:
+ mutex_unlock(&event_mutex);
+
+ return ret;
+}
+
+static int dyn_event_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+ ret = dyn_events_release_all(NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ return seq_open(file, &dyn_event_seq_op);
+}
+
+static ssize_t dyn_event_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ return trace_parse_run_command(file, buffer, count, ppos,
+ create_dyn_event);
+}
+
+static const struct file_operations dynamic_events_ops = {
+ .owner = THIS_MODULE,
+ .open = dyn_event_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+ .write = dyn_event_write,
+};
+
+/* Make a tracefs interface for controlling dynamic events */
+static __init int init_dynamic_event(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+ if (IS_ERR(d_tracer))
+ return 0;
+
+ entry = tracefs_create_file("dynamic_events", 0644, d_tracer,
+ NULL, &dynamic_events_ops);
+
+ /* Event list interface */
+ if (!entry)
+ pr_warn("Could not create tracefs 'dynamic_events' entry\n");
+
+ return 0;
+}
+fs_initcall(init_dynamic_event);
diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h
new file mode 100644
index 000000000000..8c334064e4d6
--- /dev/null
+++ b/kernel/trace/trace_dynevent.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common header file for generic dynamic events.
+ */
+
+#ifndef _TRACE_DYNEVENT_H
+#define _TRACE_DYNEVENT_H
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/seq_file.h>
+
+#include "trace.h"
+
+struct dyn_event;
+
+/**
+ * struct dyn_event_operations - Methods for each type of dynamic events
+ *
+ * These methods must be set for each type, since there is no default method.
+ * Before using this for dyn_event_init(), it must be registered by
+ * dyn_event_register().
+ *
+ * @create: Parse and create event method. This is invoked when user passes
+ * a event definition to dynamic_events interface. This must not destruct
+ * the arguments and return -ECANCELED if given arguments doesn't match its
+ * command prefix.
+ * @show: Showing method. This is invoked when user reads the event definitions
+ * via dynamic_events interface.
+ * @is_busy: Check whether given event is busy so that it can not be deleted.
+ * Return true if it is busy, otherwides false.
+ * @free: Delete the given event. Return 0 if success, otherwides error.
+ * @match: Check whether given event and system name match this event.
+ * Return true if it matches, otherwides false.
+ *
+ * Except for @create, these methods are called under holding event_mutex.
+ */
+struct dyn_event_operations {
+ struct list_head list;
+ int (*create)(int argc, const char *argv[]);
+ int (*show)(struct seq_file *m, struct dyn_event *ev);
+ bool (*is_busy)(struct dyn_event *ev);
+ int (*free)(struct dyn_event *ev);
+ bool (*match)(const char *system, const char *event,
+ struct dyn_event *ev);
+};
+
+/* Register new dyn_event type -- must be called at first */
+int dyn_event_register(struct dyn_event_operations *ops);
+
+/**
+ * struct dyn_event - Dynamic event list header
+ *
+ * The dyn_event structure encapsulates a list and a pointer to the operators
+ * for making a global list of dynamic events.
+ * User must includes this in each event structure, so that those events can
+ * be added/removed via dynamic_events interface.
+ */
+struct dyn_event {
+ struct list_head list;
+ struct dyn_event_operations *ops;
+};
+
+extern struct list_head dyn_event_list;
+
+static inline
+int dyn_event_init(struct dyn_event *ev, struct dyn_event_operations *ops)
+{
+ if (!ev || !ops)
+ return -EINVAL;
+
+ INIT_LIST_HEAD(&ev->list);
+ ev->ops = ops;
+ return 0;
+}
+
+static inline int dyn_event_add(struct dyn_event *ev)
+{
+ lockdep_assert_held(&event_mutex);
+
+ if (!ev || !ev->ops)
+ return -EINVAL;
+
+ list_add_tail(&ev->list, &dyn_event_list);
+ return 0;
+}
+
+static inline void dyn_event_remove(struct dyn_event *ev)
+{
+ lockdep_assert_held(&event_mutex);
+ list_del_init(&ev->list);
+}
+
+void *dyn_event_seq_start(struct seq_file *m, loff_t *pos);
+void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos);
+void dyn_event_seq_stop(struct seq_file *m, void *v);
+int dyn_events_release_all(struct dyn_event_operations *type);
+int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type);
+
+/*
+ * for_each_dyn_event - iterate over the dyn_event list
+ * @pos: the struct dyn_event * to use as a loop cursor
+ *
+ * This is just a basement of for_each macro. Wrap this for
+ * each actual event structure with ops filtering.
+ */
+#define for_each_dyn_event(pos) \
+ list_for_each_entry(pos, &dyn_event_list, list)
+
+/*
+ * for_each_dyn_event - iterate over the dyn_event list safely
+ * @pos: the struct dyn_event * to use as a loop cursor
+ * @n: the struct dyn_event * to use as temporary storage
+ */
+#define for_each_dyn_event_safe(pos, n) \
+ list_for_each_entry_safe(pos, n, &dyn_event_list, list)
+
+#endif
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 69a3fe926e8c..76217bbef815 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -290,7 +290,8 @@ void perf_kprobe_destroy(struct perf_event *p_event)
#endif /* CONFIG_KPROBE_EVENTS */
#ifdef CONFIG_UPROBE_EVENTS
-int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
+int perf_uprobe_init(struct perf_event *p_event,
+ unsigned long ref_ctr_offset, bool is_retprobe)
{
int ret;
char *path = NULL;
@@ -312,8 +313,8 @@ int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
goto out;
}
- tp_event = create_local_trace_uprobe(
- path, p_event->attr.probe_offset, is_retprobe);
+ tp_event = create_local_trace_uprobe(path, p_event->attr.probe_offset,
+ ref_ctr_offset, is_retprobe);
if (IS_ERR(tp_event)) {
ret = PTR_ERR(tp_event);
goto out;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f94be0c2827b..5b3b0c3c8a47 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1251,7 +1251,7 @@ static int f_show(struct seq_file *m, void *v)
*/
array_descriptor = strchr(field->type, '[');
- if (!strncmp(field->type, "__data_loc", 10))
+ if (str_has_prefix(field->type, "__data_loc"))
array_descriptor = NULL;
if (!array_descriptor)
@@ -2309,7 +2309,8 @@ static void __add_event_to_tracers(struct trace_event_call *call);
int trace_add_event_call(struct trace_event_call *call)
{
int ret;
- mutex_lock(&event_mutex);
+ lockdep_assert_held(&event_mutex);
+
mutex_lock(&trace_types_lock);
ret = __register_event(call, NULL);
@@ -2317,7 +2318,6 @@ int trace_add_event_call(struct trace_event_call *call)
__add_event_to_tracers(call);
mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
return ret;
}
@@ -2371,13 +2371,13 @@ int trace_remove_event_call(struct trace_event_call *call)
{
int ret;
- mutex_lock(&event_mutex);
+ lockdep_assert_held(&event_mutex);
+
mutex_lock(&trace_types_lock);
down_write(&trace_event_sem);
ret = probe_remove_event_call(call);
up_write(&trace_event_sem);
mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
return ret;
}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 84a65173b1e9..217ef481fbbb 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -570,11 +570,13 @@ predicate_parse(const char *str, int nr_parens, int nr_preds,
}
}
+ kfree(op_stack);
+ kfree(inverts);
return prog;
out_free:
kfree(op_stack);
- kfree(prog_stack);
kfree(inverts);
+ kfree(prog_stack);
return ERR_PTR(ret);
}
@@ -1299,7 +1301,7 @@ static int parse_pred(const char *str, void *data,
/* go past the last quote */
i++;
- } else if (isdigit(str[i])) {
+ } else if (isdigit(str[i]) || str[i] == '-') {
/* Make sure the field is not a string */
if (is_string_field(field)) {
@@ -1312,6 +1314,9 @@ static int parse_pred(const char *str, void *data,
goto err_free;
}
+ if (str[i] == '-')
+ i++;
+
/* We allow 0xDEADBEEF */
while (isalnum(str[i]))
i++;
@@ -1614,7 +1619,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
/*
* The calls can still be using the old filters.
- * Do a synchronize_sched() and to ensure all calls are
+ * Do a synchronize_rcu() and to ensure all calls are
* done with them before we free them.
*/
tracepoint_synchronize_unregister();
@@ -1718,6 +1723,7 @@ static int create_filter(struct trace_event_call *call,
err = process_preds(call, filter_string, *filterp, pe);
if (err && set_str)
append_filter_err(pe, *filterp);
+ create_filter_finish(pe);
return err;
}
@@ -1845,7 +1851,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir,
if (filter) {
/*
* No event actually uses the system filter
- * we can free it without synchronize_sched().
+ * we can free it without synchronize_rcu().
*/
__free_filter(system->filter);
system->filter = filter;
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index d239004aaf29..449d90cfa151 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -15,6 +15,7 @@
#include "tracing_map.h"
#include "trace.h"
+#include "trace_dynevent.h"
#define SYNTH_SYSTEM "synthetic"
#define SYNTH_FIELDS_MAX 16
@@ -39,6 +40,16 @@ enum field_op_id {
FIELD_OP_UNARY_MINUS,
};
+/*
+ * A hist_var (histogram variable) contains variable information for
+ * hist_fields having the HIST_FIELD_FL_VAR or HIST_FIELD_FL_VAR_REF
+ * flag set. A hist_var has a variable name e.g. ts0, and is
+ * associated with a given histogram trigger, as specified by
+ * hist_data. The hist_var idx is the unique index assigned to the
+ * variable by the hist trigger's tracing_map. The idx is what is
+ * used to set a variable's value and, by a variable reference, to
+ * retrieve it.
+ */
struct hist_var {
char *name;
struct hist_trigger_data *hist_data;
@@ -55,12 +66,29 @@ struct hist_field {
const char *type;
struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
struct hist_trigger_data *hist_data;
+
+ /*
+ * Variable fields contain variable-specific info in var.
+ */
struct hist_var var;
enum field_op_id operator;
char *system;
char *event_name;
+
+ /*
+ * The name field is used for EXPR and VAR_REF fields. VAR
+ * fields contain the variable name in var.name.
+ */
char *name;
- unsigned int var_idx;
+
+ /*
+ * When a histogram trigger is hit, if it has any references
+ * to variables, the values of those variables are collected
+ * into a var_ref_vals array by resolve_var_refs(). The
+ * current value of each variable is read from the tracing_map
+ * using the hist field's hist_var.idx and entered into the
+ * var_ref_idx entry i.e. var_ref_vals[var_ref_idx].
+ */
unsigned int var_ref_idx;
bool read_once;
};
@@ -279,8 +307,6 @@ struct hist_trigger_data {
struct action_data *actions[HIST_ACTIONS_MAX];
unsigned int n_actions;
- struct hist_field *synth_var_refs[SYNTH_FIELDS_MAX];
- unsigned int n_synth_var_refs;
struct field_var *field_vars[SYNTH_FIELDS_MAX];
unsigned int n_field_vars;
unsigned int n_field_var_str;
@@ -292,6 +318,21 @@ struct hist_trigger_data {
unsigned int n_max_var_str;
};
+static int synth_event_create(int argc, const char **argv);
+static int synth_event_show(struct seq_file *m, struct dyn_event *ev);
+static int synth_event_release(struct dyn_event *ev);
+static bool synth_event_is_busy(struct dyn_event *ev);
+static bool synth_event_match(const char *system, const char *event,
+ struct dyn_event *ev);
+
+static struct dyn_event_operations synth_event_ops = {
+ .create = synth_event_create,
+ .show = synth_event_show,
+ .is_busy = synth_event_is_busy,
+ .free = synth_event_release,
+ .match = synth_event_match,
+};
+
struct synth_field {
char *type;
char *name;
@@ -301,7 +342,7 @@ struct synth_field {
};
struct synth_event {
- struct list_head list;
+ struct dyn_event devent;
int ref;
char *name;
struct synth_field **fields;
@@ -312,6 +353,32 @@ struct synth_event {
struct tracepoint *tp;
};
+static bool is_synth_event(struct dyn_event *ev)
+{
+ return ev->ops == &synth_event_ops;
+}
+
+static struct synth_event *to_synth_event(struct dyn_event *ev)
+{
+ return container_of(ev, struct synth_event, devent);
+}
+
+static bool synth_event_is_busy(struct dyn_event *ev)
+{
+ struct synth_event *event = to_synth_event(ev);
+
+ return event->ref != 0;
+}
+
+static bool synth_event_match(const char *system, const char *event,
+ struct dyn_event *ev)
+{
+ struct synth_event *sev = to_synth_event(ev);
+
+ return strcmp(sev->name, event) == 0 &&
+ (!system || strcmp(system, SYNTH_SYSTEM) == 0);
+}
+
struct action_data;
typedef void (*action_fn_t) (struct hist_trigger_data *hist_data,
@@ -326,6 +393,14 @@ struct action_data {
union {
struct {
+ /*
+ * When a histogram trigger is hit, the values of any
+ * references to variables, including variables being passed
+ * as parameters to synthetic events, are collected into a
+ * var_ref_vals array. This var_ref_idx is the index of the
+ * first param in the array to be passed to the synthetic
+ * event invocation.
+ */
unsigned int var_ref_idx;
char *match_event;
char *match_event_system;
@@ -402,9 +477,6 @@ static bool have_hist_err(void)
return false;
}
-static LIST_HEAD(synth_event_list);
-static DEFINE_MUTEX(synth_event_mutex);
-
struct synth_trace_event {
struct trace_entry ent;
u64 fields[];
@@ -446,7 +518,7 @@ static int synth_event_define_fields(struct trace_event_call *call)
static bool synth_field_signed(char *type)
{
- if (strncmp(type, "u", 1) == 0)
+ if (str_has_prefix(type, "u"))
return false;
return true;
@@ -469,7 +541,7 @@ static int synth_field_string_size(char *type)
start = strstr(type, "char[");
if (start == NULL)
return -EINVAL;
- start += strlen("char[");
+ start += sizeof("char[") - 1;
end = strchr(type, ']');
if (!end || end < start)
@@ -738,14 +810,12 @@ static void free_synth_field(struct synth_field *field)
kfree(field);
}
-static struct synth_field *parse_synth_field(int argc, char **argv,
+static struct synth_field *parse_synth_field(int argc, const char **argv,
int *consumed)
{
struct synth_field *field;
- const char *prefix = NULL;
- char *field_type = argv[0], *field_name;
+ const char *prefix = NULL, *field_type = argv[0], *field_name, *array;
int len, ret = 0;
- char *array;
if (field_type[0] == ';')
field_type++;
@@ -762,20 +832,31 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
*consumed = 2;
}
- len = strlen(field_name);
- if (field_name[len - 1] == ';')
- field_name[len - 1] = '\0';
-
field = kzalloc(sizeof(*field), GFP_KERNEL);
if (!field)
return ERR_PTR(-ENOMEM);
- len = strlen(field_type) + 1;
+ len = strlen(field_name);
array = strchr(field_name, '[');
if (array)
+ len -= strlen(array);
+ else if (field_name[len - 1] == ';')
+ len--;
+
+ field->name = kmemdup_nul(field_name, len, GFP_KERNEL);
+ if (!field->name) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ if (field_type[0] == ';')
+ field_type++;
+ len = strlen(field_type) + 1;
+ if (array)
len += strlen(array);
if (prefix)
len += strlen(prefix);
+
field->type = kzalloc(len, GFP_KERNEL);
if (!field->type) {
ret = -ENOMEM;
@@ -786,7 +867,8 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
strcat(field->type, field_type);
if (array) {
strcat(field->type, array);
- *array = '\0';
+ if (field->type[len - 1] == ';')
+ field->type[len - 1] = '\0';
}
field->size = synth_field_size(field->type);
@@ -800,11 +882,6 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
field->is_signed = synth_field_signed(field->type);
- field->name = kstrdup(field_name, GFP_KERNEL);
- if (!field->name) {
- ret = -ENOMEM;
- goto free;
- }
out:
return field;
free:
@@ -868,9 +945,13 @@ static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals,
static struct synth_event *find_synth_event(const char *name)
{
+ struct dyn_event *pos;
struct synth_event *event;
- list_for_each_entry(event, &synth_event_list, list) {
+ for_each_dyn_event(pos) {
+ if (!is_synth_event(pos))
+ continue;
+ event = to_synth_event(pos);
if (strcmp(event->name, name) == 0)
return event;
}
@@ -959,7 +1040,7 @@ static void free_synth_event(struct synth_event *event)
kfree(event);
}
-static struct synth_event *alloc_synth_event(char *event_name, int n_fields,
+static struct synth_event *alloc_synth_event(const char *name, int n_fields,
struct synth_field **fields)
{
struct synth_event *event;
@@ -971,7 +1052,7 @@ static struct synth_event *alloc_synth_event(char *event_name, int n_fields,
goto out;
}
- event->name = kstrdup(event_name, GFP_KERNEL);
+ event->name = kstrdup(name, GFP_KERNEL);
if (!event->name) {
kfree(event);
event = ERR_PTR(-ENOMEM);
@@ -985,6 +1066,8 @@ static struct synth_event *alloc_synth_event(char *event_name, int n_fields,
goto out;
}
+ dyn_event_init(&event->devent, &synth_event_ops);
+
for (i = 0; i < n_fields; i++)
event->fields[i] = fields[i];
@@ -1008,29 +1091,11 @@ struct hist_var_data {
struct hist_trigger_data *hist_data;
};
-static void add_or_delete_synth_event(struct synth_event *event, int delete)
-{
- if (delete)
- free_synth_event(event);
- else {
- mutex_lock(&synth_event_mutex);
- if (!find_synth_event(event->name))
- list_add(&event->list, &synth_event_list);
- else
- free_synth_event(event);
- mutex_unlock(&synth_event_mutex);
- }
-}
-
-static int create_synth_event(int argc, char **argv)
+static int __create_synth_event(int argc, const char *name, const char **argv)
{
struct synth_field *field, *fields[SYNTH_FIELDS_MAX];
struct synth_event *event = NULL;
- bool delete_event = false;
int i, consumed = 0, n_fields = 0, ret = 0;
- char *name;
-
- mutex_lock(&synth_event_mutex);
/*
* Argument syntax:
@@ -1038,40 +1103,19 @@ static int create_synth_event(int argc, char **argv)
* - Remove synthetic event: !<event_name> field[;field] ...
* where 'field' = type field_name
*/
- if (argc < 1) {
- ret = -EINVAL;
- goto out;
- }
- name = argv[0];
- if (name[0] == '!') {
- delete_event = true;
- name++;
- }
+ if (name[0] == '\0' || argc < 1)
+ return -EINVAL;
+
+ mutex_lock(&event_mutex);
event = find_synth_event(name);
if (event) {
- if (delete_event) {
- if (event->ref) {
- event = NULL;
- ret = -EBUSY;
- goto out;
- }
- list_del(&event->list);
- goto out;
- }
- event = NULL;
ret = -EEXIST;
goto out;
- } else if (delete_event)
- goto out;
-
- if (argc < 2) {
- ret = -EINVAL;
- goto out;
}
- for (i = 1; i < argc - 1; i++) {
+ for (i = 0; i < argc - 1; i++) {
if (strcmp(argv[i], ";") == 0)
continue;
if (n_fields == SYNTH_FIELDS_MAX) {
@@ -1099,83 +1143,91 @@ static int create_synth_event(int argc, char **argv)
event = NULL;
goto err;
}
+ ret = register_synth_event(event);
+ if (!ret)
+ dyn_event_add(&event->devent);
+ else
+ free_synth_event(event);
out:
- mutex_unlock(&synth_event_mutex);
-
- if (event) {
- if (delete_event) {
- ret = unregister_synth_event(event);
- add_or_delete_synth_event(event, !ret);
- } else {
- ret = register_synth_event(event);
- add_or_delete_synth_event(event, ret);
- }
- }
+ mutex_unlock(&event_mutex);
return ret;
err:
- mutex_unlock(&synth_event_mutex);
-
for (i = 0; i < n_fields; i++)
free_synth_field(fields[i]);
- free_synth_event(event);
- return ret;
+ goto out;
}
-static int release_all_synth_events(void)
+static int create_or_delete_synth_event(int argc, char **argv)
{
- struct list_head release_events;
- struct synth_event *event, *e;
- int ret = 0;
-
- INIT_LIST_HEAD(&release_events);
-
- mutex_lock(&synth_event_mutex);
+ const char *name = argv[0];
+ struct synth_event *event = NULL;
+ int ret;
- list_for_each_entry(event, &synth_event_list, list) {
- if (event->ref) {
- mutex_unlock(&synth_event_mutex);
- return -EBUSY;
- }
+ /* trace_run_command() ensures argc != 0 */
+ if (name[0] == '!') {
+ mutex_lock(&event_mutex);
+ event = find_synth_event(name + 1);
+ if (event) {
+ if (event->ref)
+ ret = -EBUSY;
+ else {
+ ret = unregister_synth_event(event);
+ if (!ret) {
+ dyn_event_remove(&event->devent);
+ free_synth_event(event);
+ }
+ }
+ } else
+ ret = -ENOENT;
+ mutex_unlock(&event_mutex);
+ return ret;
}
- list_splice_init(&event->list, &release_events);
+ ret = __create_synth_event(argc - 1, name, (const char **)argv + 1);
+ return ret == -ECANCELED ? -EINVAL : ret;
+}
- mutex_unlock(&synth_event_mutex);
+static int synth_event_create(int argc, const char **argv)
+{
+ const char *name = argv[0];
+ int len;
- list_for_each_entry_safe(event, e, &release_events, list) {
- list_del(&event->list);
+ if (name[0] != 's' || name[1] != ':')
+ return -ECANCELED;
+ name += 2;
- ret = unregister_synth_event(event);
- add_or_delete_synth_event(event, !ret);
+ /* This interface accepts group name prefix */
+ if (strchr(name, '/')) {
+ len = sizeof(SYNTH_SYSTEM "/") - 1;
+ if (strncmp(name, SYNTH_SYSTEM "/", len))
+ return -EINVAL;
+ name += len;
}
-
- return ret;
+ return __create_synth_event(argc - 1, name, argv + 1);
}
-
-static void *synth_events_seq_start(struct seq_file *m, loff_t *pos)
+static int synth_event_release(struct dyn_event *ev)
{
- mutex_lock(&synth_event_mutex);
+ struct synth_event *event = to_synth_event(ev);
+ int ret;
- return seq_list_start(&synth_event_list, *pos);
-}
+ if (event->ref)
+ return -EBUSY;
-static void *synth_events_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
- return seq_list_next(v, &synth_event_list, pos);
-}
+ ret = unregister_synth_event(event);
+ if (ret)
+ return ret;
-static void synth_events_seq_stop(struct seq_file *m, void *v)
-{
- mutex_unlock(&synth_event_mutex);
+ dyn_event_remove(ev);
+ free_synth_event(event);
+ return 0;
}
-static int synth_events_seq_show(struct seq_file *m, void *v)
+static int __synth_event_show(struct seq_file *m, struct synth_event *event)
{
struct synth_field *field;
- struct synth_event *event = v;
unsigned int i;
seq_printf(m, "%s\t", event->name);
@@ -1193,11 +1245,30 @@ static int synth_events_seq_show(struct seq_file *m, void *v)
return 0;
}
+static int synth_event_show(struct seq_file *m, struct dyn_event *ev)
+{
+ struct synth_event *event = to_synth_event(ev);
+
+ seq_printf(m, "s:%s/", event->class.system);
+
+ return __synth_event_show(m, event);
+}
+
+static int synth_events_seq_show(struct seq_file *m, void *v)
+{
+ struct dyn_event *ev = v;
+
+ if (!is_synth_event(ev))
+ return 0;
+
+ return __synth_event_show(m, to_synth_event(ev));
+}
+
static const struct seq_operations synth_events_seq_op = {
- .start = synth_events_seq_start,
- .next = synth_events_seq_next,
- .stop = synth_events_seq_stop,
- .show = synth_events_seq_show
+ .start = dyn_event_seq_start,
+ .next = dyn_event_seq_next,
+ .stop = dyn_event_seq_stop,
+ .show = synth_events_seq_show,
};
static int synth_events_open(struct inode *inode, struct file *file)
@@ -1205,7 +1276,7 @@ static int synth_events_open(struct inode *inode, struct file *file)
int ret;
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
- ret = release_all_synth_events();
+ ret = dyn_events_release_all(&synth_event_ops);
if (ret < 0)
return ret;
}
@@ -1218,7 +1289,7 @@ static ssize_t synth_events_write(struct file *file,
size_t count, loff_t *ppos)
{
return trace_parse_run_command(file, buffer, count, ppos,
- create_synth_event);
+ create_or_delete_synth_event);
}
static const struct file_operations synth_events_fops = {
@@ -1255,82 +1326,73 @@ static u64 hist_field_cpu(struct hist_field *hist_field,
return cpu;
}
+/**
+ * check_field_for_var_ref - Check if a VAR_REF field references a variable
+ * @hist_field: The VAR_REF field to check
+ * @var_data: The hist trigger that owns the variable
+ * @var_idx: The trigger variable identifier
+ *
+ * Check the given VAR_REF field to see whether or not it references
+ * the given variable associated with the given trigger.
+ *
+ * Return: The VAR_REF field if it does reference the variable, NULL if not
+ */
static struct hist_field *
check_field_for_var_ref(struct hist_field *hist_field,
struct hist_trigger_data *var_data,
unsigned int var_idx)
{
- struct hist_field *found = NULL;
-
- if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR_REF) {
- if (hist_field->var.idx == var_idx &&
- hist_field->var.hist_data == var_data) {
- found = hist_field;
- }
- }
-
- return found;
-}
-
-static struct hist_field *
-check_field_for_var_refs(struct hist_trigger_data *hist_data,
- struct hist_field *hist_field,
- struct hist_trigger_data *var_data,
- unsigned int var_idx,
- unsigned int level)
-{
- struct hist_field *found = NULL;
- unsigned int i;
+ WARN_ON(!(hist_field && hist_field->flags & HIST_FIELD_FL_VAR_REF));
- if (level > 3)
- return found;
-
- if (!hist_field)
- return found;
-
- found = check_field_for_var_ref(hist_field, var_data, var_idx);
- if (found)
- return found;
-
- for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) {
- struct hist_field *operand;
-
- operand = hist_field->operands[i];
- found = check_field_for_var_refs(hist_data, operand, var_data,
- var_idx, level + 1);
- if (found)
- return found;
- }
+ if (hist_field && hist_field->var.idx == var_idx &&
+ hist_field->var.hist_data == var_data)
+ return hist_field;
- return found;
+ return NULL;
}
+/**
+ * find_var_ref - Check if a trigger has a reference to a trigger variable
+ * @hist_data: The hist trigger that might have a reference to the variable
+ * @var_data: The hist trigger that owns the variable
+ * @var_idx: The trigger variable identifier
+ *
+ * Check the list of var_refs[] on the first hist trigger to see
+ * whether any of them are references to the variable on the second
+ * trigger.
+ *
+ * Return: The VAR_REF field referencing the variable if so, NULL if not
+ */
static struct hist_field *find_var_ref(struct hist_trigger_data *hist_data,
struct hist_trigger_data *var_data,
unsigned int var_idx)
{
- struct hist_field *hist_field, *found = NULL;
+ struct hist_field *hist_field;
unsigned int i;
- for_each_hist_field(i, hist_data) {
- hist_field = hist_data->fields[i];
- found = check_field_for_var_refs(hist_data, hist_field,
- var_data, var_idx, 0);
- if (found)
- return found;
- }
-
- for (i = 0; i < hist_data->n_synth_var_refs; i++) {
- hist_field = hist_data->synth_var_refs[i];
- found = check_field_for_var_refs(hist_data, hist_field,
- var_data, var_idx, 0);
- if (found)
- return found;
+ for (i = 0; i < hist_data->n_var_refs; i++) {
+ hist_field = hist_data->var_refs[i];
+ if (check_field_for_var_ref(hist_field, var_data, var_idx))
+ return hist_field;
}
- return found;
+ return NULL;
}
+/**
+ * find_any_var_ref - Check if there is a reference to a given trigger variable
+ * @hist_data: The hist trigger
+ * @var_idx: The trigger variable identifier
+ *
+ * Check to see whether the given variable is currently referenced by
+ * any other trigger.
+ *
+ * The trigger the variable is defined on is explicitly excluded - the
+ * assumption being that a self-reference doesn't prevent a trigger
+ * from being removed.
+ *
+ * Return: The VAR_REF field referencing the variable if so, NULL if not
+ */
static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data,
unsigned int var_idx)
{
@@ -1349,6 +1411,19 @@ static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data,
return found;
}
+/**
+ * check_var_refs - Check if there is a reference to any of trigger's variables
+ * @hist_data: The hist trigger
+ *
+ * A trigger can define one or more variables. If any one of them is
+ * currently referenced by any other trigger, this function will
+ * determine that.
+
+ * Typically used to determine whether or not a trigger can be removed
+ * - if there are any references to a trigger's variables, it cannot.
+ *
+ * Return: True if there is a reference to any of trigger's variables
+ */
static bool check_var_refs(struct hist_trigger_data *hist_data)
{
struct hist_field *field;
@@ -1806,8 +1881,8 @@ static int parse_action(char *str, struct hist_trigger_attrs *attrs)
if (attrs->n_actions >= HIST_ACTIONS_MAX)
return ret;
- if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0) ||
- (strncmp(str, "onmax(", strlen("onmax(")) == 0)) {
+ if ((str_has_prefix(str, "onmatch(")) ||
+ (str_has_prefix(str, "onmax("))) {
attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL);
if (!attrs->action_str[attrs->n_actions]) {
ret = -ENOMEM;
@@ -1824,34 +1899,34 @@ static int parse_assignment(char *str, struct hist_trigger_attrs *attrs)
{
int ret = 0;
- if ((strncmp(str, "key=", strlen("key=")) == 0) ||
- (strncmp(str, "keys=", strlen("keys=")) == 0)) {
+ if ((str_has_prefix(str, "key=")) ||
+ (str_has_prefix(str, "keys="))) {
attrs->keys_str = kstrdup(str, GFP_KERNEL);
if (!attrs->keys_str) {
ret = -ENOMEM;
goto out;
}
- } else if ((strncmp(str, "val=", strlen("val=")) == 0) ||
- (strncmp(str, "vals=", strlen("vals=")) == 0) ||
- (strncmp(str, "values=", strlen("values=")) == 0)) {
+ } else if ((str_has_prefix(str, "val=")) ||
+ (str_has_prefix(str, "vals=")) ||
+ (str_has_prefix(str, "values="))) {
attrs->vals_str = kstrdup(str, GFP_KERNEL);
if (!attrs->vals_str) {
ret = -ENOMEM;
goto out;
}
- } else if (strncmp(str, "sort=", strlen("sort=")) == 0) {
+ } else if (str_has_prefix(str, "sort=")) {
attrs->sort_key_str = kstrdup(str, GFP_KERNEL);
if (!attrs->sort_key_str) {
ret = -ENOMEM;
goto out;
}
- } else if (strncmp(str, "name=", strlen("name=")) == 0) {
+ } else if (str_has_prefix(str, "name=")) {
attrs->name = kstrdup(str, GFP_KERNEL);
if (!attrs->name) {
ret = -ENOMEM;
goto out;
}
- } else if (strncmp(str, "clock=", strlen("clock=")) == 0) {
+ } else if (str_has_prefix(str, "clock=")) {
strsep(&str, "=");
if (!str) {
ret = -EINVAL;
@@ -1864,7 +1939,7 @@ static int parse_assignment(char *str, struct hist_trigger_attrs *attrs)
ret = -ENOMEM;
goto out;
}
- } else if (strncmp(str, "size=", strlen("size=")) == 0) {
+ } else if (str_has_prefix(str, "size=")) {
int map_bits = parse_map_size(str);
if (map_bits < 0) {
@@ -2149,6 +2224,15 @@ static int contains_operator(char *str)
return field_op;
}
+static void __destroy_hist_field(struct hist_field *hist_field)
+{
+ kfree(hist_field->var.name);
+ kfree(hist_field->name);
+ kfree(hist_field->type);
+
+ kfree(hist_field);
+}
+
static void destroy_hist_field(struct hist_field *hist_field,
unsigned int level)
{
@@ -2160,14 +2244,13 @@ static void destroy_hist_field(struct hist_field *hist_field,
if (!hist_field)
return;
+ if (hist_field->flags & HIST_FIELD_FL_VAR_REF)
+ return; /* var refs will be destroyed separately */
+
for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++)
destroy_hist_field(hist_field->operands[i], level + 1);
- kfree(hist_field->var.name);
- kfree(hist_field->name);
- kfree(hist_field->type);
-
- kfree(hist_field);
+ __destroy_hist_field(hist_field);
}
static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
@@ -2294,6 +2377,12 @@ static void destroy_hist_fields(struct hist_trigger_data *hist_data)
hist_data->fields[i] = NULL;
}
}
+
+ for (i = 0; i < hist_data->n_var_refs; i++) {
+ WARN_ON(!(hist_data->var_refs[i]->flags & HIST_FIELD_FL_VAR_REF));
+ __destroy_hist_field(hist_data->var_refs[i]);
+ hist_data->var_refs[i] = NULL;
+ }
}
static int init_var_ref(struct hist_field *ref_field,
@@ -2352,7 +2441,23 @@ static int init_var_ref(struct hist_field *ref_field,
goto out;
}
-static struct hist_field *create_var_ref(struct hist_field *var_field,
+/**
+ * create_var_ref - Create a variable reference and attach it to trigger
+ * @hist_data: The trigger that will be referencing the variable
+ * @var_field: The VAR field to create a reference to
+ * @system: The optional system string
+ * @event_name: The optional event_name string
+ *
+ * Given a variable hist_field, create a VAR_REF hist_field that
+ * represents a reference to it.
+ *
+ * This function also adds the reference to the trigger that
+ * now references the variable.
+ *
+ * Return: The VAR_REF field if successful, NULL if not
+ */
+static struct hist_field *create_var_ref(struct hist_trigger_data *hist_data,
+ struct hist_field *var_field,
char *system, char *event_name)
{
unsigned long flags = HIST_FIELD_FL_VAR_REF;
@@ -2364,6 +2469,9 @@ static struct hist_field *create_var_ref(struct hist_field *var_field,
destroy_hist_field(ref_field, 0);
return NULL;
}
+
+ hist_data->var_refs[hist_data->n_var_refs] = ref_field;
+ ref_field->var_ref_idx = hist_data->n_var_refs++;
}
return ref_field;
@@ -2437,7 +2545,8 @@ static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data,
var_field = find_event_var(hist_data, system, event_name, var_name);
if (var_field)
- ref_field = create_var_ref(var_field, system, event_name);
+ ref_field = create_var_ref(hist_data, var_field,
+ system, event_name);
if (!ref_field)
hist_err_event("Couldn't find variable: $",
@@ -2555,8 +2664,6 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
if (!s) {
hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var);
if (hist_field) {
- hist_data->var_refs[hist_data->n_var_refs] = hist_field;
- hist_field->var_ref_idx = hist_data->n_var_refs++;
if (var_name) {
hist_field = create_alias(hist_data, hist_field, var_name);
if (!hist_field) {
@@ -3330,7 +3437,6 @@ static int onmax_create(struct hist_trigger_data *hist_data,
unsigned int var_ref_idx = hist_data->n_var_refs;
struct field_var *field_var;
char *onmax_var_str, *param;
- unsigned long flags;
unsigned int i;
int ret = 0;
@@ -3347,18 +3453,10 @@ static int onmax_create(struct hist_trigger_data *hist_data,
return -EINVAL;
}
- flags = HIST_FIELD_FL_VAR_REF;
- ref_field = create_hist_field(hist_data, NULL, flags, NULL);
+ ref_field = create_var_ref(hist_data, var_field, NULL, NULL);
if (!ref_field)
return -ENOMEM;
- if (init_var_ref(ref_field, var_field, NULL, NULL)) {
- destroy_hist_field(ref_field, 0);
- ret = -ENOMEM;
- goto out;
- }
- hist_data->var_refs[hist_data->n_var_refs] = ref_field;
- ref_field->var_ref_idx = hist_data->n_var_refs++;
data->onmax.var = ref_field;
data->fn = onmax_save;
@@ -3460,7 +3558,7 @@ static struct action_data *onmax_parse(char *str)
if (!onmax_fn_name || !str)
goto free;
- if (strncmp(onmax_fn_name, "save", strlen("save")) == 0) {
+ if (str_has_prefix(onmax_fn_name, "save")) {
char *params = strsep(&str, ")");
if (!params) {
@@ -3491,7 +3589,7 @@ static void onmatch_destroy(struct action_data *data)
{
unsigned int i;
- mutex_lock(&synth_event_mutex);
+ lockdep_assert_held(&event_mutex);
kfree(data->onmatch.match_event);
kfree(data->onmatch.match_event_system);
@@ -3504,8 +3602,6 @@ static void onmatch_destroy(struct action_data *data)
data->onmatch.synth_event->ref--;
kfree(data);
-
- mutex_unlock(&synth_event_mutex);
}
static void destroy_field_var(struct field_var *field_var)
@@ -3537,23 +3633,6 @@ static void save_field_var(struct hist_trigger_data *hist_data,
}
-static void destroy_synth_var_refs(struct hist_trigger_data *hist_data)
-{
- unsigned int i;
-
- for (i = 0; i < hist_data->n_synth_var_refs; i++)
- destroy_hist_field(hist_data->synth_var_refs[i], 0);
-}
-
-static void save_synth_var_ref(struct hist_trigger_data *hist_data,
- struct hist_field *var_ref)
-{
- hist_data->synth_var_refs[hist_data->n_synth_var_refs++] = var_ref;
-
- hist_data->var_refs[hist_data->n_var_refs] = var_ref;
- var_ref->var_ref_idx = hist_data->n_var_refs++;
-}
-
static int check_synth_field(struct synth_event *event,
struct hist_field *hist_field,
unsigned int field_pos)
@@ -3656,15 +3735,14 @@ static int onmatch_create(struct hist_trigger_data *hist_data,
struct synth_event *event;
int ret = 0;
- mutex_lock(&synth_event_mutex);
+ lockdep_assert_held(&event_mutex);
+
event = find_synth_event(data->onmatch.synth_event_name);
if (!event) {
hist_err("onmatch: Couldn't find synthetic event: ", data->onmatch.synth_event_name);
- mutex_unlock(&synth_event_mutex);
return -EINVAL;
}
event->ref++;
- mutex_unlock(&synth_event_mutex);
var_ref_idx = hist_data->n_var_refs;
@@ -3706,14 +3784,14 @@ static int onmatch_create(struct hist_trigger_data *hist_data,
}
if (check_synth_field(event, hist_field, field_pos) == 0) {
- var_ref = create_var_ref(hist_field, system, event_name);
+ var_ref = create_var_ref(hist_data, hist_field,
+ system, event_name);
if (!var_ref) {
kfree(p);
ret = -ENOMEM;
goto err;
}
- save_synth_var_ref(hist_data, var_ref);
field_pos++;
kfree(p);
continue;
@@ -3738,9 +3816,7 @@ static int onmatch_create(struct hist_trigger_data *hist_data,
out:
return ret;
err:
- mutex_lock(&synth_event_mutex);
event->ref--;
- mutex_unlock(&synth_event_mutex);
goto out;
}
@@ -4266,12 +4342,13 @@ static int parse_actions(struct hist_trigger_data *hist_data)
unsigned int i;
int ret = 0;
char *str;
+ int len;
for (i = 0; i < hist_data->attrs->n_actions; i++) {
str = hist_data->attrs->action_str[i];
- if (strncmp(str, "onmatch(", strlen("onmatch(")) == 0) {
- char *action_str = str + strlen("onmatch(");
+ if ((len = str_has_prefix(str, "onmatch("))) {
+ char *action_str = str + len;
data = onmatch_parse(tr, action_str);
if (IS_ERR(data)) {
@@ -4279,8 +4356,8 @@ static int parse_actions(struct hist_trigger_data *hist_data)
break;
}
data->fn = action_trace;
- } else if (strncmp(str, "onmax(", strlen("onmax(")) == 0) {
- char *action_str = str + strlen("onmax(");
+ } else if ((len = str_has_prefix(str, "onmax("))) {
+ char *action_str = str + len;
data = onmax_parse(action_str);
if (IS_ERR(data)) {
@@ -4459,7 +4536,6 @@ static void destroy_hist_data(struct hist_trigger_data *hist_data)
destroy_actions(hist_data);
destroy_field_vars(hist_data);
destroy_field_var_hists(hist_data);
- destroy_synth_var_refs(hist_data);
kfree(hist_data);
}
@@ -5448,6 +5524,8 @@ static void hist_unreg_all(struct trace_event_file *file)
struct synth_event *se;
const char *se_name;
+ lockdep_assert_held(&event_mutex);
+
if (hist_file_check_refs(file))
return;
@@ -5457,12 +5535,10 @@ static void hist_unreg_all(struct trace_event_file *file)
list_del_rcu(&test->list);
trace_event_trigger_enable_disable(file, 0);
- mutex_lock(&synth_event_mutex);
se_name = trace_event_name(file->event_call);
se = find_synth_event(se_name);
if (se)
se->ref--;
- mutex_unlock(&synth_event_mutex);
update_cond_flag(file);
if (hist_data->enable_timestamps)
@@ -5488,6 +5564,8 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
char *trigger, *p;
int ret = 0;
+ lockdep_assert_held(&event_mutex);
+
if (glob && strlen(glob)) {
last_cmd_set(param);
hist_err_clear();
@@ -5514,9 +5592,9 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
p++;
continue;
}
- if (p >= param + strlen(param) - strlen("if") - 1)
+ if (p >= param + strlen(param) - (sizeof("if") - 1) - 1)
return -EINVAL;
- if (*(p + strlen("if")) != ' ' && *(p + strlen("if")) != '\t') {
+ if (*(p + sizeof("if") - 1) != ' ' && *(p + sizeof("if") - 1) != '\t') {
p++;
continue;
}
@@ -5578,14 +5656,10 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
}
cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
-
- mutex_lock(&synth_event_mutex);
se_name = trace_event_name(file->event_call);
se = find_synth_event(se_name);
if (se)
se->ref--;
- mutex_unlock(&synth_event_mutex);
-
ret = 0;
goto out_free;
}
@@ -5621,13 +5695,10 @@ enable:
if (ret)
goto out_unreg;
- mutex_lock(&synth_event_mutex);
se_name = trace_event_name(file->event_call);
se = find_synth_event(se_name);
if (se)
se->ref++;
- mutex_unlock(&synth_event_mutex);
-
/* Just return zero, not the number of registered triggers */
ret = 0;
out:
@@ -5810,6 +5881,12 @@ static __init int trace_events_hist_init(void)
struct dentry *d_tracer;
int err = 0;
+ err = dyn_event_register(&synth_event_ops);
+ if (err) {
+ pr_warn("Could not register synth_event_ops\n");
+ return err;
+ }
+
d_tracer = tracing_init_dentry();
if (IS_ERR(d_tracer)) {
err = PTR_ERR(d_tracer);
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 2152d1e530cb..cd12ecb66eb9 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -732,8 +732,10 @@ int set_trigger_filter(char *filter_str,
/* The filter is for the 'trigger' event, not the triggered event */
ret = create_event_filter(file->event_call, filter_str, false, &filter);
- if (ret)
- goto out;
+ /*
+ * If create_event_filter() fails, filter still needs to be freed.
+ * Which the calling code will do with data->filter.
+ */
assign:
tmp = rcu_access_pointer(data->filter);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 169b3c44ee97..c2af1560e856 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -16,33 +16,6 @@
#include "trace.h"
#include "trace_output.h"
-static bool kill_ftrace_graph;
-
-/**
- * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
- *
- * ftrace_graph_stop() is called when a severe error is detected in
- * the function graph tracing. This function is called by the critical
- * paths of function graph to keep those paths from doing any more harm.
- */
-bool ftrace_graph_is_dead(void)
-{
- return kill_ftrace_graph;
-}
-
-/**
- * ftrace_graph_stop - set to permanently disable function graph tracincg
- *
- * In case of an error int function graph tracing, this is called
- * to try to keep function graph tracing from causing any more harm.
- * Usually this is pretty severe and this is called to try to at least
- * get a warning out to the user.
- */
-void ftrace_graph_stop(void)
-{
- kill_ftrace_graph = true;
-}
-
/* When set, irq functions will be ignored */
static int ftrace_graph_skip_irqs;
@@ -87,8 +60,12 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
/* Include sleep time (scheduled out) between entry and return */
{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
+
+#ifdef CONFIG_FUNCTION_PROFILER
/* Include time within nested functions */
{ TRACER_OPT(graph-time, TRACE_GRAPH_GRAPH_TIME) },
+#endif
+
{ } /* Empty entry */
};
@@ -117,231 +94,6 @@ static void
print_graph_duration(struct trace_array *tr, unsigned long long duration,
struct trace_seq *s, u32 flags);
-/* Add a function return address to the trace stack on thread info.*/
-int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
- unsigned long frame_pointer, unsigned long *retp)
-{
- unsigned long long calltime;
- int index;
-
- if (unlikely(ftrace_graph_is_dead()))
- return -EBUSY;
-
- if (!current->ret_stack)
- return -EBUSY;
-
- /*
- * We must make sure the ret_stack is tested before we read
- * anything else.
- */
- smp_rmb();
-
- /* The return trace stack is full */
- if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
- atomic_inc(&current->trace_overrun);
- return -EBUSY;
- }
-
- /*
- * The curr_ret_stack is an index to ftrace return stack of
- * current task. Its value should be in [0, FTRACE_RETFUNC_
- * DEPTH) when the function graph tracer is used. To support
- * filtering out specific functions, it makes the index
- * negative by subtracting huge value (FTRACE_NOTRACE_DEPTH)
- * so when it sees a negative index the ftrace will ignore
- * the record. And the index gets recovered when returning
- * from the filtered function by adding the FTRACE_NOTRACE_
- * DEPTH and then it'll continue to record functions normally.
- *
- * The curr_ret_stack is initialized to -1 and get increased
- * in this function. So it can be less than -1 only if it was
- * filtered out via ftrace_graph_notrace_addr() which can be
- * set from set_graph_notrace file in tracefs by user.
- */
- if (current->curr_ret_stack < -1)
- return -EBUSY;
-
- calltime = trace_clock_local();
-
- index = ++current->curr_ret_stack;
- if (ftrace_graph_notrace_addr(func))
- current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
- barrier();
- current->ret_stack[index].ret = ret;
- current->ret_stack[index].func = func;
- current->ret_stack[index].calltime = calltime;
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- current->ret_stack[index].fp = frame_pointer;
-#endif
-#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
- current->ret_stack[index].retp = retp;
-#endif
- *depth = current->curr_ret_stack;
-
- return 0;
-}
-
-/* Retrieve a function return address to the trace stack on thread info.*/
-static void
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
- unsigned long frame_pointer)
-{
- int index;
-
- index = current->curr_ret_stack;
-
- /*
- * A negative index here means that it's just returned from a
- * notrace'd function. Recover index to get an original
- * return address. See ftrace_push_return_trace().
- *
- * TODO: Need to check whether the stack gets corrupted.
- */
- if (index < 0)
- index += FTRACE_NOTRACE_DEPTH;
-
- if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
- ftrace_graph_stop();
- WARN_ON(1);
- /* Might as well panic, otherwise we have no where to go */
- *ret = (unsigned long)panic;
- return;
- }
-
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- /*
- * The arch may choose to record the frame pointer used
- * and check it here to make sure that it is what we expect it
- * to be. If gcc does not set the place holder of the return
- * address in the frame pointer, and does a copy instead, then
- * the function graph trace will fail. This test detects this
- * case.
- *
- * Currently, x86_32 with optimize for size (-Os) makes the latest
- * gcc do the above.
- *
- * Note, -mfentry does not use frame pointers, and this test
- * is not needed if CC_USING_FENTRY is set.
- */
- if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
- ftrace_graph_stop();
- WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
- " from func %ps return to %lx\n",
- current->ret_stack[index].fp,
- frame_pointer,
- (void *)current->ret_stack[index].func,
- current->ret_stack[index].ret);
- *ret = (unsigned long)panic;
- return;
- }
-#endif
-
- *ret = current->ret_stack[index].ret;
- trace->func = current->ret_stack[index].func;
- trace->calltime = current->ret_stack[index].calltime;
- trace->overrun = atomic_read(&current->trace_overrun);
- trace->depth = index;
-}
-
-/*
- * Send the trace to the ring-buffer.
- * @return the original return address.
- */
-unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
-{
- struct ftrace_graph_ret trace;
- unsigned long ret;
-
- ftrace_pop_return_trace(&trace, &ret, frame_pointer);
- trace.rettime = trace_clock_local();
- barrier();
- current->curr_ret_stack--;
- /*
- * The curr_ret_stack can be less than -1 only if it was
- * filtered out and it's about to return from the function.
- * Recover the index and continue to trace normal functions.
- */
- if (current->curr_ret_stack < -1) {
- current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
- return ret;
- }
-
- /*
- * The trace should run after decrementing the ret counter
- * in case an interrupt were to come in. We don't want to
- * lose the interrupt if max_depth is set.
- */
- ftrace_graph_return(&trace);
-
- if (unlikely(!ret)) {
- ftrace_graph_stop();
- WARN_ON(1);
- /* Might as well panic. What else to do? */
- ret = (unsigned long)panic;
- }
-
- return ret;
-}
-
-/**
- * ftrace_graph_ret_addr - convert a potentially modified stack return address
- * to its original value
- *
- * This function can be called by stack unwinding code to convert a found stack
- * return address ('ret') to its original value, in case the function graph
- * tracer has modified it to be 'return_to_handler'. If the address hasn't
- * been modified, the unchanged value of 'ret' is returned.
- *
- * 'idx' is a state variable which should be initialized by the caller to zero
- * before the first call.
- *
- * 'retp' is a pointer to the return address on the stack. It's ignored if
- * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
- */
-#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
-unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
- unsigned long ret, unsigned long *retp)
-{
- int index = task->curr_ret_stack;
- int i;
-
- if (ret != (unsigned long)return_to_handler)
- return ret;
-
- if (index < -1)
- index += FTRACE_NOTRACE_DEPTH;
-
- if (index < 0)
- return ret;
-
- for (i = 0; i <= index; i++)
- if (task->ret_stack[i].retp == retp)
- return task->ret_stack[i].ret;
-
- return ret;
-}
-#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
-unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
- unsigned long ret, unsigned long *retp)
-{
- int task_idx;
-
- if (ret != (unsigned long)return_to_handler)
- return ret;
-
- task_idx = task->curr_ret_stack;
-
- if (!task->ret_stack || task_idx < *idx)
- return ret;
-
- task_idx -= *idx;
- (*idx)++;
-
- return task->ret_stack[task_idx].ret;
-}
-#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
-
int __trace_graph_entry(struct trace_array *tr,
struct ftrace_graph_ent *trace,
unsigned long flags,
@@ -382,6 +134,18 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
int cpu;
int pc;
+ if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT))
+ return 0;
+
+ if (ftrace_graph_notrace_addr(trace->func)) {
+ trace_recursion_set(TRACE_GRAPH_NOTRACE_BIT);
+ /*
+ * Need to return 1 to have the return called
+ * that will clear the NOTRACE bit.
+ */
+ return 1;
+ }
+
if (!ftrace_trace_task(tr))
return 0;
@@ -482,6 +246,13 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
int cpu;
int pc;
+ ftrace_graph_addr_finish(trace);
+
+ if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
+ trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
+ return;
+ }
+
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
@@ -505,6 +276,13 @@ void set_graph_array(struct trace_array *tr)
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
{
+ ftrace_graph_addr_finish(trace);
+
+ if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
+ trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
+ return;
+ }
+
if (tracing_thresh &&
(trace->rettime - trace->calltime < tracing_thresh))
return;
@@ -512,17 +290,25 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
trace_graph_return(trace);
}
+static struct fgraph_ops funcgraph_thresh_ops = {
+ .entryfunc = &trace_graph_entry,
+ .retfunc = &trace_graph_thresh_return,
+};
+
+static struct fgraph_ops funcgraph_ops = {
+ .entryfunc = &trace_graph_entry,
+ .retfunc = &trace_graph_return,
+};
+
static int graph_trace_init(struct trace_array *tr)
{
int ret;
set_graph_array(tr);
if (tracing_thresh)
- ret = register_ftrace_graph(&trace_graph_thresh_return,
- &trace_graph_entry);
+ ret = register_ftrace_graph(&funcgraph_thresh_ops);
else
- ret = register_ftrace_graph(&trace_graph_return,
- &trace_graph_entry);
+ ret = register_ftrace_graph(&funcgraph_ops);
if (ret)
return ret;
tracing_start_cmdline_record();
@@ -533,7 +319,10 @@ static int graph_trace_init(struct trace_array *tr)
static void graph_trace_reset(struct trace_array *tr)
{
tracing_stop_cmdline_record();
- unregister_ftrace_graph();
+ if (tracing_thresh)
+ unregister_ftrace_graph(&funcgraph_thresh_ops);
+ else
+ unregister_ftrace_graph(&funcgraph_ops);
}
static int graph_trace_update_thresh(struct trace_array *tr)
@@ -843,10 +632,6 @@ print_graph_entry_leaf(struct trace_iterator *iter,
cpu_data = per_cpu_ptr(data->cpu_data, cpu);
- /* If a graph tracer ignored set_graph_notrace */
- if (call->depth < -1)
- call->depth += FTRACE_NOTRACE_DEPTH;
-
/*
* Comments display at + 1 to depth. Since
* this is a leaf function, keep the comments
@@ -889,10 +674,6 @@ print_graph_entry_nested(struct trace_iterator *iter,
struct fgraph_cpu_data *cpu_data;
int cpu = iter->cpu;
- /* If a graph tracer ignored set_graph_notrace */
- if (call->depth < -1)
- call->depth += FTRACE_NOTRACE_DEPTH;
-
cpu_data = per_cpu_ptr(data->cpu_data, cpu);
cpu_data->depth = call->depth;
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index b7357f9f82a3..d42a473b8240 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -14,6 +14,7 @@
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
+#include <linux/kprobes.h>
#include "trace.h"
@@ -208,6 +209,8 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
unsigned long flags;
int pc;
+ ftrace_graph_addr_finish(trace);
+
if (!func_prolog_dec(tr, &data, &flags))
return;
@@ -216,6 +219,11 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
atomic_dec(&data->disabled);
}
+static struct fgraph_ops fgraph_ops = {
+ .entryfunc = &irqsoff_graph_entry,
+ .retfunc = &irqsoff_graph_return,
+};
+
static void irqsoff_trace_open(struct trace_iterator *iter)
{
if (is_graph(iter->tr))
@@ -270,13 +278,6 @@ __trace_function(struct trace_array *tr,
#else
#define __trace_function trace_function
-#ifdef CONFIG_FUNCTION_TRACER
-static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
-{
- return -1;
-}
-#endif
-
static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
{
return TRACE_TYPE_UNHANDLED;
@@ -286,7 +287,6 @@ static void irqsoff_trace_open(struct trace_iterator *iter) { }
static void irqsoff_trace_close(struct trace_iterator *iter) { }
#ifdef CONFIG_FUNCTION_TRACER
-static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { }
static void irqsoff_print_header(struct seq_file *s)
{
trace_default_header(s);
@@ -366,7 +366,7 @@ out:
__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
}
-static inline void
+static nokprobe_inline void
start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
{
int cpu;
@@ -402,7 +402,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
atomic_dec(&data->disabled);
}
-static inline void
+static nokprobe_inline void
stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
{
int cpu;
@@ -444,6 +444,7 @@ void start_critical_timings(void)
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc);
}
EXPORT_SYMBOL_GPL(start_critical_timings);
+NOKPROBE_SYMBOL(start_critical_timings);
void stop_critical_timings(void)
{
@@ -453,6 +454,7 @@ void stop_critical_timings(void)
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc);
}
EXPORT_SYMBOL_GPL(stop_critical_timings);
+NOKPROBE_SYMBOL(stop_critical_timings);
#ifdef CONFIG_FUNCTION_TRACER
static bool function_enabled;
@@ -466,8 +468,7 @@ static int register_irqsoff_function(struct trace_array *tr, int graph, int set)
return 0;
if (graph)
- ret = register_ftrace_graph(&irqsoff_graph_return,
- &irqsoff_graph_entry);
+ ret = register_ftrace_graph(&fgraph_ops);
else
ret = register_ftrace_function(tr->ops);
@@ -483,7 +484,7 @@ static void unregister_irqsoff_function(struct trace_array *tr, int graph)
return;
if (graph)
- unregister_ftrace_graph();
+ unregister_ftrace_graph(&fgraph_ops);
else
unregister_ftrace_function(tr->ops);
@@ -613,6 +614,7 @@ void tracer_hardirqs_on(unsigned long a0, unsigned long a1)
if (!preempt_trace(pc) && irq_trace())
stop_critical_timing(a0, a1, pc);
}
+NOKPROBE_SYMBOL(tracer_hardirqs_on);
void tracer_hardirqs_off(unsigned long a0, unsigned long a1)
{
@@ -621,6 +623,7 @@ void tracer_hardirqs_off(unsigned long a0, unsigned long a1)
if (!preempt_trace(pc) && irq_trace())
start_critical_timing(a0, a1, pc);
}
+NOKPROBE_SYMBOL(tracer_hardirqs_off);
static int irqsoff_tracer_init(struct trace_array *tr)
{
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c30032367aab..99592c27465e 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -12,23 +12,59 @@
#include <linux/rculist.h>
#include <linux/error-injection.h>
+#include "trace_dynevent.h"
#include "trace_kprobe_selftest.h"
#include "trace_probe.h"
+#include "trace_probe_tmpl.h"
#define KPROBE_EVENT_SYSTEM "kprobes"
#define KRETPROBE_MAXACTIVE_MAX 4096
+static int trace_kprobe_create(int argc, const char **argv);
+static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev);
+static int trace_kprobe_release(struct dyn_event *ev);
+static bool trace_kprobe_is_busy(struct dyn_event *ev);
+static bool trace_kprobe_match(const char *system, const char *event,
+ struct dyn_event *ev);
+
+static struct dyn_event_operations trace_kprobe_ops = {
+ .create = trace_kprobe_create,
+ .show = trace_kprobe_show,
+ .is_busy = trace_kprobe_is_busy,
+ .free = trace_kprobe_release,
+ .match = trace_kprobe_match,
+};
+
/**
* Kprobe event core functions
*/
struct trace_kprobe {
- struct list_head list;
+ struct dyn_event devent;
struct kretprobe rp; /* Use rp.kp for kprobe use */
unsigned long __percpu *nhit;
const char *symbol; /* symbol name */
struct trace_probe tp;
};
+static bool is_trace_kprobe(struct dyn_event *ev)
+{
+ return ev->ops == &trace_kprobe_ops;
+}
+
+static struct trace_kprobe *to_trace_kprobe(struct dyn_event *ev)
+{
+ return container_of(ev, struct trace_kprobe, devent);
+}
+
+/**
+ * for_each_trace_kprobe - iterate over the trace_kprobe list
+ * @pos: the struct trace_kprobe * for each entry
+ * @dpos: the struct dyn_event * to use as a loop cursor
+ */
+#define for_each_trace_kprobe(pos, dpos) \
+ for_each_dyn_event(dpos) \
+ if (is_trace_kprobe(dpos) && (pos = to_trace_kprobe(dpos)))
+
#define SIZEOF_TRACE_KPROBE(n) \
(offsetof(struct trace_kprobe, tp.args) + \
(sizeof(struct probe_arg) * (n)))
@@ -61,9 +97,39 @@ static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
return strncmp(mod->name, name, len) == 0 && name[len] == ':';
}
-static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
+static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
+{
+ char *p;
+ bool ret;
+
+ if (!tk->symbol)
+ return false;
+ p = strchr(tk->symbol, ':');
+ if (!p)
+ return true;
+ *p = '\0';
+ mutex_lock(&module_mutex);
+ ret = !!find_module(tk->symbol);
+ mutex_unlock(&module_mutex);
+ *p = ':';
+
+ return ret;
+}
+
+static bool trace_kprobe_is_busy(struct dyn_event *ev)
{
- return !!strchr(trace_kprobe_symbol(tk), ':');
+ struct trace_kprobe *tk = to_trace_kprobe(ev);
+
+ return trace_probe_is_enabled(&tk->tp);
+}
+
+static bool trace_kprobe_match(const char *system, const char *event,
+ struct dyn_event *ev)
+{
+ struct trace_kprobe *tk = to_trace_kprobe(ev);
+
+ return strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
+ (!system || strcmp(tk->tp.call.class->system, system) == 0);
}
static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
@@ -113,191 +179,10 @@ bool trace_kprobe_error_injectable(struct trace_event_call *call)
static int register_kprobe_event(struct trace_kprobe *tk);
static int unregister_kprobe_event(struct trace_kprobe *tk);
-static DEFINE_MUTEX(probe_lock);
-static LIST_HEAD(probe_list);
-
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
struct pt_regs *regs);
-/* Memory fetching by symbol */
-struct symbol_cache {
- char *symbol;
- long offset;
- unsigned long addr;
-};
-
-unsigned long update_symbol_cache(struct symbol_cache *sc)
-{
- sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
-
- if (sc->addr)
- sc->addr += sc->offset;
-
- return sc->addr;
-}
-
-void free_symbol_cache(struct symbol_cache *sc)
-{
- kfree(sc->symbol);
- kfree(sc);
-}
-
-struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
-{
- struct symbol_cache *sc;
-
- if (!sym || strlen(sym) == 0)
- return NULL;
-
- sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
- if (!sc)
- return NULL;
-
- sc->symbol = kstrdup(sym, GFP_KERNEL);
- if (!sc->symbol) {
- kfree(sc);
- return NULL;
- }
- sc->offset = offset;
- update_symbol_cache(sc);
-
- return sc;
-}
-
-/*
- * Kprobes-specific fetch functions
- */
-#define DEFINE_FETCH_stack(type) \
-static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
- void *offset, void *dest) \
-{ \
- *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
- (unsigned int)((unsigned long)offset)); \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));
-
-DEFINE_BASIC_FETCH_FUNCS(stack)
-/* No string on the stack entry */
-#define fetch_stack_string NULL
-#define fetch_stack_string_size NULL
-
-#define DEFINE_FETCH_memory(type) \
-static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
- void *addr, void *dest) \
-{ \
- type retval; \
- if (probe_kernel_address(addr, retval)) \
- *(type *)dest = 0; \
- else \
- *(type *)dest = retval; \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));
-
-DEFINE_BASIC_FETCH_FUNCS(memory)
-/*
- * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
- * length and relative data location.
- */
-static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
- void *addr, void *dest)
-{
- int maxlen = get_rloc_len(*(u32 *)dest);
- u8 *dst = get_rloc_data(dest);
- long ret;
-
- if (!maxlen)
- return;
-
- /*
- * Try to get string again, since the string can be changed while
- * probing.
- */
- ret = strncpy_from_unsafe(dst, addr, maxlen);
-
- if (ret < 0) { /* Failed to fetch string */
- dst[0] = '\0';
- *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
- } else {
- *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
- }
-}
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
-
-/* Return the length of string -- including null terminal byte */
-static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
- void *addr, void *dest)
-{
- mm_segment_t old_fs;
- int ret, len = 0;
- u8 c;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- pagefault_disable();
-
- do {
- ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
- len++;
- } while (c && ret == 0 && len < MAX_STRING_SIZE);
-
- pagefault_enable();
- set_fs(old_fs);
-
- if (ret < 0) /* Failed to check the length */
- *(u32 *)dest = 0;
- else
- *(u32 *)dest = len;
-}
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
-
-#define DEFINE_FETCH_symbol(type) \
-void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
-{ \
- struct symbol_cache *sc = data; \
- if (sc->addr) \
- fetch_memory_##type(regs, (void *)sc->addr, dest); \
- else \
- *(type *)dest = 0; \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));
-
-DEFINE_BASIC_FETCH_FUNCS(symbol)
-DEFINE_FETCH_symbol(string)
-DEFINE_FETCH_symbol(string_size)
-
-/* kprobes don't support file_offset fetch methods */
-#define fetch_file_offset_u8 NULL
-#define fetch_file_offset_u16 NULL
-#define fetch_file_offset_u32 NULL
-#define fetch_file_offset_u64 NULL
-#define fetch_file_offset_string NULL
-#define fetch_file_offset_string_size NULL
-
-/* Fetch type information table */
-static const struct fetch_type kprobes_fetch_type_table[] = {
- /* Special types */
- [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
- sizeof(u32), 1, "__data_loc char[]"),
- [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
- string_size, sizeof(u32), 0, "u32"),
- /* Basic types */
- ASSIGN_FETCH_TYPE(u8, u8, 0),
- ASSIGN_FETCH_TYPE(u16, u16, 0),
- ASSIGN_FETCH_TYPE(u32, u32, 0),
- ASSIGN_FETCH_TYPE(u64, u64, 0),
- ASSIGN_FETCH_TYPE(s8, u8, 1),
- ASSIGN_FETCH_TYPE(s16, u16, 1),
- ASSIGN_FETCH_TYPE(s32, u32, 1),
- ASSIGN_FETCH_TYPE(s64, u64, 1),
- ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
-
- ASSIGN_FETCH_TYPE_END
-};
-
/*
* Allocate new trace_probe and initialize it (including kprobes).
*/
@@ -355,7 +240,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
if (!tk->tp.class.system)
goto error;
- INIT_LIST_HEAD(&tk->list);
+ dyn_event_init(&tk->devent, &trace_kprobe_ops);
INIT_LIST_HEAD(&tk->tp.files);
return tk;
error:
@@ -370,6 +255,9 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
{
int i;
+ if (!tk)
+ return;
+
for (i = 0; i < tk->tp.nr_args; i++)
traceprobe_free_probe_arg(&tk->tp.args[i]);
@@ -383,9 +271,10 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
static struct trace_kprobe *find_trace_kprobe(const char *event,
const char *group)
{
+ struct dyn_event *pos;
struct trace_kprobe *tk;
- list_for_each_entry(tk, &probe_list, list)
+ for_each_trace_kprobe(tk, pos)
if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
strcmp(tk->tp.call.class->system, group) == 0)
return tk;
@@ -484,7 +373,7 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
* created with perf_event_open. We don't need to wait for these
* trace_kprobes
*/
- if (list_empty(&tk->list))
+ if (list_empty(&tk->devent.list))
wait = 0;
out:
if (wait) {
@@ -496,7 +385,7 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
* event_call related objects, which will be accessed in
* the kprobe_trace_func/kretprobe_trace_func.
*/
- synchronize_sched();
+ synchronize_rcu();
kfree(link); /* Ignored if link == NULL */
}
@@ -540,8 +429,11 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
return -EINVAL;
}
- for (i = 0; i < tk->tp.nr_args; i++)
- traceprobe_update_arg(&tk->tp.args[i]);
+ for (i = 0; i < tk->tp.nr_args; i++) {
+ ret = traceprobe_update_arg(&tk->tp.args[i]);
+ if (ret)
+ return ret;
+ }
/* Set/clear disabled flag according to tp->flag */
if (trace_probe_is_enabled(&tk->tp))
@@ -554,19 +446,13 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
else
ret = register_kprobe(&tk->rp.kp);
- if (ret == 0)
+ if (ret == 0) {
tk->tp.flags |= TP_FLAG_REGISTERED;
- else {
- if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
- pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
- ret = 0;
- } else if (ret == -EILSEQ) {
- pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
- tk->rp.kp.addr);
- ret = -EINVAL;
- }
+ } else if (ret == -EILSEQ) {
+ pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
+ tk->rp.kp.addr);
+ ret = -EINVAL;
}
-
return ret;
}
@@ -585,7 +471,7 @@ static void __unregister_trace_kprobe(struct trace_kprobe *tk)
}
}
-/* Unregister a trace_probe and probe_event: call with locking probe_lock */
+/* Unregister a trace_probe and probe_event */
static int unregister_trace_kprobe(struct trace_kprobe *tk)
{
/* Enabled event can not be unregistered */
@@ -597,7 +483,7 @@ static int unregister_trace_kprobe(struct trace_kprobe *tk)
return -EBUSY;
__unregister_trace_kprobe(tk);
- list_del(&tk->list);
+ dyn_event_remove(&tk->devent);
return 0;
}
@@ -608,7 +494,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
struct trace_kprobe *old_tk;
int ret;
- mutex_lock(&probe_lock);
+ mutex_lock(&event_mutex);
/* Delete old (same name) event if exist */
old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
@@ -629,13 +515,18 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
/* Register k*probe */
ret = __register_trace_kprobe(tk);
+ if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
+ pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
+ ret = 0;
+ }
+
if (ret < 0)
unregister_kprobe_event(tk);
else
- list_add_tail(&tk->list, &probe_list);
+ dyn_event_add(&tk->devent);
end:
- mutex_unlock(&probe_lock);
+ mutex_unlock(&event_mutex);
return ret;
}
@@ -644,6 +535,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
{
struct module *mod = data;
+ struct dyn_event *pos;
struct trace_kprobe *tk;
int ret;
@@ -651,8 +543,8 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
return NOTIFY_DONE;
/* Update probes on coming module */
- mutex_lock(&probe_lock);
- list_for_each_entry(tk, &probe_list, list) {
+ mutex_lock(&event_mutex);
+ for_each_trace_kprobe(tk, pos) {
if (trace_kprobe_within_module(tk, mod)) {
/* Don't need to check busy - this should have gone. */
__unregister_trace_kprobe(tk);
@@ -663,7 +555,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
mod->name, ret);
}
}
- mutex_unlock(&probe_lock);
+ mutex_unlock(&event_mutex);
return NOTIFY_DONE;
}
@@ -681,7 +573,7 @@ static inline void sanitize_event_name(char *name)
*name = '_';
}
-static int create_trace_kprobe(int argc, char **argv)
+static int trace_kprobe_create(int argc, const char *argv[])
{
/*
* Argument syntax:
@@ -705,35 +597,43 @@ static int create_trace_kprobe(int argc, char **argv)
* FETCHARG:TYPE : use TYPE instead of unsigned long.
*/
struct trace_kprobe *tk;
- int i, ret = 0;
- bool is_return = false, is_delete = false;
- char *symbol = NULL, *event = NULL, *group = NULL;
+ int i, len, ret = 0;
+ bool is_return = false;
+ char *symbol = NULL, *tmp = NULL;
+ const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
int maxactive = 0;
- char *arg;
long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
+ unsigned int flags = TPARG_FL_KERNEL;
- /* argc must be >= 1 */
- if (argv[0][0] == 'p')
- is_return = false;
- else if (argv[0][0] == 'r')
+ switch (argv[0][0]) {
+ case 'r':
is_return = true;
- else if (argv[0][0] == '-')
- is_delete = true;
- else {
- pr_info("Probe definition must be started with 'p', 'r' or"
- " '-'.\n");
- return -EINVAL;
+ flags |= TPARG_FL_RETURN;
+ break;
+ case 'p':
+ break;
+ default:
+ return -ECANCELED;
}
+ if (argc < 2)
+ return -ECANCELED;
event = strchr(&argv[0][1], ':');
- if (event) {
- event[0] = '\0';
+ if (event)
event++;
- }
+
if (is_return && isdigit(argv[0][1])) {
- ret = kstrtouint(&argv[0][1], 0, &maxactive);
+ if (event)
+ len = event - &argv[0][1] - 1;
+ else
+ len = strlen(&argv[0][1]);
+ if (len > MAX_EVENT_NAME_LEN - 1)
+ return -E2BIG;
+ memcpy(buf, &argv[0][1], len);
+ buf[len] = '\0';
+ ret = kstrtouint(buf, 0, &maxactive);
if (ret) {
pr_info("Failed to parse maxactive.\n");
return ret;
@@ -748,70 +648,37 @@ static int create_trace_kprobe(int argc, char **argv)
}
}
- if (event) {
- if (strchr(event, '/')) {
- group = event;
- event = strchr(group, '/') + 1;
- event[-1] = '\0';
- if (strlen(group) == 0) {
- pr_info("Group name is not specified\n");
- return -EINVAL;
- }
- }
- if (strlen(event) == 0) {
- pr_info("Event name is not specified\n");
- return -EINVAL;
- }
- }
- if (!group)
- group = KPROBE_EVENT_SYSTEM;
-
- if (is_delete) {
- if (!event) {
- pr_info("Delete command needs an event name.\n");
- return -EINVAL;
- }
- mutex_lock(&probe_lock);
- tk = find_trace_kprobe(event, group);
- if (!tk) {
- mutex_unlock(&probe_lock);
- pr_info("Event %s/%s doesn't exist.\n", group, event);
- return -ENOENT;
- }
- /* delete an event */
- ret = unregister_trace_kprobe(tk);
- if (ret == 0)
- free_trace_kprobe(tk);
- mutex_unlock(&probe_lock);
- return ret;
- }
-
- if (argc < 2) {
- pr_info("Probe point is not specified.\n");
- return -EINVAL;
- }
-
/* try to parse an address. if that fails, try to read the
* input as a symbol. */
if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
+ /* Check whether uprobe event specified */
+ if (strchr(argv[1], '/') && strchr(argv[1], ':'))
+ return -ECANCELED;
/* a symbol specified */
- symbol = argv[1];
+ symbol = kstrdup(argv[1], GFP_KERNEL);
+ if (!symbol)
+ return -ENOMEM;
/* TODO: support .init module functions */
ret = traceprobe_split_symbol_offset(symbol, &offset);
if (ret || offset < 0 || offset > UINT_MAX) {
pr_info("Failed to parse either an address or a symbol.\n");
- return ret;
+ goto out;
}
- if (offset && is_return &&
- !kprobe_on_func_entry(NULL, symbol, offset)) {
+ if (kprobe_on_func_entry(NULL, symbol, offset))
+ flags |= TPARG_FL_FENTRY;
+ if (offset && is_return && !(flags & TPARG_FL_FENTRY)) {
pr_info("Given offset is not valid for return probe.\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
}
argc -= 2; argv += 2;
- /* setup a probe */
- if (!event) {
+ if (event) {
+ ret = traceprobe_parse_event_name(&event, &group, buf);
+ if (ret)
+ goto out;
+ } else {
/* Make a new event name */
if (symbol)
snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
@@ -822,122 +689,67 @@ static int create_trace_kprobe(int argc, char **argv)
sanitize_event_name(buf);
event = buf;
}
+
+ /* setup a probe */
tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
argc, is_return);
if (IS_ERR(tk)) {
pr_info("Failed to allocate trace_probe.(%d)\n",
(int)PTR_ERR(tk));
- return PTR_ERR(tk);
+ ret = PTR_ERR(tk);
+ goto out;
}
/* parse arguments */
- ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
- struct probe_arg *parg = &tk->tp.args[i];
-
- /* Increment count for freeing args in error case */
- tk->tp.nr_args++;
-
- /* Parse argument name */
- arg = strchr(argv[i], '=');
- if (arg) {
- *arg++ = '\0';
- parg->name = kstrdup(argv[i], GFP_KERNEL);
- } else {
- arg = argv[i];
- /* If argument name is omitted, set "argN" */
- snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
- parg->name = kstrdup(buf, GFP_KERNEL);
- }
-
- if (!parg->name) {
- pr_info("Failed to allocate argument[%d] name.\n", i);
+ tmp = kstrdup(argv[i], GFP_KERNEL);
+ if (!tmp) {
ret = -ENOMEM;
goto error;
}
- if (!is_good_name(parg->name)) {
- pr_info("Invalid argument[%d] name: %s\n",
- i, parg->name);
- ret = -EINVAL;
- goto error;
- }
-
- if (traceprobe_conflict_field_name(parg->name,
- tk->tp.args, i)) {
- pr_info("Argument[%d] name '%s' conflicts with "
- "another field.\n", i, argv[i]);
- ret = -EINVAL;
- goto error;
- }
-
- /* Parse fetch argument */
- ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
- is_return, true,
- kprobes_fetch_type_table);
- if (ret) {
- pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
+ ret = traceprobe_parse_probe_arg(&tk->tp, i, tmp, flags);
+ kfree(tmp);
+ if (ret)
goto error;
- }
}
ret = register_trace_kprobe(tk);
if (ret)
goto error;
- return 0;
+out:
+ kfree(symbol);
+ return ret;
error:
free_trace_kprobe(tk);
- return ret;
+ goto out;
}
-static int release_all_trace_kprobes(void)
+static int create_or_delete_trace_kprobe(int argc, char **argv)
{
- struct trace_kprobe *tk;
- int ret = 0;
-
- mutex_lock(&probe_lock);
- /* Ensure no probe is in use. */
- list_for_each_entry(tk, &probe_list, list)
- if (trace_probe_is_enabled(&tk->tp)) {
- ret = -EBUSY;
- goto end;
- }
- /* TODO: Use batch unregistration */
- while (!list_empty(&probe_list)) {
- tk = list_entry(probe_list.next, struct trace_kprobe, list);
- ret = unregister_trace_kprobe(tk);
- if (ret)
- goto end;
- free_trace_kprobe(tk);
- }
+ int ret;
-end:
- mutex_unlock(&probe_lock);
+ if (argv[0][0] == '-')
+ return dyn_event_release(argc, argv, &trace_kprobe_ops);
- return ret;
+ ret = trace_kprobe_create(argc, (const char **)argv);
+ return ret == -ECANCELED ? -EINVAL : ret;
}
-/* Probes listing interfaces */
-static void *probes_seq_start(struct seq_file *m, loff_t *pos)
+static int trace_kprobe_release(struct dyn_event *ev)
{
- mutex_lock(&probe_lock);
- return seq_list_start(&probe_list, *pos);
-}
+ struct trace_kprobe *tk = to_trace_kprobe(ev);
+ int ret = unregister_trace_kprobe(tk);
-static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
- return seq_list_next(v, &probe_list, pos);
-}
-
-static void probes_seq_stop(struct seq_file *m, void *v)
-{
- mutex_unlock(&probe_lock);
+ if (!ret)
+ free_trace_kprobe(tk);
+ return ret;
}
-static int probes_seq_show(struct seq_file *m, void *v)
+static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev)
{
- struct trace_kprobe *tk = v;
+ struct trace_kprobe *tk = to_trace_kprobe(ev);
int i;
seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
@@ -959,10 +771,20 @@ static int probes_seq_show(struct seq_file *m, void *v)
return 0;
}
+static int probes_seq_show(struct seq_file *m, void *v)
+{
+ struct dyn_event *ev = v;
+
+ if (!is_trace_kprobe(ev))
+ return 0;
+
+ return trace_kprobe_show(m, ev);
+}
+
static const struct seq_operations probes_seq_op = {
- .start = probes_seq_start,
- .next = probes_seq_next,
- .stop = probes_seq_stop,
+ .start = dyn_event_seq_start,
+ .next = dyn_event_seq_next,
+ .stop = dyn_event_seq_stop,
.show = probes_seq_show
};
@@ -971,7 +793,7 @@ static int probes_open(struct inode *inode, struct file *file)
int ret;
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
- ret = release_all_trace_kprobes();
+ ret = dyn_events_release_all(&trace_kprobe_ops);
if (ret < 0)
return ret;
}
@@ -983,7 +805,7 @@ static ssize_t probes_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
return trace_parse_run_command(file, buffer, count, ppos,
- create_trace_kprobe);
+ create_or_delete_trace_kprobe);
}
static const struct file_operations kprobe_events_ops = {
@@ -998,8 +820,13 @@ static const struct file_operations kprobe_events_ops = {
/* Probes profiling interfaces */
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
- struct trace_kprobe *tk = v;
+ struct dyn_event *ev = v;
+ struct trace_kprobe *tk;
+ if (!is_trace_kprobe(ev))
+ return 0;
+
+ tk = to_trace_kprobe(ev);
seq_printf(m, " %-44s %15lu %15lu\n",
trace_event_name(&tk->tp.call),
trace_kprobe_nhit(tk),
@@ -1009,9 +836,9 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
}
static const struct seq_operations profile_seq_op = {
- .start = probes_seq_start,
- .next = probes_seq_next,
- .stop = probes_seq_stop,
+ .start = dyn_event_seq_start,
+ .next = dyn_event_seq_next,
+ .stop = dyn_event_seq_stop,
.show = probes_profile_seq_show
};
@@ -1028,6 +855,98 @@ static const struct file_operations kprobe_profile_ops = {
.release = seq_release,
};
+/* Kprobe specific fetch functions */
+
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen(unsigned long addr)
+{
+ int ret, len = 0;
+ u8 c;
+
+ do {
+ ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
+ len++;
+ } while (c && ret == 0 && len < MAX_STRING_SIZE);
+
+ return (ret < 0) ? ret : len;
+}
+
+/*
+ * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
+ * length and relative data location.
+ */
+static nokprobe_inline int
+fetch_store_string(unsigned long addr, void *dest, void *base)
+{
+ int maxlen = get_loc_len(*(u32 *)dest);
+ u8 *dst = get_loc_data(dest, base);
+ long ret;
+
+ if (unlikely(!maxlen))
+ return -ENOMEM;
+ /*
+ * Try to get string again, since the string can be changed while
+ * probing.
+ */
+ ret = strncpy_from_unsafe(dst, (void *)addr, maxlen);
+
+ if (ret >= 0)
+ *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
+ return ret;
+}
+
+static nokprobe_inline int
+probe_mem_read(void *dest, void *src, size_t size)
+{
+ return probe_kernel_read(dest, src, size);
+}
+
+/* Note that we don't verify it, since the code does not come from user space */
+static int
+process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
+ void *base)
+{
+ unsigned long val;
+
+retry:
+ /* 1st stage: get value from context */
+ switch (code->op) {
+ case FETCH_OP_REG:
+ val = regs_get_register(regs, code->param);
+ break;
+ case FETCH_OP_STACK:
+ val = regs_get_kernel_stack_nth(regs, code->param);
+ break;
+ case FETCH_OP_STACKP:
+ val = kernel_stack_pointer(regs);
+ break;
+ case FETCH_OP_RETVAL:
+ val = regs_return_value(regs);
+ break;
+ case FETCH_OP_IMM:
+ val = code->immediate;
+ break;
+ case FETCH_OP_COMM:
+ val = (unsigned long)current->comm;
+ break;
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ case FETCH_OP_ARG:
+ val = regs_get_kernel_argument(regs, code->param);
+ break;
+#endif
+ case FETCH_NOP_SYMBOL: /* Ignore a place holder */
+ code++;
+ goto retry;
+ default:
+ return -EILSEQ;
+ }
+ code++;
+
+ return process_fetch_insn_bottom(code, val, dest, base);
+}
+NOKPROBE_SYMBOL(process_fetch_insn)
+
/* Kprobe handler */
static nokprobe_inline void
__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
@@ -1059,7 +978,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
entry = ring_buffer_event_data(event);
entry->ip = (unsigned long)tk->rp.kp.addr;
- store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
+ store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
event_trigger_unlock_commit_regs(trace_file, buffer, event,
entry, irq_flags, pc, regs);
@@ -1108,7 +1027,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
entry = ring_buffer_event_data(event);
entry->func = (unsigned long)tk->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
- store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
+ store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
event_trigger_unlock_commit_regs(trace_file, buffer, event,
entry, irq_flags, pc, regs);
@@ -1133,8 +1052,6 @@ print_kprobe_event(struct trace_iterator *iter, int flags,
struct kprobe_trace_entry_head *field;
struct trace_seq *s = &iter->seq;
struct trace_probe *tp;
- u8 *data;
- int i;
field = (struct kprobe_trace_entry_head *)iter->ent;
tp = container_of(event, struct trace_probe, call.event);
@@ -1146,11 +1063,9 @@ print_kprobe_event(struct trace_iterator *iter, int flags,
trace_seq_putc(s, ')');
- data = (u8 *)&field[1];
- for (i = 0; i < tp->nr_args; i++)
- if (!tp->args[i].type->print(s, tp->args[i].name,
- data + tp->args[i].offset, field))
- goto out;
+ if (print_probe_args(s, tp->args, tp->nr_args,
+ (u8 *)&field[1], field) < 0)
+ goto out;
trace_seq_putc(s, '\n');
out:
@@ -1164,8 +1079,6 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
struct kretprobe_trace_entry_head *field;
struct trace_seq *s = &iter->seq;
struct trace_probe *tp;
- u8 *data;
- int i;
field = (struct kretprobe_trace_entry_head *)iter->ent;
tp = container_of(event, struct trace_probe, call.event);
@@ -1182,11 +1095,9 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
trace_seq_putc(s, ')');
- data = (u8 *)&field[1];
- for (i = 0; i < tp->nr_args; i++)
- if (!tp->args[i].type->print(s, tp->args[i].name,
- data + tp->args[i].offset, field))
- goto out;
+ if (print_probe_args(s, tp->args, tp->nr_args,
+ (u8 *)&field[1], field) < 0)
+ goto out;
trace_seq_putc(s, '\n');
@@ -1197,49 +1108,25 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
static int kprobe_event_define_fields(struct trace_event_call *event_call)
{
- int ret, i;
+ int ret;
struct kprobe_trace_entry_head field;
struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
- /* Set argument names as fields */
- for (i = 0; i < tk->tp.nr_args; i++) {
- struct probe_arg *parg = &tk->tp.args[i];
-
- ret = trace_define_field(event_call, parg->type->fmttype,
- parg->name,
- sizeof(field) + parg->offset,
- parg->type->size,
- parg->type->is_signed,
- FILTER_OTHER);
- if (ret)
- return ret;
- }
- return 0;
+
+ return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp);
}
static int kretprobe_event_define_fields(struct trace_event_call *event_call)
{
- int ret, i;
+ int ret;
struct kretprobe_trace_entry_head field;
struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
- /* Set argument names as fields */
- for (i = 0; i < tk->tp.nr_args; i++) {
- struct probe_arg *parg = &tk->tp.args[i];
-
- ret = trace_define_field(event_call, parg->type->fmttype,
- parg->name,
- sizeof(field) + parg->offset,
- parg->type->size,
- parg->type->is_signed,
- FILTER_OTHER);
- if (ret)
- return ret;
- }
- return 0;
+
+ return traceprobe_define_arg_fields(event_call, sizeof(field), &tk->tp);
}
#ifdef CONFIG_PERF_EVENTS
@@ -1286,7 +1173,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
entry->ip = (unsigned long)tk->rp.kp.addr;
memset(&entry[1], 0, dsize);
- store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
+ store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
return 0;
@@ -1322,7 +1209,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
entry->func = (unsigned long)tk->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
- store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
+ store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
}
@@ -1457,7 +1344,7 @@ static int register_kprobe_event(struct trace_kprobe *tk)
init_trace_event_call(tk, call);
- if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
+ if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
return -ENOMEM;
ret = register_trace_event(&call->event);
if (!ret) {
@@ -1496,7 +1383,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
char *event;
/*
- * local trace_kprobes are not added to probe_list, so they are never
+ * local trace_kprobes are not added to dyn_event, so they are never
* searched in find_trace_kprobe(). Therefore, there is no concern of
* duplicated name here.
*/
@@ -1514,7 +1401,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
init_trace_event_call(tk, &tk->tp.call);
- if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
+ if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
ret = -ENOMEM;
goto error;
}
@@ -1554,6 +1441,11 @@ static __init int init_kprobe_trace(void)
{
struct dentry *d_tracer;
struct dentry *entry;
+ int ret;
+
+ ret = dyn_event_register(&trace_kprobe_ops);
+ if (ret)
+ return ret;
if (register_module_notifier(&trace_kprobe_module_nb))
return -EINVAL;
@@ -1611,9 +1503,8 @@ static __init int kprobe_trace_self_tests_init(void)
pr_info("Testing kprobe tracing: ");
- ret = trace_run_command("p:testprobe kprobe_trace_selftest_target "
- "$stack $stack0 +0($stack)",
- create_trace_kprobe);
+ ret = trace_run_command("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)",
+ create_or_delete_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on probing function entry.\n");
warn++;
@@ -1633,8 +1524,8 @@ static __init int kprobe_trace_self_tests_init(void)
}
}
- ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target "
- "$retval", create_trace_kprobe);
+ ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target $retval",
+ create_or_delete_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on probing function return.\n");
warn++;
@@ -1704,20 +1595,24 @@ static __init int kprobe_trace_self_tests_init(void)
disable_trace_kprobe(tk, file);
}
- ret = trace_run_command("-:testprobe", create_trace_kprobe);
+ ret = trace_run_command("-:testprobe", create_or_delete_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on deleting a probe.\n");
warn++;
}
- ret = trace_run_command("-:testprobe2", create_trace_kprobe);
+ ret = trace_run_command("-:testprobe2", create_or_delete_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on deleting a probe.\n");
warn++;
}
end:
- release_all_trace_kprobes();
+ ret = dyn_events_release_all(&trace_kprobe_ops);
+ if (WARN_ON_ONCE(ret)) {
+ pr_warn("error on cleaning up probes.\n");
+ warn++;
+ }
/*
* Wait for the optimizer work to finish. Otherwise it might fiddle
* with probes in already freed __init text.
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 6e6cc64faa38..54373d93e251 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -339,43 +339,24 @@ static inline const char *kretprobed(const char *name)
#endif /* CONFIG_KRETPROBES */
static void
-seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
+seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
{
- char str[KSYM_SYMBOL_LEN];
#ifdef CONFIG_KALLSYMS
- const char *name;
-
- kallsyms_lookup(address, NULL, NULL, NULL, str);
-
- name = kretprobed(str);
-
- if (name && strlen(name)) {
- trace_seq_printf(s, fmt, name);
- return;
- }
-#endif
- snprintf(str, KSYM_SYMBOL_LEN, "0x%08lx", address);
- trace_seq_printf(s, fmt, str);
-}
-
-static void
-seq_print_sym_offset(struct trace_seq *s, const char *fmt,
- unsigned long address)
-{
char str[KSYM_SYMBOL_LEN];
-#ifdef CONFIG_KALLSYMS
const char *name;
- sprint_symbol(str, address);
+ if (offset)
+ sprint_symbol(str, address);
+ else
+ kallsyms_lookup(address, NULL, NULL, NULL, str);
name = kretprobed(str);
if (name && strlen(name)) {
- trace_seq_printf(s, fmt, name);
+ trace_seq_puts(s, name);
return;
}
#endif
- snprintf(str, KSYM_SYMBOL_LEN, "0x%08lx", address);
- trace_seq_printf(s, fmt, str);
+ trace_seq_printf(s, "0x%08lx", address);
}
#ifndef CONFIG_64BIT
@@ -424,10 +405,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
goto out;
}
- if (sym_flags & TRACE_ITER_SYM_OFFSET)
- seq_print_sym_offset(s, "%s", ip);
- else
- seq_print_sym_short(s, "%s", ip);
+ seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
if (sym_flags & TRACE_ITER_SYM_ADDR)
trace_seq_printf(s, " <" IP_FMT ">", ip);
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index 71f553cceb3c..4d8e99fdbbbe 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -9,6 +9,7 @@
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
+#include <linux/kprobes.h>
#include "trace.h"
#define CREATE_TRACE_POINTS
@@ -30,6 +31,7 @@ void trace_hardirqs_on(void)
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on);
+NOKPROBE_SYMBOL(trace_hardirqs_on);
void trace_hardirqs_off(void)
{
@@ -43,6 +45,7 @@ void trace_hardirqs_off(void)
lockdep_hardirqs_off(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_off);
+NOKPROBE_SYMBOL(trace_hardirqs_off);
__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
{
@@ -56,6 +59,7 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
+NOKPROBE_SYMBOL(trace_hardirqs_on_caller);
__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
{
@@ -69,6 +73,7 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
lockdep_hardirqs_off(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
+NOKPROBE_SYMBOL(trace_hardirqs_off_caller);
#endif /* CONFIG_TRACE_IRQFLAGS */
#ifdef CONFIG_TRACE_PREEMPT_TOGGLE
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index b0875b327f5c..c3fd849d4a8f 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -115,7 +115,7 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self,
* section, then we need to read the link list pointers. The trick is
* we pass the address of the string to the seq function just like
* we do for the kernel core formats. To get back the structure that
- * holds the format, we simply use containerof() and then go to the
+ * holds the format, we simply use container_of() and then go to the
* next format in the list.
*/
static const char **
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index e99c3ce7aa65..9962cb5da8ac 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -26,14 +26,12 @@ const char *reserved_field_names[] = {
/* Printing in basic type function template */
#define DEFINE_BASIC_PRINT_TYPE_FUNC(tname, type, fmt) \
-int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, const char *name, \
- void *data, void *ent) \
+int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, void *data, void *ent)\
{ \
- trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \
+ trace_seq_printf(s, fmt, *(type *)data); \
return !trace_seq_has_overflowed(s); \
} \
-const char PRINT_TYPE_FMT_NAME(tname)[] = fmt; \
-NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(tname));
+const char PRINT_TYPE_FMT_NAME(tname)[] = fmt;
DEFINE_BASIC_PRINT_TYPE_FUNC(u8, u8, "%u")
DEFINE_BASIC_PRINT_TYPE_FUNC(u16, u16, "%u")
@@ -48,193 +46,52 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(x16, u16, "0x%x")
DEFINE_BASIC_PRINT_TYPE_FUNC(x32, u32, "0x%x")
DEFINE_BASIC_PRINT_TYPE_FUNC(x64, u64, "0x%Lx")
+int PRINT_TYPE_FUNC_NAME(symbol)(struct trace_seq *s, void *data, void *ent)
+{
+ trace_seq_printf(s, "%pS", (void *)*(unsigned long *)data);
+ return !trace_seq_has_overflowed(s);
+}
+const char PRINT_TYPE_FMT_NAME(symbol)[] = "%pS";
+
/* Print type function for string type */
-int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
- void *data, void *ent)
+int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent)
{
int len = *(u32 *)data >> 16;
if (!len)
- trace_seq_printf(s, " %s=(fault)", name);
+ trace_seq_puts(s, "(fault)");
else
- trace_seq_printf(s, " %s=\"%s\"", name,
+ trace_seq_printf(s, "\"%s\"",
(const char *)get_loc_data(data, ent));
return !trace_seq_has_overflowed(s);
}
-NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string));
const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
-#define CHECK_FETCH_FUNCS(method, fn) \
- (((FETCH_FUNC_NAME(method, u8) == fn) || \
- (FETCH_FUNC_NAME(method, u16) == fn) || \
- (FETCH_FUNC_NAME(method, u32) == fn) || \
- (FETCH_FUNC_NAME(method, u64) == fn) || \
- (FETCH_FUNC_NAME(method, string) == fn) || \
- (FETCH_FUNC_NAME(method, string_size) == fn)) \
- && (fn != NULL))
-
-/* Data fetch function templates */
-#define DEFINE_FETCH_reg(type) \
-void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest) \
-{ \
- *(type *)dest = (type)regs_get_register(regs, \
- (unsigned int)((unsigned long)offset)); \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type));
-DEFINE_BASIC_FETCH_FUNCS(reg)
-/* No string on the register */
-#define fetch_reg_string NULL
-#define fetch_reg_string_size NULL
-
-#define DEFINE_FETCH_retval(type) \
-void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \
- void *dummy, void *dest) \
-{ \
- *(type *)dest = (type)regs_return_value(regs); \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type));
-DEFINE_BASIC_FETCH_FUNCS(retval)
-/* No string on the retval */
-#define fetch_retval_string NULL
-#define fetch_retval_string_size NULL
-
-/* Dereference memory access function */
-struct deref_fetch_param {
- struct fetch_param orig;
- long offset;
- fetch_func_t fetch;
- fetch_func_t fetch_size;
+/* Fetch type information table */
+static const struct fetch_type probe_fetch_types[] = {
+ /* Special types */
+ __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1,
+ "__data_loc char[]"),
+ /* Basic types */
+ ASSIGN_FETCH_TYPE(u8, u8, 0),
+ ASSIGN_FETCH_TYPE(u16, u16, 0),
+ ASSIGN_FETCH_TYPE(u32, u32, 0),
+ ASSIGN_FETCH_TYPE(u64, u64, 0),
+ ASSIGN_FETCH_TYPE(s8, u8, 1),
+ ASSIGN_FETCH_TYPE(s16, u16, 1),
+ ASSIGN_FETCH_TYPE(s32, u32, 1),
+ ASSIGN_FETCH_TYPE(s64, u64, 1),
+ ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(symbol, ADDR_FETCH_TYPE, ADDR_FETCH_TYPE, 0),
+
+ ASSIGN_FETCH_TYPE_END
};
-#define DEFINE_FETCH_deref(type) \
-void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \
- void *data, void *dest) \
-{ \
- struct deref_fetch_param *dprm = data; \
- unsigned long addr; \
- call_fetch(&dprm->orig, regs, &addr); \
- if (addr) { \
- addr += dprm->offset; \
- dprm->fetch(regs, (void *)addr, dest); \
- } else \
- *(type *)dest = 0; \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type));
-DEFINE_BASIC_FETCH_FUNCS(deref)
-DEFINE_FETCH_deref(string)
-
-void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
- void *data, void *dest)
-{
- struct deref_fetch_param *dprm = data;
- unsigned long addr;
-
- call_fetch(&dprm->orig, regs, &addr);
- if (addr && dprm->fetch_size) {
- addr += dprm->offset;
- dprm->fetch_size(regs, (void *)addr, dest);
- } else
- *(string_size *)dest = 0;
-}
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size));
-
-static void update_deref_fetch_param(struct deref_fetch_param *data)
-{
- if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
- update_deref_fetch_param(data->orig.data);
- else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
- update_symbol_cache(data->orig.data);
-}
-NOKPROBE_SYMBOL(update_deref_fetch_param);
-
-static void free_deref_fetch_param(struct deref_fetch_param *data)
-{
- if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
- free_deref_fetch_param(data->orig.data);
- else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
- free_symbol_cache(data->orig.data);
- kfree(data);
-}
-NOKPROBE_SYMBOL(free_deref_fetch_param);
-
-/* Bitfield fetch function */
-struct bitfield_fetch_param {
- struct fetch_param orig;
- unsigned char hi_shift;
- unsigned char low_shift;
-};
-
-#define DEFINE_FETCH_bitfield(type) \
-void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \
- void *data, void *dest) \
-{ \
- struct bitfield_fetch_param *bprm = data; \
- type buf = 0; \
- call_fetch(&bprm->orig, regs, &buf); \
- if (buf) { \
- buf <<= bprm->hi_shift; \
- buf >>= bprm->low_shift; \
- } \
- *(type *)dest = buf; \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type));
-DEFINE_BASIC_FETCH_FUNCS(bitfield)
-#define fetch_bitfield_string NULL
-#define fetch_bitfield_string_size NULL
-
-static void
-update_bitfield_fetch_param(struct bitfield_fetch_param *data)
-{
- /*
- * Don't check the bitfield itself, because this must be the
- * last fetch function.
- */
- if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
- update_deref_fetch_param(data->orig.data);
- else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
- update_symbol_cache(data->orig.data);
-}
-
-static void
-free_bitfield_fetch_param(struct bitfield_fetch_param *data)
-{
- /*
- * Don't check the bitfield itself, because this must be the
- * last fetch function.
- */
- if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
- free_deref_fetch_param(data->orig.data);
- else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
- free_symbol_cache(data->orig.data);
-
- kfree(data);
-}
-
-void FETCH_FUNC_NAME(comm, string)(struct pt_regs *regs,
- void *data, void *dest)
-{
- int maxlen = get_rloc_len(*(u32 *)dest);
- u8 *dst = get_rloc_data(dest);
- long ret;
-
- if (!maxlen)
- return;
-
- ret = strlcpy(dst, current->comm, maxlen);
- *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
-}
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string));
-
-void FETCH_FUNC_NAME(comm, string_size)(struct pt_regs *regs,
- void *data, void *dest)
-{
- *(u32 *)dest = strlen(current->comm) + 1;
-}
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string_size));
-
-static const struct fetch_type *find_fetch_type(const char *type,
- const struct fetch_type *ftbl)
+static const struct fetch_type *find_fetch_type(const char *type)
{
int i;
@@ -255,58 +112,27 @@ static const struct fetch_type *find_fetch_type(const char *type,
switch (bs) {
case 8:
- return find_fetch_type("u8", ftbl);
+ return find_fetch_type("u8");
case 16:
- return find_fetch_type("u16", ftbl);
+ return find_fetch_type("u16");
case 32:
- return find_fetch_type("u32", ftbl);
+ return find_fetch_type("u32");
case 64:
- return find_fetch_type("u64", ftbl);
+ return find_fetch_type("u64");
default:
goto fail;
}
}
- for (i = 0; ftbl[i].name; i++) {
- if (strcmp(type, ftbl[i].name) == 0)
- return &ftbl[i];
+ for (i = 0; probe_fetch_types[i].name; i++) {
+ if (strcmp(type, probe_fetch_types[i].name) == 0)
+ return &probe_fetch_types[i];
}
fail:
return NULL;
}
-/* Special function : only accept unsigned long */
-static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest)
-{
- *(unsigned long *)dest = kernel_stack_pointer(regs);
-}
-NOKPROBE_SYMBOL(fetch_kernel_stack_address);
-
-static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest)
-{
- *(unsigned long *)dest = user_stack_pointer(regs);
-}
-NOKPROBE_SYMBOL(fetch_user_stack_address);
-
-static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
- fetch_func_t orig_fn,
- const struct fetch_type *ftbl)
-{
- int i;
-
- if (type != &ftbl[FETCH_TYPE_STRING])
- return NULL; /* Only string type needs size function */
-
- for (i = 0; i < FETCH_MTD_END; i++)
- if (type->fetch[i] == orig_fn)
- return ftbl[FETCH_TYPE_STRSIZE].fetch[i];
-
- WARN_ON(1); /* This should not happen */
-
- return NULL;
-}
-
/* Split symbol and offset. */
int traceprobe_split_symbol_offset(char *symbol, long *offset)
{
@@ -328,44 +154,75 @@ int traceprobe_split_symbol_offset(char *symbol, long *offset)
return 0;
}
+/* @buf must has MAX_EVENT_NAME_LEN size */
+int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
+ char *buf)
+{
+ const char *slash, *event = *pevent;
+
+ slash = strchr(event, '/');
+ if (slash) {
+ if (slash == event) {
+ pr_info("Group name is not specified\n");
+ return -EINVAL;
+ }
+ if (slash - event + 1 > MAX_EVENT_NAME_LEN) {
+ pr_info("Group name is too long\n");
+ return -E2BIG;
+ }
+ strlcpy(buf, event, slash - event + 1);
+ *pgroup = buf;
+ *pevent = slash + 1;
+ }
+ if (strlen(event) == 0) {
+ pr_info("Event name is not specified\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
static int parse_probe_vars(char *arg, const struct fetch_type *t,
- struct fetch_param *f, bool is_return,
- bool is_kprobe)
+ struct fetch_insn *code, unsigned int flags)
{
- int ret = 0;
unsigned long param;
+ int ret = 0;
+ int len;
if (strcmp(arg, "retval") == 0) {
- if (is_return)
- f->fn = t->fetch[FETCH_MTD_retval];
+ if (flags & TPARG_FL_RETURN)
+ code->op = FETCH_OP_RETVAL;
else
ret = -EINVAL;
- } else if (strncmp(arg, "stack", 5) == 0) {
- if (arg[5] == '\0') {
- if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR))
- return -EINVAL;
-
- if (is_kprobe)
- f->fn = fetch_kernel_stack_address;
- else
- f->fn = fetch_user_stack_address;
- } else if (isdigit(arg[5])) {
- ret = kstrtoul(arg + 5, 10, &param);
- if (ret || (is_kprobe && param > PARAM_MAX_STACK))
+ } else if ((len = str_has_prefix(arg, "stack"))) {
+ if (arg[len] == '\0') {
+ code->op = FETCH_OP_STACKP;
+ } else if (isdigit(arg[len])) {
+ ret = kstrtoul(arg + len, 10, &param);
+ if (ret || ((flags & TPARG_FL_KERNEL) &&
+ param > PARAM_MAX_STACK))
ret = -EINVAL;
else {
- f->fn = t->fetch[FETCH_MTD_stack];
- f->data = (void *)param;
+ code->op = FETCH_OP_STACK;
+ code->param = (unsigned int)param;
}
} else
ret = -EINVAL;
} else if (strcmp(arg, "comm") == 0) {
- if (strcmp(t->name, "string") != 0 &&
- strcmp(t->name, "string_size") != 0)
+ code->op = FETCH_OP_COMM;
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ } else if (((flags & TPARG_FL_MASK) ==
+ (TPARG_FL_KERNEL | TPARG_FL_FENTRY)) &&
+ (len = str_has_prefix(arg, "arg"))) {
+ if (!isdigit(arg[len]))
return -EINVAL;
- f->fn = t->fetch[FETCH_MTD_comm];
+ ret = kstrtoul(arg + len, 10, &param);
+ if (ret || !param || param > PARAM_MAX_STACK)
+ return -EINVAL;
+ code->op = FETCH_OP_ARG;
+ code->param = (unsigned int)param - 1;
+#endif
} else
ret = -EINVAL;
@@ -373,25 +230,27 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
}
/* Recursive argument parser */
-static int parse_probe_arg(char *arg, const struct fetch_type *t,
- struct fetch_param *f, bool is_return, bool is_kprobe,
- const struct fetch_type *ftbl)
+static int
+parse_probe_arg(char *arg, const struct fetch_type *type,
+ struct fetch_insn **pcode, struct fetch_insn *end,
+ unsigned int flags)
{
+ struct fetch_insn *code = *pcode;
unsigned long param;
- long offset;
+ long offset = 0;
char *tmp;
int ret = 0;
switch (arg[0]) {
case '$':
- ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe);
+ ret = parse_probe_vars(arg + 1, type, code, flags);
break;
case '%': /* named register */
ret = regs_query_register_offset(arg + 1);
if (ret >= 0) {
- f->fn = t->fetch[FETCH_MTD_reg];
- f->data = (void *)(unsigned long)ret;
+ code->op = FETCH_OP_REG;
+ code->param = (unsigned int)ret;
ret = 0;
}
break;
@@ -401,33 +260,42 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,
ret = kstrtoul(arg + 1, 0, &param);
if (ret)
break;
-
- f->fn = t->fetch[FETCH_MTD_memory];
- f->data = (void *)param;
+ /* load address */
+ code->op = FETCH_OP_IMM;
+ code->immediate = param;
} else if (arg[1] == '+') {
/* kprobes don't support file offsets */
- if (is_kprobe)
+ if (flags & TPARG_FL_KERNEL)
return -EINVAL;
ret = kstrtol(arg + 2, 0, &offset);
if (ret)
break;
- f->fn = t->fetch[FETCH_MTD_file_offset];
- f->data = (void *)offset;
+ code->op = FETCH_OP_FOFFS;
+ code->immediate = (unsigned long)offset; // imm64?
} else {
/* uprobes don't support symbols */
- if (!is_kprobe)
+ if (!(flags & TPARG_FL_KERNEL))
return -EINVAL;
- ret = traceprobe_split_symbol_offset(arg + 1, &offset);
- if (ret)
- break;
+ /* Preserve symbol for updating */
+ code->op = FETCH_NOP_SYMBOL;
+ code->data = kstrdup(arg + 1, GFP_KERNEL);
+ if (!code->data)
+ return -ENOMEM;
+ if (++code == end)
+ return -E2BIG;
- f->data = alloc_symbol_cache(arg + 1, offset);
- if (f->data)
- f->fn = t->fetch[FETCH_MTD_symbol];
+ code->op = FETCH_OP_IMM;
+ code->immediate = 0;
}
+ /* These are fetching from memory */
+ if (++code == end)
+ return -E2BIG;
+ *pcode = code;
+ code->op = FETCH_OP_DEREF;
+ code->offset = offset;
break;
case '+': /* deref memory */
@@ -435,11 +303,10 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,
case '-':
tmp = strchr(arg, '(');
if (!tmp)
- break;
+ return -EINVAL;
*tmp = '\0';
ret = kstrtol(arg, 0, &offset);
-
if (ret)
break;
@@ -447,36 +314,27 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,
tmp = strrchr(arg, ')');
if (tmp) {
- struct deref_fetch_param *dprm;
- const struct fetch_type *t2;
+ const struct fetch_type *t2 = find_fetch_type(NULL);
- t2 = find_fetch_type(NULL, ftbl);
*tmp = '\0';
- dprm = kzalloc(sizeof(struct deref_fetch_param), GFP_KERNEL);
-
- if (!dprm)
- return -ENOMEM;
-
- dprm->offset = offset;
- dprm->fetch = t->fetch[FETCH_MTD_memory];
- dprm->fetch_size = get_fetch_size_function(t,
- dprm->fetch, ftbl);
- ret = parse_probe_arg(arg, t2, &dprm->orig, is_return,
- is_kprobe, ftbl);
+ ret = parse_probe_arg(arg, t2, &code, end, flags);
if (ret)
- kfree(dprm);
- else {
- f->fn = t->fetch[FETCH_MTD_deref];
- f->data = (void *)dprm;
- }
+ break;
+ if (code->op == FETCH_OP_COMM)
+ return -EINVAL;
+ if (++code == end)
+ return -E2BIG;
+ *pcode = code;
+
+ code->op = FETCH_OP_DEREF;
+ code->offset = offset;
}
break;
}
- if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
- pr_info("%s type has no corresponding fetch method.\n", t->name);
+ if (!ret && code->op == FETCH_OP_NOP) {
+ /* Parsed, but do not find fetch method */
ret = -EINVAL;
}
-
return ret;
}
@@ -485,22 +343,15 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,
/* Bitfield type needs to be parsed into a fetch function */
static int __parse_bitfield_probe_arg(const char *bf,
const struct fetch_type *t,
- struct fetch_param *f)
+ struct fetch_insn **pcode)
{
- struct bitfield_fetch_param *bprm;
+ struct fetch_insn *code = *pcode;
unsigned long bw, bo;
char *tail;
if (*bf != 'b')
return 0;
- bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
- if (!bprm)
- return -ENOMEM;
-
- bprm->orig = *f;
- f->fn = t->fetch[FETCH_MTD_bitfield];
- f->data = (void *)bprm;
bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
if (bw == 0 || *tail != '@')
@@ -511,20 +362,26 @@ static int __parse_bitfield_probe_arg(const char *bf,
if (tail == bf || *tail != '/')
return -EINVAL;
+ code++;
+ if (code->op != FETCH_OP_NOP)
+ return -E2BIG;
+ *pcode = code;
- bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
- bprm->low_shift = bprm->hi_shift + bo;
+ code->op = FETCH_OP_MOD_BF;
+ code->lshift = BYTES_TO_BITS(t->size) - (bw + bo);
+ code->rshift = BYTES_TO_BITS(t->size) - bw;
+ code->basesize = t->size;
return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
}
/* String length checking wrapper */
-int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
- struct probe_arg *parg, bool is_return, bool is_kprobe,
- const struct fetch_type *ftbl)
+static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
+ struct probe_arg *parg, unsigned int flags)
{
- const char *t;
- int ret;
+ struct fetch_insn *code, *scode, *tmp = NULL;
+ char *t, *t2;
+ int ret, len;
if (strlen(arg) > MAX_ARGSTR_LEN) {
pr_info("Argument is too long.: %s\n", arg);
@@ -535,43 +392,135 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
pr_info("Failed to allocate memory for command '%s'.\n", arg);
return -ENOMEM;
}
- t = strchr(parg->comm, ':');
+ t = strchr(arg, ':');
if (t) {
- arg[t - parg->comm] = '\0';
- t++;
+ *t = '\0';
+ t2 = strchr(++t, '[');
+ if (t2) {
+ *t2 = '\0';
+ parg->count = simple_strtoul(t2 + 1, &t2, 0);
+ if (strcmp(t2, "]") || parg->count == 0)
+ return -EINVAL;
+ if (parg->count > MAX_ARRAY_LEN)
+ return -E2BIG;
+ }
}
/*
* The default type of $comm should be "string", and it can't be
* dereferenced.
*/
if (!t && strcmp(arg, "$comm") == 0)
- t = "string";
- parg->type = find_fetch_type(t, ftbl);
+ parg->type = find_fetch_type("string");
+ else
+ parg->type = find_fetch_type(t);
if (!parg->type) {
pr_info("Unsupported type: %s\n", t);
return -EINVAL;
}
parg->offset = *size;
- *size += parg->type->size;
- ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return,
- is_kprobe, ftbl);
-
- if (ret >= 0 && t != NULL)
- ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
-
- if (ret >= 0) {
- parg->fetch_size.fn = get_fetch_size_function(parg->type,
- parg->fetch.fn,
- ftbl);
- parg->fetch_size.data = parg->fetch.data;
+ *size += parg->type->size * (parg->count ?: 1);
+
+ if (parg->count) {
+ len = strlen(parg->type->fmttype) + 6;
+ parg->fmt = kmalloc(len, GFP_KERNEL);
+ if (!parg->fmt)
+ return -ENOMEM;
+ snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
+ parg->count);
}
+ code = tmp = kzalloc(sizeof(*code) * FETCH_INSN_MAX, GFP_KERNEL);
+ if (!code)
+ return -ENOMEM;
+ code[FETCH_INSN_MAX - 1].op = FETCH_OP_END;
+
+ ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1],
+ flags);
+ if (ret)
+ goto fail;
+
+ /* Store operation */
+ if (!strcmp(parg->type->name, "string")) {
+ if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_IMM &&
+ code->op != FETCH_OP_COMM) {
+ pr_info("string only accepts memory or address.\n");
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (code->op != FETCH_OP_DEREF || parg->count) {
+ /*
+ * IMM and COMM is pointing actual address, those must
+ * be kept, and if parg->count != 0, this is an array
+ * of string pointers instead of string address itself.
+ */
+ code++;
+ if (code->op != FETCH_OP_NOP) {
+ ret = -E2BIG;
+ goto fail;
+ }
+ }
+ code->op = FETCH_OP_ST_STRING; /* In DEREF case, replace it */
+ code->size = parg->type->size;
+ parg->dynamic = true;
+ } else if (code->op == FETCH_OP_DEREF) {
+ code->op = FETCH_OP_ST_MEM;
+ code->size = parg->type->size;
+ } else {
+ code++;
+ if (code->op != FETCH_OP_NOP) {
+ ret = -E2BIG;
+ goto fail;
+ }
+ code->op = FETCH_OP_ST_RAW;
+ code->size = parg->type->size;
+ }
+ scode = code;
+ /* Modify operation */
+ if (t != NULL) {
+ ret = __parse_bitfield_probe_arg(t, parg->type, &code);
+ if (ret)
+ goto fail;
+ }
+ /* Loop(Array) operation */
+ if (parg->count) {
+ if (scode->op != FETCH_OP_ST_MEM &&
+ scode->op != FETCH_OP_ST_STRING) {
+ pr_info("array only accepts memory or address\n");
+ ret = -EINVAL;
+ goto fail;
+ }
+ code++;
+ if (code->op != FETCH_OP_NOP) {
+ ret = -E2BIG;
+ goto fail;
+ }
+ code->op = FETCH_OP_LP_ARRAY;
+ code->param = parg->count;
+ }
+ code++;
+ code->op = FETCH_OP_END;
+
+ /* Shrink down the code buffer */
+ parg->code = kzalloc(sizeof(*code) * (code - tmp + 1), GFP_KERNEL);
+ if (!parg->code)
+ ret = -ENOMEM;
+ else
+ memcpy(parg->code, tmp, sizeof(*code) * (code - tmp + 1));
+
+fail:
+ if (ret) {
+ for (code = tmp; code < tmp + FETCH_INSN_MAX; code++)
+ if (code->op == FETCH_NOP_SYMBOL)
+ kfree(code->data);
+ }
+ kfree(tmp);
+
return ret;
}
/* Return 1 if name is reserved or already used by another argument */
-int traceprobe_conflict_field_name(const char *name,
- struct probe_arg *args, int narg)
+static int traceprobe_conflict_field_name(const char *name,
+ struct probe_arg *args, int narg)
{
int i;
@@ -586,35 +535,104 @@ int traceprobe_conflict_field_name(const char *name,
return 0;
}
-void traceprobe_update_arg(struct probe_arg *arg)
+int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, char *arg,
+ unsigned int flags)
{
- if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
- update_bitfield_fetch_param(arg->fetch.data);
- else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
- update_deref_fetch_param(arg->fetch.data);
- else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
- update_symbol_cache(arg->fetch.data);
+ struct probe_arg *parg = &tp->args[i];
+ char *body;
+ int ret;
+
+ /* Increment count for freeing args in error case */
+ tp->nr_args++;
+
+ body = strchr(arg, '=');
+ if (body) {
+ parg->name = kmemdup_nul(arg, body - arg, GFP_KERNEL);
+ body++;
+ } else {
+ /* If argument name is omitted, set "argN" */
+ parg->name = kasprintf(GFP_KERNEL, "arg%d", i + 1);
+ body = arg;
+ }
+ if (!parg->name)
+ return -ENOMEM;
+
+ if (!is_good_name(parg->name)) {
+ pr_info("Invalid argument[%d] name: %s\n",
+ i, parg->name);
+ return -EINVAL;
+ }
+
+ if (traceprobe_conflict_field_name(parg->name, tp->args, i)) {
+ pr_info("Argument[%d]: '%s' conflicts with another field.\n",
+ i, parg->name);
+ return -EINVAL;
+ }
+
+ /* Parse fetch argument */
+ ret = traceprobe_parse_probe_arg_body(body, &tp->size, parg, flags);
+ if (ret)
+ pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
+ return ret;
}
void traceprobe_free_probe_arg(struct probe_arg *arg)
{
- if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
- free_bitfield_fetch_param(arg->fetch.data);
- else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
- free_deref_fetch_param(arg->fetch.data);
- else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
- free_symbol_cache(arg->fetch.data);
+ struct fetch_insn *code = arg->code;
+ while (code && code->op != FETCH_OP_END) {
+ if (code->op == FETCH_NOP_SYMBOL)
+ kfree(code->data);
+ code++;
+ }
+ kfree(arg->code);
kfree(arg->name);
kfree(arg->comm);
+ kfree(arg->fmt);
+}
+
+int traceprobe_update_arg(struct probe_arg *arg)
+{
+ struct fetch_insn *code = arg->code;
+ long offset;
+ char *tmp;
+ char c;
+ int ret = 0;
+
+ while (code && code->op != FETCH_OP_END) {
+ if (code->op == FETCH_NOP_SYMBOL) {
+ if (code[1].op != FETCH_OP_IMM)
+ return -EINVAL;
+
+ tmp = strpbrk(code->data, "+-");
+ if (tmp)
+ c = *tmp;
+ ret = traceprobe_split_symbol_offset(code->data,
+ &offset);
+ if (ret)
+ return ret;
+
+ code[1].immediate =
+ (unsigned long)kallsyms_lookup_name(code->data);
+ if (tmp)
+ *tmp = c;
+ if (!code[1].immediate)
+ return -ENOENT;
+ code[1].immediate += offset;
+ }
+ code++;
+ }
+ return 0;
}
+/* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
bool is_return)
{
- int i;
+ struct probe_arg *parg;
+ int i, j;
int pos = 0;
-
const char *fmt, *arg;
if (!is_return) {
@@ -625,35 +643,51 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
}
- /* When len=0, we just calculate the needed length */
-#define LEN_OR_ZERO (len ? len - pos : 0)
-
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
for (i = 0; i < tp->nr_args; i++) {
- pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
- tp->args[i].name, tp->args[i].type->fmt);
+ parg = tp->args + i;
+ pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=", parg->name);
+ if (parg->count) {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "{%s",
+ parg->type->fmt);
+ for (j = 1; j < parg->count; j++)
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ",%s",
+ parg->type->fmt);
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "}");
+ } else
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "%s",
+ parg->type->fmt);
}
pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
for (i = 0; i < tp->nr_args; i++) {
- if (strcmp(tp->args[i].type->name, "string") == 0)
+ parg = tp->args + i;
+ if (parg->count) {
+ if (strcmp(parg->type->name, "string") == 0)
+ fmt = ", __get_str(%s[%d])";
+ else
+ fmt = ", REC->%s[%d]";
+ for (j = 0; j < parg->count; j++)
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ fmt, parg->name, j);
+ } else {
+ if (strcmp(parg->type->name, "string") == 0)
+ fmt = ", __get_str(%s)";
+ else
+ fmt = ", REC->%s";
pos += snprintf(buf + pos, LEN_OR_ZERO,
- ", __get_str(%s)",
- tp->args[i].name);
- else
- pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
- tp->args[i].name);
+ fmt, parg->name);
+ }
}
-#undef LEN_OR_ZERO
-
/* return the length of print_fmt */
return pos;
}
+#undef LEN_OR_ZERO
-int set_print_fmt(struct trace_probe *tp, bool is_return)
+int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return)
{
int len;
char *print_fmt;
@@ -670,3 +704,28 @@ int set_print_fmt(struct trace_probe *tp, bool is_return)
return 0;
}
+
+int traceprobe_define_arg_fields(struct trace_event_call *event_call,
+ size_t offset, struct trace_probe *tp)
+{
+ int ret, i;
+
+ /* Set argument names as fields */
+ for (i = 0; i < tp->nr_args; i++) {
+ struct probe_arg *parg = &tp->args[i];
+ const char *fmt = parg->type->fmttype;
+ int size = parg->type->size;
+
+ if (parg->fmt)
+ fmt = parg->fmt;
+ if (parg->count)
+ size *= parg->count;
+ ret = trace_define_field(event_call, fmt, parg->name,
+ offset + parg->offset, size,
+ parg->type->is_signed,
+ FILTER_OTHER);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 5f52668e165d..8a63f8bc01bc 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -23,6 +23,7 @@
#include <linux/stringify.h>
#include <linux/limits.h>
#include <linux/uaccess.h>
+#include <linux/bitops.h>
#include <asm/bitsperlong.h>
#include "trace.h"
@@ -30,6 +31,7 @@
#define MAX_TRACE_ARGS 128
#define MAX_ARGSTR_LEN 63
+#define MAX_ARRAY_LEN 64
#define MAX_STRING_SIZE PATH_MAX
/* Reserved field names */
@@ -54,50 +56,74 @@
#define TP_FLAG_PROFILE 2
#define TP_FLAG_REGISTERED 4
+/* data_loc: data location, compatible with u32 */
+#define make_data_loc(len, offs) \
+ (((u32)(len) << 16) | ((u32)(offs) & 0xffff))
+#define get_loc_len(dl) ((u32)(dl) >> 16)
+#define get_loc_offs(dl) ((u32)(dl) & 0xffff)
-/* data_rloc: data relative location, compatible with u32 */
-#define make_data_rloc(len, roffs) \
- (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
-#define get_rloc_len(dl) ((u32)(dl) >> 16)
-#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
-
-/*
- * Convert data_rloc to data_loc:
- * data_rloc stores the offset from data_rloc itself, but data_loc
- * stores the offset from event entry.
- */
-#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
-
-static nokprobe_inline void *get_rloc_data(u32 *dl)
+static nokprobe_inline void *get_loc_data(u32 *dl, void *ent)
{
- return (u8 *)dl + get_rloc_offs(*dl);
+ return (u8 *)ent + get_loc_offs(*dl);
}
-/* For data_loc conversion */
-static nokprobe_inline void *get_loc_data(u32 *dl, void *ent)
+static nokprobe_inline u32 update_data_loc(u32 loc, int consumed)
{
- return (u8 *)ent + get_rloc_offs(*dl);
+ u32 maxlen = get_loc_len(loc);
+ u32 offset = get_loc_offs(loc);
+
+ return make_data_loc(maxlen - consumed, offset + consumed);
}
-/* Data fetch function type */
-typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
/* Printing function type */
-typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, void *);
-
-/* Fetch types */
-enum {
- FETCH_MTD_reg = 0,
- FETCH_MTD_stack,
- FETCH_MTD_retval,
- FETCH_MTD_comm,
- FETCH_MTD_memory,
- FETCH_MTD_symbol,
- FETCH_MTD_deref,
- FETCH_MTD_bitfield,
- FETCH_MTD_file_offset,
- FETCH_MTD_END,
+typedef int (*print_type_func_t)(struct trace_seq *, void *, void *);
+
+enum fetch_op {
+ FETCH_OP_NOP = 0,
+ // Stage 1 (load) ops
+ FETCH_OP_REG, /* Register : .param = offset */
+ FETCH_OP_STACK, /* Stack : .param = index */
+ FETCH_OP_STACKP, /* Stack pointer */
+ FETCH_OP_RETVAL, /* Return value */
+ FETCH_OP_IMM, /* Immediate : .immediate */
+ FETCH_OP_COMM, /* Current comm */
+ FETCH_OP_ARG, /* Function argument : .param */
+ FETCH_OP_FOFFS, /* File offset: .immediate */
+ // Stage 2 (dereference) op
+ FETCH_OP_DEREF, /* Dereference: .offset */
+ // Stage 3 (store) ops
+ FETCH_OP_ST_RAW, /* Raw: .size */
+ FETCH_OP_ST_MEM, /* Mem: .offset, .size */
+ FETCH_OP_ST_STRING, /* String: .offset, .size */
+ // Stage 4 (modify) op
+ FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */
+ // Stage 5 (loop) op
+ FETCH_OP_LP_ARRAY, /* Array: .param = loop count */
+ FETCH_OP_END,
+ FETCH_NOP_SYMBOL, /* Unresolved Symbol holder */
};
+struct fetch_insn {
+ enum fetch_op op;
+ union {
+ unsigned int param;
+ struct {
+ unsigned int size;
+ int offset;
+ };
+ struct {
+ unsigned char basesize;
+ unsigned char lshift;
+ unsigned char rshift;
+ };
+ unsigned long immediate;
+ void *data;
+ };
+};
+
+/* fetch + deref*N + store + mod + end <= 16, this allows N=12, enough */
+#define FETCH_INSN_MAX 16
+
/* Fetch type information table */
struct fetch_type {
const char *name; /* Name of type */
@@ -106,13 +132,6 @@ struct fetch_type {
print_type_func_t print; /* Print functions */
const char *fmt; /* Fromat string */
const char *fmttype; /* Name in format file */
- /* Fetch functions */
- fetch_func_t fetch[FETCH_MTD_END];
-};
-
-struct fetch_param {
- fetch_func_t fn;
- void *data;
};
/* For defining macros, define string/string_size types */
@@ -124,8 +143,7 @@ typedef u32 string_size;
/* Printing in basic type function template */
#define DECLARE_BASIC_PRINT_TYPE_FUNC(type) \
-int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \
- void *data, void *ent); \
+int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, void *data, void *ent);\
extern const char PRINT_TYPE_FMT_NAME(type)[]
DECLARE_BASIC_PRINT_TYPE_FUNC(u8);
@@ -142,57 +160,7 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(x32);
DECLARE_BASIC_PRINT_TYPE_FUNC(x64);
DECLARE_BASIC_PRINT_TYPE_FUNC(string);
-
-#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
-
-/* Declare macro for basic types */
-#define DECLARE_FETCH_FUNC(method, type) \
-extern void FETCH_FUNC_NAME(method, type)(struct pt_regs *regs, \
- void *data, void *dest)
-
-#define DECLARE_BASIC_FETCH_FUNCS(method) \
-DECLARE_FETCH_FUNC(method, u8); \
-DECLARE_FETCH_FUNC(method, u16); \
-DECLARE_FETCH_FUNC(method, u32); \
-DECLARE_FETCH_FUNC(method, u64)
-
-DECLARE_BASIC_FETCH_FUNCS(reg);
-#define fetch_reg_string NULL
-#define fetch_reg_string_size NULL
-
-DECLARE_BASIC_FETCH_FUNCS(retval);
-#define fetch_retval_string NULL
-#define fetch_retval_string_size NULL
-
-DECLARE_BASIC_FETCH_FUNCS(symbol);
-DECLARE_FETCH_FUNC(symbol, string);
-DECLARE_FETCH_FUNC(symbol, string_size);
-
-DECLARE_BASIC_FETCH_FUNCS(deref);
-DECLARE_FETCH_FUNC(deref, string);
-DECLARE_FETCH_FUNC(deref, string_size);
-
-DECLARE_BASIC_FETCH_FUNCS(bitfield);
-#define fetch_bitfield_string NULL
-#define fetch_bitfield_string_size NULL
-
-/* comm only makes sense as a string */
-#define fetch_comm_u8 NULL
-#define fetch_comm_u16 NULL
-#define fetch_comm_u32 NULL
-#define fetch_comm_u64 NULL
-DECLARE_FETCH_FUNC(comm, string);
-DECLARE_FETCH_FUNC(comm, string_size);
-
-/*
- * Define macro for basic types - we don't need to define s* types, because
- * we have to care only about bitwidth at recording time.
- */
-#define DEFINE_BASIC_FETCH_FUNCS(method) \
-DEFINE_FETCH_##method(u8) \
-DEFINE_FETCH_##method(u16) \
-DEFINE_FETCH_##method(u32) \
-DEFINE_FETCH_##method(u64)
+DECLARE_BASIC_PRINT_TYPE_FUNC(symbol);
/* Default (unsigned long) fetch type */
#define __DEFAULT_FETCH_TYPE(t) x##t
@@ -200,8 +168,9 @@ DEFINE_FETCH_##method(u64)
#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
-#define ASSIGN_FETCH_FUNC(method, type) \
- [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
+#define __ADDR_FETCH_TYPE(t) u##t
+#define _ADDR_FETCH_TYPE(t) __ADDR_FETCH_TYPE(t)
+#define ADDR_FETCH_TYPE _ADDR_FETCH_TYPE(BITS_PER_LONG)
#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
{.name = _name, \
@@ -210,64 +179,23 @@ DEFINE_FETCH_##method(u64)
.print = PRINT_TYPE_FUNC_NAME(ptype), \
.fmt = PRINT_TYPE_FMT_NAME(ptype), \
.fmttype = _fmttype, \
- .fetch = { \
-ASSIGN_FETCH_FUNC(reg, ftype), \
-ASSIGN_FETCH_FUNC(stack, ftype), \
-ASSIGN_FETCH_FUNC(retval, ftype), \
-ASSIGN_FETCH_FUNC(comm, ftype), \
-ASSIGN_FETCH_FUNC(memory, ftype), \
-ASSIGN_FETCH_FUNC(symbol, ftype), \
-ASSIGN_FETCH_FUNC(deref, ftype), \
-ASSIGN_FETCH_FUNC(bitfield, ftype), \
-ASSIGN_FETCH_FUNC(file_offset, ftype), \
- } \
}
-
+#define _ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
+ __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, #_fmttype)
#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
- __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
+ _ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, ptype)
/* If ptype is an alias of atype, use this macro (show atype in format) */
#define ASSIGN_FETCH_TYPE_ALIAS(ptype, atype, ftype, sign) \
- __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #atype)
+ _ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, atype)
#define ASSIGN_FETCH_TYPE_END {}
-
-#define FETCH_TYPE_STRING 0
-#define FETCH_TYPE_STRSIZE 1
+#define MAX_ARRAY_LEN 64
#ifdef CONFIG_KPROBE_EVENTS
-struct symbol_cache;
-unsigned long update_symbol_cache(struct symbol_cache *sc);
-void free_symbol_cache(struct symbol_cache *sc);
-struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
bool trace_kprobe_on_func_entry(struct trace_event_call *call);
bool trace_kprobe_error_injectable(struct trace_event_call *call);
#else
-/* uprobes do not support symbol fetch methods */
-#define fetch_symbol_u8 NULL
-#define fetch_symbol_u16 NULL
-#define fetch_symbol_u32 NULL
-#define fetch_symbol_u64 NULL
-#define fetch_symbol_string NULL
-#define fetch_symbol_string_size NULL
-
-struct symbol_cache {
-};
-static inline unsigned long __used update_symbol_cache(struct symbol_cache *sc)
-{
- return 0;
-}
-
-static inline void __used free_symbol_cache(struct symbol_cache *sc)
-{
-}
-
-static inline struct symbol_cache * __used
-alloc_symbol_cache(const char *sym, long offset)
-{
- return NULL;
-}
-
static inline bool trace_kprobe_on_func_entry(struct trace_event_call *call)
{
return false;
@@ -280,11 +208,13 @@ static inline bool trace_kprobe_error_injectable(struct trace_event_call *call)
#endif /* CONFIG_KPROBE_EVENTS */
struct probe_arg {
- struct fetch_param fetch;
- struct fetch_param fetch_size;
+ struct fetch_insn *code;
+ bool dynamic;/* Dynamic array (string) is used */
unsigned int offset; /* Offset from argument entry */
+ unsigned int count; /* Array count */
const char *name; /* Name of this argument */
const char *comm; /* Command of this argument */
+ char *fmt; /* Format string if needed */
const struct fetch_type *type; /* Type of this argument */
};
@@ -313,12 +243,6 @@ static inline bool trace_probe_is_registered(struct trace_probe *tp)
return !!(tp->flags & TP_FLAG_REGISTERED);
}
-static nokprobe_inline void call_fetch(struct fetch_param *fprm,
- struct pt_regs *regs, void *dest)
-{
- return fprm->fn(regs, fprm->data, dest);
-}
-
/* Check the name is good for event/group/fields */
static inline bool is_good_name(const char *name)
{
@@ -343,67 +267,22 @@ find_event_file_link(struct trace_probe *tp, struct trace_event_file *file)
return NULL;
}
-extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
- struct probe_arg *parg, bool is_return, bool is_kprobe,
- const struct fetch_type *ftbl);
+#define TPARG_FL_RETURN BIT(0)
+#define TPARG_FL_KERNEL BIT(1)
+#define TPARG_FL_FENTRY BIT(2)
+#define TPARG_FL_MASK GENMASK(2, 0)
-extern int traceprobe_conflict_field_name(const char *name,
- struct probe_arg *args, int narg);
+extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,
+ char *arg, unsigned int flags);
-extern void traceprobe_update_arg(struct probe_arg *arg);
+extern int traceprobe_update_arg(struct probe_arg *arg);
extern void traceprobe_free_probe_arg(struct probe_arg *arg);
extern int traceprobe_split_symbol_offset(char *symbol, long *offset);
+extern int traceprobe_parse_event_name(const char **pevent,
+ const char **pgroup, char *buf);
-/* Sum up total data length for dynamic arraies (strings) */
-static nokprobe_inline int
-__get_data_size(struct trace_probe *tp, struct pt_regs *regs)
-{
- int i, ret = 0;
- u32 len;
-
- for (i = 0; i < tp->nr_args; i++)
- if (unlikely(tp->args[i].fetch_size.fn)) {
- call_fetch(&tp->args[i].fetch_size, regs, &len);
- ret += len;
- }
-
- return ret;
-}
-
-/* Store the value of each argument */
-static nokprobe_inline void
-store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
- u8 *data, int maxlen)
-{
- int i;
- u32 end = tp->size;
- u32 *dl; /* Data (relative) location */
-
- for (i = 0; i < tp->nr_args; i++) {
- if (unlikely(tp->args[i].fetch_size.fn)) {
- /*
- * First, we set the relative location and
- * maximum data length to *dl
- */
- dl = (u32 *)(data + tp->args[i].offset);
- *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
- /* Then try to fetch string or dynamic array data */
- call_fetch(&tp->args[i].fetch, regs, dl);
- /* Reduce maximum length */
- end += get_rloc_len(*dl);
- maxlen -= get_rloc_len(*dl);
- /* Trick here, convert data_rloc to data_loc */
- *dl = convert_rloc_to_loc(*dl,
- ent_size + tp->args[i].offset);
- } else
- /* Just fetching data normally */
- call_fetch(&tp->args[i].fetch, regs,
- data + tp->args[i].offset);
- }
-}
-
-extern int set_print_fmt(struct trace_probe *tp, bool is_return);
+extern int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return);
#ifdef CONFIG_PERF_EVENTS
extern struct trace_event_call *
@@ -412,6 +291,9 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
extern struct trace_event_call *
-create_local_trace_uprobe(char *name, unsigned long offs, bool is_return);
+create_local_trace_uprobe(char *name, unsigned long offs,
+ unsigned long ref_ctr_offset, bool is_return);
extern void destroy_local_trace_uprobe(struct trace_event_call *event_call);
#endif
+extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
+ size_t offset, struct trace_probe *tp);
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
new file mode 100644
index 000000000000..4737bb8c07a3
--- /dev/null
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Traceprobe fetch helper inlines
+ */
+
+static nokprobe_inline void
+fetch_store_raw(unsigned long val, struct fetch_insn *code, void *buf)
+{
+ switch (code->size) {
+ case 1:
+ *(u8 *)buf = (u8)val;
+ break;
+ case 2:
+ *(u16 *)buf = (u16)val;
+ break;
+ case 4:
+ *(u32 *)buf = (u32)val;
+ break;
+ case 8:
+ //TBD: 32bit signed
+ *(u64 *)buf = (u64)val;
+ break;
+ default:
+ *(unsigned long *)buf = val;
+ }
+}
+
+static nokprobe_inline void
+fetch_apply_bitfield(struct fetch_insn *code, void *buf)
+{
+ switch (code->basesize) {
+ case 1:
+ *(u8 *)buf <<= code->lshift;
+ *(u8 *)buf >>= code->rshift;
+ break;
+ case 2:
+ *(u16 *)buf <<= code->lshift;
+ *(u16 *)buf >>= code->rshift;
+ break;
+ case 4:
+ *(u32 *)buf <<= code->lshift;
+ *(u32 *)buf >>= code->rshift;
+ break;
+ case 8:
+ *(u64 *)buf <<= code->lshift;
+ *(u64 *)buf >>= code->rshift;
+ break;
+ }
+}
+
+/*
+ * These functions must be defined for each callsite.
+ * Return consumed dynamic data size (>= 0), or error (< 0).
+ * If dest is NULL, don't store result and return required dynamic data size.
+ */
+static int
+process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs,
+ void *dest, void *base);
+static nokprobe_inline int fetch_store_strlen(unsigned long addr);
+static nokprobe_inline int
+fetch_store_string(unsigned long addr, void *dest, void *base);
+static nokprobe_inline int
+probe_mem_read(void *dest, void *src, size_t size);
+
+/* From the 2nd stage, routine is same */
+static nokprobe_inline int
+process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
+ void *dest, void *base)
+{
+ struct fetch_insn *s3 = NULL;
+ int total = 0, ret = 0, i = 0;
+ u32 loc = 0;
+ unsigned long lval = val;
+
+stage2:
+ /* 2nd stage: dereference memory if needed */
+ while (code->op == FETCH_OP_DEREF) {
+ lval = val;
+ ret = probe_mem_read(&val, (void *)val + code->offset,
+ sizeof(val));
+ if (ret)
+ return ret;
+ code++;
+ }
+
+ s3 = code;
+stage3:
+ /* 3rd stage: store value to buffer */
+ if (unlikely(!dest)) {
+ if (code->op == FETCH_OP_ST_STRING) {
+ ret += fetch_store_strlen(val + code->offset);
+ code++;
+ goto array;
+ } else
+ return -EILSEQ;
+ }
+
+ switch (code->op) {
+ case FETCH_OP_ST_RAW:
+ fetch_store_raw(val, code, dest);
+ break;
+ case FETCH_OP_ST_MEM:
+ probe_mem_read(dest, (void *)val + code->offset, code->size);
+ break;
+ case FETCH_OP_ST_STRING:
+ loc = *(u32 *)dest;
+ ret = fetch_store_string(val + code->offset, dest, base);
+ break;
+ default:
+ return -EILSEQ;
+ }
+ code++;
+
+ /* 4th stage: modify stored value if needed */
+ if (code->op == FETCH_OP_MOD_BF) {
+ fetch_apply_bitfield(code, dest);
+ code++;
+ }
+
+array:
+ /* the last stage: Loop on array */
+ if (code->op == FETCH_OP_LP_ARRAY) {
+ total += ret;
+ if (++i < code->param) {
+ code = s3;
+ if (s3->op != FETCH_OP_ST_STRING) {
+ dest += s3->size;
+ val += s3->size;
+ goto stage3;
+ }
+ code--;
+ val = lval + sizeof(char *);
+ if (dest) {
+ dest += sizeof(u32);
+ *(u32 *)dest = update_data_loc(loc, ret);
+ }
+ goto stage2;
+ }
+ code++;
+ ret = total;
+ }
+
+ return code->op == FETCH_OP_END ? ret : -EILSEQ;
+}
+
+/* Sum up total data length for dynamic arraies (strings) */
+static nokprobe_inline int
+__get_data_size(struct trace_probe *tp, struct pt_regs *regs)
+{
+ struct probe_arg *arg;
+ int i, len, ret = 0;
+
+ for (i = 0; i < tp->nr_args; i++) {
+ arg = tp->args + i;
+ if (unlikely(arg->dynamic)) {
+ len = process_fetch_insn(arg->code, regs, NULL, NULL);
+ if (len > 0)
+ ret += len;
+ }
+ }
+
+ return ret;
+}
+
+/* Store the value of each argument */
+static nokprobe_inline void
+store_trace_args(void *data, struct trace_probe *tp, struct pt_regs *regs,
+ int header_size, int maxlen)
+{
+ struct probe_arg *arg;
+ void *base = data - header_size;
+ void *dyndata = data + tp->size;
+ u32 *dl; /* Data location */
+ int ret, i;
+
+ for (i = 0; i < tp->nr_args; i++) {
+ arg = tp->args + i;
+ dl = data + arg->offset;
+ /* Point the dynamic data area if needed */
+ if (unlikely(arg->dynamic))
+ *dl = make_data_loc(maxlen, dyndata - base);
+ ret = process_fetch_insn(arg->code, regs, dl, base);
+ if (unlikely(ret < 0 && arg->dynamic)) {
+ *dl = make_data_loc(0, dyndata - base);
+ } else {
+ dyndata += ret;
+ maxlen -= ret;
+ }
+ }
+}
+
+static inline int
+print_probe_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
+ u8 *data, void *field)
+{
+ void *p;
+ int i, j;
+
+ for (i = 0; i < nr_args; i++) {
+ struct probe_arg *a = args + i;
+
+ trace_seq_printf(s, " %s=", a->name);
+ if (likely(!a->count)) {
+ if (!a->type->print(s, data + a->offset, field))
+ return -ENOMEM;
+ continue;
+ }
+ trace_seq_putc(s, '{');
+ p = data + a->offset;
+ for (j = 0; j < a->count; j++) {
+ if (!a->type->print(s, p, field))
+ return -ENOMEM;
+ trace_seq_putc(s, j == a->count - 1 ? '}' : ',');
+ p += a->type->size;
+ }
+ }
+ return 0;
+}
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index a86b303e6c67..4ea7e6845efb 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -35,26 +35,19 @@ static arch_spinlock_t wakeup_lock =
static void wakeup_reset(struct trace_array *tr);
static void __wakeup_reset(struct trace_array *tr);
+static int start_func_tracer(struct trace_array *tr, int graph);
+static void stop_func_tracer(struct trace_array *tr, int graph);
static int save_flags;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int wakeup_display_graph(struct trace_array *tr, int set);
# define is_graph(tr) ((tr)->trace_flags & TRACE_ITER_DISPLAY_GRAPH)
#else
-static inline int wakeup_display_graph(struct trace_array *tr, int set)
-{
- return 0;
-}
# define is_graph(tr) false
#endif
-
#ifdef CONFIG_FUNCTION_TRACER
-static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
-static void wakeup_graph_return(struct ftrace_graph_ret *trace);
-
static bool function_enabled;
/*
@@ -104,122 +97,8 @@ out_enable:
return 0;
}
-/*
- * wakeup uses its own tracer function to keep the overhead down:
- */
-static void
-wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
-{
- struct trace_array *tr = wakeup_trace;
- struct trace_array_cpu *data;
- unsigned long flags;
- int pc;
-
- if (!func_prolog_preempt_disable(tr, &data, &pc))
- return;
-
- local_irq_save(flags);
- trace_function(tr, ip, parent_ip, flags, pc);
- local_irq_restore(flags);
-
- atomic_dec(&data->disabled);
- preempt_enable_notrace();
-}
-
-static int register_wakeup_function(struct trace_array *tr, int graph, int set)
-{
- int ret;
-
- /* 'set' is set if TRACE_ITER_FUNCTION is about to be set */
- if (function_enabled || (!set && !(tr->trace_flags & TRACE_ITER_FUNCTION)))
- return 0;
-
- if (graph)
- ret = register_ftrace_graph(&wakeup_graph_return,
- &wakeup_graph_entry);
- else
- ret = register_ftrace_function(tr->ops);
-
- if (!ret)
- function_enabled = true;
-
- return ret;
-}
-
-static void unregister_wakeup_function(struct trace_array *tr, int graph)
-{
- if (!function_enabled)
- return;
-
- if (graph)
- unregister_ftrace_graph();
- else
- unregister_ftrace_function(tr->ops);
-
- function_enabled = false;
-}
-
-static int wakeup_function_set(struct trace_array *tr, u32 mask, int set)
-{
- if (!(mask & TRACE_ITER_FUNCTION))
- return 0;
-
- if (set)
- register_wakeup_function(tr, is_graph(tr), 1);
- else
- unregister_wakeup_function(tr, is_graph(tr));
- return 1;
-}
-#else
-static int register_wakeup_function(struct trace_array *tr, int graph, int set)
-{
- return 0;
-}
-static void unregister_wakeup_function(struct trace_array *tr, int graph) { }
-static int wakeup_function_set(struct trace_array *tr, u32 mask, int set)
-{
- return 0;
-}
-#endif /* CONFIG_FUNCTION_TRACER */
-
-static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
-{
- struct tracer *tracer = tr->current_trace;
-
- if (wakeup_function_set(tr, mask, set))
- return 0;
-
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if (mask & TRACE_ITER_DISPLAY_GRAPH)
- return wakeup_display_graph(tr, set);
-#endif
-
- return trace_keep_overwrite(tracer, mask, set);
-}
-static int start_func_tracer(struct trace_array *tr, int graph)
-{
- int ret;
-
- ret = register_wakeup_function(tr, graph, 0);
-
- if (!ret && tracing_is_enabled())
- tracer_enabled = 1;
- else
- tracer_enabled = 0;
-
- return ret;
-}
-
-static void stop_func_tracer(struct trace_array *tr, int graph)
-{
- tracer_enabled = 0;
-
- unregister_wakeup_function(tr, graph);
-}
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int wakeup_display_graph(struct trace_array *tr, int set)
{
if (!(is_graph(tr) ^ set))
@@ -270,6 +149,8 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace)
unsigned long flags;
int pc;
+ ftrace_graph_addr_finish(trace);
+
if (!func_prolog_preempt_disable(tr, &data, &pc))
return;
@@ -281,6 +162,11 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace)
return;
}
+static struct fgraph_ops fgraph_wakeup_ops = {
+ .entryfunc = &wakeup_graph_entry,
+ .retfunc = &wakeup_graph_return,
+};
+
static void wakeup_trace_open(struct trace_iterator *iter)
{
if (is_graph(iter->tr))
@@ -316,20 +202,87 @@ static void wakeup_print_header(struct seq_file *s)
else
trace_default_header(s);
}
+#endif /* else CONFIG_FUNCTION_GRAPH_TRACER */
+/*
+ * wakeup uses its own tracer function to keep the overhead down:
+ */
static void
-__trace_function(struct trace_array *tr,
- unsigned long ip, unsigned long parent_ip,
- unsigned long flags, int pc)
+wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct pt_regs *pt_regs)
{
- if (is_graph(tr))
- trace_graph_function(tr, ip, parent_ip, flags, pc);
+ struct trace_array *tr = wakeup_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ int pc;
+
+ if (!func_prolog_preempt_disable(tr, &data, &pc))
+ return;
+
+ local_irq_save(flags);
+ trace_function(tr, ip, parent_ip, flags, pc);
+ local_irq_restore(flags);
+
+ atomic_dec(&data->disabled);
+ preempt_enable_notrace();
+}
+
+static int register_wakeup_function(struct trace_array *tr, int graph, int set)
+{
+ int ret;
+
+ /* 'set' is set if TRACE_ITER_FUNCTION is about to be set */
+ if (function_enabled || (!set && !(tr->trace_flags & TRACE_ITER_FUNCTION)))
+ return 0;
+
+ if (graph)
+ ret = register_ftrace_graph(&fgraph_wakeup_ops);
else
- trace_function(tr, ip, parent_ip, flags, pc);
+ ret = register_ftrace_function(tr->ops);
+
+ if (!ret)
+ function_enabled = true;
+
+ return ret;
}
-#else
-#define __trace_function trace_function
+static void unregister_wakeup_function(struct trace_array *tr, int graph)
+{
+ if (!function_enabled)
+ return;
+
+ if (graph)
+ unregister_ftrace_graph(&fgraph_wakeup_ops);
+ else
+ unregister_ftrace_function(tr->ops);
+
+ function_enabled = false;
+}
+
+static int wakeup_function_set(struct trace_array *tr, u32 mask, int set)
+{
+ if (!(mask & TRACE_ITER_FUNCTION))
+ return 0;
+
+ if (set)
+ register_wakeup_function(tr, is_graph(tr), 1);
+ else
+ unregister_wakeup_function(tr, is_graph(tr));
+ return 1;
+}
+#else /* CONFIG_FUNCTION_TRACER */
+static int register_wakeup_function(struct trace_array *tr, int graph, int set)
+{
+ return 0;
+}
+static void unregister_wakeup_function(struct trace_array *tr, int graph) { }
+static int wakeup_function_set(struct trace_array *tr, u32 mask, int set)
+{
+ return 0;
+}
+#endif /* else CONFIG_FUNCTION_TRACER */
+
+#ifndef CONFIG_FUNCTION_GRAPH_TRACER
static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
{
return TRACE_TYPE_UNHANDLED;
@@ -338,23 +291,58 @@ static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
static void wakeup_trace_open(struct trace_iterator *iter) { }
static void wakeup_trace_close(struct trace_iterator *iter) { }
-#ifdef CONFIG_FUNCTION_TRACER
-static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
-{
- return -1;
-}
-static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
static void wakeup_print_header(struct seq_file *s)
{
trace_default_header(s);
}
-#else
-static void wakeup_print_header(struct seq_file *s)
+#endif /* !CONFIG_FUNCTION_GRAPH_TRACER */
+
+static void
+__trace_function(struct trace_array *tr,
+ unsigned long ip, unsigned long parent_ip,
+ unsigned long flags, int pc)
{
- trace_latency_header(s);
+ if (is_graph(tr))
+ trace_graph_function(tr, ip, parent_ip, flags, pc);
+ else
+ trace_function(tr, ip, parent_ip, flags, pc);
+}
+
+static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
+{
+ struct tracer *tracer = tr->current_trace;
+
+ if (wakeup_function_set(tr, mask, set))
+ return 0;
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ if (mask & TRACE_ITER_DISPLAY_GRAPH)
+ return wakeup_display_graph(tr, set);
+#endif
+
+ return trace_keep_overwrite(tracer, mask, set);
+}
+
+static int start_func_tracer(struct trace_array *tr, int graph)
+{
+ int ret;
+
+ ret = register_wakeup_function(tr, graph, 0);
+
+ if (!ret && tracing_is_enabled())
+ tracer_enabled = 1;
+ else
+ tracer_enabled = 0;
+
+ return ret;
+}
+
+static void stop_func_tracer(struct trace_array *tr, int graph)
+{
+ tracer_enabled = 0;
+
+ unregister_wakeup_function(tr, graph);
}
-#endif /* CONFIG_FUNCTION_TRACER */
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
/*
* Should this new latency be reported/recorded?
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 11e9daa4a568..9d402e7fc949 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -741,6 +741,11 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
return trace_graph_entry(trace);
}
+static struct fgraph_ops fgraph_ops __initdata = {
+ .entryfunc = &trace_graph_entry_watchdog,
+ .retfunc = &trace_graph_return,
+};
+
/*
* Pretty much the same than for the function tracer from which the selftest
* has been borrowed.
@@ -765,8 +770,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
*/
tracing_reset_online_cpus(&tr->trace_buffer);
set_graph_array(tr);
- ret = register_ftrace_graph(&trace_graph_return,
- &trace_graph_entry_watchdog);
+ ret = register_ftrace_graph(&fgraph_ops);
if (ret) {
warn_failed_init_tracer(trace, ret);
goto out;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 4237eba4ef20..eec648a0d673 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -111,7 +111,7 @@ check_stack(unsigned long ip, unsigned long *stack)
stack_trace_max_size = this_size;
stack_trace_max.nr_entries = 0;
- stack_trace_max.skip = 3;
+ stack_trace_max.skip = 0;
save_stack_trace(&stack_trace_max);
@@ -286,7 +286,7 @@ __next(struct seq_file *m, loff_t *pos)
{
long n = *pos - 1;
- if (n > stack_trace_max.nr_entries || stack_dump_trace[n] == ULONG_MAX)
+ if (n >= stack_trace_max.nr_entries || stack_dump_trace[n] == ULONG_MAX)
return NULL;
m->private = (void *)n;
@@ -448,8 +448,10 @@ static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
static __init int enable_stacktrace(char *str)
{
- if (strncmp(str, "_filter=", 8) == 0)
- strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
+ int len;
+
+ if ((len = str_has_prefix(str, "_filter=")))
+ strncpy(stack_trace_filter_buf, str + len, COMMAND_LINE_SIZE);
stack_tracer_enabled = 1;
last_stack_tracer_enabled = 1;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index e696667da29a..9bde07c06362 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -5,8 +5,9 @@
* Copyright (C) IBM Corporation, 2010-2012
* Author: Srikar Dronamraju <[email protected]>
*/
-#define pr_fmt(fmt) "trace_kprobe: " fmt
+#define pr_fmt(fmt) "trace_uprobe: " fmt
+#include <linux/ctype.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/uprobes.h>
@@ -14,7 +15,9 @@
#include <linux/string.h>
#include <linux/rculist.h>
+#include "trace_dynevent.h"
#include "trace_probe.h"
+#include "trace_probe_tmpl.h"
#define UPROBE_EVENT_SYSTEM "uprobes"
@@ -36,21 +39,56 @@ struct trace_uprobe_filter {
struct list_head perf_events;
};
+static int trace_uprobe_create(int argc, const char **argv);
+static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev);
+static int trace_uprobe_release(struct dyn_event *ev);
+static bool trace_uprobe_is_busy(struct dyn_event *ev);
+static bool trace_uprobe_match(const char *system, const char *event,
+ struct dyn_event *ev);
+
+static struct dyn_event_operations trace_uprobe_ops = {
+ .create = trace_uprobe_create,
+ .show = trace_uprobe_show,
+ .is_busy = trace_uprobe_is_busy,
+ .free = trace_uprobe_release,
+ .match = trace_uprobe_match,
+};
+
/*
* uprobe event core functions
*/
struct trace_uprobe {
- struct list_head list;
+ struct dyn_event devent;
struct trace_uprobe_filter filter;
struct uprobe_consumer consumer;
struct path path;
struct inode *inode;
char *filename;
unsigned long offset;
+ unsigned long ref_ctr_offset;
unsigned long nhit;
struct trace_probe tp;
};
+static bool is_trace_uprobe(struct dyn_event *ev)
+{
+ return ev->ops == &trace_uprobe_ops;
+}
+
+static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev)
+{
+ return container_of(ev, struct trace_uprobe, devent);
+}
+
+/**
+ * for_each_trace_uprobe - iterate over the trace_uprobe list
+ * @pos: the struct trace_uprobe * for each entry
+ * @dpos: the struct dyn_event * to use as a loop cursor
+ */
+#define for_each_trace_uprobe(pos, dpos) \
+ for_each_dyn_event(dpos) \
+ if (is_trace_uprobe(dpos) && (pos = to_trace_uprobe(dpos)))
+
#define SIZEOF_TRACE_UPROBE(n) \
(offsetof(struct trace_uprobe, tp.args) + \
(sizeof(struct probe_arg) * (n)))
@@ -58,9 +96,6 @@ struct trace_uprobe {
static int register_uprobe_event(struct trace_uprobe *tu);
static int unregister_uprobe_event(struct trace_uprobe *tu);
-static DEFINE_MUTEX(uprobe_lock);
-static LIST_HEAD(uprobe_list);
-
struct uprobe_dispatch_data {
struct trace_uprobe *tu;
unsigned long bp_addr;
@@ -98,74 +133,59 @@ static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n)
/*
* Uprobes-specific fetch functions
*/
-#define DEFINE_FETCH_stack(type) \
-static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
- void *offset, void *dest) \
-{ \
- *(type *)dest = (type)get_user_stack_nth(regs, \
- ((unsigned long)offset)); \
-}
-DEFINE_BASIC_FETCH_FUNCS(stack)
-/* No string on the stack entry */
-#define fetch_stack_string NULL
-#define fetch_stack_string_size NULL
-
-#define DEFINE_FETCH_memory(type) \
-static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
- void *addr, void *dest) \
-{ \
- type retval; \
- void __user *vaddr = (void __force __user *) addr; \
- \
- if (copy_from_user(&retval, vaddr, sizeof(type))) \
- *(type *)dest = 0; \
- else \
- *(type *) dest = retval; \
+static nokprobe_inline int
+probe_mem_read(void *dest, void *src, size_t size)
+{
+ void __user *vaddr = (void __force __user *)src;
+
+ return copy_from_user(dest, vaddr, size) ? -EFAULT : 0;
}
-DEFINE_BASIC_FETCH_FUNCS(memory)
/*
* Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
* length and relative data location.
*/
-static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
- void *addr, void *dest)
+static nokprobe_inline int
+fetch_store_string(unsigned long addr, void *dest, void *base)
{
long ret;
- u32 rloc = *(u32 *)dest;
- int maxlen = get_rloc_len(rloc);
- u8 *dst = get_rloc_data(dest);
+ u32 loc = *(u32 *)dest;
+ int maxlen = get_loc_len(loc);
+ u8 *dst = get_loc_data(dest, base);
void __user *src = (void __force __user *) addr;
- if (!maxlen)
- return;
+ if (unlikely(!maxlen))
+ return -ENOMEM;
ret = strncpy_from_user(dst, src, maxlen);
- if (ret == maxlen)
- dst[--ret] = '\0';
-
- if (ret < 0) { /* Failed to fetch string */
- ((u8 *)get_rloc_data(dest))[0] = '\0';
- *(u32 *)dest = make_data_rloc(0, get_rloc_offs(rloc));
- } else {
- *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(rloc));
+ if (ret >= 0) {
+ if (ret == maxlen)
+ dst[ret - 1] = '\0';
+ else
+ /*
+ * Include the terminating null byte. In this case it
+ * was copied by strncpy_from_user but not accounted
+ * for in ret.
+ */
+ ret++;
+ *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
}
+
+ return ret;
}
-static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
- void *addr, void *dest)
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen(unsigned long addr)
{
int len;
void __user *vaddr = (void __force __user *) addr;
len = strnlen_user(vaddr, MAX_STRING_SIZE);
- if (len == 0 || len > MAX_STRING_SIZE) /* Failed to check length */
- *(u32 *)dest = 0;
- else
- *(u32 *)dest = len;
+ return (len > MAX_STRING_SIZE) ? 0 : len;
}
-static unsigned long translate_user_vaddr(void *file_offset)
+static unsigned long translate_user_vaddr(unsigned long file_offset)
{
unsigned long base_addr;
struct uprobe_dispatch_data *udd;
@@ -173,44 +193,44 @@ static unsigned long translate_user_vaddr(void *file_offset)
udd = (void *) current->utask->vaddr;
base_addr = udd->bp_addr - udd->tu->offset;
- return base_addr + (unsigned long)file_offset;
+ return base_addr + file_offset;
}
-#define DEFINE_FETCH_file_offset(type) \
-static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs, \
- void *offset, void *dest)\
-{ \
- void *vaddr = (void *)translate_user_vaddr(offset); \
- \
- FETCH_FUNC_NAME(memory, type)(regs, vaddr, dest); \
+/* Note that we don't verify it, since the code does not come from user space */
+static int
+process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
+ void *base)
+{
+ unsigned long val;
+
+ /* 1st stage: get value from context */
+ switch (code->op) {
+ case FETCH_OP_REG:
+ val = regs_get_register(regs, code->param);
+ break;
+ case FETCH_OP_STACK:
+ val = get_user_stack_nth(regs, code->param);
+ break;
+ case FETCH_OP_STACKP:
+ val = user_stack_pointer(regs);
+ break;
+ case FETCH_OP_RETVAL:
+ val = regs_return_value(regs);
+ break;
+ case FETCH_OP_IMM:
+ val = code->immediate;
+ break;
+ case FETCH_OP_FOFFS:
+ val = translate_user_vaddr(code->immediate);
+ break;
+ default:
+ return -EILSEQ;
+ }
+ code++;
+
+ return process_fetch_insn_bottom(code, val, dest, base);
}
-DEFINE_BASIC_FETCH_FUNCS(file_offset)
-DEFINE_FETCH_file_offset(string)
-DEFINE_FETCH_file_offset(string_size)
-
-/* Fetch type information table */
-static const struct fetch_type uprobes_fetch_type_table[] = {
- /* Special types */
- [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
- sizeof(u32), 1, "__data_loc char[]"),
- [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
- string_size, sizeof(u32), 0, "u32"),
- /* Basic types */
- ASSIGN_FETCH_TYPE(u8, u8, 0),
- ASSIGN_FETCH_TYPE(u16, u16, 0),
- ASSIGN_FETCH_TYPE(u32, u32, 0),
- ASSIGN_FETCH_TYPE(u64, u64, 0),
- ASSIGN_FETCH_TYPE(s8, u8, 1),
- ASSIGN_FETCH_TYPE(s16, u16, 1),
- ASSIGN_FETCH_TYPE(s32, u32, 1),
- ASSIGN_FETCH_TYPE(s64, u64, 1),
- ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
-
- ASSIGN_FETCH_TYPE_END
-};
+NOKPROBE_SYMBOL(process_fetch_insn)
static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
{
@@ -229,6 +249,22 @@ static inline bool is_ret_probe(struct trace_uprobe *tu)
return tu->consumer.ret_handler != NULL;
}
+static bool trace_uprobe_is_busy(struct dyn_event *ev)
+{
+ struct trace_uprobe *tu = to_trace_uprobe(ev);
+
+ return trace_probe_is_enabled(&tu->tp);
+}
+
+static bool trace_uprobe_match(const char *system, const char *event,
+ struct dyn_event *ev)
+{
+ struct trace_uprobe *tu = to_trace_uprobe(ev);
+
+ return strcmp(trace_event_name(&tu->tp.call), event) == 0 &&
+ (!system || strcmp(tu->tp.call.class->system, system) == 0);
+}
+
/*
* Allocate new trace_uprobe and initialize it (including uprobes).
*/
@@ -256,7 +292,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
if (!tu->tp.class.system)
goto error;
- INIT_LIST_HEAD(&tu->list);
+ dyn_event_init(&tu->devent, &trace_uprobe_ops);
INIT_LIST_HEAD(&tu->tp.files);
tu->consumer.handler = uprobe_dispatcher;
if (is_ret)
@@ -275,6 +311,9 @@ static void free_trace_uprobe(struct trace_uprobe *tu)
{
int i;
+ if (!tu)
+ return;
+
for (i = 0; i < tu->tp.nr_args; i++)
traceprobe_free_probe_arg(&tu->tp.args[i]);
@@ -287,9 +326,10 @@ static void free_trace_uprobe(struct trace_uprobe *tu)
static struct trace_uprobe *find_probe_event(const char *event, const char *group)
{
+ struct dyn_event *pos;
struct trace_uprobe *tu;
- list_for_each_entry(tu, &uprobe_list, list)
+ for_each_trace_uprobe(tu, pos)
if (strcmp(trace_event_name(&tu->tp.call), event) == 0 &&
strcmp(tu->tp.call.class->system, group) == 0)
return tu;
@@ -297,7 +337,7 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou
return NULL;
}
-/* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
+/* Unregister a trace_uprobe and probe_event */
static int unregister_trace_uprobe(struct trace_uprobe *tu)
{
int ret;
@@ -306,22 +346,56 @@ static int unregister_trace_uprobe(struct trace_uprobe *tu)
if (ret)
return ret;
- list_del(&tu->list);
+ dyn_event_remove(&tu->devent);
free_trace_uprobe(tu);
return 0;
}
+/*
+ * Uprobe with multiple reference counter is not allowed. i.e.
+ * If inode and offset matches, reference counter offset *must*
+ * match as well. Though, there is one exception: If user is
+ * replacing old trace_uprobe with new one(same group/event),
+ * then we allow same uprobe with new reference counter as far
+ * as the new one does not conflict with any other existing
+ * ones.
+ */
+static struct trace_uprobe *find_old_trace_uprobe(struct trace_uprobe *new)
+{
+ struct dyn_event *pos;
+ struct trace_uprobe *tmp, *old = NULL;
+ struct inode *new_inode = d_real_inode(new->path.dentry);
+
+ old = find_probe_event(trace_event_name(&new->tp.call),
+ new->tp.call.class->system);
+
+ for_each_trace_uprobe(tmp, pos) {
+ if ((old ? old != tmp : true) &&
+ new_inode == d_real_inode(tmp->path.dentry) &&
+ new->offset == tmp->offset &&
+ new->ref_ctr_offset != tmp->ref_ctr_offset) {
+ pr_warn("Reference counter offset mismatch.");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+ return old;
+}
+
/* Register a trace_uprobe and probe_event */
static int register_trace_uprobe(struct trace_uprobe *tu)
{
struct trace_uprobe *old_tu;
int ret;
- mutex_lock(&uprobe_lock);
+ mutex_lock(&event_mutex);
/* register as an event */
- old_tu = find_probe_event(trace_event_name(&tu->tp.call),
- tu->tp.call.class->system);
+ old_tu = find_old_trace_uprobe(tu);
+ if (IS_ERR(old_tu)) {
+ ret = PTR_ERR(old_tu);
+ goto end;
+ }
+
if (old_tu) {
/* delete old event */
ret = unregister_trace_uprobe(old_tu);
@@ -335,10 +409,10 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
goto end;
}
- list_add_tail(&tu->list, &uprobe_list);
+ dyn_event_add(&tu->devent);
end:
- mutex_unlock(&uprobe_lock);
+ mutex_unlock(&event_mutex);
return ret;
}
@@ -349,95 +423,74 @@ end:
*
* - Remove uprobe: -:[GRP/]EVENT
*/
-static int create_trace_uprobe(int argc, char **argv)
+static int trace_uprobe_create(int argc, const char **argv)
{
struct trace_uprobe *tu;
- char *arg, *event, *group, *filename;
+ const char *event = NULL, *group = UPROBE_EVENT_SYSTEM;
+ char *arg, *filename, *rctr, *rctr_end, *tmp;
char buf[MAX_EVENT_NAME_LEN];
struct path path;
- unsigned long offset;
- bool is_delete, is_return;
+ unsigned long offset, ref_ctr_offset;
+ bool is_return = false;
int i, ret;
ret = 0;
- is_delete = false;
- is_return = false;
- event = NULL;
- group = NULL;
+ ref_ctr_offset = 0;
/* argc must be >= 1 */
- if (argv[0][0] == '-')
- is_delete = true;
- else if (argv[0][0] == 'r')
+ if (argv[0][0] == 'r')
is_return = true;
- else if (argv[0][0] != 'p') {
- pr_info("Probe definition must be started with 'p', 'r' or '-'.\n");
- return -EINVAL;
- }
+ else if (argv[0][0] != 'p' || argc < 2)
+ return -ECANCELED;
- if (argv[0][1] == ':') {
+ if (argv[0][1] == ':')
event = &argv[0][2];
- arg = strchr(event, '/');
- if (arg) {
- group = event;
- event = arg + 1;
- event[-1] = '\0';
+ if (!strchr(argv[1], '/'))
+ return -ECANCELED;
- if (strlen(group) == 0) {
- pr_info("Group name is not specified\n");
- return -EINVAL;
- }
- }
- if (strlen(event) == 0) {
- pr_info("Event name is not specified\n");
- return -EINVAL;
- }
- }
- if (!group)
- group = UPROBE_EVENT_SYSTEM;
-
- if (is_delete) {
- int ret;
-
- if (!event) {
- pr_info("Delete command needs an event name.\n");
- return -EINVAL;
- }
- mutex_lock(&uprobe_lock);
- tu = find_probe_event(event, group);
-
- if (!tu) {
- mutex_unlock(&uprobe_lock);
- pr_info("Event %s/%s doesn't exist.\n", group, event);
- return -ENOENT;
- }
- /* delete an event */
- ret = unregister_trace_uprobe(tu);
- mutex_unlock(&uprobe_lock);
- return ret;
- }
+ filename = kstrdup(argv[1], GFP_KERNEL);
+ if (!filename)
+ return -ENOMEM;
- if (argc < 2) {
- pr_info("Probe point is not specified.\n");
- return -EINVAL;
- }
/* Find the last occurrence, in case the path contains ':' too. */
- arg = strrchr(argv[1], ':');
- if (!arg)
- return -EINVAL;
+ arg = strrchr(filename, ':');
+ if (!arg || !isdigit(arg[1])) {
+ kfree(filename);
+ return -ECANCELED;
+ }
*arg++ = '\0';
- filename = argv[1];
ret = kern_path(filename, LOOKUP_FOLLOW, &path);
- if (ret)
+ if (ret) {
+ kfree(filename);
return ret;
-
+ }
if (!d_is_reg(path.dentry)) {
ret = -EINVAL;
goto fail_address_parse;
}
+ /* Parse reference counter offset if specified. */
+ rctr = strchr(arg, '(');
+ if (rctr) {
+ rctr_end = strchr(rctr, ')');
+ if (rctr > rctr_end || *(rctr_end + 1) != 0) {
+ ret = -EINVAL;
+ pr_info("Invalid reference counter offset.\n");
+ goto fail_address_parse;
+ }
+
+ *rctr++ = '\0';
+ *rctr_end = '\0';
+ ret = kstrtoul(rctr, 0, &ref_ctr_offset);
+ if (ret) {
+ pr_info("Invalid reference counter offset.\n");
+ goto fail_address_parse;
+ }
+ }
+
+ /* Parse uprobe offset. */
ret = kstrtoul(arg, 0, &offset);
if (ret)
goto fail_address_parse;
@@ -446,7 +499,11 @@ static int create_trace_uprobe(int argc, char **argv)
argv += 2;
/* setup a probe */
- if (!event) {
+ if (event) {
+ ret = traceprobe_parse_event_name(&event, &group, buf);
+ if (ret)
+ goto fail_address_parse;
+ } else {
char *tail;
char *ptr;
@@ -472,62 +529,23 @@ static int create_trace_uprobe(int argc, char **argv)
goto fail_address_parse;
}
tu->offset = offset;
+ tu->ref_ctr_offset = ref_ctr_offset;
tu->path = path;
- tu->filename = kstrdup(filename, GFP_KERNEL);
-
- if (!tu->filename) {
- pr_info("Failed to allocate filename.\n");
- ret = -ENOMEM;
- goto error;
- }
+ tu->filename = filename;
/* parse arguments */
- ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
- struct probe_arg *parg = &tu->tp.args[i];
-
- /* Increment count for freeing args in error case */
- tu->tp.nr_args++;
-
- /* Parse argument name */
- arg = strchr(argv[i], '=');
- if (arg) {
- *arg++ = '\0';
- parg->name = kstrdup(argv[i], GFP_KERNEL);
- } else {
- arg = argv[i];
- /* If argument name is omitted, set "argN" */
- snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
- parg->name = kstrdup(buf, GFP_KERNEL);
- }
-
- if (!parg->name) {
- pr_info("Failed to allocate argument[%d] name.\n", i);
+ tmp = kstrdup(argv[i], GFP_KERNEL);
+ if (!tmp) {
ret = -ENOMEM;
goto error;
}
- if (!is_good_name(parg->name)) {
- pr_info("Invalid argument[%d] name: %s\n", i, parg->name);
- ret = -EINVAL;
- goto error;
- }
-
- if (traceprobe_conflict_field_name(parg->name, tu->tp.args, i)) {
- pr_info("Argument[%d] name '%s' conflicts with "
- "another field.\n", i, argv[i]);
- ret = -EINVAL;
- goto error;
- }
-
- /* Parse fetch argument */
- ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg,
- is_return, false,
- uprobes_fetch_type_table);
- if (ret) {
- pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
+ ret = traceprobe_parse_probe_arg(&tu->tp, i, tmp,
+ is_return ? TPARG_FL_RETURN : 0);
+ kfree(tmp);
+ if (ret)
goto error;
- }
}
ret = register_trace_uprobe(tu);
@@ -541,48 +559,35 @@ error:
fail_address_parse:
path_put(&path);
+ kfree(filename);
pr_info("Failed to parse address or file.\n");
return ret;
}
-static int cleanup_all_probes(void)
+static int create_or_delete_trace_uprobe(int argc, char **argv)
{
- struct trace_uprobe *tu;
- int ret = 0;
+ int ret;
- mutex_lock(&uprobe_lock);
- while (!list_empty(&uprobe_list)) {
- tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
- ret = unregister_trace_uprobe(tu);
- if (ret)
- break;
- }
- mutex_unlock(&uprobe_lock);
- return ret;
-}
+ if (argv[0][0] == '-')
+ return dyn_event_release(argc, argv, &trace_uprobe_ops);
-/* Probes listing interfaces */
-static void *probes_seq_start(struct seq_file *m, loff_t *pos)
-{
- mutex_lock(&uprobe_lock);
- return seq_list_start(&uprobe_list, *pos);
+ ret = trace_uprobe_create(argc, (const char **)argv);
+ return ret == -ECANCELED ? -EINVAL : ret;
}
-static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
+static int trace_uprobe_release(struct dyn_event *ev)
{
- return seq_list_next(v, &uprobe_list, pos);
-}
+ struct trace_uprobe *tu = to_trace_uprobe(ev);
-static void probes_seq_stop(struct seq_file *m, void *v)
-{
- mutex_unlock(&uprobe_lock);
+ return unregister_trace_uprobe(tu);
}
-static int probes_seq_show(struct seq_file *m, void *v)
+/* Probes listing interfaces */
+static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev)
{
- struct trace_uprobe *tu = v;
+ struct trace_uprobe *tu = to_trace_uprobe(ev);
char c = is_ret_probe(tu) ? 'r' : 'p';
int i;
@@ -590,6 +595,9 @@ static int probes_seq_show(struct seq_file *m, void *v)
trace_event_name(&tu->tp.call), tu->filename,
(int)(sizeof(void *) * 2), tu->offset);
+ if (tu->ref_ctr_offset)
+ seq_printf(m, "(0x%lx)", tu->ref_ctr_offset);
+
for (i = 0; i < tu->tp.nr_args; i++)
seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);
@@ -597,11 +605,21 @@ static int probes_seq_show(struct seq_file *m, void *v)
return 0;
}
+static int probes_seq_show(struct seq_file *m, void *v)
+{
+ struct dyn_event *ev = v;
+
+ if (!is_trace_uprobe(ev))
+ return 0;
+
+ return trace_uprobe_show(m, ev);
+}
+
static const struct seq_operations probes_seq_op = {
- .start = probes_seq_start,
- .next = probes_seq_next,
- .stop = probes_seq_stop,
- .show = probes_seq_show
+ .start = dyn_event_seq_start,
+ .next = dyn_event_seq_next,
+ .stop = dyn_event_seq_stop,
+ .show = probes_seq_show
};
static int probes_open(struct inode *inode, struct file *file)
@@ -609,7 +627,7 @@ static int probes_open(struct inode *inode, struct file *file)
int ret;
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
- ret = cleanup_all_probes();
+ ret = dyn_events_release_all(&trace_uprobe_ops);
if (ret)
return ret;
}
@@ -620,7 +638,8 @@ static int probes_open(struct inode *inode, struct file *file)
static ssize_t probes_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
- return trace_parse_run_command(file, buffer, count, ppos, create_trace_uprobe);
+ return trace_parse_run_command(file, buffer, count, ppos,
+ create_or_delete_trace_uprobe);
}
static const struct file_operations uprobe_events_ops = {
@@ -635,17 +654,22 @@ static const struct file_operations uprobe_events_ops = {
/* Probes profiling interfaces */
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
- struct trace_uprobe *tu = v;
+ struct dyn_event *ev = v;
+ struct trace_uprobe *tu;
+ if (!is_trace_uprobe(ev))
+ return 0;
+
+ tu = to_trace_uprobe(ev);
seq_printf(m, " %s %-44s %15lu\n", tu->filename,
trace_event_name(&tu->tp.call), tu->nhit);
return 0;
}
static const struct seq_operations profile_seq_op = {
- .start = probes_seq_start,
- .next = probes_seq_next,
- .stop = probes_seq_stop,
+ .start = dyn_event_seq_start,
+ .next = dyn_event_seq_next,
+ .stop = dyn_event_seq_stop,
.show = probes_profile_seq_show
};
@@ -833,7 +857,6 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e
struct trace_seq *s = &iter->seq;
struct trace_uprobe *tu;
u8 *data;
- int i;
entry = (struct uprobe_trace_entry_head *)iter->ent;
tu = container_of(event, struct trace_uprobe, tp.call.event);
@@ -850,12 +873,8 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e
data = DATAOF_TRACE_ENTRY(entry, false);
}
- for (i = 0; i < tu->tp.nr_args; i++) {
- struct probe_arg *parg = &tu->tp.args[i];
-
- if (!parg->type->print(s, parg->name, data + parg->offset, entry))
- goto out;
- }
+ if (print_probe_args(s, tu->tp.args, tu->tp.nr_args, data, entry) < 0)
+ goto out;
trace_seq_putc(s, '\n');
@@ -905,7 +924,13 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
tu->consumer.filter = filter;
tu->inode = d_real_inode(tu->path.dentry);
- ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
+ if (tu->ref_ctr_offset) {
+ ret = uprobe_register_refctr(tu->inode, tu->offset,
+ tu->ref_ctr_offset, &tu->consumer);
+ } else {
+ ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
+ }
+
if (ret)
goto err_buffer;
@@ -958,7 +983,7 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file)
static int uprobe_event_define_fields(struct trace_event_call *event_call)
{
- int ret, i, size;
+ int ret, size;
struct uprobe_trace_entry_head field;
struct trace_uprobe *tu = event_call->data;
@@ -970,19 +995,8 @@ static int uprobe_event_define_fields(struct trace_event_call *event_call)
DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0);
size = SIZEOF_TRACE_ENTRY(false);
}
- /* Set argument names as fields */
- for (i = 0; i < tu->tp.nr_args; i++) {
- struct probe_arg *parg = &tu->tp.args[i];
-
- ret = trace_define_field(event_call, parg->type->fmttype,
- parg->name, size + parg->offset,
- parg->type->size, parg->type->is_signed,
- FILTER_OTHER);
- if (ret)
- return ret;
- }
- return 0;
+ return traceprobe_define_arg_fields(event_call, size, &tu->tp);
}
#ifdef CONFIG_PERF_EVENTS
@@ -1233,7 +1247,7 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
ucb = uprobe_buffer_get();
- store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize);
+ store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
if (tu->tp.flags & TP_FLAG_TRACE)
ret |= uprobe_trace_func(tu, regs, ucb, dsize);
@@ -1268,7 +1282,7 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con,
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
ucb = uprobe_buffer_get();
- store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize);
+ store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
if (tu->tp.flags & TP_FLAG_TRACE)
uretprobe_trace_func(tu, func, regs, ucb, dsize);
@@ -1304,7 +1318,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)
init_trace_event_call(tu, call);
- if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0)
+ if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0)
return -ENOMEM;
ret = register_trace_event(&call->event);
@@ -1340,7 +1354,8 @@ static int unregister_uprobe_event(struct trace_uprobe *tu)
#ifdef CONFIG_PERF_EVENTS
struct trace_event_call *
-create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
+create_local_trace_uprobe(char *name, unsigned long offs,
+ unsigned long ref_ctr_offset, bool is_return)
{
struct trace_uprobe *tu;
struct path path;
@@ -1356,7 +1371,7 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
}
/*
- * local trace_kprobes are not added to probe_list, so they are never
+ * local trace_kprobes are not added to dyn_event, so they are never
* searched in find_trace_kprobe(). Therefore, there is no concern of
* duplicated name "DUMMY_EVENT" here.
*/
@@ -1372,10 +1387,11 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
tu->offset = offs;
tu->path = path;
+ tu->ref_ctr_offset = ref_ctr_offset;
tu->filename = kstrdup(name, GFP_KERNEL);
init_trace_event_call(tu, &tu->tp.call);
- if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) {
+ if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) {
ret = -ENOMEM;
goto error;
}
@@ -1403,6 +1419,11 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call)
static __init int init_uprobe_trace(void)
{
struct dentry *d_tracer;
+ int ret;
+
+ ret = dyn_event_register(&trace_uprobe_ops);
+ if (ret)
+ return ret;
d_tracer = tracing_init_dentry();
if (IS_ERR(d_tracer))