aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig14
-rw-r--r--kernel/trace/blktrace.c20
-rw-r--r--kernel/trace/bpf_trace.c95
-rw-r--r--kernel/trace/ftrace.c42
-rw-r--r--kernel/trace/ring_buffer.c7
-rw-r--r--kernel/trace/trace.c108
-rw-r--r--kernel/trace/trace.h107
-rw-r--r--kernel/trace/trace_eprobe.c38
-rw-r--r--kernel/trace/trace_events.c24
-rw-r--r--kernel/trace/trace_events_filter.c139
-rw-r--r--kernel/trace/trace_events_hist.c121
-rw-r--r--kernel/trace/trace_events_inject.c11
-rw-r--r--kernel/trace/trace_events_synth.c26
-rw-r--r--kernel/trace/trace_events_trigger.c424
-rw-r--r--kernel/trace/trace_hwlat.c6
-rw-r--r--kernel/trace/trace_kprobe.c44
-rw-r--r--kernel/trace/trace_osnoise.c26
-rw-r--r--kernel/trace/trace_output.c4
-rw-r--r--kernel/trace/trace_probe.c5
-rw-r--r--kernel/trace/trace_syscalls.c6
-rw-r--r--kernel/trace/trace_uprobe.c39
-rw-r--r--kernel/trace/tracing_map.c3
22 files changed, 979 insertions, 330 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 420ff4bc67fd..f468767bc287 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -915,6 +915,20 @@ config EVENT_TRACE_TEST_SYSCALLS
TBD - enable a way to actually call the syscalls as we test their
events
+config FTRACE_SORT_STARTUP_TEST
+ bool "Verify compile time sorting of ftrace functions"
+ depends on DYNAMIC_FTRACE
+ depends on BUILDTIME_TABLE_SORT
+ help
+ Sorting of the mcount_loc sections that is used to find the
+ where the ftrace knows where to patch functions for tracing
+ and other callbacks is done at compile time. But if the sort
+ is not done correctly, it will cause non-deterministic failures.
+ When this is set, the sorted sections will be verified that they
+ are in deed sorted and will warn if they are not.
+
+ If unsure, say N
+
config RING_BUFFER_STARTUP_TEST
bool "Ring buffer startup self test"
depends on RING_BUFFER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 1183c88634aa..af68a67179b4 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -34,7 +34,7 @@ static struct trace_array *blk_tr;
static bool blk_tracer_enabled __read_mostly;
static LIST_HEAD(running_trace_list);
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock);
+static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(running_trace_lock);
/* Select an alternative, minimalistic output than the original one */
#define TRACE_BLK_OPT_CLASSIC 0x1
@@ -121,12 +121,12 @@ static void trace_note_tsk(struct task_struct *tsk)
struct blk_trace *bt;
tsk->btrace_seq = blktrace_seq;
- spin_lock_irqsave(&running_trace_lock, flags);
+ raw_spin_lock_irqsave(&running_trace_lock, flags);
list_for_each_entry(bt, &running_trace_list, running_list) {
trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm,
sizeof(tsk->comm), 0);
}
- spin_unlock_irqrestore(&running_trace_lock, flags);
+ raw_spin_unlock_irqrestore(&running_trace_lock, flags);
}
static void trace_note_time(struct blk_trace *bt)
@@ -666,9 +666,9 @@ static int __blk_trace_startstop(struct request_queue *q, int start)
blktrace_seq++;
smp_mb();
bt->trace_state = Blktrace_running;
- spin_lock_irq(&running_trace_lock);
+ raw_spin_lock_irq(&running_trace_lock);
list_add(&bt->running_list, &running_trace_list);
- spin_unlock_irq(&running_trace_lock);
+ raw_spin_unlock_irq(&running_trace_lock);
trace_note_time(bt);
ret = 0;
@@ -676,9 +676,9 @@ static int __blk_trace_startstop(struct request_queue *q, int start)
} else {
if (bt->trace_state == Blktrace_running) {
bt->trace_state = Blktrace_stopped;
- spin_lock_irq(&running_trace_lock);
+ raw_spin_lock_irq(&running_trace_lock);
list_del_init(&bt->running_list);
- spin_unlock_irq(&running_trace_lock);
+ raw_spin_unlock_irq(&running_trace_lock);
relay_flush(bt->rchan);
ret = 0;
}
@@ -1045,7 +1045,7 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev,
}
r.device_from = cpu_to_be32(dev);
- r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
+ r.device_to = cpu_to_be32(disk_devt(rq->q->disk));
r.sector_from = cpu_to_be64(from);
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
@@ -1608,9 +1608,9 @@ static int blk_trace_remove_queue(struct request_queue *q)
if (bt->trace_state == Blktrace_running) {
bt->trace_state = Blktrace_stopped;
- spin_lock_irq(&running_trace_lock);
+ raw_spin_lock_irq(&running_trace_lock);
list_del_init(&bt->running_list);
- spin_unlock_irq(&running_trace_lock);
+ raw_spin_unlock_irq(&running_trace_lock);
relay_flush(bt->rchan);
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 7396488793ff..21aa30644219 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -345,7 +345,7 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_CONST_SIZE,
};
@@ -394,7 +394,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
.func = bpf_trace_printk,
.gpl_only = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_MEM,
+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE,
};
@@ -450,9 +450,9 @@ static const struct bpf_func_proto bpf_trace_vprintk_proto = {
.func = bpf_trace_vprintk,
.gpl_only = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_MEM,
+ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE,
- .arg3_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
.arg4_type = ARG_CONST_SIZE_OR_ZERO,
};
@@ -492,9 +492,9 @@ static const struct bpf_func_proto bpf_seq_printf_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_seq_file_ids[0],
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_CONST_SIZE,
- .arg4_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@@ -509,7 +509,7 @@ static const struct bpf_func_proto bpf_seq_write_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_seq_file_ids[0],
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
};
@@ -533,7 +533,7 @@ static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_seq_file_ids[0],
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
@@ -694,7 +694,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@@ -764,7 +764,7 @@ const struct bpf_func_proto bpf_get_current_task_btf_proto = {
.func = bpf_get_current_task_btf,
.gpl_only = true,
.ret_type = RET_PTR_TO_BTF_ID,
- .ret_btf_id = &btf_task_struct_ids[0],
+ .ret_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
};
BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task)
@@ -779,7 +779,7 @@ const struct bpf_func_proto bpf_task_pt_regs_proto = {
.func = bpf_task_pt_regs,
.gpl_only = true,
.arg1_type = ARG_PTR_TO_BTF_ID,
- .arg1_btf_id = &btf_task_struct_ids[0],
+ .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
.ret_type = RET_PTR_TO_BTF_ID,
.ret_btf_id = &bpf_task_pt_regs_ids[0],
};
@@ -1004,7 +1004,7 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
- .arg3_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_CONST_SIZE,
.arg5_type = ARG_ANYTHING,
};
@@ -1012,7 +1012,7 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx)
{
/* This helper call is inlined by verifier. */
- return ((u64 *)ctx)[-1];
+ return ((u64 *)ctx)[-2];
}
static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = {
@@ -1091,6 +1091,53 @@ static const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
};
+BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value)
+{
+ /* This helper call is inlined by verifier. */
+ u64 nr_args = ((u64 *)ctx)[-1];
+
+ if ((u64) n >= nr_args)
+ return -EINVAL;
+ *value = ((u64 *)ctx)[n];
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_get_func_arg_proto = {
+ .func = get_func_arg,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_LONG,
+};
+
+BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value)
+{
+ /* This helper call is inlined by verifier. */
+ u64 nr_args = ((u64 *)ctx)[-1];
+
+ *value = ((u64 *)ctx)[nr_args];
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_get_func_ret_proto = {
+ .func = get_func_ret,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_LONG,
+};
+
+BPF_CALL_1(get_func_arg_cnt, void *, ctx)
+{
+ /* This helper call is inlined by verifier. */
+ return ((u64 *)ctx)[-1];
+}
+
+static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
+ .func = get_func_arg_cnt,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
static const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1111,8 +1158,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_ktime_get_boot_ns:
return &bpf_ktime_get_boot_ns_proto;
- case BPF_FUNC_ktime_get_coarse_ns:
- return &bpf_ktime_get_coarse_ns_proto;
case BPF_FUNC_tail_call:
return &bpf_tail_call_proto;
case BPF_FUNC_get_current_pid_tgid:
@@ -1208,6 +1253,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_func_ip_proto_tracing;
case BPF_FUNC_get_branch_snapshot:
return &bpf_get_branch_snapshot_proto;
+ case BPF_FUNC_find_vma:
+ return &bpf_find_vma_proto;
case BPF_FUNC_trace_vprintk:
return bpf_get_trace_vprintk_proto();
default:
@@ -1287,7 +1334,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@@ -1402,9 +1449,6 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
void *, buf, u32, size, u64, flags)
{
-#ifndef CONFIG_X86
- return -ENOENT;
-#else
static const u32 br_entry_size = sizeof(struct perf_branch_entry);
struct perf_branch_stack *br_stack = ctx->data->br_stack;
u32 to_copy;
@@ -1413,7 +1457,7 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
return -EINVAL;
if (unlikely(!br_stack))
- return -EINVAL;
+ return -ENOENT;
if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE)
return br_stack->nr * br_entry_size;
@@ -1425,7 +1469,6 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
memcpy(buf, br_stack->entries, to_copy);
return to_copy;
-#endif
}
static const struct bpf_func_proto bpf_read_branch_records_proto = {
@@ -1513,7 +1556,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@@ -1567,7 +1610,7 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
@@ -1633,6 +1676,12 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
NULL;
case BPF_FUNC_d_path:
return &bpf_d_path_proto;
+ case BPF_FUNC_get_func_arg:
+ return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_proto : NULL;
+ case BPF_FUNC_get_func_ret:
+ return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL;
+ case BPF_FUNC_get_func_arg_cnt:
+ return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL;
default:
fn = raw_tp_prog_func_proto(func_id, prog);
if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 30bc880c3849..6163b6f762f7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5217,6 +5217,7 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr)
{
struct ftrace_direct_func *direct;
struct ftrace_func_entry *entry;
+ struct ftrace_hash *hash;
int ret = -ENODEV;
mutex_lock(&direct_mutex);
@@ -5225,7 +5226,8 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr)
if (!entry)
goto out_unlock;
- if (direct_functions->count == 1)
+ hash = direct_ops.func_hash->filter_hash;
+ if (hash->count == 1)
unregister_ftrace_function(&direct_ops);
ret = ftrace_set_filter_ip(&direct_ops, ip, 1, 0);
@@ -5540,6 +5542,10 @@ int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
err = unregister_ftrace_function(ops);
remove_direct_functions_hash(hash, addr);
mutex_unlock(&direct_mutex);
+
+ /* cleanup for possible another register call */
+ ops->func = NULL;
+ ops->trampoline = 0;
return err;
}
EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi);
@@ -6388,6 +6394,27 @@ static int ftrace_cmp_ips(const void *a, const void *b)
return 0;
}
+#ifdef CONFIG_FTRACE_SORT_STARTUP_TEST
+static void test_is_sorted(unsigned long *start, unsigned long count)
+{
+ int i;
+
+ for (i = 1; i < count; i++) {
+ if (WARN(start[i - 1] > start[i],
+ "[%d] %pS at %lx is not sorted with %pS at %lx\n", i,
+ (void *)start[i - 1], start[i - 1],
+ (void *)start[i], start[i]))
+ break;
+ }
+ if (i == count)
+ pr_info("ftrace section at %px sorted properly\n", start);
+}
+#else
+static void test_is_sorted(unsigned long *start, unsigned long count)
+{
+}
+#endif
+
static int ftrace_process_locs(struct module *mod,
unsigned long *start,
unsigned long *end)
@@ -6406,8 +6433,17 @@ static int ftrace_process_locs(struct module *mod,
if (!count)
return 0;
- sort(start, count, sizeof(*start),
- ftrace_cmp_ips, NULL);
+ /*
+ * Sorting mcount in vmlinux at build time depend on
+ * CONFIG_BUILDTIME_TABLE_SORT, while mcount loc in
+ * modules can not be sorted at build time.
+ */
+ if (!IS_ENABLED(CONFIG_BUILDTIME_TABLE_SORT) || mod) {
+ sort(start, count, sizeof(*start),
+ ftrace_cmp_ips, NULL);
+ } else {
+ test_is_sorted(start, count);
+ }
start_pg = ftrace_allocate_pages(count);
if (!start_pg)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2699e9e562b1..05dfc7a12d3d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5898,16 +5898,13 @@ static __init int test_ringbuffer(void)
rb_data[cpu].buffer = buffer;
rb_data[cpu].cpu = cpu;
rb_data[cpu].cnt = cpu;
- rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
- "rbtester/%d", cpu);
+ rb_threads[cpu] = kthread_run_on_cpu(rb_test, &rb_data[cpu],
+ cpu, "rbtester/%u");
if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
pr_cont("FAILED\n");
ret = PTR_ERR(rb_threads[cpu]);
goto out_free;
}
-
- kthread_bind(rb_threads[cpu], cpu);
- wake_up_process(rb_threads[cpu]);
}
/* Now create the rb hammer! */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f9139dc1262c..a569a0cb81ee 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -980,6 +980,8 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev
ring_buffer_write(buffer, event->array[0], &event->array[1]);
/* Release the temp buffer */
this_cpu_dec(trace_buffered_event_cnt);
+ /* ring_buffer_unlock_commit() enables preemption */
+ preempt_enable_notrace();
} else
ring_buffer_unlock_commit(buffer, event);
}
@@ -2601,6 +2603,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
trace_flags |= TRACE_FLAG_HARDIRQ;
if (in_serving_softirq())
trace_flags |= TRACE_FLAG_SOFTIRQ;
+ if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
+ trace_flags |= TRACE_FLAG_BH_OFF;
if (tif_need_resched())
trace_flags |= TRACE_FLAG_NEED_RESCHED;
@@ -2745,8 +2749,8 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
*current_rb = tr->array_buffer.buffer;
if (!tr->no_filter_buffering_ref &&
- (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
- (entry = this_cpu_read(trace_buffered_event))) {
+ (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
+ preempt_disable_notrace();
/*
* Filtering is on, so try to use the per cpu buffer first.
* This buffer will simulate a ring_buffer_event,
@@ -2764,33 +2768,38 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
* is still quicker than no copy on match, but having
* to discard out of the ring buffer on a failed match.
*/
- int max_len = PAGE_SIZE - struct_size(entry, array, 1);
+ if ((entry = __this_cpu_read(trace_buffered_event))) {
+ int max_len = PAGE_SIZE - struct_size(entry, array, 1);
- val = this_cpu_inc_return(trace_buffered_event_cnt);
+ val = this_cpu_inc_return(trace_buffered_event_cnt);
- /*
- * Preemption is disabled, but interrupts and NMIs
- * can still come in now. If that happens after
- * the above increment, then it will have to go
- * back to the old method of allocating the event
- * on the ring buffer, and if the filter fails, it
- * will have to call ring_buffer_discard_commit()
- * to remove it.
- *
- * Need to also check the unlikely case that the
- * length is bigger than the temp buffer size.
- * If that happens, then the reserve is pretty much
- * guaranteed to fail, as the ring buffer currently
- * only allows events less than a page. But that may
- * change in the future, so let the ring buffer reserve
- * handle the failure in that case.
- */
- if (val == 1 && likely(len <= max_len)) {
- trace_event_setup(entry, type, trace_ctx);
- entry->array[0] = len;
- return entry;
+ /*
+ * Preemption is disabled, but interrupts and NMIs
+ * can still come in now. If that happens after
+ * the above increment, then it will have to go
+ * back to the old method of allocating the event
+ * on the ring buffer, and if the filter fails, it
+ * will have to call ring_buffer_discard_commit()
+ * to remove it.
+ *
+ * Need to also check the unlikely case that the
+ * length is bigger than the temp buffer size.
+ * If that happens, then the reserve is pretty much
+ * guaranteed to fail, as the ring buffer currently
+ * only allows events less than a page. But that may
+ * change in the future, so let the ring buffer reserve
+ * handle the failure in that case.
+ */
+ if (val == 1 && likely(len <= max_len)) {
+ trace_event_setup(entry, type, trace_ctx);
+ entry->array[0] = len;
+ /* Return with preemption disabled */
+ return entry;
+ }
+ this_cpu_dec(trace_buffered_event_cnt);
}
- this_cpu_dec(trace_buffered_event_cnt);
+ /* __trace_buffer_lock_reserve() disables preemption */
+ preempt_enable_notrace();
}
entry = __trace_buffer_lock_reserve(*current_rb, type, len,
@@ -3207,7 +3216,7 @@ struct trace_buffer_struct {
char buffer[4][TRACE_BUF_SIZE];
};
-static struct trace_buffer_struct *trace_percpu_buffer;
+static struct trace_buffer_struct __percpu *trace_percpu_buffer;
/*
* This allows for lockless recording. If we're nested too deeply, then
@@ -3217,7 +3226,7 @@ static char *get_trace_buf(void)
{
struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
- if (!buffer || buffer->nesting >= 4)
+ if (!trace_percpu_buffer || buffer->nesting >= 4)
return NULL;
buffer->nesting++;
@@ -3236,7 +3245,7 @@ static void put_trace_buf(void)
static int alloc_percpu_trace_buffer(void)
{
- struct trace_buffer_struct *buffers;
+ struct trace_buffer_struct __percpu *buffers;
if (trace_percpu_buffer)
return 0;
@@ -3812,6 +3821,18 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
iter->fmt[i] = '\0';
trace_seq_vprintf(&iter->seq, iter->fmt, ap);
+ /*
+ * If iter->seq is full, the above call no longer guarantees
+ * that ap is in sync with fmt processing, and further calls
+ * to va_arg() can return wrong positional arguments.
+ *
+ * Ensure that ap is no longer used in this case.
+ */
+ if (iter->seq.full) {
+ p = "";
+ break;
+ }
+
if (star)
len = va_arg(ap, int);
@@ -4171,7 +4192,7 @@ unsigned long trace_total_entries(struct trace_array *tr)
static void print_lat_help_header(struct seq_file *m)
{
seq_puts(m, "# _------=> CPU# \n"
- "# / _-----=> irqs-off \n"
+ "# / _-----=> irqs-off/BH-disabled\n"
"# | / _----=> need-resched \n"
"# || / _---=> hardirq/softirq \n"
"# ||| / _--=> preempt-depth \n"
@@ -4212,7 +4233,7 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
print_event_info(buf, m);
- seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
+ seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
@@ -4822,6 +4843,12 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp)
return 0;
}
+static int tracing_mark_open(struct inode *inode, struct file *filp)
+{
+ stream_open(inode, filp);
+ return tracing_open_generic_tr(inode, filp);
+}
+
static int tracing_release(struct inode *inode, struct file *file)
{
struct trace_array *tr = inode->i_private;
@@ -5623,7 +5650,7 @@ static const char readme_msg[] =
"\t - a numeric literal: e.g. ms_per_sec=1000,\n"
"\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
"\n"
- "\t hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
+ "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
"\t multiplication(*) and division(/) operators. An operand can be either a\n"
"\t variable reference, field or numeric literal.\n"
"\n"
@@ -6706,12 +6733,9 @@ waitagain:
cnt = PAGE_SIZE - 1;
/* reset all but tr, trace, and overruns */
- memset(&iter->seq, 0,
- sizeof(struct trace_iterator) -
- offsetof(struct trace_iterator, seq));
+ trace_iterator_reset(iter);
cpumask_clear(iter->started);
trace_seq_init(&iter->seq);
- iter->pos = -1;
trace_event_read_lock();
trace_access_lock(iter->cpu_file);
@@ -7100,9 +7124,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (tt)
event_triggers_post_call(tr->trace_marker_file, tt);
- if (written > 0)
- *fpos += written;
-
return written;
}
@@ -7161,9 +7182,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
__buffer_unlock_commit(buffer, event);
- if (written > 0)
- *fpos += written;
-
return written;
}
@@ -7563,16 +7581,14 @@ static const struct file_operations tracing_free_buffer_fops = {
};
static const struct file_operations tracing_mark_fops = {
- .open = tracing_open_generic_tr,
+ .open = tracing_mark_open,
.write = tracing_mark_write,
- .llseek = generic_file_llseek,
.release = tracing_release_generic_tr,
};
static const struct file_operations tracing_mark_raw_fops = {
- .open = tracing_open_generic_tr,
+ .open = tracing_mark_open,
.write = tracing_mark_raw_write,
- .llseek = generic_file_llseek,
.release = tracing_release_generic_tr,
};
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6b60ab9475ed..d038ddbf1bea 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -83,6 +83,9 @@ enum trace_type {
#undef __dynamic_array
#define __dynamic_array(type, item) type item[];
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(type, item) type item[];
+
#undef F_STRUCT
#define F_STRUCT(args...) args
@@ -1334,10 +1337,12 @@ __trace_event_discard_commit(struct trace_buffer *buffer,
struct ring_buffer_event *event)
{
if (this_cpu_read(trace_buffered_event) == event) {
- /* Simply release the temp buffer */
+ /* Simply release the temp buffer and enable preemption */
this_cpu_dec(trace_buffered_event_cnt);
+ preempt_enable_notrace();
return;
}
+ /* ring_buffer_discard_commit() enables preemption */
ring_buffer_discard_commit(buffer, event);
}
@@ -1366,14 +1371,26 @@ __event_trigger_test_discard(struct trace_event_file *file,
if (eflags & EVENT_FILE_FL_TRIGGER_COND)
*tt = event_triggers_call(file, buffer, entry, event);
- if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
- (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
- !filter_match_preds(file->filter, entry))) {
- __trace_event_discard_commit(buffer, event);
- return true;
- }
+ if (likely(!(file->flags & (EVENT_FILE_FL_SOFT_DISABLED |
+ EVENT_FILE_FL_FILTERED |
+ EVENT_FILE_FL_PID_FILTER))))
+ return false;
+
+ if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
+ goto discard;
+
+ if (file->flags & EVENT_FILE_FL_FILTERED &&
+ !filter_match_preds(file->filter, entry))
+ goto discard;
+
+ if ((file->flags & EVENT_FILE_FL_PID_FILTER) &&
+ trace_event_ignore_this_pid(file))
+ goto discard;
return false;
+ discard:
+ __trace_event_discard_commit(buffer, event);
+ return true;
}
/**
@@ -1453,6 +1470,7 @@ struct filter_pred {
static inline bool is_string_field(struct ftrace_event_field *field)
{
return field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING ||
field->filter_type == FILTER_STATIC_STRING ||
field->filter_type == FILTER_PTR_STRING ||
field->filter_type == FILTER_COMM;
@@ -1560,15 +1578,13 @@ extern int event_enable_trigger_print(struct seq_file *m,
struct event_trigger_data *data);
extern void event_enable_trigger_free(struct event_trigger_ops *ops,
struct event_trigger_data *data);
-extern int event_enable_trigger_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param);
+extern int event_enable_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param);
extern int event_enable_register_trigger(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file);
extern void event_enable_unregister_trigger(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *test,
struct trace_event_file *file);
extern void trigger_data_free(struct event_trigger_data *data);
@@ -1594,6 +1610,30 @@ get_named_trigger_data(struct event_trigger_data *data);
extern int register_event_command(struct event_command *cmd);
extern int unregister_event_command(struct event_command *cmd);
extern int register_trigger_hist_enable_disable_cmds(void);
+extern bool event_trigger_check_remove(const char *glob);
+extern bool event_trigger_empty_param(const char *param);
+extern int event_trigger_separate_filter(char *param_and_filter, char **param,
+ char **filter, bool param_required);
+extern struct event_trigger_data *
+event_trigger_alloc(struct event_command *cmd_ops,
+ char *cmd,
+ char *param,
+ void *private_data);
+extern int event_trigger_parse_num(char *trigger,
+ struct event_trigger_data *trigger_data);
+extern int event_trigger_set_filter(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *param,
+ struct event_trigger_data *trigger_data);
+extern void event_trigger_reset_filter(struct event_command *cmd_ops,
+ struct event_trigger_data *trigger_data);
+extern int event_trigger_register(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob,
+ char *cmd,
+ char *trigger,
+ struct event_trigger_data *trigger_data,
+ int *n_registered);
/**
* struct event_trigger_ops - callbacks for trace event triggers
@@ -1601,10 +1641,20 @@ extern int register_trigger_hist_enable_disable_cmds(void);
* The methods in this structure provide per-event trigger hooks for
* various trigger operations.
*
+ * The @init and @free methods are used during trigger setup and
+ * teardown, typically called from an event_command's @parse()
+ * function implementation.
+ *
+ * The @print method is used to print the trigger spec.
+ *
+ * The @trigger method is the function that actually implements the
+ * trigger and is called in the context of the triggering event
+ * whenever that event occurs.
+ *
* All the methods below, except for @init() and @free(), must be
* implemented.
*
- * @func: The trigger 'probe' function called when the triggering
+ * @trigger: The trigger 'probe' function called when the triggering
* event occurs. The data passed into this callback is the data
* that was supplied to the event_command @reg() function that
* registered the trigger (see struct event_command) along with
@@ -1633,9 +1683,10 @@ extern int register_trigger_hist_enable_disable_cmds(void);
* (see trace_event_triggers.c).
*/
struct event_trigger_ops {
- void (*func)(struct event_trigger_data *data,
- struct trace_buffer *buffer, void *rec,
- struct ring_buffer_event *rbe);
+ void (*trigger)(struct event_trigger_data *data,
+ struct trace_buffer *buffer,
+ void *rec,
+ struct ring_buffer_event *rbe);
int (*init)(struct event_trigger_ops *ops,
struct event_trigger_data *data);
void (*free)(struct event_trigger_ops *ops,
@@ -1684,7 +1735,7 @@ struct event_trigger_ops {
* All the methods below, except for @set_filter() and @unreg_all(),
* must be implemented.
*
- * @func: The callback function responsible for parsing and
+ * @parse: The callback function responsible for parsing and
* registering the trigger written to the 'trigger' file by the
* user. It allocates the trigger instance and registers it with
* the appropriate trace event. It makes use of the other
@@ -1719,21 +1770,24 @@ struct event_trigger_ops {
*
* @get_trigger_ops: The callback function invoked to retrieve the
* event_trigger_ops implementation associated with the command.
+ * This callback function allows a single event_command to
+ * support multiple trigger implementations via different sets of
+ * event_trigger_ops, depending on the value of the @param
+ * string.
*/
struct event_command {
struct list_head list;
char *name;
enum event_trigger_type trigger_type;
int flags;
- int (*func)(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *params);
+ int (*parse)(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd,
+ char *param_and_filter);
int (*reg)(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file);
void (*unreg)(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file);
void (*unreg_all)(struct trace_event_file *file);
@@ -1914,14 +1968,7 @@ extern struct trace_iterator *tracepoint_print_iter;
*/
static __always_inline void trace_iterator_reset(struct trace_iterator *iter)
{
- const size_t offset = offsetof(struct trace_iterator, seq);
-
- /*
- * Keep gcc from complaining about overwriting more than just one
- * member in the structure.
- */
- memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset);
-
+ memset_startat(iter, 0, seq);
iter->pos = -1;
}
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 928867f527e7..191db32dec46 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -489,18 +489,12 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec)
if (trace_trigger_soft_disabled(edata->file))
return;
- fbuffer.trace_ctx = tracing_gen_ctx();
- fbuffer.trace_file = edata->file;
-
dsize = get_eprobe_size(&edata->ep->tp, rec);
- fbuffer.regs = NULL;
-
- fbuffer.event =
- trace_event_buffer_lock_reserve(&fbuffer.buffer, edata->file,
- call->event.type,
- sizeof(*entry) + edata->ep->tp.size + dsize,
- fbuffer.trace_ctx);
- if (!fbuffer.event)
+
+ entry = trace_event_buffer_reserve(&fbuffer, edata->file,
+ sizeof(*entry) + edata->ep->tp.size + dsize);
+
+ if (!entry)
return;
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
@@ -549,29 +543,29 @@ static void eprobe_trigger_func(struct event_trigger_data *data,
}
static struct event_trigger_ops eprobe_trigger_ops = {
- .func = eprobe_trigger_func,
+ .trigger = eprobe_trigger_func,
.print = eprobe_trigger_print,
.init = eprobe_trigger_init,
.free = eprobe_trigger_free,
};
-static int eprobe_trigger_cmd_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
{
return -1;
}
-static int eprobe_trigger_reg_func(char *glob, struct event_trigger_ops *ops,
- struct event_trigger_data *data,
- struct trace_event_file *file)
+static int eprobe_trigger_reg_func(char *glob,
+ struct event_trigger_data *data,
+ struct trace_event_file *file)
{
return -1;
}
-static void eprobe_trigger_unreg_func(char *glob, struct event_trigger_ops *ops,
- struct event_trigger_data *data,
- struct trace_event_file *file)
+static void eprobe_trigger_unreg_func(char *glob,
+ struct event_trigger_data *data,
+ struct trace_event_file *file)
{
}
@@ -586,7 +580,7 @@ static struct event_command event_trigger_cmd = {
.name = "eprobe",
.trigger_type = ETT_EVENT_EPROBE,
.flags = EVENT_CMD_FL_NEEDS_REC,
- .func = eprobe_trigger_cmd_func,
+ .parse = eprobe_trigger_cmd_parse,
.reg = eprobe_trigger_reg_func,
.unreg = eprobe_trigger_unreg_func,
.unreg_all = NULL,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4021b9a79f93..3147614c1812 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2678,12 +2678,24 @@ static struct trace_event_file *
trace_create_new_event(struct trace_event_call *call,
struct trace_array *tr)
{
+ struct trace_pid_list *no_pid_list;
+ struct trace_pid_list *pid_list;
struct trace_event_file *file;
+ unsigned int first;
file = kmem_cache_alloc(file_cachep, GFP_TRACE);
if (!file)
return NULL;
+ pid_list = rcu_dereference_protected(tr->filtered_pids,
+ lockdep_is_held(&event_mutex));
+ no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
+ lockdep_is_held(&event_mutex));
+
+ if (!trace_pid_list_first(pid_list, &first) ||
+ !trace_pid_list_first(no_pid_list, &first))
+ file->flags |= EVENT_FILE_FL_PID_FILTER;
+
file->event_call = call;
file->tr = tr;
atomic_set(&file->sm_ref, 0);
@@ -3449,10 +3461,8 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events,
tr, &ftrace_tr_enable_fops);
- if (!entry) {
- pr_warn("Could not create tracefs 'enable' entry\n");
+ if (!entry)
return -ENOMEM;
- }
/* There are not as crucial, just warn if they are not created */
@@ -3468,17 +3478,13 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n");
/* ring buffer internal formats */
- entry = trace_create_file("header_page", TRACE_MODE_READ, d_events,
+ trace_create_file("header_page", TRACE_MODE_READ, d_events,
ring_buffer_print_page_header,
&ftrace_show_header_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'header_page' entry\n");
- entry = trace_create_file("header_event", TRACE_MODE_READ, d_events,
+ trace_create_file("header_event", TRACE_MODE_READ, d_events,
ring_buffer_print_entry_header,
&ftrace_show_header_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'header_event' entry\n");
tr->event_dir = d_events;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index c9124038b140..b458a9afa2c0 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -5,6 +5,7 @@
* Copyright (C) 2009 Tom Zanussi <[email protected]>
*/
+#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/mutex.h>
@@ -654,6 +655,52 @@ DEFINE_EQUALITY_PRED(32);
DEFINE_EQUALITY_PRED(16);
DEFINE_EQUALITY_PRED(8);
+/* user space strings temp buffer */
+#define USTRING_BUF_SIZE 1024
+
+struct ustring_buffer {
+ char buffer[USTRING_BUF_SIZE];
+};
+
+static __percpu struct ustring_buffer *ustring_per_cpu;
+
+static __always_inline char *test_string(char *str)
+{
+ struct ustring_buffer *ubuf;
+ char *kstr;
+
+ if (!ustring_per_cpu)
+ return NULL;
+
+ ubuf = this_cpu_ptr(ustring_per_cpu);
+ kstr = ubuf->buffer;
+
+ /* For safety, do not trust the string pointer */
+ if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
+ return NULL;
+ return kstr;
+}
+
+static __always_inline char *test_ustring(char *str)
+{
+ struct ustring_buffer *ubuf;
+ char __user *ustr;
+ char *kstr;
+
+ if (!ustring_per_cpu)
+ return NULL;
+
+ ubuf = this_cpu_ptr(ustring_per_cpu);
+ kstr = ubuf->buffer;
+
+ /* user space address? */
+ ustr = (char __user *)str;
+ if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
+ return NULL;
+
+ return kstr;
+}
+
/* Filter predicate for fixed sized arrays of characters */
static int filter_pred_string(struct filter_pred *pred, void *event)
{
@@ -667,19 +714,43 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
return match;
}
-/* Filter predicate for char * pointers */
-static int filter_pred_pchar(struct filter_pred *pred, void *event)
+static __always_inline int filter_pchar(struct filter_pred *pred, char *str)
{
- char **addr = (char **)(event + pred->offset);
int cmp, match;
- int len = strlen(*addr) + 1; /* including tailing '\0' */
+ int len;
- cmp = pred->regex.match(*addr, &pred->regex, len);
+ len = strlen(str) + 1; /* including tailing '\0' */
+ cmp = pred->regex.match(str, &pred->regex, len);
match = cmp ^ pred->not;
return match;
}
+/* Filter predicate for char * pointers */
+static int filter_pred_pchar(struct filter_pred *pred, void *event)
+{
+ char **addr = (char **)(event + pred->offset);
+ char *str;
+
+ str = test_string(*addr);
+ if (!str)
+ return 0;
+
+ return filter_pchar(pred, str);
+}
+
+/* Filter predicate for char * pointers in user space*/
+static int filter_pred_pchar_user(struct filter_pred *pred, void *event)
+{
+ char **addr = (char **)(event + pred->offset);
+ char *str;
+
+ str = test_ustring(*addr);
+ if (!str)
+ return 0;
+
+ return filter_pchar(pred, str);
+}
/*
* Filter predicate for dynamic sized arrays of characters.
@@ -706,6 +777,29 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
return match;
}
+/*
+ * Filter predicate for relative dynamic sized arrays of characters.
+ * These are implemented through a list of strings at the end
+ * of the entry as same as dynamic string.
+ * The difference is that the relative one records the location offset
+ * from the field itself, not the event entry.
+ */
+static int filter_pred_strrelloc(struct filter_pred *pred, void *event)
+{
+ u32 *item = (u32 *)(event + pred->offset);
+ u32 str_item = *item;
+ int str_loc = str_item & 0xffff;
+ int str_len = str_item >> 16;
+ char *addr = (char *)(&item[1]) + str_loc;
+ int cmp, match;
+
+ cmp = pred->regex.match(addr, &pred->regex, str_len);
+
+ match = cmp ^ pred->not;
+
+ return match;
+}
+
/* Filter predicate for CPUs. */
static int filter_pred_cpu(struct filter_pred *pred, void *event)
{
@@ -756,7 +850,7 @@ static int filter_pred_none(struct filter_pred *pred, void *event)
*
* Note:
* - @str might not be NULL-terminated if it's of type DYN_STRING
- * or STATIC_STRING, unless @len is zero.
+ * RDYN_STRING, or STATIC_STRING, unless @len is zero.
*/
static int regex_match_full(char *str, struct regex *r, int len)
@@ -1083,6 +1177,9 @@ int filter_assign_type(const char *type)
if (strstr(type, "__data_loc") && strstr(type, "char"))
return FILTER_DYN_STRING;
+ if (strstr(type, "__rel_loc") && strstr(type, "char"))
+ return FILTER_RDYN_STRING;
+
if (strchr(type, '[') && strstr(type, "char"))
return FILTER_STATIC_STRING;
@@ -1158,6 +1255,7 @@ static int parse_pred(const char *str, void *data,
struct filter_pred *pred = NULL;
char num_buf[24]; /* Big enough to hold an address */
char *field_name;
+ bool ustring = false;
char q;
u64 val;
int len;
@@ -1192,6 +1290,12 @@ static int parse_pred(const char *str, void *data,
return -EINVAL;
}
+ /* See if the field is a user space string */
+ if ((len = str_has_prefix(str + i, ".ustring"))) {
+ ustring = true;
+ i += len;
+ }
+
while (isspace(str[i]))
i++;
@@ -1318,10 +1422,24 @@ static int parse_pred(const char *str, void *data,
pred->fn = filter_pred_string;
pred->regex.field_len = field->size;
- } else if (field->filter_type == FILTER_DYN_STRING)
+ } else if (field->filter_type == FILTER_DYN_STRING) {
pred->fn = filter_pred_strloc;
- else
- pred->fn = filter_pred_pchar;
+ } else if (field->filter_type == FILTER_RDYN_STRING)
+ pred->fn = filter_pred_strrelloc;
+ else {
+
+ if (!ustring_per_cpu) {
+ /* Once allocated, keep it around for good */
+ ustring_per_cpu = alloc_percpu(struct ustring_buffer);
+ if (!ustring_per_cpu)
+ goto err_mem;
+ }
+
+ if (ustring)
+ pred->fn = filter_pred_pchar_user;
+ else
+ pred->fn = filter_pred_pchar;
+ }
/* go past the last quote */
i++;
@@ -1387,6 +1505,9 @@ static int parse_pred(const char *str, void *data,
err_free:
kfree(pred);
return -EINVAL;
+err_mem:
+ kfree(pred);
+ return -ENOMEM;
}
enum {
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 8a10046c775f..5e6a988a8a51 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -217,6 +217,20 @@ static u64 hist_field_dynstring(struct hist_field *hist_field,
return (u64)(unsigned long)addr;
}
+static u64 hist_field_reldynstring(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ u32 *item = event + hist_field->field->offset;
+ u32 str_item = *item;
+ int str_loc = str_item & 0xffff;
+ char *addr = (char *)&item[1] + str_loc;
+
+ return (u64)(unsigned long)addr;
+}
+
static u64 hist_field_pstring(struct hist_field *hist_field,
struct tracing_map_elt *elt,
struct trace_buffer *buffer,
@@ -1956,8 +1970,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
if (field->filter_type == FILTER_STATIC_STRING) {
hist_field->fn = hist_field_string;
hist_field->size = field->size;
- } else if (field->filter_type == FILTER_DYN_STRING)
+ } else if (field->filter_type == FILTER_DYN_STRING) {
hist_field->fn = hist_field_dynstring;
+ } else if (field->filter_type == FILTER_RDYN_STRING)
+ hist_field->fn = hist_field_reldynstring;
else
hist_field->fn = hist_field_pstring;
} else {
@@ -2576,28 +2592,27 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
/* Split the expression string at the root operator */
if (!sep)
- goto free;
+ return ERR_PTR(-EINVAL);
+
*sep = '\0';
operand1_str = str;
str = sep+1;
/* Binary operator requires both operands */
if (*operand1_str == '\0' || *str == '\0')
- goto free;
+ return ERR_PTR(-EINVAL);
operand_flags = 0;
/* LHS of string is an expression e.g. a+b in a+b+c */
operand1 = parse_expr(hist_data, file, operand1_str, operand_flags, NULL, n_subexprs);
- if (IS_ERR(operand1)) {
- ret = PTR_ERR(operand1);
- operand1 = NULL;
- goto free;
- }
+ if (IS_ERR(operand1))
+ return ERR_CAST(operand1);
+
if (operand1->flags & HIST_FIELD_FL_STRING) {
hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str));
ret = -EINVAL;
- goto free;
+ goto free_op1;
}
/* RHS of string is another expression e.g. c in a+b+c */
@@ -2605,13 +2620,12 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, n_subexprs);
if (IS_ERR(operand2)) {
ret = PTR_ERR(operand2);
- operand2 = NULL;
- goto free;
+ goto free_op1;
}
if (operand2->flags & HIST_FIELD_FL_STRING) {
hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str));
ret = -EINVAL;
- goto free;
+ goto free_operands;
}
switch (field_op) {
@@ -2629,12 +2643,12 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
break;
default:
ret = -EINVAL;
- goto free;
+ goto free_operands;
}
ret = check_expr_operands(file->tr, operand1, operand2, &var1, &var2);
if (ret)
- goto free;
+ goto free_operands;
operand_flags = var1 ? var1->flags : operand1->flags;
operand2_flags = var2 ? var2->flags : operand2->flags;
@@ -2653,12 +2667,13 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
expr = create_hist_field(hist_data, NULL, flags, var_name);
if (!expr) {
ret = -ENOMEM;
- goto free;
+ goto free_operands;
}
operand1->read_once = true;
operand2->read_once = true;
+ /* The operands are now owned and free'd by 'expr' */
expr->operands[0] = operand1;
expr->operands[1] = operand2;
@@ -2669,7 +2684,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
if (!divisor) {
hist_err(file->tr, HIST_ERR_DIVISION_BY_ZERO, errpos(str));
ret = -EDOM;
- goto free;
+ goto free_expr;
}
/*
@@ -2709,18 +2724,22 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
if (!expr->type) {
ret = -ENOMEM;
- goto free;
+ goto free_expr;
}
expr->name = expr_str(expr, 0);
}
return expr;
-free:
- destroy_hist_field(operand1, 0);
+
+free_operands:
destroy_hist_field(operand2, 0);
- destroy_hist_field(expr, 0);
+free_op1:
+ destroy_hist_field(operand1, 0);
+ return ERR_PTR(ret);
+free_expr:
+ destroy_hist_field(expr, 0);
return ERR_PTR(ret);
}
@@ -2742,9 +2761,9 @@ static char *find_trigger_filter(struct hist_trigger_data *hist_data,
}
static struct event_command trigger_hist_cmd;
-static int event_hist_trigger_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param);
+static int event_hist_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param);
static bool compatible_keys(struct hist_trigger_data *target_hist_data,
struct hist_trigger_data *hist_data,
@@ -2947,8 +2966,8 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data,
var_hist->hist_data = hist_data;
/* Create the new histogram with our variable */
- ret = event_hist_trigger_func(&trigger_hist_cmd, file,
- "", "hist", cmd);
+ ret = event_hist_trigger_parse(&trigger_hist_cmd, file,
+ "", "hist", cmd);
if (ret) {
kfree(cmd);
kfree(var_hist->cmd);
@@ -3026,8 +3045,10 @@ static inline void __update_field_vars(struct tracing_map_elt *elt,
if (val->flags & HIST_FIELD_FL_STRING) {
char *str = elt_data->field_var_str[j++];
char *val_str = (char *)(uintptr_t)var_val;
+ unsigned int size;
- strscpy(str, val_str, val->size);
+ size = min(val->size, STR_VAR_LEN_MAX);
+ strscpy(str, val_str, size);
var_val = (u64)(uintptr_t)str;
}
tracing_map_set_var(elt, var_idx, var_val);
@@ -3752,7 +3773,7 @@ static int check_synth_field(struct synth_event *event,
if (strcmp(field->type, hist_field->type) != 0) {
if (field->size != hist_field->size ||
- field->is_signed != hist_field->is_signed)
+ (!field->is_string && field->is_signed != hist_field->is_signed))
return -EINVAL;
}
@@ -4914,6 +4935,7 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
if (hist_field->flags & HIST_FIELD_FL_STRING) {
unsigned int str_start, var_str_idx, idx;
char *str, *val_str;
+ unsigned int size;
str_start = hist_data->n_field_var_str +
hist_data->n_save_var_str;
@@ -4922,7 +4944,9 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
str = elt_data->field_var_str[idx];
val_str = (char *)(uintptr_t)hist_val;
- strscpy(str, val_str, hist_field->size);
+
+ size = min(hist_field->size, STR_VAR_LEN_MAX);
+ strscpy(str, val_str, size);
hist_val = (u64)(uintptr_t)str;
}
@@ -4953,7 +4977,8 @@ static inline void add_to_key(char *compound_key, void *key,
struct ftrace_event_field *field;
field = key_field->field;
- if (field->filter_type == FILTER_DYN_STRING)
+ if (field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING)
size = *(u32 *)(rec + field->offset) >> 16;
else if (field->filter_type == FILTER_STATIC_STRING)
size = field->size;
@@ -5704,8 +5729,8 @@ static void unregister_field_var_hists(struct hist_trigger_data *hist_data)
for (i = 0; i < hist_data->n_field_var_hists; i++) {
file = hist_data->field_var_hists[i]->hist_data->event_file;
cmd = hist_data->field_var_hists[i]->cmd;
- ret = event_hist_trigger_func(&trigger_hist_cmd, file,
- "!hist", "hist", cmd);
+ ret = event_hist_trigger_parse(&trigger_hist_cmd, file,
+ "!hist", "hist", cmd);
WARN_ON_ONCE(ret < 0);
}
}
@@ -5734,7 +5759,7 @@ static void event_hist_trigger_free(struct event_trigger_ops *ops,
}
static struct event_trigger_ops event_hist_trigger_ops = {
- .func = event_hist_trigger,
+ .trigger = event_hist_trigger,
.print = event_hist_trigger_print,
.init = event_hist_trigger_init,
.free = event_hist_trigger_free,
@@ -5768,7 +5793,7 @@ static void event_hist_trigger_named_free(struct event_trigger_ops *ops,
}
static struct event_trigger_ops event_hist_trigger_named_ops = {
- .func = event_hist_trigger,
+ .trigger = event_hist_trigger,
.print = event_hist_trigger_print,
.init = event_hist_trigger_named_init,
.free = event_hist_trigger_named_free,
@@ -5885,7 +5910,7 @@ static bool hist_trigger_match(struct event_trigger_data *data,
return true;
}
-static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
+static int hist_register_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
@@ -6037,7 +6062,7 @@ static bool hist_trigger_check_refs(struct event_trigger_data *data,
return false;
}
-static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops,
+static void hist_unregister_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
@@ -6121,9 +6146,9 @@ static void hist_unreg_all(struct trace_event_file *file)
}
}
-static int event_hist_trigger_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+static int event_hist_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
{
unsigned int hist_trigger_bits = TRACING_MAP_BITS_DEFAULT;
struct event_trigger_data *trigger_data;
@@ -6237,7 +6262,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
goto out_free;
}
- cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob+1, trigger_data, file);
se_name = trace_event_name(file->event_call);
se = find_synth_event(se_name);
if (se)
@@ -6246,7 +6271,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
goto out_free;
}
- ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
+ ret = cmd_ops->reg(glob, trigger_data, file);
/*
* The above returns on success the # of triggers registered,
* but if it didn't register any it returns zero. Consider no
@@ -6289,7 +6314,7 @@ enable:
return ret;
out_unreg:
- cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob+1, trigger_data, file);
out_free:
if (cmd_ops->set_filter)
cmd_ops->set_filter(NULL, trigger_data, NULL);
@@ -6306,7 +6331,7 @@ static struct event_command trigger_hist_cmd = {
.name = "hist",
.trigger_type = ETT_EVENT_HIST,
.flags = EVENT_CMD_FL_NEEDS_REC,
- .func = event_hist_trigger_func,
+ .parse = event_hist_trigger_parse,
.reg = hist_register_trigger,
.unreg = hist_unregister_trigger,
.unreg_all = hist_unreg_all,
@@ -6358,28 +6383,28 @@ hist_enable_count_trigger(struct event_trigger_data *data,
}
static struct event_trigger_ops hist_enable_trigger_ops = {
- .func = hist_enable_trigger,
+ .trigger = hist_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops hist_enable_count_trigger_ops = {
- .func = hist_enable_count_trigger,
+ .trigger = hist_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops hist_disable_trigger_ops = {
- .func = hist_enable_trigger,
+ .trigger = hist_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops hist_disable_count_trigger_ops = {
- .func = hist_enable_count_trigger,
+ .trigger = hist_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
@@ -6421,7 +6446,7 @@ static void hist_enable_unreg_all(struct trace_event_file *file)
static struct event_command trigger_hist_enable_cmd = {
.name = ENABLE_HIST_STR,
.trigger_type = ETT_HIST_ENABLE,
- .func = event_enable_trigger_func,
+ .parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.unreg_all = hist_enable_unreg_all,
@@ -6432,7 +6457,7 @@ static struct event_command trigger_hist_enable_cmd = {
static struct event_command trigger_hist_disable_cmd = {
.name = DISABLE_HIST_STR,
.trigger_type = ETT_HIST_ENABLE,
- .func = event_enable_trigger_func,
+ .parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.unreg_all = hist_enable_unreg_all,
diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
index c188045c5f97..d6b4935a78c0 100644
--- a/kernel/trace/trace_events_inject.c
+++ b/kernel/trace/trace_events_inject.c
@@ -168,10 +168,14 @@ static void *trace_alloc_entry(struct trace_event_call *call, int *size)
continue;
if (field->filter_type == FILTER_STATIC_STRING)
continue;
- if (field->filter_type == FILTER_DYN_STRING) {
+ if (field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING) {
u32 *str_item;
int str_loc = entry_size & 0xffff;
+ if (field->filter_type == FILTER_RDYN_STRING)
+ str_loc -= field->offset + field->size;
+
str_item = (u32 *)(entry + field->offset);
*str_item = str_loc; /* string length is 0. */
} else {
@@ -214,7 +218,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry)
if (field->filter_type == FILTER_STATIC_STRING) {
strlcpy(entry + field->offset, addr, field->size);
- } else if (field->filter_type == FILTER_DYN_STRING) {
+ } else if (field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING) {
int str_len = strlen(addr) + 1;
int str_loc = entry_size & 0xffff;
u32 *str_item;
@@ -229,6 +234,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry)
strlcpy(entry + (entry_size - str_len), addr, str_len);
str_item = (u32 *)(entry + field->offset);
+ if (field->filter_type == FILTER_RDYN_STRING)
+ str_loc -= field->offset + field->size;
*str_item = (str_len << 16) | str_loc;
} else {
char **paddr;
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 22db3ce95e74..154db74dadbc 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -1237,9 +1237,8 @@ static int __create_synth_event(const char *name, const char *raw_fields)
argv + consumed, &consumed,
&field_version);
if (IS_ERR(field)) {
- argv_free(argv);
ret = PTR_ERR(field);
- goto err;
+ goto err_free_arg;
}
/*
@@ -1262,18 +1261,19 @@ static int __create_synth_event(const char *name, const char *raw_fields)
if (cmd_version > 1 && n_fields_this_loop >= 1) {
synth_err(SYNTH_ERR_INVALID_CMD, errpos(field_str));
ret = -EINVAL;
- goto err;
+ goto err_free_arg;
}
fields[n_fields++] = field;
if (n_fields == SYNTH_FIELDS_MAX) {
synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0);
ret = -EINVAL;
- goto err;
+ goto err_free_arg;
}
n_fields_this_loop++;
}
+ argv_free(argv);
if (consumed < argc) {
synth_err(SYNTH_ERR_INVALID_CMD, 0);
@@ -1281,7 +1281,6 @@ static int __create_synth_event(const char *name, const char *raw_fields)
goto err;
}
- argv_free(argv);
}
if (n_fields == 0) {
@@ -1307,6 +1306,8 @@ static int __create_synth_event(const char *name, const char *raw_fields)
kfree(saved_fields);
return ret;
+ err_free_arg:
+ argv_free(argv);
err:
for (i = 0; i < n_fields; i++)
free_synth_field(fields[i]);
@@ -1978,7 +1979,7 @@ EXPORT_SYMBOL_GPL(synth_event_add_next_val);
/**
* synth_event_add_val - Add a named field's value to an open synth trace
* @field_name: The name of the synthetic event field value to set
- * @val: The value to set the next field to
+ * @val: The value to set the named field to
* @trace_state: A pointer to object tracking the piecewise trace state
*
* Set the value of the named field in an event that's been opened by
@@ -2053,6 +2054,13 @@ static int create_synth_event(const char *raw_command)
last_cmd_set(raw_command);
+ name = raw_command;
+
+ /* Don't try to process if not our system */
+ if (name[0] != 's' || name[1] != ':')
+ return -ECANCELED;
+ name += 2;
+
p = strpbrk(raw_command, " \t");
if (!p) {
synth_err(SYNTH_ERR_INVALID_CMD, 0);
@@ -2061,12 +2069,6 @@ static int create_synth_event(const char *raw_command)
fields = skip_spaces(p);
- name = raw_command;
-
- if (name[0] != 's' || name[1] != ':')
- return -ECANCELED;
- name += 2;
-
/* This interface accepts group name prefix */
if (strchr(name, '/')) {
len = str_has_prefix(name, SYNTH_SYSTEM "/");
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 3d5c07239a2a..d00fee705f9c 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -68,7 +68,7 @@ event_triggers_call(struct trace_event_file *file,
if (data->paused)
continue;
if (!rec) {
- data->ops->func(data, buffer, rec, event);
+ data->ops->trigger(data, buffer, rec, event);
continue;
}
filter = rcu_dereference_sched(data->filter);
@@ -78,7 +78,7 @@ event_triggers_call(struct trace_event_file *file,
tt |= data->cmd_ops->trigger_type;
continue;
}
- data->ops->func(data, buffer, rec, event);
+ data->ops->trigger(data, buffer, rec, event);
}
return tt;
}
@@ -106,7 +106,7 @@ event_triggers_post_call(struct trace_event_file *file,
if (data->paused)
continue;
if (data->cmd_ops->trigger_type & tt)
- data->ops->func(data, NULL, NULL, NULL);
+ data->ops->trigger(data, NULL, NULL, NULL);
}
}
EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -245,7 +245,7 @@ int trigger_process_regex(struct trace_event_file *file, char *buff)
mutex_lock(&trigger_cmd_mutex);
list_for_each_entry(p, &trigger_commands, list) {
if (strcmp(p->name, command) == 0) {
- ret = p->func(p, file, buff, command, next);
+ ret = p->parse(p, file, buff, command, next);
goto out_unlock;
}
}
@@ -540,7 +540,6 @@ void update_cond_flag(struct trace_event_file *file)
/**
* register_trigger - Generic event_command @reg implementation
* @glob: The raw string used to register the trigger
- * @ops: The trigger ops associated with the trigger
* @data: Trigger-specific data to associate with the trigger
* @file: The trace_event_file associated with the event
*
@@ -551,7 +550,7 @@ void update_cond_flag(struct trace_event_file *file)
*
* Return: 0 on success, errno otherwise
*/
-static int register_trigger(char *glob, struct event_trigger_ops *ops,
+static int register_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
@@ -589,7 +588,6 @@ out:
/**
* unregister_trigger - Generic event_command @unreg implementation
* @glob: The raw string used to register the trigger
- * @ops: The trigger ops associated with the trigger
* @test: Trigger-specific data used to find the trigger to remove
* @file: The trace_event_file associated with the event
*
@@ -598,7 +596,7 @@ out:
* Usually used directly as the @unreg method in event command
* implementations.
*/
-static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
+static void unregister_trigger(char *glob,
struct event_trigger_data *test,
struct trace_event_file *file)
{
@@ -621,8 +619,350 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
data->ops->free(data->ops, data);
}
+/*
+ * Event trigger parsing helper functions.
+ *
+ * These functions help make it easier to write an event trigger
+ * parsing function i.e. the struct event_command.parse() callback
+ * function responsible for parsing and registering a trigger command
+ * written to the 'trigger' file.
+ *
+ * A trigger command (or just 'trigger' for short) takes the form:
+ * [trigger] [if filter]
+ *
+ * The struct event_command.parse() callback (and other struct
+ * event_command functions) refer to several components of a trigger
+ * command. Those same components are referenced by the event trigger
+ * parsing helper functions defined below. These components are:
+ *
+ * cmd - the trigger command name
+ * glob - the trigger command name optionally prefaced with '!'
+ * param_and_filter - text following cmd and ':'
+ * param - text following cmd and ':' and stripped of filter
+ * filter - the optional filter text following (and including) 'if'
+ *
+ * To illustrate the use of these componenents, here are some concrete
+ * examples. For the following triggers:
+ *
+ * echo 'traceon:5 if pid == 0' > trigger
+ * - 'traceon' is both cmd and glob
+ * - '5 if pid == 0' is the param_and_filter
+ * - '5' is the param
+ * - 'if pid == 0' is the filter
+ *
+ * echo 'enable_event:sys:event:n' > trigger
+ * - 'enable_event' is both cmd and glob
+ * - 'sys:event:n' is the param_and_filter
+ * - 'sys:event:n' is the param
+ * - there is no filter
+ *
+ * echo 'hist:keys=pid if prio > 50' > trigger
+ * - 'hist' is both cmd and glob
+ * - 'keys=pid if prio > 50' is the param_and_filter
+ * - 'keys=pid' is the param
+ * - 'if prio > 50' is the filter
+ *
+ * echo '!enable_event:sys:event:n' > trigger
+ * - 'enable_event' the cmd
+ * - '!enable_event' is the glob
+ * - 'sys:event:n' is the param_and_filter
+ * - 'sys:event:n' is the param
+ * - there is no filter
+ *
+ * echo 'traceoff' > trigger
+ * - 'traceoff' is both cmd and glob
+ * - there is no param_and_filter
+ * - there is no param
+ * - there is no filter
+ *
+ * There are a few different categories of event trigger covered by
+ * these helpers:
+ *
+ * - triggers that don't require a parameter e.g. traceon
+ * - triggers that do require a parameter e.g. enable_event and hist
+ * - triggers that though they may not require a param may support an
+ * optional 'n' param (n = number of times the trigger should fire)
+ * e.g.: traceon:5 or enable_event:sys:event:n
+ * - triggers that do not support an 'n' param e.g. hist
+ *
+ * These functions can be used or ignored as necessary - it all
+ * depends on the complexity of the trigger, and the granularity of
+ * the functions supported reflects the fact that some implementations
+ * may need to customize certain aspects of their implementations and
+ * won't need certain functions. For instance, the hist trigger
+ * implementation doesn't use event_trigger_separate_filter() because
+ * it has special requirements for handling the filter.
+ */
+
+/**
+ * event_trigger_check_remove - check whether an event trigger specifies remove
+ * @glob: The trigger command string, with optional remove(!) operator
+ *
+ * The event trigger callback implementations pass in 'glob' as a
+ * parameter. This is the command name either with or without a
+ * remove(!) operator. This function simply parses the glob and
+ * determines whether the command corresponds to a trigger removal or
+ * a trigger addition.
+ *
+ * Return: true if this is a remove command, false otherwise
+ */
+bool event_trigger_check_remove(const char *glob)
+{
+ return (glob && glob[0] == '!') ? true : false;
+}
+
+/**
+ * event_trigger_empty_param - check whether the param is empty
+ * @param: The trigger param string
+ *
+ * The event trigger callback implementations pass in 'param' as a
+ * parameter. This corresponds to the string following the command
+ * name minus the command name. This function can be called by a
+ * callback implementation for any command that requires a param; a
+ * callback that doesn't require a param can ignore it.
+ *
+ * Return: true if this is an empty param, false otherwise
+ */
+bool event_trigger_empty_param(const char *param)
+{
+ return !param;
+}
+
+/**
+ * event_trigger_separate_filter - separate an event trigger from a filter
+ * @param: The param string containing trigger and possibly filter
+ * @trigger: outparam, will be filled with a pointer to the trigger
+ * @filter: outparam, will be filled with a pointer to the filter
+ * @param_required: Specifies whether or not the param string is required
+ *
+ * Given a param string of the form '[trigger] [if filter]', this
+ * function separates the filter from the trigger and returns the
+ * trigger in *trigger and the filter in *filter. Either the *trigger
+ * or the *filter may be set to NULL by this function - if not set to
+ * NULL, they will contain strings corresponding to the trigger and
+ * filter.
+ *
+ * There are two cases that need to be handled with respect to the
+ * passed-in param: either the param is required, or it is not
+ * required. If @param_required is set, and there's no param, it will
+ * return -EINVAL. If @param_required is not set and there's a param
+ * that starts with a number, that corresponds to the case of a
+ * trigger with :n (n = number of times the trigger should fire) and
+ * the parsing continues normally; otherwise the function just returns
+ * and assumes param just contains a filter and there's nothing else
+ * to do.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int event_trigger_separate_filter(char *param_and_filter, char **param,
+ char **filter, bool param_required)
+{
+ int ret = 0;
+
+ *param = *filter = NULL;
+
+ if (!param_and_filter) {
+ if (param_required)
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Here we check for an optional param. The only legal
+ * optional param is :n, and if that's the case, continue
+ * below. Otherwise we assume what's left is a filter and
+ * return it as the filter string for the caller to deal with.
+ */
+ if (!param_required && param_and_filter && !isdigit(param_and_filter[0])) {
+ *filter = param_and_filter;
+ goto out;
+ }
+
+ /*
+ * Separate the param from the filter (param [if filter]).
+ * Here we have either an optional :n param or a required
+ * param and an optional filter.
+ */
+ *param = strsep(&param_and_filter, " \t");
+
+ /*
+ * Here we have a filter, though it may be empty.
+ */
+ if (param_and_filter) {
+ *filter = skip_spaces(param_and_filter);
+ if (!**filter)
+ *filter = NULL;
+ }
+out:
+ return ret;
+}
+
+/**
+ * event_trigger_alloc - allocate and init event_trigger_data for a trigger
+ * @cmd_ops: The event_command operations for the trigger
+ * @cmd: The cmd string
+ * @param: The param string
+ * @private_data: User data to associate with the event trigger
+ *
+ * Allocate an event_trigger_data instance and initialize it. The
+ * @cmd_ops are used along with the @cmd and @param to get the
+ * trigger_ops to assign to the event_trigger_data. @private_data can
+ * also be passed in and associated with the event_trigger_data.
+ *
+ * Use event_trigger_free() to free an event_trigger_data object.
+ *
+ * Return: The trigger_data object success, NULL otherwise
+ */
+struct event_trigger_data *event_trigger_alloc(struct event_command *cmd_ops,
+ char *cmd,
+ char *param,
+ void *private_data)
+{
+ struct event_trigger_data *trigger_data;
+ struct event_trigger_ops *trigger_ops;
+
+ trigger_ops = cmd_ops->get_trigger_ops(cmd, param);
+
+ trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
+ if (!trigger_data)
+ return NULL;
+
+ trigger_data->count = -1;
+ trigger_data->ops = trigger_ops;
+ trigger_data->cmd_ops = cmd_ops;
+ trigger_data->private_data = private_data;
+
+ INIT_LIST_HEAD(&trigger_data->list);
+ INIT_LIST_HEAD(&trigger_data->named_list);
+ RCU_INIT_POINTER(trigger_data->filter, NULL);
+
+ return trigger_data;
+}
+
+/**
+ * event_trigger_parse_num - parse and return the number param for a trigger
+ * @param: The param string
+ * @trigger_data: The trigger_data for the trigger
+ *
+ * Parse the :n (n = number of times the trigger should fire) param
+ * and set the count variable in the trigger_data to the parsed count.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int event_trigger_parse_num(char *param,
+ struct event_trigger_data *trigger_data)
+{
+ char *number;
+ int ret = 0;
+
+ if (param) {
+ number = strsep(&param, ":");
+
+ if (!strlen(number))
+ return -EINVAL;
+
+ /*
+ * We use the callback data field (which is a pointer)
+ * as our counter.
+ */
+ ret = kstrtoul(number, 0, &trigger_data->count);
+ }
+
+ return ret;
+}
+
+/**
+ * event_trigger_set_filter - set an event trigger's filter
+ * @cmd_ops: The event_command operations for the trigger
+ * @file: The event file for the trigger's event
+ * @param: The string containing the filter
+ * @trigger_data: The trigger_data for the trigger
+ *
+ * Set the filter for the trigger. If the filter is NULL, just return
+ * without error.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int event_trigger_set_filter(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *param,
+ struct event_trigger_data *trigger_data)
+{
+ if (param && cmd_ops->set_filter)
+ return cmd_ops->set_filter(param, trigger_data, file);
+
+ return 0;
+}
+
+/**
+ * event_trigger_reset_filter - reset an event trigger's filter
+ * @cmd_ops: The event_command operations for the trigger
+ * @trigger_data: The trigger_data for the trigger
+ *
+ * Reset the filter for the trigger to no filter.
+ */
+void event_trigger_reset_filter(struct event_command *cmd_ops,
+ struct event_trigger_data *trigger_data)
+{
+ if (cmd_ops->set_filter)
+ cmd_ops->set_filter(NULL, trigger_data, NULL);
+}
+
+/**
+ * event_trigger_register - register an event trigger
+ * @cmd_ops: The event_command operations for the trigger
+ * @file: The event file for the trigger's event
+ * @glob: The trigger command string, with optional remove(!) operator
+ * @cmd: The cmd string
+ * @param: The param string
+ * @trigger_data: The trigger_data for the trigger
+ * @n_registered: optional outparam, the number of triggers registered
+ *
+ * Register an event trigger. The @cmd_ops are used to call the
+ * cmd_ops->reg() function which actually does the registration. The
+ * cmd_ops->reg() function returns the number of triggers registered,
+ * which is assigned to n_registered, if n_registered is non-NULL.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int event_trigger_register(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob,
+ char *cmd,
+ char *param,
+ struct event_trigger_data *trigger_data,
+ int *n_registered)
+{
+ int ret;
+
+ if (n_registered)
+ *n_registered = 0;
+
+ ret = cmd_ops->reg(glob, trigger_data, file);
+ /*
+ * The above returns on success the # of functions enabled,
+ * but if it didn't find any functions it returns zero.
+ * Consider no functions a failure too.
+ */
+ if (!ret) {
+ cmd_ops->unreg(glob, trigger_data, file);
+ ret = -ENOENT;
+ } else if (ret > 0) {
+ if (n_registered)
+ *n_registered = ret;
+ /* Just return zero, not the number of enabled functions */
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/*
+ * End event trigger parsing helper functions.
+ */
+
/**
- * event_trigger_callback - Generic event_command @func implementation
+ * event_trigger_parse - Generic event_command @parse implementation
* @cmd_ops: The command ops, used for trigger registration
* @file: The trace_event_file associated with the event
* @glob: The raw string used to register the trigger
@@ -632,15 +972,15 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
* Common implementation for event command parsing and trigger
* instantiation.
*
- * Usually used directly as the @func method in event command
+ * Usually used directly as the @parse method in event command
* implementations.
*
* Return: 0 on success, errno otherwise
*/
static int
-event_trigger_callback(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+event_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
{
struct event_trigger_data *trigger_data;
struct event_trigger_ops *trigger_ops;
@@ -673,7 +1013,7 @@ event_trigger_callback(struct event_command *cmd_ops,
INIT_LIST_HEAD(&trigger_data->named_list);
if (glob[0] == '!') {
- cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob+1, trigger_data, file);
kfree(trigger_data);
ret = 0;
goto out;
@@ -708,14 +1048,14 @@ event_trigger_callback(struct event_command *cmd_ops,
out_reg:
/* Up the trigger_data count to make sure reg doesn't free it on failure */
event_trigger_init(trigger_ops, trigger_data);
- ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
+ ret = cmd_ops->reg(glob, trigger_data, file);
/*
* The above returns on success the # of functions enabled,
* but if it didn't find any functions it returns zero.
* Consider no functions a failure too.
*/
if (!ret) {
- cmd_ops->unreg(glob, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob, trigger_data, file);
ret = -ENOENT;
} else if (ret > 0)
ret = 0;
@@ -1023,28 +1363,28 @@ traceoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
}
static struct event_trigger_ops traceon_trigger_ops = {
- .func = traceon_trigger,
+ .trigger = traceon_trigger,
.print = traceon_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops traceon_count_trigger_ops = {
- .func = traceon_count_trigger,
+ .trigger = traceon_count_trigger,
.print = traceon_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops traceoff_trigger_ops = {
- .func = traceoff_trigger,
+ .trigger = traceoff_trigger,
.print = traceoff_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops traceoff_count_trigger_ops = {
- .func = traceoff_count_trigger,
+ .trigger = traceoff_count_trigger,
.print = traceoff_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
@@ -1069,7 +1409,7 @@ onoff_get_trigger_ops(char *cmd, char *param)
static struct event_command trigger_traceon_cmd = {
.name = "traceon",
.trigger_type = ETT_TRACE_ONOFF,
- .func = event_trigger_callback,
+ .parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
.get_trigger_ops = onoff_get_trigger_ops,
@@ -1080,7 +1420,7 @@ static struct event_command trigger_traceoff_cmd = {
.name = "traceoff",
.trigger_type = ETT_TRACE_ONOFF,
.flags = EVENT_CMD_FL_POST_TRIGGER,
- .func = event_trigger_callback,
+ .parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
.get_trigger_ops = onoff_get_trigger_ops,
@@ -1116,14 +1456,14 @@ snapshot_count_trigger(struct event_trigger_data *data,
}
static int
-register_snapshot_trigger(char *glob, struct event_trigger_ops *ops,
+register_snapshot_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
if (tracing_alloc_snapshot_instance(file->tr) != 0)
return 0;
- return register_trigger(glob, ops, data, file);
+ return register_trigger(glob, data, file);
}
static int
@@ -1135,14 +1475,14 @@ snapshot_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
}
static struct event_trigger_ops snapshot_trigger_ops = {
- .func = snapshot_trigger,
+ .trigger = snapshot_trigger,
.print = snapshot_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops snapshot_count_trigger_ops = {
- .func = snapshot_count_trigger,
+ .trigger = snapshot_count_trigger,
.print = snapshot_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
@@ -1157,7 +1497,7 @@ snapshot_get_trigger_ops(char *cmd, char *param)
static struct event_command trigger_snapshot_cmd = {
.name = "snapshot",
.trigger_type = ETT_SNAPSHOT,
- .func = event_trigger_callback,
+ .parse = event_trigger_parse,
.reg = register_snapshot_trigger,
.unreg = unregister_trigger,
.get_trigger_ops = snapshot_get_trigger_ops,
@@ -1226,14 +1566,14 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
}
static struct event_trigger_ops stacktrace_trigger_ops = {
- .func = stacktrace_trigger,
+ .trigger = stacktrace_trigger,
.print = stacktrace_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops stacktrace_count_trigger_ops = {
- .func = stacktrace_count_trigger,
+ .trigger = stacktrace_count_trigger,
.print = stacktrace_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
@@ -1249,7 +1589,7 @@ static struct event_command trigger_stacktrace_cmd = {
.name = "stacktrace",
.trigger_type = ETT_STACKTRACE,
.flags = EVENT_CMD_FL_POST_TRIGGER,
- .func = event_trigger_callback,
+ .parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
.get_trigger_ops = stacktrace_get_trigger_ops,
@@ -1353,36 +1693,36 @@ void event_enable_trigger_free(struct event_trigger_ops *ops,
}
static struct event_trigger_ops event_enable_trigger_ops = {
- .func = event_enable_trigger,
+ .trigger = event_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops event_enable_count_trigger_ops = {
- .func = event_enable_count_trigger,
+ .trigger = event_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops event_disable_trigger_ops = {
- .func = event_enable_trigger,
+ .trigger = event_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops event_disable_count_trigger_ops = {
- .func = event_enable_count_trigger,
+ .trigger = event_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
-int event_enable_trigger_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+int event_enable_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
{
struct trace_event_file *event_enable_file;
struct enable_trigger_data *enable_data;
@@ -1455,7 +1795,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
trigger_data->private_data = enable_data;
if (glob[0] == '!') {
- cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob+1, trigger_data, file);
kfree(trigger_data);
kfree(enable_data);
ret = 0;
@@ -1502,7 +1842,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
ret = trace_event_enable_disable(event_enable_file, 1, 1);
if (ret < 0)
goto out_put;
- ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
+ ret = cmd_ops->reg(glob, trigger_data, file);
/*
* The above returns on success the # of functions enabled,
* but if it didn't find any functions it returns zero.
@@ -1532,7 +1872,6 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
}
int event_enable_register_trigger(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file)
{
@@ -1574,7 +1913,6 @@ out:
}
void event_enable_unregister_trigger(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *test,
struct trace_event_file *file)
{
@@ -1628,7 +1966,7 @@ event_enable_get_trigger_ops(char *cmd, char *param)
static struct event_command trigger_enable_cmd = {
.name = ENABLE_EVENT_STR,
.trigger_type = ETT_EVENT_ENABLE,
- .func = event_enable_trigger_func,
+ .parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.get_trigger_ops = event_enable_get_trigger_ops,
@@ -1638,7 +1976,7 @@ static struct event_command trigger_enable_cmd = {
static struct event_command trigger_disable_cmd = {
.name = DISABLE_EVENT_STR,
.trigger_type = ETT_EVENT_ENABLE,
- .func = event_enable_trigger_func,
+ .parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.get_trigger_ops = event_enable_get_trigger_ops,
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 56bb7b890578..d440ddd5fd8b 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -491,18 +491,14 @@ static void stop_per_cpu_kthreads(void)
static int start_cpu_kthread(unsigned int cpu)
{
struct task_struct *kthread;
- char comm[24];
- snprintf(comm, 24, "hwlatd/%d", cpu);
-
- kthread = kthread_create_on_cpu(kthread_fn, NULL, cpu, comm);
+ kthread = kthread_run_on_cpu(kthread_fn, NULL, cpu, "hwlatd/%u");
if (IS_ERR(kthread)) {
pr_err(BANNER "could not start sampling thread\n");
return -ENOMEM;
}
per_cpu(hwlat_per_cpu_data, cpu).kthread = kthread;
- wake_up_process(kthread);
return 0;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 33272a7b6912..508f14af4f2c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -7,6 +7,7 @@
*/
#define pr_fmt(fmt) "trace_kprobe: " fmt
+#include <linux/bpf-cgroup.h>
#include <linux/security.h>
#include <linux/module.h>
#include <linux/uaccess.h>
@@ -327,11 +328,9 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
static void __disable_trace_kprobe(struct trace_probe *tp)
{
- struct trace_probe *pos;
struct trace_kprobe *tk;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tk = container_of(pos, struct trace_kprobe, tp);
+ list_for_each_entry(tk, trace_probe_probe_list(tp), tp.list) {
if (!trace_kprobe_is_registered(tk))
continue;
if (trace_kprobe_is_return(tk))
@@ -348,7 +347,7 @@ static void __disable_trace_kprobe(struct trace_probe *tp)
static int enable_trace_kprobe(struct trace_event_call *call,
struct trace_event_file *file)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_kprobe *tk;
bool enabled;
int ret = 0;
@@ -369,8 +368,7 @@ static int enable_trace_kprobe(struct trace_event_call *call,
if (enabled)
return 0;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tk = container_of(pos, struct trace_kprobe, tp);
+ list_for_each_entry(tk, trace_probe_probe_list(tp), tp.list) {
if (trace_kprobe_has_gone(tk))
continue;
ret = __enable_trace_kprobe(tk);
@@ -559,11 +557,9 @@ static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig,
struct trace_kprobe *comp)
{
struct trace_probe_event *tpe = orig->tp.event;
- struct trace_probe *pos;
int i;
- list_for_each_entry(pos, &tpe->probes, list) {
- orig = container_of(pos, struct trace_kprobe, tp);
+ list_for_each_entry(orig, &tpe->probes, tp.list) {
if (strcmp(trace_kprobe_symbol(orig),
trace_kprobe_symbol(comp)) ||
trace_kprobe_offset(orig) != trace_kprobe_offset(comp))
@@ -1175,15 +1171,18 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
{
struct dyn_event *ev = v;
struct trace_kprobe *tk;
+ unsigned long nmissed;
if (!is_trace_kprobe(ev))
return 0;
tk = to_trace_kprobe(ev);
+ nmissed = trace_kprobe_is_return(tk) ?
+ tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed;
seq_printf(m, " %-44s %15lu %15lu\n",
trace_probe_name(&tk->tp),
trace_kprobe_nhit(tk),
- tk->rp.kp.nmissed);
+ nmissed);
return 0;
}
@@ -1383,17 +1382,11 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
if (trace_trigger_soft_disabled(trace_file))
return;
- fbuffer.trace_ctx = tracing_gen_ctx();
- fbuffer.trace_file = trace_file;
-
dsize = __get_data_size(&tk->tp, regs);
- fbuffer.event =
- trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file,
- call->event.type,
- sizeof(*entry) + tk->tp.size + dsize,
- fbuffer.trace_ctx);
- if (!fbuffer.event)
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+ sizeof(*entry) + tk->tp.size + dsize);
+ if (!entry)
return;
fbuffer.regs = regs;
@@ -1430,16 +1423,11 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
if (trace_trigger_soft_disabled(trace_file))
return;
- fbuffer.trace_ctx = tracing_gen_ctx();
- fbuffer.trace_file = trace_file;
-
dsize = __get_data_size(&tk->tp, regs);
- fbuffer.event =
- trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file,
- call->event.type,
- sizeof(*entry) + tk->tp.size + dsize,
- fbuffer.trace_ctx);
- if (!fbuffer.event)
+
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+ sizeof(*entry) + tk->tp.size + dsize);
+ if (!entry)
return;
fbuffer.regs = regs;
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 7520d43aed55..870a08da5b48 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -138,8 +138,7 @@ static void osnoise_unregister_instance(struct trace_array *tr)
if (!found)
return;
- synchronize_rcu();
- kfree(inst);
+ kvfree_rcu(inst);
}
/*
@@ -1701,7 +1700,7 @@ static int start_kthread(unsigned int cpu)
snprintf(comm, 24, "osnoise/%d", cpu);
}
- kthread = kthread_create_on_cpu(main, NULL, cpu, comm);
+ kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
if (IS_ERR(kthread)) {
pr_err(BANNER "could not start sampling thread\n");
@@ -1710,7 +1709,6 @@ static int start_kthread(unsigned int cpu)
}
per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
- wake_up_process(kthread);
return 0;
}
@@ -2123,6 +2121,13 @@ out_unhook_irq:
return -EINVAL;
}
+static void osnoise_unhook_events(void)
+{
+ unhook_thread_events();
+ unhook_softirq_events();
+ unhook_irq_events();
+}
+
/*
* osnoise_workload_start - start the workload and hook to events
*/
@@ -2155,7 +2160,14 @@ static int osnoise_workload_start(void)
retval = start_per_cpu_kthreads();
if (retval) {
- unhook_irq_events();
+ trace_osnoise_callback_enabled = false;
+ /*
+ * Make sure that ftrace_nmi_enter/exit() see
+ * trace_osnoise_callback_enabled as false before continuing.
+ */
+ barrier();
+
+ osnoise_unhook_events();
return retval;
}
@@ -2186,9 +2198,7 @@ static void osnoise_workload_stop(void)
stop_per_cpu_kthreads();
- unhook_irq_events();
- unhook_softirq_events();
- unhook_thread_events();
+ osnoise_unhook_events();
}
static void osnoise_tracer_start(struct trace_array *tr)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3547e7176ff7..8aa493d25c73 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -445,14 +445,18 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
char irqs_off;
int hardirq;
int softirq;
+ int bh_off;
int nmi;
nmi = entry->flags & TRACE_FLAG_NMI;
hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
+ bh_off = entry->flags & TRACE_FLAG_BH_OFF;
irqs_off =
+ (entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' :
(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+ bh_off ? 'b' :
(entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
'.';
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 3ed2a3f37297..73d90179b51b 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -356,6 +356,8 @@ static int __parse_imm_string(char *str, char **pbuf, int offs)
return -EINVAL;
}
*pbuf = kstrndup(str, len - 1, GFP_KERNEL);
+ if (!*pbuf)
+ return -ENOMEM;
return 0;
}
@@ -1138,8 +1140,7 @@ int trace_probe_remove_file(struct trace_probe *tp,
return -ENOENT;
list_del_rcu(&link->list);
- synchronize_rcu();
- kfree(link);
+ kvfree_rcu(link);
if (list_empty(&tp->event->files))
trace_probe_clear_flag(tp, TP_FLAG_TRACE);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8bfcd3b09422..f755bde42fd0 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -323,8 +323,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
trace_ctx = tracing_gen_ctx();
- buffer = tr->array_buffer.buffer;
- event = trace_buffer_lock_reserve(buffer,
+ event = trace_event_buffer_lock_reserve(&buffer, trace_file,
sys_data->enter_event->event.type, size, trace_ctx);
if (!event)
return;
@@ -367,8 +366,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
trace_ctx = tracing_gen_ctx();
- buffer = tr->array_buffer.buffer;
- event = trace_buffer_lock_reserve(buffer,
+ event = trace_event_buffer_lock_reserve(&buffer, trace_file,
sys_data->exit_event->event.type, sizeof(*entry),
trace_ctx);
if (!event)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 0a5c0db3137e..9711589273cd 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -7,6 +7,7 @@
*/
#define pr_fmt(fmt) "trace_uprobe: " fmt
+#include <linux/bpf-cgroup.h>
#include <linux/security.h>
#include <linux/ctype.h>
#include <linux/module.h>
@@ -409,12 +410,10 @@ static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig,
struct trace_uprobe *comp)
{
struct trace_probe_event *tpe = orig->tp.event;
- struct trace_probe *pos;
struct inode *comp_inode = d_real_inode(comp->path.dentry);
int i;
- list_for_each_entry(pos, &tpe->probes, list) {
- orig = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(orig, &tpe->probes, tp.list) {
if (comp_inode != d_real_inode(orig->path.dentry) ||
comp->offset != orig->offset)
continue;
@@ -949,8 +948,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
struct trace_event_file *trace_file)
{
struct uprobe_trace_entry_head *entry;
- struct trace_buffer *buffer;
- struct ring_buffer_event *event;
+ struct trace_event_buffer fbuffer;
void *data;
int size, esize;
struct trace_event_call *call = trace_probe_event_call(&tu->tp);
@@ -965,12 +963,10 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
size = esize + tu->tp.size + dsize;
- event = trace_event_buffer_lock_reserve(&buffer, trace_file,
- call->event.type, size, 0);
- if (!event)
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file, size);
+ if (!entry)
return;
- entry = ring_buffer_event_data(event);
if (is_ret_probe(tu)) {
entry->vaddr[0] = func;
entry->vaddr[1] = instruction_pointer(regs);
@@ -982,7 +978,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
memcpy(data, ucb->buf, tu->tp.size + dsize);
- event_trigger_unlock_commit(trace_file, buffer, event, entry, 0);
+ trace_event_buffer_commit(&fbuffer);
}
/* uprobe handler */
@@ -1075,14 +1071,12 @@ static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter)
static void __probe_event_disable(struct trace_probe *tp)
{
- struct trace_probe *pos;
struct trace_uprobe *tu;
tu = container_of(tp, struct trace_uprobe, tp);
WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter));
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tu = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) {
if (!tu->inode)
continue;
@@ -1094,7 +1088,7 @@ static void __probe_event_disable(struct trace_probe *tp)
static int probe_event_enable(struct trace_event_call *call,
struct trace_event_file *file, filter_func_t filter)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_uprobe *tu;
bool enabled;
int ret;
@@ -1129,8 +1123,7 @@ static int probe_event_enable(struct trace_event_call *call,
if (ret)
goto err_flags;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tu = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) {
ret = trace_uprobe_enable(tu, filter);
if (ret) {
__probe_event_disable(tp);
@@ -1275,7 +1268,7 @@ static bool trace_uprobe_filter_add(struct trace_uprobe_filter *filter,
static int uprobe_perf_close(struct trace_event_call *call,
struct perf_event *event)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_uprobe *tu;
int ret = 0;
@@ -1287,8 +1280,7 @@ static int uprobe_perf_close(struct trace_event_call *call,
if (trace_uprobe_filter_remove(tu->tp.event->filter, event))
return 0;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tu = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) {
ret = uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
if (ret)
break;
@@ -1300,7 +1292,7 @@ static int uprobe_perf_close(struct trace_event_call *call,
static int uprobe_perf_open(struct trace_event_call *call,
struct perf_event *event)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_uprobe *tu;
int err = 0;
@@ -1312,7 +1304,7 @@ static int uprobe_perf_open(struct trace_event_call *call,
if (trace_uprobe_filter_add(tu->tp.event->filter, event))
return 0;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+ list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) {
err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
if (err) {
uprobe_perf_close(call, event);
@@ -1618,6 +1610,11 @@ create_local_trace_uprobe(char *name, unsigned long offs,
tu->path = path;
tu->ref_ctr_offset = ref_ctr_offset;
tu->filename = kstrdup(name, GFP_KERNEL);
+ if (!tu->filename) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
init_trace_event_call(tu);
ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index 39bb56d2dcbe..9628b5571846 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -15,6 +15,7 @@
#include <linux/jhash.h>
#include <linux/slab.h>
#include <linux/sort.h>
+#include <linux/kmemleak.h>
#include "tracing_map.h"
#include "trace.h"
@@ -307,6 +308,7 @@ static void tracing_map_array_free(struct tracing_map_array *a)
for (i = 0; i < a->n_pages; i++) {
if (!a->pages[i])
break;
+ kmemleak_free(a->pages[i]);
free_page((unsigned long)a->pages[i]);
}
@@ -342,6 +344,7 @@ static struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts,
a->pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
if (!a->pages[i])
goto free;
+ kmemleak_alloc(a->pages[i], PAGE_SIZE, 1, GFP_KERNEL);
}
out:
return a;