diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/Kconfig | 1 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 105 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 22 | ||||
-rw-r--r-- | kernel/trace/trace.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace.h | 19 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_events_hist.c | 13 | ||||
-rw-r--r-- | kernel/trace/trace_kdb.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 15 |
10 files changed, 154 insertions, 47 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5d965cef6c77..564e5fdb025f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Architectures that offer an FUNCTION_TRACER implementation should # select HAVE_FUNCTION_TRACER: diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b496ffdf5f36..1c9a4745e596 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -410,8 +410,6 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { .arg4_type = ARG_CONST_SIZE, }; -static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd); - static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, u64 flags, struct perf_sample_data *sd) @@ -442,24 +440,50 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, return perf_event_output(event, sd, regs); } +/* + * Support executing tracepoints in normal, irq, and nmi context that each call + * bpf_perf_event_output + */ +struct bpf_trace_sample_data { + struct perf_sample_data sds[3]; +}; + +static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds); +static DEFINE_PER_CPU(int, bpf_trace_nest_level); BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, u64, flags, void *, data, u64, size) { - struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); + struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds); + int nest_level = this_cpu_inc_return(bpf_trace_nest_level); struct perf_raw_record raw = { .frag = { .size = size, .data = data, }, }; + struct perf_sample_data *sd; + int err; - if (unlikely(flags & ~(BPF_F_INDEX_MASK))) - return -EINVAL; + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { + err = -EBUSY; + goto out; + } + + sd = &sds->sds[nest_level - 1]; + + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) { + err = -EINVAL; + goto out; + } perf_sample_data_init(sd, 0, 0); sd->raw = &raw; - return __bpf_perf_event_output(regs, map, flags, sd); + err = __bpf_perf_event_output(regs, map, flags, sd); + +out: + this_cpu_dec(bpf_trace_nest_level); + return err; } static const struct bpf_func_proto bpf_perf_event_output_proto = { @@ -822,16 +846,48 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) /* * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp * to avoid potential recursive reuse issue when/if tracepoints are added - * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack + * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack. + * + * Since raw tracepoints run despite bpf_prog_active, support concurrent usage + * in normal, irq, and nmi context. */ -static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs); +struct bpf_raw_tp_regs { + struct pt_regs regs[3]; +}; +static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs); +static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level); +static struct pt_regs *get_bpf_raw_tp_regs(void) +{ + struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); + int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); + + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) { + this_cpu_dec(bpf_raw_tp_nest_level); + return ERR_PTR(-EBUSY); + } + + return &tp_regs->regs[nest_level - 1]; +} + +static void put_bpf_raw_tp_regs(void) +{ + this_cpu_dec(bpf_raw_tp_nest_level); +} + BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags, void *, data, u64, size) { - struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + struct pt_regs *regs = get_bpf_raw_tp_regs(); + int ret; + + if (IS_ERR(regs)) + return PTR_ERR(regs); perf_fetch_caller_regs(regs); - return ____bpf_perf_event_output(regs, map, flags, data, size); + ret = ____bpf_perf_event_output(regs, map, flags, data, size); + + put_bpf_raw_tp_regs(); + return ret; } static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { @@ -848,12 +904,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags) { - struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + struct pt_regs *regs = get_bpf_raw_tp_regs(); + int ret; + + if (IS_ERR(regs)) + return PTR_ERR(regs); perf_fetch_caller_regs(regs); /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ - return bpf_get_stackid((unsigned long) regs, (unsigned long) map, - flags, 0, 0); + ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map, + flags, 0, 0); + put_bpf_raw_tp_regs(); + return ret; } static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { @@ -868,11 +930,17 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, void *, buf, u32, size, u64, flags) { - struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + struct pt_regs *regs = get_bpf_raw_tp_regs(); + int ret; + + if (IS_ERR(regs)) + return PTR_ERR(regs); perf_fetch_caller_regs(regs); - return bpf_get_stack((unsigned long) regs, (unsigned long) buf, - (unsigned long) size, flags, 0); + ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf, + (unsigned long) size, flags, 0); + put_bpf_raw_tp_regs(); + return ret; } static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { @@ -1297,7 +1365,8 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, } #ifdef CONFIG_MODULES -int bpf_event_notify(struct notifier_block *nb, unsigned long op, void *module) +static int bpf_event_notify(struct notifier_block *nb, unsigned long op, + void *module) { struct bpf_trace_module *btm, *tmp; struct module *mod = module; @@ -1336,7 +1405,7 @@ static struct notifier_block bpf_module_nb = { .notifier_call = bpf_event_notify, }; -int __init bpf_event_init(void) +static int __init bpf_event_init(void) { register_module_notifier(&bpf_module_nb); return 0; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a12aff849c04..38277af44f5c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -34,6 +34,7 @@ #include <linux/hash.h> #include <linux/rcupdate.h> #include <linux/kprobes.h> +#include <linux/memory.h> #include <trace/events/sched.h> @@ -2610,10 +2611,12 @@ static void ftrace_run_update_code(int command) { int ret; + mutex_lock(&text_mutex); + ret = ftrace_arch_code_modify_prepare(); FTRACE_WARN_ON(ret); if (ret) - return; + goto out_unlock; /* * By default we use stop_machine() to modify the code. @@ -2625,6 +2628,9 @@ static void ftrace_run_update_code(int command) ret = ftrace_arch_code_modify_post_process(); FTRACE_WARN_ON(ret); + +out_unlock: + mutex_unlock(&text_mutex); } static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, @@ -2935,14 +2941,13 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) p = &pg->records[i]; p->flags = rec_flags; -#ifndef CC_USING_NOP_MCOUNT /* * Do the initial record conversion from mcount jump * to the NOP instructions. */ - if (!ftrace_code_disable(mod, p)) + if (!__is_defined(CC_USING_NOP_MCOUNT) && + !ftrace_code_disable(mod, p)) break; -#endif update_cnt++; } @@ -4221,10 +4226,13 @@ void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, struct ftrace_func_entry *entry; struct ftrace_func_map *map; struct hlist_head *hhd; - int size = 1 << mapper->hash.size_bits; - int i; + int size, i; + + if (!mapper) + return; if (free_func && mapper->hash.count) { + size = 1 << mapper->hash.size_bits; for (i = 0; i < size; i++) { hhd = &mapper->hash.buckets[i]; hlist_for_each_entry(entry, hhd, hlist) { @@ -5776,6 +5784,7 @@ void ftrace_module_enable(struct module *mod) struct ftrace_page *pg; mutex_lock(&ftrace_lock); + mutex_lock(&text_mutex); if (ftrace_disabled) goto out_unlock; @@ -5837,6 +5846,7 @@ void ftrace_module_enable(struct module *mod) ftrace_arch_code_modify_post_process(); out_unlock: + mutex_unlock(&text_mutex); mutex_unlock(&ftrace_lock); process_cached_mods(mod->name); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2c92b3d9ea30..83e08b78dbee 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6923,7 +6923,7 @@ struct tracing_log_err { static DEFINE_MUTEX(tracing_err_log_lock); -struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) +static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) { struct tracing_log_err *err; @@ -8192,7 +8192,7 @@ static const struct file_operations buffer_percent_fops = { .llseek = default_llseek, }; -struct dentry *trace_instance_dir; +static struct dentry *trace_instance_dir; static void init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); @@ -8910,12 +8910,8 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) cnt++; - /* reset all but tr, trace, and overruns */ - memset(&iter.seq, 0, - sizeof(struct trace_iterator) - - offsetof(struct trace_iterator, seq)); + trace_iterator_reset(&iter); iter.iter_flags |= TRACE_FILE_LAT_FMT; - iter.pos = -1; if (trace_find_next_entry_inc(&iter) != NULL) { int ret; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1974ce818ddb..005f08629b8b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -15,7 +15,6 @@ #include <linux/trace_seq.h> #include <linux/trace_events.h> #include <linux/compiler.h> -#include <linux/trace_seq.h> #include <linux/glob.h> #ifdef CONFIG_FTRACE_SYSCALLS @@ -1967,4 +1966,22 @@ static inline void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { } extern struct trace_iterator *tracepoint_print_iter; +/* + * Reset the state of the trace_iterator so that it can read consumed data. + * Normally, the trace_iterator is used for reading the data when it is not + * consumed, and must retain state. + */ +static __always_inline void trace_iterator_reset(struct trace_iterator *iter) +{ + const size_t offset = offsetof(struct trace_iterator, seq); + + /* + * Keep gcc from complaining about overwriting more than just one + * member in the structure. + */ + memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset); + + iter->pos = -1; +} + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index d3e59312ef40..5079d1db3754 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -428,7 +428,7 @@ predicate_parse(const char *str, int nr_parens, int nr_preds, op_stack = kmalloc_array(nr_parens, sizeof(*op_stack), GFP_KERNEL); if (!op_stack) return ERR_PTR(-ENOMEM); - prog_stack = kmalloc_array(nr_preds, sizeof(*prog_stack), GFP_KERNEL); + prog_stack = kcalloc(nr_preds, sizeof(*prog_stack), GFP_KERNEL); if (!prog_stack) { parse_error(pe, -ENOMEM, 0); goto out_free; @@ -579,7 +579,11 @@ predicate_parse(const char *str, int nr_parens, int nr_preds, out_free: kfree(op_stack); kfree(inverts); - kfree(prog_stack); + if (prog_stack) { + for (i = 0; prog_stack[i].pred; i++) + kfree(prog_stack[i].pred); + kfree(prog_stack); + } return ERR_PTR(ret); } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 7fca3457c705..ca6b0dff60c5 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -59,7 +59,7 @@ C(NO_CLOSING_PAREN, "No closing paren found"), \ C(SUBSYS_NOT_FOUND, "Missing subsystem"), \ C(INVALID_SUBSYS_EVENT, "Invalid subsystem or event name"), \ - C(INVALID_REF_KEY, "Using variable references as keys not supported"), \ + C(INVALID_REF_KEY, "Using variable references in keys not supported"), \ C(VAR_NOT_FOUND, "Couldn't find variable"), \ C(FIELD_NOT_FOUND, "Couldn't find field"), @@ -1854,6 +1854,9 @@ static u64 hist_field_var_ref(struct hist_field *hist_field, struct hist_elt_data *elt_data; u64 var_val = 0; + if (WARN_ON_ONCE(!elt)) + return var_val; + elt_data = elt->private_data; var_val = elt_data->var_ref_vals[hist_field->var_ref_idx]; @@ -3582,14 +3585,20 @@ static bool cond_snapshot_update(struct trace_array *tr, void *cond_data) struct track_data *track_data = tr->cond_snapshot->cond_data; struct hist_elt_data *elt_data, *track_elt_data; struct snapshot_context *context = cond_data; + struct action_data *action; u64 track_val; if (!track_data) return false; + action = track_data->action_data; + track_val = get_track_val(track_data->hist_data, context->elt, track_data->action_data); + if (!action->track_data.check_val(track_data->track_val, track_val)) + return false; + track_data->track_val = track_val; memcpy(track_data->key, context->key, track_data->key_len); @@ -4503,7 +4512,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, goto out; } - if (hist_field->flags & HIST_FIELD_FL_VAR_REF) { + if (field_has_hist_vars(hist_field, 0)) { hist_err(tr, HIST_ERR_INVALID_REF_KEY, errpos(field_str)); destroy_hist_field(hist_field, 0); ret = -EINVAL; diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 6c1ae6b752d1..cca65044c14c 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -37,12 +37,8 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file) if (skip_entries) kdb_printf("(skipping %d entries)\n", skip_entries); - /* reset all but tr, trace, and overruns */ - memset(&iter.seq, 0, - sizeof(struct trace_iterator) - - offsetof(struct trace_iterator, seq)); + trace_iterator_reset(&iter); iter.iter_flags |= TRACE_FILE_LAT_FMT; - iter.pos = -1; if (cpu_file == RING_BUFFER_ALL_CPUS) { for_each_tracing_cpu(cpu) { diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 54373d93e251..ba751f993c3b 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1057,7 +1057,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, trace_seq_puts(s, "<stack trace>\n"); - for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) { + for (p = field->caller; p && p < end && *p != ULONG_MAX; p++) { if (trace_seq_has_overflowed(s)) break; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index eb7e06b54741..b55906c77ce0 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -426,8 +426,6 @@ end: /* * Argument syntax: * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] - * - * - Remove uprobe: -:[GRP/]EVENT */ static int trace_uprobe_create(int argc, const char **argv) { @@ -443,10 +441,17 @@ static int trace_uprobe_create(int argc, const char **argv) ret = 0; ref_ctr_offset = 0; - /* argc must be >= 1 */ - if (argv[0][0] == 'r') + switch (argv[0][0]) { + case 'r': is_return = true; - else if (argv[0][0] != 'p' || argc < 2) + break; + case 'p': + break; + default: + return -ECANCELED; + } + + if (argc < 2) return -ECANCELED; if (argv[0][1] == ':') |