diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/blktrace.c | 6 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 197 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 12 | ||||
-rw-r--r-- | kernel/trace/trace.c | 32 | ||||
-rw-r--r-- | kernel/trace/trace_hwlat.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 17 |
7 files changed, 214 insertions, 54 deletions
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e1c6d79fb4cc..2d6e93ab0478 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -512,8 +512,6 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, dir = debugfs_lookup(buts->name, blk_debugfs_root); if (!dir) bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); - if (!dir) - goto err; bt->dev = dev; atomic_set(&bt->dropped, 0); @@ -522,12 +520,8 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, ret = -EIO; bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops); - if (!bt->dropped_file) - goto err; bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); - if (!bt->msg_file) - goto err; bt->rchan = relay_open("trace", dir, buts->buf_size, buts->buf_nr, &blk_relay_callbacks, bt); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f92d6ad5e080..ca1255d14576 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -19,6 +19,9 @@ #include "trace_probe.h" #include "trace.h" +#define bpf_event_rcu_dereference(p) \ + rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex)) + #ifdef CONFIG_MODULES struct bpf_trace_module { struct module *module; @@ -410,8 +413,6 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { .arg4_type = ARG_CONST_SIZE, }; -static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd); - static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, u64 flags, struct perf_sample_data *sd) @@ -442,24 +443,50 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, return perf_event_output(event, sd, regs); } +/* + * Support executing tracepoints in normal, irq, and nmi context that each call + * bpf_perf_event_output + */ +struct bpf_trace_sample_data { + struct perf_sample_data sds[3]; +}; + +static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds); +static DEFINE_PER_CPU(int, bpf_trace_nest_level); BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, u64, flags, void *, data, u64, size) { - struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); + struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds); + int nest_level = this_cpu_inc_return(bpf_trace_nest_level); struct perf_raw_record raw = { .frag = { .size = size, .data = data, }, }; + struct perf_sample_data *sd; + int err; - if (unlikely(flags & ~(BPF_F_INDEX_MASK))) - return -EINVAL; + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { + err = -EBUSY; + goto out; + } + + sd = &sds->sds[nest_level - 1]; + + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) { + err = -EINVAL; + goto out; + } perf_sample_data_init(sd, 0, 0); sd->raw = &raw; - return __bpf_perf_event_output(regs, map, flags, sd); + err = __bpf_perf_event_output(regs, map, flags, sd); + +out: + this_cpu_dec(bpf_trace_nest_level); + return err; } static const struct bpf_func_proto bpf_perf_event_output_proto = { @@ -567,6 +594,69 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = { .arg3_type = ARG_ANYTHING, }; +struct send_signal_irq_work { + struct irq_work irq_work; + struct task_struct *task; + u32 sig; +}; + +static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); + +static void do_bpf_send_signal(struct irq_work *entry) +{ + struct send_signal_irq_work *work; + + work = container_of(entry, struct send_signal_irq_work, irq_work); + group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, PIDTYPE_TGID); +} + +BPF_CALL_1(bpf_send_signal, u32, sig) +{ + struct send_signal_irq_work *work = NULL; + + /* Similar to bpf_probe_write_user, task needs to be + * in a sound condition and kernel memory access be + * permitted in order to send signal to the current + * task. + */ + if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING))) + return -EPERM; + if (unlikely(uaccess_kernel())) + return -EPERM; + if (unlikely(!nmi_uaccess_okay())) + return -EPERM; + + if (in_nmi()) { + /* Do an early check on signal validity. Otherwise, + * the error is lost in deferred irq_work. + */ + if (unlikely(!valid_signal(sig))) + return -EINVAL; + + work = this_cpu_ptr(&send_signal_work); + if (work->irq_work.flags & IRQ_WORK_BUSY) + return -EBUSY; + + /* Add the current task, which is the target of sending signal, + * to the irq_work. The current task may change when queued + * irq works get executed. + */ + work->task = current; + work->sig = sig; + irq_work_queue(&work->irq_work); + return 0; + } + + return group_send_sig_info(sig, SEND_SIG_PRIV, current, PIDTYPE_TGID); +} + +static const struct bpf_func_proto bpf_send_signal_proto = { + .func = bpf_send_signal, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -617,6 +707,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_current_cgroup_id: return &bpf_get_current_cgroup_id_proto; #endif + case BPF_FUNC_send_signal: + return &bpf_send_signal_proto; default: return NULL; } @@ -822,16 +914,48 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) /* * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp * to avoid potential recursive reuse issue when/if tracepoints are added - * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack + * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack. + * + * Since raw tracepoints run despite bpf_prog_active, support concurrent usage + * in normal, irq, and nmi context. */ -static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs); +struct bpf_raw_tp_regs { + struct pt_regs regs[3]; +}; +static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs); +static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level); +static struct pt_regs *get_bpf_raw_tp_regs(void) +{ + struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); + int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); + + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) { + this_cpu_dec(bpf_raw_tp_nest_level); + return ERR_PTR(-EBUSY); + } + + return &tp_regs->regs[nest_level - 1]; +} + +static void put_bpf_raw_tp_regs(void) +{ + this_cpu_dec(bpf_raw_tp_nest_level); +} + BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags, void *, data, u64, size) { - struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + struct pt_regs *regs = get_bpf_raw_tp_regs(); + int ret; + + if (IS_ERR(regs)) + return PTR_ERR(regs); perf_fetch_caller_regs(regs); - return ____bpf_perf_event_output(regs, map, flags, data, size); + ret = ____bpf_perf_event_output(regs, map, flags, data, size); + + put_bpf_raw_tp_regs(); + return ret; } static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { @@ -848,12 +972,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags) { - struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + struct pt_regs *regs = get_bpf_raw_tp_regs(); + int ret; + + if (IS_ERR(regs)) + return PTR_ERR(regs); perf_fetch_caller_regs(regs); /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ - return bpf_get_stackid((unsigned long) regs, (unsigned long) map, - flags, 0, 0); + ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map, + flags, 0, 0); + put_bpf_raw_tp_regs(); + return ret; } static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { @@ -868,11 +998,17 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, void *, buf, u32, size, u64, flags) { - struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + struct pt_regs *regs = get_bpf_raw_tp_regs(); + int ret; + + if (IS_ERR(regs)) + return PTR_ERR(regs); perf_fetch_caller_regs(regs); - return bpf_get_stack((unsigned long) regs, (unsigned long) buf, - (unsigned long) size, flags, 0); + ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf, + (unsigned long) size, flags, 0); + put_bpf_raw_tp_regs(); + return ret; } static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { @@ -1034,7 +1170,7 @@ static DEFINE_MUTEX(bpf_event_mutex); int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog) { - struct bpf_prog_array __rcu *old_array; + struct bpf_prog_array *old_array; struct bpf_prog_array *new_array; int ret = -EEXIST; @@ -1052,7 +1188,7 @@ int perf_event_attach_bpf_prog(struct perf_event *event, if (event->prog) goto unlock; - old_array = event->tp_event->prog_array; + old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); if (old_array && bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) { ret = -E2BIG; @@ -1075,7 +1211,7 @@ unlock: void perf_event_detach_bpf_prog(struct perf_event *event) { - struct bpf_prog_array __rcu *old_array; + struct bpf_prog_array *old_array; struct bpf_prog_array *new_array; int ret; @@ -1084,7 +1220,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event) if (!event->prog) goto unlock; - old_array = event->tp_event->prog_array; + old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); if (ret == -ENOENT) goto unlock; @@ -1106,6 +1242,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) { struct perf_event_query_bpf __user *uquery = info; struct perf_event_query_bpf query = {}; + struct bpf_prog_array *progs; u32 *ids, prog_cnt, ids_len; int ret; @@ -1130,10 +1267,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) */ mutex_lock(&bpf_event_mutex); - ret = bpf_prog_array_copy_info(event->tp_event->prog_array, - ids, - ids_len, - &prog_cnt); + progs = bpf_event_rcu_dereference(event->tp_event->prog_array); + ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt); mutex_unlock(&bpf_event_mutex); if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || @@ -1296,6 +1431,20 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, return err; } +static int __init send_signal_irq_work_init(void) +{ + int cpu; + struct send_signal_irq_work *work; + + for_each_possible_cpu(cpu) { + work = per_cpu_ptr(&send_signal_work, cpu); + init_irq_work(&work->irq_work, do_bpf_send_signal); + } + return 0; +} + +subsys_initcall(send_signal_irq_work_init); + #ifdef CONFIG_MODULES static int bpf_event_notify(struct notifier_block *nb, unsigned long op, void *module) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a12aff849c04..576c41644e77 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2935,14 +2935,13 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) p = &pg->records[i]; p->flags = rec_flags; -#ifndef CC_USING_NOP_MCOUNT /* * Do the initial record conversion from mcount jump * to the NOP instructions. */ - if (!ftrace_code_disable(mod, p)) + if (!__is_defined(CC_USING_NOP_MCOUNT) && + !ftrace_code_disable(mod, p)) break; -#endif update_cnt++; } @@ -4221,10 +4220,13 @@ void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, struct ftrace_func_entry *entry; struct ftrace_func_map *map; struct hlist_head *hhd; - int size = 1 << mapper->hash.size_bits; - int i; + int size, i; + + if (!mapper) + return; if (free_func && mapper->hash.count) { + size = 1 << mapper->hash.size_bits; for (i = 0; i < size; i++) { hhd = &mapper->hash.buckets[i]; hlist_for_each_entry(entry, hhd, hlist) { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1c80521fd436..c90c687cf950 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6719,11 +6719,13 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, break; } #endif - if (!tr->allocated_snapshot) { + if (tr->allocated_snapshot) + ret = resize_buffer_duplicate_size(&tr->max_buffer, + &tr->trace_buffer, iter->cpu_file); + else ret = tracing_alloc_snapshot_instance(tr); - if (ret < 0) - break; - } + if (ret < 0) + break; local_irq_disable(); /* Now, we're going to swap */ if (iter->cpu_file == RING_BUFFER_ALL_CPUS) @@ -6923,7 +6925,7 @@ struct tracing_log_err { static DEFINE_MUTEX(tracing_err_log_lock); -struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) +static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) { struct tracing_log_err *err; @@ -7126,12 +7128,24 @@ static ssize_t tracing_err_log_write(struct file *file, return count; } +static int tracing_err_log_release(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + if (file->f_mode & FMODE_READ) + seq_release(inode, file); + + return 0; +} + static const struct file_operations tracing_err_log_fops = { .open = tracing_err_log_open, .write = tracing_err_log_write, .read = seq_read, .llseek = seq_lseek, - .release = tracing_release_generic_tr, + .release = tracing_err_log_release, }; static int tracing_buffers_open(struct inode *inode, struct file *filp) @@ -8192,7 +8206,7 @@ static const struct file_operations buffer_percent_fops = { .llseek = default_llseek, }; -struct dentry *trace_instance_dir; +static struct dentry *trace_instance_dir; static void init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); @@ -8604,10 +8618,6 @@ struct dentry *tracing_init_dentry(void) */ tr->dir = debugfs_create_automount("tracing", NULL, trace_automount, NULL); - if (!tr->dir) { - pr_warn_once("Could not create debugfs directory 'tracing'\n"); - return ERR_PTR(-ENOMEM); - } return NULL; } diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 1e6db9cbe4dc..fa95139445b2 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -277,7 +277,7 @@ static void move_to_next_cpu(void) * of this thread, than stop migrating for the duration * of the current test. */ - if (!cpumask_equal(current_mask, ¤t->cpus_allowed)) + if (!cpumask_equal(current_mask, current->cpus_ptr)) goto disable; get_online_cpus(); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 54373d93e251..ba751f993c3b 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1057,7 +1057,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, trace_seq_puts(s, "<stack trace>\n"); - for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) { + for (p = field->caller; p && p < end && *p != ULONG_MAX; p++) { if (trace_seq_has_overflowed(s)) break; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index eb7e06b54741..7860e3f59fad 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -426,8 +426,6 @@ end: /* * Argument syntax: * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] - * - * - Remove uprobe: -:[GRP/]EVENT */ static int trace_uprobe_create(int argc, const char **argv) { @@ -443,10 +441,17 @@ static int trace_uprobe_create(int argc, const char **argv) ret = 0; ref_ctr_offset = 0; - /* argc must be >= 1 */ - if (argv[0][0] == 'r') + switch (argv[0][0]) { + case 'r': is_return = true; - else if (argv[0][0] != 'p' || argc < 2) + break; + case 'p': + break; + default: + return -ECANCELED; + } + + if (argc < 2) return -ECANCELED; if (argv[0][1] == ':') @@ -1331,7 +1336,7 @@ static inline void init_trace_event_call(struct trace_uprobe *tu, call->event.funcs = &uprobe_funcs; call->class->define_fields = uprobe_event_define_fields; - call->flags = TRACE_EVENT_FL_UPROBE; + call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY; call->class->reg = trace_uprobe_register; call->data = tu; } |