aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/blktrace.c6
-rw-r--r--kernel/trace/bpf_trace.c197
-rw-r--r--kernel/trace/ftrace.c12
-rw-r--r--kernel/trace/trace.c32
-rw-r--r--kernel/trace/trace_hwlat.c2
-rw-r--r--kernel/trace/trace_output.c2
-rw-r--r--kernel/trace/trace_uprobe.c17
7 files changed, 214 insertions, 54 deletions
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index e1c6d79fb4cc..2d6e93ab0478 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -512,8 +512,6 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
dir = debugfs_lookup(buts->name, blk_debugfs_root);
if (!dir)
bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
- if (!dir)
- goto err;
bt->dev = dev;
atomic_set(&bt->dropped, 0);
@@ -522,12 +520,8 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
ret = -EIO;
bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
&blk_dropped_fops);
- if (!bt->dropped_file)
- goto err;
bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
- if (!bt->msg_file)
- goto err;
bt->rchan = relay_open("trace", dir, buts->buf_size,
buts->buf_nr, &blk_relay_callbacks, bt);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f92d6ad5e080..ca1255d14576 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -19,6 +19,9 @@
#include "trace_probe.h"
#include "trace.h"
+#define bpf_event_rcu_dereference(p) \
+ rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
+
#ifdef CONFIG_MODULES
struct bpf_trace_module {
struct module *module;
@@ -410,8 +413,6 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
.arg4_type = ARG_CONST_SIZE,
};
-static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
-
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
u64 flags, struct perf_sample_data *sd)
@@ -442,24 +443,50 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
return perf_event_output(event, sd, regs);
}
+/*
+ * Support executing tracepoints in normal, irq, and nmi context that each call
+ * bpf_perf_event_output
+ */
+struct bpf_trace_sample_data {
+ struct perf_sample_data sds[3];
+};
+
+static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds);
+static DEFINE_PER_CPU(int, bpf_trace_nest_level);
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags, void *, data, u64, size)
{
- struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
+ struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
+ int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
struct perf_raw_record raw = {
.frag = {
.size = size,
.data = data,
},
};
+ struct perf_sample_data *sd;
+ int err;
- if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
- return -EINVAL;
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ sd = &sds->sds[nest_level - 1];
+
+ if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
+ err = -EINVAL;
+ goto out;
+ }
perf_sample_data_init(sd, 0, 0);
sd->raw = &raw;
- return __bpf_perf_event_output(regs, map, flags, sd);
+ err = __bpf_perf_event_output(regs, map, flags, sd);
+
+out:
+ this_cpu_dec(bpf_trace_nest_level);
+ return err;
}
static const struct bpf_func_proto bpf_perf_event_output_proto = {
@@ -567,6 +594,69 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
.arg3_type = ARG_ANYTHING,
};
+struct send_signal_irq_work {
+ struct irq_work irq_work;
+ struct task_struct *task;
+ u32 sig;
+};
+
+static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
+
+static void do_bpf_send_signal(struct irq_work *entry)
+{
+ struct send_signal_irq_work *work;
+
+ work = container_of(entry, struct send_signal_irq_work, irq_work);
+ group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, PIDTYPE_TGID);
+}
+
+BPF_CALL_1(bpf_send_signal, u32, sig)
+{
+ struct send_signal_irq_work *work = NULL;
+
+ /* Similar to bpf_probe_write_user, task needs to be
+ * in a sound condition and kernel memory access be
+ * permitted in order to send signal to the current
+ * task.
+ */
+ if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
+ return -EPERM;
+ if (unlikely(uaccess_kernel()))
+ return -EPERM;
+ if (unlikely(!nmi_uaccess_okay()))
+ return -EPERM;
+
+ if (in_nmi()) {
+ /* Do an early check on signal validity. Otherwise,
+ * the error is lost in deferred irq_work.
+ */
+ if (unlikely(!valid_signal(sig)))
+ return -EINVAL;
+
+ work = this_cpu_ptr(&send_signal_work);
+ if (work->irq_work.flags & IRQ_WORK_BUSY)
+ return -EBUSY;
+
+ /* Add the current task, which is the target of sending signal,
+ * to the irq_work. The current task may change when queued
+ * irq works get executed.
+ */
+ work->task = current;
+ work->sig = sig;
+ irq_work_queue(&work->irq_work);
+ return 0;
+ }
+
+ return group_send_sig_info(sig, SEND_SIG_PRIV, current, PIDTYPE_TGID);
+}
+
+static const struct bpf_func_proto bpf_send_signal_proto = {
+ .func = bpf_send_signal,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -617,6 +707,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
#endif
+ case BPF_FUNC_send_signal:
+ return &bpf_send_signal_proto;
default:
return NULL;
}
@@ -822,16 +914,48 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
/*
* bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
* to avoid potential recursive reuse issue when/if tracepoints are added
- * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
+ * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
+ *
+ * Since raw tracepoints run despite bpf_prog_active, support concurrent usage
+ * in normal, irq, and nmi context.
*/
-static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
+struct bpf_raw_tp_regs {
+ struct pt_regs regs[3];
+};
+static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs);
+static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level);
+static struct pt_regs *get_bpf_raw_tp_regs(void)
+{
+ struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
+
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) {
+ this_cpu_dec(bpf_raw_tp_nest_level);
+ return ERR_PTR(-EBUSY);
+ }
+
+ return &tp_regs->regs[nest_level - 1];
+}
+
+static void put_bpf_raw_tp_regs(void)
+{
+ this_cpu_dec(bpf_raw_tp_nest_level);
+}
+
BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags, void *, data, u64, size)
{
- struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ struct pt_regs *regs = get_bpf_raw_tp_regs();
+ int ret;
+
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
perf_fetch_caller_regs(regs);
- return ____bpf_perf_event_output(regs, map, flags, data, size);
+ ret = ____bpf_perf_event_output(regs, map, flags, data, size);
+
+ put_bpf_raw_tp_regs();
+ return ret;
}
static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
@@ -848,12 +972,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
{
- struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ struct pt_regs *regs = get_bpf_raw_tp_regs();
+ int ret;
+
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
perf_fetch_caller_regs(regs);
/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
- return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
- flags, 0, 0);
+ ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map,
+ flags, 0, 0);
+ put_bpf_raw_tp_regs();
+ return ret;
}
static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
@@ -868,11 +998,17 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
void *, buf, u32, size, u64, flags)
{
- struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ struct pt_regs *regs = get_bpf_raw_tp_regs();
+ int ret;
+
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
perf_fetch_caller_regs(regs);
- return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
- (unsigned long) size, flags, 0);
+ ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf,
+ (unsigned long) size, flags, 0);
+ put_bpf_raw_tp_regs();
+ return ret;
}
static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
@@ -1034,7 +1170,7 @@ static DEFINE_MUTEX(bpf_event_mutex);
int perf_event_attach_bpf_prog(struct perf_event *event,
struct bpf_prog *prog)
{
- struct bpf_prog_array __rcu *old_array;
+ struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
int ret = -EEXIST;
@@ -1052,7 +1188,7 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
if (event->prog)
goto unlock;
- old_array = event->tp_event->prog_array;
+ old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
if (old_array &&
bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
ret = -E2BIG;
@@ -1075,7 +1211,7 @@ unlock:
void perf_event_detach_bpf_prog(struct perf_event *event)
{
- struct bpf_prog_array __rcu *old_array;
+ struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
int ret;
@@ -1084,7 +1220,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
if (!event->prog)
goto unlock;
- old_array = event->tp_event->prog_array;
+ old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
if (ret == -ENOENT)
goto unlock;
@@ -1106,6 +1242,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
{
struct perf_event_query_bpf __user *uquery = info;
struct perf_event_query_bpf query = {};
+ struct bpf_prog_array *progs;
u32 *ids, prog_cnt, ids_len;
int ret;
@@ -1130,10 +1267,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
*/
mutex_lock(&bpf_event_mutex);
- ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
- ids,
- ids_len,
- &prog_cnt);
+ progs = bpf_event_rcu_dereference(event->tp_event->prog_array);
+ ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt);
mutex_unlock(&bpf_event_mutex);
if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
@@ -1296,6 +1431,20 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
return err;
}
+static int __init send_signal_irq_work_init(void)
+{
+ int cpu;
+ struct send_signal_irq_work *work;
+
+ for_each_possible_cpu(cpu) {
+ work = per_cpu_ptr(&send_signal_work, cpu);
+ init_irq_work(&work->irq_work, do_bpf_send_signal);
+ }
+ return 0;
+}
+
+subsys_initcall(send_signal_irq_work_init);
+
#ifdef CONFIG_MODULES
static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
void *module)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a12aff849c04..576c41644e77 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2935,14 +2935,13 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
p = &pg->records[i];
p->flags = rec_flags;
-#ifndef CC_USING_NOP_MCOUNT
/*
* Do the initial record conversion from mcount jump
* to the NOP instructions.
*/
- if (!ftrace_code_disable(mod, p))
+ if (!__is_defined(CC_USING_NOP_MCOUNT) &&
+ !ftrace_code_disable(mod, p))
break;
-#endif
update_cnt++;
}
@@ -4221,10 +4220,13 @@ void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper,
struct ftrace_func_entry *entry;
struct ftrace_func_map *map;
struct hlist_head *hhd;
- int size = 1 << mapper->hash.size_bits;
- int i;
+ int size, i;
+
+ if (!mapper)
+ return;
if (free_func && mapper->hash.count) {
+ size = 1 << mapper->hash.size_bits;
for (i = 0; i < size; i++) {
hhd = &mapper->hash.buckets[i];
hlist_for_each_entry(entry, hhd, hlist) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1c80521fd436..c90c687cf950 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6719,11 +6719,13 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
break;
}
#endif
- if (!tr->allocated_snapshot) {
+ if (tr->allocated_snapshot)
+ ret = resize_buffer_duplicate_size(&tr->max_buffer,
+ &tr->trace_buffer, iter->cpu_file);
+ else
ret = tracing_alloc_snapshot_instance(tr);
- if (ret < 0)
- break;
- }
+ if (ret < 0)
+ break;
local_irq_disable();
/* Now, we're going to swap */
if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
@@ -6923,7 +6925,7 @@ struct tracing_log_err {
static DEFINE_MUTEX(tracing_err_log_lock);
-struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
+static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
{
struct tracing_log_err *err;
@@ -7126,12 +7128,24 @@ static ssize_t tracing_err_log_write(struct file *file,
return count;
}
+static int tracing_err_log_release(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+
+ trace_array_put(tr);
+
+ if (file->f_mode & FMODE_READ)
+ seq_release(inode, file);
+
+ return 0;
+}
+
static const struct file_operations tracing_err_log_fops = {
.open = tracing_err_log_open,
.write = tracing_err_log_write,
.read = seq_read,
.llseek = seq_lseek,
- .release = tracing_release_generic_tr,
+ .release = tracing_err_log_release,
};
static int tracing_buffers_open(struct inode *inode, struct file *filp)
@@ -8192,7 +8206,7 @@ static const struct file_operations buffer_percent_fops = {
.llseek = default_llseek,
};
-struct dentry *trace_instance_dir;
+static struct dentry *trace_instance_dir;
static void
init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
@@ -8604,10 +8618,6 @@ struct dentry *tracing_init_dentry(void)
*/
tr->dir = debugfs_create_automount("tracing", NULL,
trace_automount, NULL);
- if (!tr->dir) {
- pr_warn_once("Could not create debugfs directory 'tracing'\n");
- return ERR_PTR(-ENOMEM);
- }
return NULL;
}
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 1e6db9cbe4dc..fa95139445b2 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -277,7 +277,7 @@ static void move_to_next_cpu(void)
* of this thread, than stop migrating for the duration
* of the current test.
*/
- if (!cpumask_equal(current_mask, &current->cpus_allowed))
+ if (!cpumask_equal(current_mask, current->cpus_ptr))
goto disable;
get_online_cpus();
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 54373d93e251..ba751f993c3b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1057,7 +1057,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
trace_seq_puts(s, "<stack trace>\n");
- for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) {
+ for (p = field->caller; p && p < end && *p != ULONG_MAX; p++) {
if (trace_seq_has_overflowed(s))
break;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index eb7e06b54741..7860e3f59fad 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -426,8 +426,6 @@ end:
/*
* Argument syntax:
* - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS]
- *
- * - Remove uprobe: -:[GRP/]EVENT
*/
static int trace_uprobe_create(int argc, const char **argv)
{
@@ -443,10 +441,17 @@ static int trace_uprobe_create(int argc, const char **argv)
ret = 0;
ref_ctr_offset = 0;
- /* argc must be >= 1 */
- if (argv[0][0] == 'r')
+ switch (argv[0][0]) {
+ case 'r':
is_return = true;
- else if (argv[0][0] != 'p' || argc < 2)
+ break;
+ case 'p':
+ break;
+ default:
+ return -ECANCELED;
+ }
+
+ if (argc < 2)
return -ECANCELED;
if (argv[0][1] == ':')
@@ -1331,7 +1336,7 @@ static inline void init_trace_event_call(struct trace_uprobe *tu,
call->event.funcs = &uprobe_funcs;
call->class->define_fields = uprobe_event_define_fields;
- call->flags = TRACE_EVENT_FL_UPROBE;
+ call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY;
call->class->reg = trace_uprobe_register;
call->data = tu;
}