aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/bpf_trace.c20
-rw-r--r--kernel/trace/ring_buffer.c38
-rw-r--r--kernel/trace/trace.c72
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_events.c33
-rw-r--r--kernel/trace/trace_events_inject.c3
-rw-r--r--kernel/trace/trace_events_synth.c2
-rw-r--r--kernel/trace/trace_events_user.c58
8 files changed, 177 insertions, 51 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a7264b2c17ad..868008f56fec 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2853,6 +2853,17 @@ static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u3
return arr.mods_cnt;
}
+static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt)
+{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ if (!within_error_injection_list(addrs[i]))
+ return -EINVAL;
+ }
+ return 0;
+}
+
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_kprobe_multi_link *link = NULL;
@@ -2930,6 +2941,11 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
goto error;
}
+ if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) {
+ err = -EINVAL;
+ goto error;
+ }
+
link = kzalloc(sizeof(*link), GFP_KERNEL);
if (!link) {
err = -ENOMEM;
@@ -3207,8 +3223,10 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
rcu_read_lock();
task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
rcu_read_unlock();
- if (!task)
+ if (!task) {
+ err = -ESRCH;
goto error_path_put;
+ }
}
err = -ENOMEM;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 78502d4c7214..515cafdb18d9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage)
local_set(&bpage->commit, 0);
}
+static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
+{
+ return local_read(&bpage->page->commit);
+}
+
static void free_buffer_page(struct buffer_page *bpage)
{
free_page((unsigned long)bpage->page);
@@ -1132,6 +1137,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
if (full) {
poll_wait(filp, &work->full_waiters, poll_table);
work->full_waiters_pending = true;
+ if (!cpu_buffer->shortest_full ||
+ cpu_buffer->shortest_full > full)
+ cpu_buffer->shortest_full = full;
} else {
poll_wait(filp, &work->waiters, poll_table);
work->waiters_pending = true;
@@ -2003,7 +2011,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
* Increment overrun to account for the lost events.
*/
local_add(page_entries, &cpu_buffer->overrun);
- local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
}
@@ -2198,6 +2206,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
err = -ENOMEM;
goto out_err;
}
+
+ cond_resched();
}
cpus_read_lock();
@@ -2365,11 +2375,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->reader_page->read);
}
-static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
-{
- return local_read(&bpage->page->commit);
-}
-
static struct ring_buffer_event *
rb_iter_head_event(struct ring_buffer_iter *iter)
{
@@ -2388,6 +2393,11 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
*/
commit = rb_page_commit(iter_head_page);
smp_rmb();
+
+ /* An event needs to be at least 8 bytes in size */
+ if (iter->head > commit - 8)
+ goto reset;
+
event = __rb_page_index(iter_head_page, iter->head);
length = rb_event_length(event);
@@ -2510,7 +2520,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
* the counters.
*/
local_add(entries, &cpu_buffer->overrun);
- local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
/*
@@ -2653,9 +2663,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
event = __rb_page_index(tail_page, tail);
- /* account for padding bytes */
- local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
-
/*
* Save the original length to the meta data.
* This will be used by the reader to add lost event
@@ -2669,7 +2676,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
* write counter enough to allow another writer to slip
* in on this page.
* We put in a discarded commit instead, to make sure
- * that this space is not used again.
+ * that this space is not used again, and this space will
+ * not be accounted into 'entries_bytes'.
*
* If we are less than the minimum size, we don't need to
* worry about it.
@@ -2694,6 +2702,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
/* time delta must be non zero */
event->time_delta = 1;
+ /* account for padding bytes */
+ local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
+
/* Make sure the padding is visible before the tail_page->write update */
smp_wmb();
@@ -4208,7 +4219,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu)
EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
/**
- * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
+ * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer
* @buffer: The ring buffer
* @cpu: The per CPU buffer to read from.
*/
@@ -4716,6 +4727,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
length = rb_event_length(event);
cpu_buffer->reader_page->read += length;
+ cpu_buffer->read_bytes += length;
}
static void rb_advance_iter(struct ring_buffer_iter *iter)
@@ -5809,7 +5821,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
} else {
/* update the entry counter */
cpu_buffer->read += rb_page_entries(reader);
- cpu_buffer->read_bytes += BUF_PAGE_SIZE;
+ cpu_buffer->read_bytes += rb_page_commit(reader);
/* swap the pages */
rb_init_page(bpage);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2b4ded753367..abaaf516fcae 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1772,7 +1772,7 @@ static void trace_create_maxlat_file(struct trace_array *tr,
init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
tr->d_max_latency = trace_create_file("tracing_max_latency",
TRACE_MODE_WRITE,
- d_tracer, &tr->max_latency,
+ d_tracer, tr,
&tracing_max_lat_fops);
}
@@ -1805,7 +1805,7 @@ void latency_fsnotify(struct trace_array *tr)
#define trace_create_maxlat_file(tr, d_tracer) \
trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
- d_tracer, &tr->max_latency, &tracing_max_lat_fops)
+ d_tracer, tr, &tracing_max_lat_fops)
#endif
@@ -4973,6 +4973,33 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp)
return 0;
}
+/*
+ * The private pointer of the inode is the trace_event_file.
+ * Update the tr ref count associated to it.
+ */
+int tracing_open_file_tr(struct inode *inode, struct file *filp)
+{
+ struct trace_event_file *file = inode->i_private;
+ int ret;
+
+ ret = tracing_check_open_get_tr(file->tr);
+ if (ret)
+ return ret;
+
+ filp->private_data = inode->i_private;
+
+ return 0;
+}
+
+int tracing_release_file_tr(struct inode *inode, struct file *filp)
+{
+ struct trace_event_file *file = inode->i_private;
+
+ trace_array_put(file->tr);
+
+ return 0;
+}
+
static int tracing_mark_open(struct inode *inode, struct file *filp)
{
stream_open(inode, filp);
@@ -6691,14 +6718,18 @@ static ssize_t
tracing_max_lat_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
+ struct trace_array *tr = filp->private_data;
+
+ return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
}
static ssize_t
tracing_max_lat_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
+ struct trace_array *tr = filp->private_data;
+
+ return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
}
#endif
@@ -7752,18 +7783,20 @@ static const struct file_operations tracing_thresh_fops = {
#ifdef CONFIG_TRACER_MAX_TRACE
static const struct file_operations tracing_max_lat_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_tr,
.read = tracing_max_lat_read,
.write = tracing_max_lat_write,
.llseek = generic_file_llseek,
+ .release = tracing_release_generic_tr,
};
#endif
static const struct file_operations set_tracer_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_tr,
.read = tracing_set_trace_read,
.write = tracing_set_trace_write,
.llseek = generic_file_llseek,
+ .release = tracing_release_generic_tr,
};
static const struct file_operations tracing_pipe_fops = {
@@ -8956,12 +8989,33 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
return cnt;
}
+static int tracing_open_options(struct inode *inode, struct file *filp)
+{
+ struct trace_option_dentry *topt = inode->i_private;
+ int ret;
+
+ ret = tracing_check_open_get_tr(topt->tr);
+ if (ret)
+ return ret;
+
+ filp->private_data = inode->i_private;
+ return 0;
+}
+
+static int tracing_release_options(struct inode *inode, struct file *file)
+{
+ struct trace_option_dentry *topt = file->private_data;
+
+ trace_array_put(topt->tr);
+ return 0;
+}
static const struct file_operations trace_options_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_options,
.read = trace_options_read,
.write = trace_options_write,
.llseek = generic_file_llseek,
+ .release = tracing_release_options,
};
/*
@@ -9739,8 +9793,8 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
tr, &tracing_mark_fops);
file = __find_event_file(tr, "ftrace", "print");
- if (file && file->dir)
- trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
+ if (file && file->ef)
+ eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
file, &event_trigger_fops);
tr->trace_marker_file = file;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5669dd1f90d9..77debe53f07c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -610,6 +610,8 @@ void tracing_reset_all_online_cpus(void);
void tracing_reset_all_online_cpus_unlocked(void);
int tracing_open_generic(struct inode *inode, struct file *filp);
int tracing_open_generic_tr(struct inode *inode, struct file *filp);
+int tracing_open_file_tr(struct inode *inode, struct file *filp);
+int tracing_release_file_tr(struct inode *inode, struct file *filp);
bool tracing_is_disabled(void);
bool tracer_tracing_is_on(struct trace_array *tr);
void tracer_tracing_on(struct trace_array *tr);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index ed367d713be0..f49d6ddb6342 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -992,19 +992,6 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
static void remove_event_file_dir(struct trace_event_file *file)
{
- struct dentry *dir = file->dir;
- struct dentry *child;
-
- if (dir) {
- spin_lock(&dir->d_lock); /* probably unneeded */
- list_for_each_entry(child, &dir->d_subdirs, d_child) {
- if (d_really_is_positive(child)) /* probably unneeded */
- d_inode(child)->i_private = NULL;
- }
- spin_unlock(&dir->d_lock);
-
- tracefs_remove(dir);
- }
eventfs_remove(file->ef);
list_del(&file->list);
remove_subsystem(file->system);
@@ -2103,9 +2090,10 @@ static const struct file_operations ftrace_set_event_notrace_pid_fops = {
};
static const struct file_operations ftrace_enable_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_file_tr,
.read = event_enable_read,
.write = event_enable_write,
+ .release = tracing_release_file_tr,
.llseek = default_llseek,
};
@@ -2122,9 +2110,10 @@ static const struct file_operations ftrace_event_id_fops = {
};
static const struct file_operations ftrace_event_filter_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_file_tr,
.read = event_filter_read,
.write = event_filter_write,
+ .release = tracing_release_file_tr,
.llseek = default_llseek,
};
@@ -2297,6 +2286,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
{
struct event_subsystem *system, *iter;
struct trace_subsystem_dir *dir;
+ struct eventfs_file *ef;
int res;
/* First see if we did not already create this dir */
@@ -2329,13 +2319,14 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
} else
__get_system(system);
- dir->ef = eventfs_add_subsystem_dir(name, parent);
- if (IS_ERR(dir->ef)) {
+ ef = eventfs_add_subsystem_dir(name, parent);
+ if (IS_ERR(ef)) {
pr_warn("Failed to create system directory %s\n", name);
__put_system(system);
goto out_free;
}
+ dir->ef = ef;
dir->tr = tr;
dir->ref_count = 1;
dir->nr_events = 1;
@@ -2415,6 +2406,7 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
struct trace_event_call *call = file->event_call;
struct eventfs_file *ef_subsystem = NULL;
struct trace_array *tr = file->tr;
+ struct eventfs_file *ef;
const char *name;
int ret;
@@ -2431,12 +2423,14 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
return -ENOMEM;
name = trace_event_name(call);
- file->ef = eventfs_add_dir(name, ef_subsystem);
- if (IS_ERR(file->ef)) {
+ ef = eventfs_add_dir(name, ef_subsystem);
+ if (IS_ERR(ef)) {
pr_warn("Could not create tracefs '%s' directory\n", name);
return -1;
}
+ file->ef = ef;
+
if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
eventfs_add_file("enable", TRACE_MODE_WRITE, file->ef, file,
&ftrace_enable_fops);
@@ -2776,6 +2770,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
update_event_fields(call, map[i]);
}
}
+ cond_resched();
}
up_write(&trace_event_sem);
}
diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
index abe805d471eb..8650562bdaa9 100644
--- a/kernel/trace/trace_events_inject.c
+++ b/kernel/trace/trace_events_inject.c
@@ -328,7 +328,8 @@ event_inject_read(struct file *file, char __user *buf, size_t size,
}
const struct file_operations event_inject_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_file_tr,
.read = event_inject_read,
.write = event_inject_write,
+ .release = tracing_release_file_tr,
};
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 9897d0bfcab7..14cb275a0bab 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -337,7 +337,7 @@ static void print_synth_event_num_val(struct trace_seq *s,
break;
default:
- trace_seq_printf(s, print_fmt, name, val, space);
+ trace_seq_printf(s, print_fmt, name, val->as_u64, space);
break;
}
}
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 6f046650e527..b87f41187c6a 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -127,8 +127,13 @@ struct user_event_enabler {
/* Bit 7 is for freeing status of enablement */
#define ENABLE_VAL_FREEING_BIT 7
-/* Only duplicate the bit value */
-#define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK
+/* Bit 8 is for marking 32-bit on 64-bit */
+#define ENABLE_VAL_32_ON_64_BIT 8
+
+#define ENABLE_VAL_COMPAT_MASK (1 << ENABLE_VAL_32_ON_64_BIT)
+
+/* Only duplicate the bit and compat values */
+#define ENABLE_VAL_DUP_MASK (ENABLE_VAL_BIT_MASK | ENABLE_VAL_COMPAT_MASK)
#define ENABLE_BITOPS(e) (&(e)->values)
@@ -174,6 +179,30 @@ struct user_event_validator {
int flags;
};
+static inline void align_addr_bit(unsigned long *addr, int *bit,
+ unsigned long *flags)
+{
+ if (IS_ALIGNED(*addr, sizeof(long))) {
+#ifdef __BIG_ENDIAN
+ /* 32 bit on BE 64 bit requires a 32 bit offset when aligned. */
+ if (test_bit(ENABLE_VAL_32_ON_64_BIT, flags))
+ *bit += 32;
+#endif
+ return;
+ }
+
+ *addr = ALIGN_DOWN(*addr, sizeof(long));
+
+ /*
+ * We only support 32 and 64 bit values. The only time we need
+ * to align is a 32 bit value on a 64 bit kernel, which on LE
+ * is always 32 bits, and on BE requires no change when unaligned.
+ */
+#ifdef __LITTLE_ENDIAN
+ *bit += 32;
+#endif
+}
+
typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
void *tpdata, bool *faulted);
@@ -482,6 +511,7 @@ static int user_event_enabler_write(struct user_event_mm *mm,
unsigned long *ptr;
struct page *page;
void *kaddr;
+ int bit = ENABLE_BIT(enabler);
int ret;
lockdep_assert_held(&event_mutex);
@@ -497,6 +527,8 @@ static int user_event_enabler_write(struct user_event_mm *mm,
test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))))
return -EBUSY;
+ align_addr_bit(&uaddr, &bit, ENABLE_BITOPS(enabler));
+
ret = pin_user_pages_remote(mm->mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT,
&page, NULL);
@@ -515,9 +547,9 @@ static int user_event_enabler_write(struct user_event_mm *mm,
/* Update bit atomically, user tracers must be atomic as well */
if (enabler->event && enabler->event->status)
- set_bit(ENABLE_BIT(enabler), ptr);
+ set_bit(bit, ptr);
else
- clear_bit(ENABLE_BIT(enabler), ptr);
+ clear_bit(bit, ptr);
kunmap_local(kaddr);
unpin_user_pages_dirty_lock(&page, 1, true);
@@ -849,6 +881,12 @@ static struct user_event_enabler
enabler->event = user;
enabler->addr = uaddr;
enabler->values = reg->enable_bit;
+
+#if BITS_PER_LONG >= 64
+ if (reg->enable_size == 4)
+ set_bit(ENABLE_VAL_32_ON_64_BIT, ENABLE_BITOPS(enabler));
+#endif
+
retry:
/* Prevents state changes from racing with new enablers */
mutex_lock(&event_mutex);
@@ -2377,7 +2415,8 @@ static long user_unreg_get(struct user_unreg __user *ureg,
}
static int user_event_mm_clear_bit(struct user_event_mm *user_mm,
- unsigned long uaddr, unsigned char bit)
+ unsigned long uaddr, unsigned char bit,
+ unsigned long flags)
{
struct user_event_enabler enabler;
int result;
@@ -2385,7 +2424,7 @@ static int user_event_mm_clear_bit(struct user_event_mm *user_mm,
memset(&enabler, 0, sizeof(enabler));
enabler.addr = uaddr;
- enabler.values = bit;
+ enabler.values = bit | flags;
retry:
/* Prevents state changes from racing with new enablers */
mutex_lock(&event_mutex);
@@ -2415,6 +2454,7 @@ static long user_events_ioctl_unreg(unsigned long uarg)
struct user_event_mm *mm = current->user_event_mm;
struct user_event_enabler *enabler, *next;
struct user_unreg reg;
+ unsigned long flags;
long ret;
ret = user_unreg_get(ureg, &reg);
@@ -2425,6 +2465,7 @@ static long user_events_ioctl_unreg(unsigned long uarg)
if (!mm)
return -ENOENT;
+ flags = 0;
ret = -ENOENT;
/*
@@ -2441,6 +2482,9 @@ static long user_events_ioctl_unreg(unsigned long uarg)
ENABLE_BIT(enabler) == reg.disable_bit) {
set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler));
+ /* We must keep compat flags for the clear */
+ flags |= enabler->values & ENABLE_VAL_COMPAT_MASK;
+
if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)))
user_event_enabler_destroy(enabler, true);
@@ -2454,7 +2498,7 @@ static long user_events_ioctl_unreg(unsigned long uarg)
/* Ensure bit is now cleared for user, regardless of event status */
if (!ret)
ret = user_event_mm_clear_bit(mm, reg.disable_addr,
- reg.disable_bit);
+ reg.disable_bit, flags);
return ret;
}