diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/bpf_trace.c | 20 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 38 | ||||
-rw-r--r-- | kernel/trace/trace.c | 72 | ||||
-rw-r--r-- | kernel/trace/trace.h | 2 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 33 | ||||
-rw-r--r-- | kernel/trace/trace_events_inject.c | 3 | ||||
-rw-r--r-- | kernel/trace/trace_events_synth.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_events_user.c | 58 |
8 files changed, 177 insertions, 51 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a7264b2c17ad..868008f56fec 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2853,6 +2853,17 @@ static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u3 return arr.mods_cnt; } +static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt) +{ + u32 i; + + for (i = 0; i < cnt; i++) { + if (!within_error_injection_list(addrs[i])) + return -EINVAL; + } + return 0; +} + int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_kprobe_multi_link *link = NULL; @@ -2930,6 +2941,11 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr goto error; } + if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) { + err = -EINVAL; + goto error; + } + link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) { err = -ENOMEM; @@ -3207,8 +3223,10 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr rcu_read_lock(); task = get_pid_task(find_vpid(pid), PIDTYPE_PID); rcu_read_unlock(); - if (!task) + if (!task) { + err = -ESRCH; goto error_path_put; + } } err = -ENOMEM; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 78502d4c7214..515cafdb18d9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } +static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) +{ + return local_read(&bpage->page->commit); +} + static void free_buffer_page(struct buffer_page *bpage) { free_page((unsigned long)bpage->page); @@ -1132,6 +1137,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, if (full) { poll_wait(filp, &work->full_waiters, poll_table); work->full_waiters_pending = true; + if (!cpu_buffer->shortest_full || + cpu_buffer->shortest_full > full) + cpu_buffer->shortest_full = full; } else { poll_wait(filp, &work->waiters, poll_table); work->waiters_pending = true; @@ -2003,7 +2011,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) * Increment overrun to account for the lost events. */ local_add(page_entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); } @@ -2198,6 +2206,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, err = -ENOMEM; goto out_err; } + + cond_resched(); } cpus_read_lock(); @@ -2365,11 +2375,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->read); } -static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) -{ - return local_read(&bpage->page->commit); -} - static struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { @@ -2388,6 +2393,11 @@ rb_iter_head_event(struct ring_buffer_iter *iter) */ commit = rb_page_commit(iter_head_page); smp_rmb(); + + /* An event needs to be at least 8 bytes in size */ + if (iter->head > commit - 8) + goto reset; + event = __rb_page_index(iter_head_page, iter->head); length = rb_event_length(event); @@ -2510,7 +2520,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * the counters. */ local_add(entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); /* @@ -2653,9 +2663,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, event = __rb_page_index(tail_page, tail); - /* account for padding bytes */ - local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); - /* * Save the original length to the meta data. * This will be used by the reader to add lost event @@ -2669,7 +2676,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, * write counter enough to allow another writer to slip * in on this page. * We put in a discarded commit instead, to make sure - * that this space is not used again. + * that this space is not used again, and this space will + * not be accounted into 'entries_bytes'. * * If we are less than the minimum size, we don't need to * worry about it. @@ -2694,6 +2702,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; + /* account for padding bytes */ + local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); + /* Make sure the padding is visible before the tail_page->write update */ smp_wmb(); @@ -4208,7 +4219,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); /** - * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer + * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ @@ -4716,6 +4727,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) length = rb_event_length(event); cpu_buffer->reader_page->read += length; + cpu_buffer->read_bytes += length; } static void rb_advance_iter(struct ring_buffer_iter *iter) @@ -5809,7 +5821,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer, } else { /* update the entry counter */ cpu_buffer->read += rb_page_entries(reader); - cpu_buffer->read_bytes += BUF_PAGE_SIZE; + cpu_buffer->read_bytes += rb_page_commit(reader); /* swap the pages */ rb_init_page(bpage); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2b4ded753367..abaaf516fcae 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1772,7 +1772,7 @@ static void trace_create_maxlat_file(struct trace_array *tr, init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); tr->d_max_latency = trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, - d_tracer, &tr->max_latency, + d_tracer, tr, &tracing_max_lat_fops); } @@ -1805,7 +1805,7 @@ void latency_fsnotify(struct trace_array *tr) #define trace_create_maxlat_file(tr, d_tracer) \ trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ - d_tracer, &tr->max_latency, &tracing_max_lat_fops) + d_tracer, tr, &tracing_max_lat_fops) #endif @@ -4973,6 +4973,33 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp) return 0; } +/* + * The private pointer of the inode is the trace_event_file. + * Update the tr ref count associated to it. + */ +int tracing_open_file_tr(struct inode *inode, struct file *filp) +{ + struct trace_event_file *file = inode->i_private; + int ret; + + ret = tracing_check_open_get_tr(file->tr); + if (ret) + return ret; + + filp->private_data = inode->i_private; + + return 0; +} + +int tracing_release_file_tr(struct inode *inode, struct file *filp) +{ + struct trace_event_file *file = inode->i_private; + + trace_array_put(file->tr); + + return 0; +} + static int tracing_mark_open(struct inode *inode, struct file *filp) { stream_open(inode, filp); @@ -6691,14 +6718,18 @@ static ssize_t tracing_max_lat_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos); + struct trace_array *tr = filp->private_data; + + return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos); } static ssize_t tracing_max_lat_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos); + struct trace_array *tr = filp->private_data; + + return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos); } #endif @@ -7752,18 +7783,20 @@ static const struct file_operations tracing_thresh_fops = { #ifdef CONFIG_TRACER_MAX_TRACE static const struct file_operations tracing_max_lat_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_max_lat_read, .write = tracing_max_lat_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; #endif static const struct file_operations set_tracer_fops = { - .open = tracing_open_generic, + .open = tracing_open_generic_tr, .read = tracing_set_trace_read, .write = tracing_set_trace_write, .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, }; static const struct file_operations tracing_pipe_fops = { @@ -8956,12 +8989,33 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } +static int tracing_open_options(struct inode *inode, struct file *filp) +{ + struct trace_option_dentry *topt = inode->i_private; + int ret; + + ret = tracing_check_open_get_tr(topt->tr); + if (ret) + return ret; + + filp->private_data = inode->i_private; + return 0; +} + +static int tracing_release_options(struct inode *inode, struct file *file) +{ + struct trace_option_dentry *topt = file->private_data; + + trace_array_put(topt->tr); + return 0; +} static const struct file_operations trace_options_fops = { - .open = tracing_open_generic, + .open = tracing_open_options, .read = trace_options_read, .write = trace_options_write, .llseek = generic_file_llseek, + .release = tracing_release_options, }; /* @@ -9739,8 +9793,8 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) tr, &tracing_mark_fops); file = __find_event_file(tr, "ftrace", "print"); - if (file && file->dir) - trace_create_file("trigger", TRACE_MODE_WRITE, file->dir, + if (file && file->ef) + eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef, file, &event_trigger_fops); tr->trace_marker_file = file; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5669dd1f90d9..77debe53f07c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -610,6 +610,8 @@ void tracing_reset_all_online_cpus(void); void tracing_reset_all_online_cpus_unlocked(void); int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); +int tracing_open_file_tr(struct inode *inode, struct file *filp); +int tracing_release_file_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); bool tracer_tracing_is_on(struct trace_array *tr); void tracer_tracing_on(struct trace_array *tr); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ed367d713be0..f49d6ddb6342 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -992,19 +992,6 @@ static void remove_subsystem(struct trace_subsystem_dir *dir) static void remove_event_file_dir(struct trace_event_file *file) { - struct dentry *dir = file->dir; - struct dentry *child; - - if (dir) { - spin_lock(&dir->d_lock); /* probably unneeded */ - list_for_each_entry(child, &dir->d_subdirs, d_child) { - if (d_really_is_positive(child)) /* probably unneeded */ - d_inode(child)->i_private = NULL; - } - spin_unlock(&dir->d_lock); - - tracefs_remove(dir); - } eventfs_remove(file->ef); list_del(&file->list); remove_subsystem(file->system); @@ -2103,9 +2090,10 @@ static const struct file_operations ftrace_set_event_notrace_pid_fops = { }; static const struct file_operations ftrace_enable_fops = { - .open = tracing_open_generic, + .open = tracing_open_file_tr, .read = event_enable_read, .write = event_enable_write, + .release = tracing_release_file_tr, .llseek = default_llseek, }; @@ -2122,9 +2110,10 @@ static const struct file_operations ftrace_event_id_fops = { }; static const struct file_operations ftrace_event_filter_fops = { - .open = tracing_open_generic, + .open = tracing_open_file_tr, .read = event_filter_read, .write = event_filter_write, + .release = tracing_release_file_tr, .llseek = default_llseek, }; @@ -2297,6 +2286,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name, { struct event_subsystem *system, *iter; struct trace_subsystem_dir *dir; + struct eventfs_file *ef; int res; /* First see if we did not already create this dir */ @@ -2329,13 +2319,14 @@ event_subsystem_dir(struct trace_array *tr, const char *name, } else __get_system(system); - dir->ef = eventfs_add_subsystem_dir(name, parent); - if (IS_ERR(dir->ef)) { + ef = eventfs_add_subsystem_dir(name, parent); + if (IS_ERR(ef)) { pr_warn("Failed to create system directory %s\n", name); __put_system(system); goto out_free; } + dir->ef = ef; dir->tr = tr; dir->ref_count = 1; dir->nr_events = 1; @@ -2415,6 +2406,7 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) struct trace_event_call *call = file->event_call; struct eventfs_file *ef_subsystem = NULL; struct trace_array *tr = file->tr; + struct eventfs_file *ef; const char *name; int ret; @@ -2431,12 +2423,14 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) return -ENOMEM; name = trace_event_name(call); - file->ef = eventfs_add_dir(name, ef_subsystem); - if (IS_ERR(file->ef)) { + ef = eventfs_add_dir(name, ef_subsystem); + if (IS_ERR(ef)) { pr_warn("Could not create tracefs '%s' directory\n", name); return -1; } + file->ef = ef; + if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) eventfs_add_file("enable", TRACE_MODE_WRITE, file->ef, file, &ftrace_enable_fops); @@ -2776,6 +2770,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) update_event_fields(call, map[i]); } } + cond_resched(); } up_write(&trace_event_sem); } diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c index abe805d471eb..8650562bdaa9 100644 --- a/kernel/trace/trace_events_inject.c +++ b/kernel/trace/trace_events_inject.c @@ -328,7 +328,8 @@ event_inject_read(struct file *file, char __user *buf, size_t size, } const struct file_operations event_inject_fops = { - .open = tracing_open_generic, + .open = tracing_open_file_tr, .read = event_inject_read, .write = event_inject_write, + .release = tracing_release_file_tr, }; diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 9897d0bfcab7..14cb275a0bab 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -337,7 +337,7 @@ static void print_synth_event_num_val(struct trace_seq *s, break; default: - trace_seq_printf(s, print_fmt, name, val, space); + trace_seq_printf(s, print_fmt, name, val->as_u64, space); break; } } diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 6f046650e527..b87f41187c6a 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -127,8 +127,13 @@ struct user_event_enabler { /* Bit 7 is for freeing status of enablement */ #define ENABLE_VAL_FREEING_BIT 7 -/* Only duplicate the bit value */ -#define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK +/* Bit 8 is for marking 32-bit on 64-bit */ +#define ENABLE_VAL_32_ON_64_BIT 8 + +#define ENABLE_VAL_COMPAT_MASK (1 << ENABLE_VAL_32_ON_64_BIT) + +/* Only duplicate the bit and compat values */ +#define ENABLE_VAL_DUP_MASK (ENABLE_VAL_BIT_MASK | ENABLE_VAL_COMPAT_MASK) #define ENABLE_BITOPS(e) (&(e)->values) @@ -174,6 +179,30 @@ struct user_event_validator { int flags; }; +static inline void align_addr_bit(unsigned long *addr, int *bit, + unsigned long *flags) +{ + if (IS_ALIGNED(*addr, sizeof(long))) { +#ifdef __BIG_ENDIAN + /* 32 bit on BE 64 bit requires a 32 bit offset when aligned. */ + if (test_bit(ENABLE_VAL_32_ON_64_BIT, flags)) + *bit += 32; +#endif + return; + } + + *addr = ALIGN_DOWN(*addr, sizeof(long)); + + /* + * We only support 32 and 64 bit values. The only time we need + * to align is a 32 bit value on a 64 bit kernel, which on LE + * is always 32 bits, and on BE requires no change when unaligned. + */ +#ifdef __LITTLE_ENDIAN + *bit += 32; +#endif +} + typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i, void *tpdata, bool *faulted); @@ -482,6 +511,7 @@ static int user_event_enabler_write(struct user_event_mm *mm, unsigned long *ptr; struct page *page; void *kaddr; + int bit = ENABLE_BIT(enabler); int ret; lockdep_assert_held(&event_mutex); @@ -497,6 +527,8 @@ static int user_event_enabler_write(struct user_event_mm *mm, test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)))) return -EBUSY; + align_addr_bit(&uaddr, &bit, ENABLE_BITOPS(enabler)); + ret = pin_user_pages_remote(mm->mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT, &page, NULL); @@ -515,9 +547,9 @@ static int user_event_enabler_write(struct user_event_mm *mm, /* Update bit atomically, user tracers must be atomic as well */ if (enabler->event && enabler->event->status) - set_bit(ENABLE_BIT(enabler), ptr); + set_bit(bit, ptr); else - clear_bit(ENABLE_BIT(enabler), ptr); + clear_bit(bit, ptr); kunmap_local(kaddr); unpin_user_pages_dirty_lock(&page, 1, true); @@ -849,6 +881,12 @@ static struct user_event_enabler enabler->event = user; enabler->addr = uaddr; enabler->values = reg->enable_bit; + +#if BITS_PER_LONG >= 64 + if (reg->enable_size == 4) + set_bit(ENABLE_VAL_32_ON_64_BIT, ENABLE_BITOPS(enabler)); +#endif + retry: /* Prevents state changes from racing with new enablers */ mutex_lock(&event_mutex); @@ -2377,7 +2415,8 @@ static long user_unreg_get(struct user_unreg __user *ureg, } static int user_event_mm_clear_bit(struct user_event_mm *user_mm, - unsigned long uaddr, unsigned char bit) + unsigned long uaddr, unsigned char bit, + unsigned long flags) { struct user_event_enabler enabler; int result; @@ -2385,7 +2424,7 @@ static int user_event_mm_clear_bit(struct user_event_mm *user_mm, memset(&enabler, 0, sizeof(enabler)); enabler.addr = uaddr; - enabler.values = bit; + enabler.values = bit | flags; retry: /* Prevents state changes from racing with new enablers */ mutex_lock(&event_mutex); @@ -2415,6 +2454,7 @@ static long user_events_ioctl_unreg(unsigned long uarg) struct user_event_mm *mm = current->user_event_mm; struct user_event_enabler *enabler, *next; struct user_unreg reg; + unsigned long flags; long ret; ret = user_unreg_get(ureg, ®); @@ -2425,6 +2465,7 @@ static long user_events_ioctl_unreg(unsigned long uarg) if (!mm) return -ENOENT; + flags = 0; ret = -ENOENT; /* @@ -2441,6 +2482,9 @@ static long user_events_ioctl_unreg(unsigned long uarg) ENABLE_BIT(enabler) == reg.disable_bit) { set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)); + /* We must keep compat flags for the clear */ + flags |= enabler->values & ENABLE_VAL_COMPAT_MASK; + if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler))) user_event_enabler_destroy(enabler, true); @@ -2454,7 +2498,7 @@ static long user_events_ioctl_unreg(unsigned long uarg) /* Ensure bit is now cleared for user, regardless of event status */ if (!ret) ret = user_event_mm_clear_bit(mm, reg.disable_addr, - reg.disable_bit); + reg.disable_bit, flags); return ret; } |