From f91840a32deef5cb1bf73338bc5010f843b01426 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 2 Jun 2017 21:03:52 -0700 Subject: perf, bpf: Add BPF support to all perf_event types Allow BPF_PROG_TYPE_PERF_EVENT program types to attach to all perf_event types, including HW_CACHE, RAW, and dynamic pmu events. Only tracepoint/kprobe events are treated differently which require BPF_PROG_TYPE_TRACEPOINT/BPF_PROG_TYPE_KPROBE program types accordingly. Also add support for reading all event counters using bpf_perf_event_read() helper. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 460a031c77e5..08eb072430b9 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -234,7 +234,8 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; struct bpf_event_entry *ee; - struct perf_event *event; + u64 value = 0; + int err; if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; @@ -247,21 +248,14 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) if (!ee) return -ENOENT; - event = ee->event; - if (unlikely(event->attr.type != PERF_TYPE_HARDWARE && - event->attr.type != PERF_TYPE_RAW)) - return -EINVAL; - - /* make sure event is local and doesn't have pmu::count */ - if (unlikely(event->oncpu != cpu || event->pmu->count)) - return -EINVAL; - + err = perf_event_read_local(ee->event, &value); /* - * we don't know if the function is run successfully by the - * return value. It can be judged in other places, such as - * eBPF programs. + * this api is ugly since we miss [-22..-2] range of valid + * counter values, but that's uapi */ - return perf_event_read_local(event); + if (err) + return err; + return value; } static const struct bpf_func_proto bpf_perf_event_read_proto = { -- cgit From c3ca46ef719580eb01994fc6032db470fde92d85 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 23 May 2017 15:05:50 +0900 Subject: ftrace/kprobes: selftests: Check kretprobe maxactive is supported Check the kretprobe maxactive is supported by kprobe_events interface. To ensure the kernel feature, this changes ftrace README to describe it. Signed-off-by: Masami Hiramatsu Signed-off-by: Shuah Khan --- kernel/trace/trace.c | 3 ++- tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1122f151466f..dc3f91e70345 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4473,7 +4473,8 @@ static const char readme_msg[] = #endif #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) "\t accepts: event-definitions (one definition per line)\n" - "\t Format: p|r[:[/]] []\n" + "\t Format: p[:[/]] []\n" + "\t r[maxactive][:[/]] []\n" "\t -:[/]\n" #ifdef CONFIG_KPROBE_EVENTS "\t place: [:][+]|\n" diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc index 57abdf1caabf..7ec6f2639ad6 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc @@ -2,6 +2,7 @@ # description: Kretprobe dynamic event with maxactive [ -f kprobe_events ] || exit_unsupported # this is configurable +grep -q 'r\[maxactive\]' README || exit_unsupported # this is older version echo > kprobe_events -- cgit From 20b9d7ac48526ce9a14106241e76e8382d126a60 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Jun 2017 00:50:40 +0200 Subject: bpf: avoid excessive stack usage for perf_sample_data perf_sample_data consumes 386 bytes on stack, reduce excessive stack usage and move it to per cpu buffer. It's allowed due to preemption being disabled for tracing, xdp and tc programs, thus at all times only one program can run on a specific CPU and programs cannot run from interrupt. We similarly also handle bpf_pt_regs. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 08eb072430b9..051d7fca0c09 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -266,14 +266,16 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = { .arg2_type = ARG_ANYTHING, }; +static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); + static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, u64 flags, struct perf_raw_record *raw) { struct bpf_array *array = container_of(map, struct bpf_array, map); + struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; - struct perf_sample_data sample_data; struct bpf_event_entry *ee; struct perf_event *event; @@ -294,9 +296,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; - perf_sample_data_init(&sample_data, 0, 0); - sample_data.raw = raw; - perf_event_output(event, &sample_data, regs); + perf_sample_data_init(sd, 0, 0); + sd->raw = raw; + perf_event_output(event, sd, regs); return 0; } -- cgit From 6a5ae63a0cc5d3ac73a96cb412fde66f3a71f98e Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 8 Jun 2017 19:53:24 -0700 Subject: tracing: Remove unused declaration of trace_stop_cmdline_recording trace_stop_cmdline_recording declaration isn't in use, remove it. Link: http://lkml.kernel.org/r/20170609025327.9508-2-joelaf@google.com Cc: kernel-team@android.com Cc: Ingo Molnar Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1122f151466f..63deff9cdf2c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1910,8 +1910,6 @@ static void tracing_stop_tr(struct trace_array *tr) raw_spin_unlock_irqrestore(&tr->start_lock, flags); } -void trace_stop_cmdline_recording(void); - static int trace_save_cmdline(struct task_struct *tsk) { unsigned pid, idx; -- cgit From 02fd7f68f5342bc7e8054cb05ea4a07f26d41d12 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 31 May 2017 16:56:42 -0500 Subject: trace: rename kernel enum section to eval The kernel and its modules have sections containing the enum string to value conversions. Rename this section because we intend to store more than enums in it. Link: http://lkml.kernel.org/r/20170531215653.3240-2-jeremy.linton@arm.com Signed-off-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- include/asm-generic/vmlinux.lds.h | 6 +++--- include/trace/trace_events.h | 2 +- kernel/module.c | 2 +- kernel/trace/trace.c | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 314a0b9219c6..800f9f9677a6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -125,9 +125,9 @@ VMLINUX_SYMBOL(__start_ftrace_events) = .; \ KEEP(*(_ftrace_events)) \ VMLINUX_SYMBOL(__stop_ftrace_events) = .; \ - VMLINUX_SYMBOL(__start_ftrace_enum_maps) = .; \ - KEEP(*(_ftrace_enum_map)) \ - VMLINUX_SYMBOL(__stop_ftrace_enum_maps) = .; + VMLINUX_SYMBOL(__start_ftrace_eval_maps) = .; \ + KEEP(*(_ftrace_eval_map)) \ + VMLINUX_SYMBOL(__stop_ftrace_eval_maps) = .; #else #define FTRACE_EVENTS() #endif diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 00f643164ca2..4bdd84023f5b 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -43,7 +43,7 @@ TRACE_MAKE_SYSTEM_STR(); .enum_value = a \ }; \ static struct trace_enum_map __used \ - __attribute__((section("_ftrace_enum_map"))) \ + __attribute__((section("_ftrace_eval_map"))) \ *TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a /* diff --git a/kernel/module.c b/kernel/module.c index 4a3665f8f837..9ec4713c5eee 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3077,7 +3077,7 @@ static int find_module_sections(struct module *mod, struct load_info *info) mod->trace_events = section_objs(info, "_ftrace_events", sizeof(*mod->trace_events), &mod->num_trace_events); - mod->trace_enums = section_objs(info, "_ftrace_enum_map", + mod->trace_enums = section_objs(info, "_ftrace_eval_map", sizeof(*mod->trace_enums), &mod->num_trace_enums); #endif diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 63deff9cdf2c..acd3eb4d56a0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7732,15 +7732,15 @@ struct dentry *tracing_init_dentry(void) return NULL; } -extern struct trace_enum_map *__start_ftrace_enum_maps[]; -extern struct trace_enum_map *__stop_ftrace_enum_maps[]; +extern struct trace_enum_map *__start_ftrace_eval_maps[]; +extern struct trace_enum_map *__stop_ftrace_eval_maps[]; static void __init trace_enum_init(void) { int len; - len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps; - trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len); + len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; + trace_insert_enum_map(NULL, __start_ftrace_eval_maps, len); } #ifdef CONFIG_MODULES -- cgit From 00f4b652b6f1dbfd4e1d5419d7f1cc23b1374da8 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 31 May 2017 16:56:43 -0500 Subject: trace: rename trace_enum_map to trace_eval_map Each enum is loaded into the trace_enum_map, as we are now using this for more than enums rename it. Link: http://lkml.kernel.org/r/20170531215653.3240-3-jeremy.linton@arm.com Signed-off-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- include/linux/module.h | 2 +- include/linux/tracepoint.h | 6 +++--- include/trace/trace_events.h | 8 ++++---- kernel/trace/trace.c | 24 ++++++++++++------------ kernel/trace/trace.h | 4 ++-- kernel/trace/trace_events.c | 14 +++++++------- 6 files changed, 29 insertions(+), 29 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/module.h b/include/linux/module.h index 21f56393602f..46b48043d741 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -442,7 +442,7 @@ struct module { #ifdef CONFIG_EVENT_TRACING struct trace_event_call **trace_events; unsigned int num_trace_events; - struct trace_enum_map **trace_enums; + struct trace_eval_map **trace_enums; unsigned int num_trace_enums; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index cc48cb2ce209..f7b0f5525e46 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -25,10 +25,10 @@ struct module; struct tracepoint; struct notifier_block; -struct trace_enum_map { +struct trace_eval_map { const char *system; - const char *enum_string; - unsigned long enum_value; + const char *eval_string; + unsigned long eval_value; }; #define TRACEPOINT_DEFAULT_PRIO 10 diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 4bdd84023f5b..49cce5fb54ee 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -35,14 +35,14 @@ TRACE_MAKE_SYSTEM_STR(); #undef TRACE_DEFINE_ENUM #define TRACE_DEFINE_ENUM(a) \ - static struct trace_enum_map __used __initdata \ + static struct trace_eval_map __used __initdata \ __##TRACE_SYSTEM##_##a = \ { \ .system = TRACE_SYSTEM_STRING, \ - .enum_string = #a, \ - .enum_value = a \ + .eval_string = #a, \ + .eval_value = a \ }; \ - static struct trace_enum_map __used \ + static struct trace_eval_map __used \ __attribute__((section("_ftrace_eval_map"))) \ *TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index acd3eb4d56a0..46fac3f63af1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -132,7 +132,7 @@ union trace_enum_map_item; struct trace_enum_map_tail { /* * "end" is first and points to NULL as it must be different - * than "mod" or "enum_string" + * than "mod" or "eval_string" */ union trace_enum_map_item *next; const char *end; /* points to NULL */ @@ -148,7 +148,7 @@ static DEFINE_MUTEX(trace_enum_mutex); * pointer to the next array of saved enum_map items. */ union trace_enum_map_item { - struct trace_enum_map map; + struct trace_eval_map map; struct trace_enum_map_head head; struct trace_enum_map_tail tail; }; @@ -4748,7 +4748,7 @@ static const struct file_operations tracing_saved_cmdlines_size_fops = { static union trace_enum_map_item * update_enum_map(union trace_enum_map_item *ptr) { - if (!ptr->map.enum_string) { + if (!ptr->map.eval_string) { if (ptr->tail.next) { ptr = ptr->tail.next; /* Set ptr to the next real item (skip head) */ @@ -4808,7 +4808,7 @@ static int enum_map_show(struct seq_file *m, void *v) union trace_enum_map_item *ptr = v; seq_printf(m, "%s %ld (%s)\n", - ptr->map.enum_string, ptr->map.enum_value, + ptr->map.eval_string, ptr->map.eval_value, ptr->map.system); return 0; @@ -4844,11 +4844,11 @@ trace_enum_jmp_to_tail(union trace_enum_map_item *ptr) } static void -trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start, +trace_insert_enum_map_file(struct module *mod, struct trace_eval_map **start, int len) { - struct trace_enum_map **stop; - struct trace_enum_map **map; + struct trace_eval_map **stop; + struct trace_eval_map **map; union trace_enum_map_item *map_array; union trace_enum_map_item *ptr; @@ -4902,13 +4902,13 @@ static void trace_create_enum_file(struct dentry *d_tracer) #else /* CONFIG_TRACE_ENUM_MAP_FILE */ static inline void trace_create_enum_file(struct dentry *d_tracer) { } static inline void trace_insert_enum_map_file(struct module *mod, - struct trace_enum_map **start, int len) { } + struct trace_eval_map **start, int len) { } #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */ static void trace_insert_enum_map(struct module *mod, - struct trace_enum_map **start, int len) + struct trace_eval_map **start, int len) { - struct trace_enum_map **map; + struct trace_eval_map **map; if (len <= 0) return; @@ -7732,8 +7732,8 @@ struct dentry *tracing_init_dentry(void) return NULL; } -extern struct trace_enum_map *__start_ftrace_eval_maps[]; -extern struct trace_enum_map *__stop_ftrace_eval_maps[]; +extern struct trace_eval_map *__start_ftrace_eval_maps[]; +extern struct trace_eval_map *__stop_ftrace_eval_maps[]; static void __init trace_enum_init(void) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 39fd77330aab..a9667297ae49 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1773,10 +1773,10 @@ static inline const char *get_syscall_name(int syscall) #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); -void trace_event_enum_update(struct trace_enum_map **map, int len); +void trace_event_enum_update(struct trace_eval_map **map, int len); #else static inline void __init trace_event_init(void) { } -static inline void trace_event_enum_update(struct trace_enum_map **map, int len) { } +static inline void trace_event_enum_update(struct trace_eval_map **map, int len) { } #endif extern struct trace_iterator *tracepoint_print_iter; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e7973e10398c..cf5b9aa4d732 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2067,18 +2067,18 @@ __register_event(struct trace_event_call *call, struct module *mod) return 0; } -static char *enum_replace(char *ptr, struct trace_enum_map *map, int len) +static char *enum_replace(char *ptr, struct trace_eval_map *map, int len) { int rlen; int elen; /* Find the length of the enum value as a string */ - elen = snprintf(ptr, 0, "%ld", map->enum_value); + elen = snprintf(ptr, 0, "%ld", map->eval_value); /* Make sure there's enough room to replace the string with the value */ if (len < elen) return NULL; - snprintf(ptr, elen + 1, "%ld", map->enum_value); + snprintf(ptr, elen + 1, "%ld", map->eval_value); /* Get the rest of the string of ptr */ rlen = strlen(ptr + len); @@ -2090,11 +2090,11 @@ static char *enum_replace(char *ptr, struct trace_enum_map *map, int len) } static void update_event_printk(struct trace_event_call *call, - struct trace_enum_map *map) + struct trace_eval_map *map) { char *ptr; int quote = 0; - int len = strlen(map->enum_string); + int len = strlen(map->eval_string); for (ptr = call->print_fmt; *ptr; ptr++) { if (*ptr == '\\') { @@ -2125,7 +2125,7 @@ static void update_event_printk(struct trace_event_call *call, continue; } if (isalpha(*ptr) || *ptr == '_') { - if (strncmp(map->enum_string, ptr, len) == 0 && + if (strncmp(map->eval_string, ptr, len) == 0 && !isalnum(ptr[len]) && ptr[len] != '_') { ptr = enum_replace(ptr, map, len); /* Hmm, enum string smaller than value */ @@ -2165,7 +2165,7 @@ static void update_event_printk(struct trace_event_call *call, } } -void trace_event_enum_update(struct trace_enum_map **map, int len) +void trace_event_enum_update(struct trace_eval_map **map, int len) { struct trace_event_call *call, *p; const char *last_system = NULL; -- cgit From 99be647c5841d570a23b5dfa65bfecada8b6e6b5 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 31 May 2017 16:56:44 -0500 Subject: trace: rename struct module entry for trace enums Each module has a list of enum's its contributing to the enum map, rename that entry to reflect its use by more than enums. Link: http://lkml.kernel.org/r/20170531215653.3240-4-jeremy.linton@arm.com Signed-off-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- include/linux/module.h | 4 ++-- kernel/module.c | 6 +++--- kernel/trace/trace.c | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/module.h b/include/linux/module.h index 46b48043d741..8eb9a1e693e5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -442,8 +442,8 @@ struct module { #ifdef CONFIG_EVENT_TRACING struct trace_event_call **trace_events; unsigned int num_trace_events; - struct trace_eval_map **trace_enums; - unsigned int num_trace_enums; + struct trace_eval_map **trace_evals; + unsigned int num_trace_evals; #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD unsigned int num_ftrace_callsites; diff --git a/kernel/module.c b/kernel/module.c index 9ec4713c5eee..df1c4a9e7abb 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3077,9 +3077,9 @@ static int find_module_sections(struct module *mod, struct load_info *info) mod->trace_events = section_objs(info, "_ftrace_events", sizeof(*mod->trace_events), &mod->num_trace_events); - mod->trace_enums = section_objs(info, "_ftrace_eval_map", - sizeof(*mod->trace_enums), - &mod->num_trace_enums); + mod->trace_evals = section_objs(info, "_ftrace_eval_map", + sizeof(*mod->trace_evals), + &mod->num_trace_evals); #endif #ifdef CONFIG_TRACING mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 46fac3f63af1..061abd8ba101 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7746,7 +7746,7 @@ static void __init trace_enum_init(void) #ifdef CONFIG_MODULES static void trace_module_add_enums(struct module *mod) { - if (!mod->num_trace_enums) + if (!mod->num_trace_evals) return; /* @@ -7756,7 +7756,7 @@ static void trace_module_add_enums(struct module *mod) if (trace_module_has_bad_taint(mod)) return; - trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums); + trace_insert_enum_map(mod, mod->trace_evals, mod->num_trace_evals); } #ifdef CONFIG_TRACE_ENUM_MAP_FILE @@ -7765,7 +7765,7 @@ static void trace_module_remove_enums(struct module *mod) union trace_enum_map_item *map; union trace_enum_map_item **last = &trace_enum_maps; - if (!mod->num_trace_enums) + if (!mod->num_trace_evals) return; mutex_lock(&trace_enum_mutex); -- cgit From 23bf8cb8dc86c0368d2471ebb4622e7edd38190b Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 31 May 2017 16:56:45 -0500 Subject: trace: rename trace enum data structures in trace.c The enum map entries can be exported to userspace via a sys enum_map file. Rename those functions and structures to reflect the fact that we are using them for more than enums. Link: http://lkml.kernel.org/r/20170531215653.3240-5-jeremy.linton@arm.com Signed-off-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 061abd8ba101..13c81f4f2bd7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -122,38 +122,38 @@ int __disable_trace_on_warning; #ifdef CONFIG_TRACE_ENUM_MAP_FILE /* Map of enums to their values, for "enum_map" file */ -struct trace_enum_map_head { +struct trace_eval_map_head { struct module *mod; unsigned long length; }; -union trace_enum_map_item; +union trace_eval_map_item; -struct trace_enum_map_tail { +struct trace_eval_map_tail { /* * "end" is first and points to NULL as it must be different * than "mod" or "eval_string" */ - union trace_enum_map_item *next; + union trace_eval_map_item *next; const char *end; /* points to NULL */ }; static DEFINE_MUTEX(trace_enum_mutex); /* - * The trace_enum_maps are saved in an array with two extra elements, + * The trace_eval_maps are saved in an array with two extra elements, * one at the beginning, and one at the end. The beginning item contains * the count of the saved maps (head.length), and the module they * belong to if not built in (head.mod). The ending item contains a * pointer to the next array of saved enum_map items. */ -union trace_enum_map_item { +union trace_eval_map_item { struct trace_eval_map map; - struct trace_enum_map_head head; - struct trace_enum_map_tail tail; + struct trace_eval_map_head head; + struct trace_eval_map_tail tail; }; -static union trace_enum_map_item *trace_enum_maps; +static union trace_eval_map_item *trace_eval_maps; #endif /* CONFIG_TRACE_ENUM_MAP_FILE */ static int tracing_set_tracer(struct trace_array *tr, const char *buf); @@ -4745,8 +4745,8 @@ static const struct file_operations tracing_saved_cmdlines_size_fops = { }; #ifdef CONFIG_TRACE_ENUM_MAP_FILE -static union trace_enum_map_item * -update_enum_map(union trace_enum_map_item *ptr) +static union trace_eval_map_item * +update_enum_map(union trace_eval_map_item *ptr) { if (!ptr->map.eval_string) { if (ptr->tail.next) { @@ -4761,7 +4761,7 @@ update_enum_map(union trace_enum_map_item *ptr) static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos) { - union trace_enum_map_item *ptr = v; + union trace_eval_map_item *ptr = v; /* * Paranoid! If ptr points to end, we don't want to increment past it. @@ -4782,12 +4782,12 @@ static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos) static void *enum_map_start(struct seq_file *m, loff_t *pos) { - union trace_enum_map_item *v; + union trace_eval_map_item *v; loff_t l = 0; mutex_lock(&trace_enum_mutex); - v = trace_enum_maps; + v = trace_eval_maps; if (v) v++; @@ -4805,7 +4805,7 @@ static void enum_map_stop(struct seq_file *m, void *v) static int enum_map_show(struct seq_file *m, void *v) { - union trace_enum_map_item *ptr = v; + union trace_eval_map_item *ptr = v; seq_printf(m, "%s %ld (%s)\n", ptr->map.eval_string, ptr->map.eval_value, @@ -4836,8 +4836,8 @@ static const struct file_operations tracing_enum_map_fops = { .release = seq_release, }; -static inline union trace_enum_map_item * -trace_enum_jmp_to_tail(union trace_enum_map_item *ptr) +static inline union trace_eval_map_item * +trace_enum_jmp_to_tail(union trace_eval_map_item *ptr) { /* Return tail of array given the head */ return ptr + ptr->head.length + 1; @@ -4849,13 +4849,13 @@ trace_insert_enum_map_file(struct module *mod, struct trace_eval_map **start, { struct trace_eval_map **stop; struct trace_eval_map **map; - union trace_enum_map_item *map_array; - union trace_enum_map_item *ptr; + union trace_eval_map_item *map_array; + union trace_eval_map_item *ptr; stop = start + len; /* - * The trace_enum_maps contains the map plus a head and tail item, + * The trace_eval_maps contains the map plus a head and tail item, * where the head holds the module and length of array, and the * tail holds a pointer to the next list. */ @@ -4867,10 +4867,10 @@ trace_insert_enum_map_file(struct module *mod, struct trace_eval_map **start, mutex_lock(&trace_enum_mutex); - if (!trace_enum_maps) - trace_enum_maps = map_array; + if (!trace_eval_maps) + trace_eval_maps = map_array; else { - ptr = trace_enum_maps; + ptr = trace_eval_maps; for (;;) { ptr = trace_enum_jmp_to_tail(ptr); if (!ptr->tail.next) @@ -7762,15 +7762,15 @@ static void trace_module_add_enums(struct module *mod) #ifdef CONFIG_TRACE_ENUM_MAP_FILE static void trace_module_remove_enums(struct module *mod) { - union trace_enum_map_item *map; - union trace_enum_map_item **last = &trace_enum_maps; + union trace_eval_map_item *map; + union trace_eval_map_item **last = &trace_eval_maps; if (!mod->num_trace_evals) return; mutex_lock(&trace_enum_mutex); - map = trace_enum_maps; + map = trace_eval_maps; while (map) { if (map->head.mod == mod) -- cgit From 1793ed939b2a0e18b06467e10d15b66925d75d5f Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 31 May 2017 16:56:46 -0500 Subject: trace: rename trace_enum_mutex to trace_eval_mutex There is a lock protecting the trace_enum_map, rename it to reflect the use by more than enums. Link: http://lkml.kernel.org/r/20170531215653.3240-6-jeremy.linton@arm.com Signed-off-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 13c81f4f2bd7..e0279f5dc83f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -138,7 +138,7 @@ struct trace_eval_map_tail { const char *end; /* points to NULL */ }; -static DEFINE_MUTEX(trace_enum_mutex); +static DEFINE_MUTEX(trace_eval_mutex); /* * The trace_eval_maps are saved in an array with two extra elements, @@ -4785,7 +4785,7 @@ static void *enum_map_start(struct seq_file *m, loff_t *pos) union trace_eval_map_item *v; loff_t l = 0; - mutex_lock(&trace_enum_mutex); + mutex_lock(&trace_eval_mutex); v = trace_eval_maps; if (v) @@ -4800,7 +4800,7 @@ static void *enum_map_start(struct seq_file *m, loff_t *pos) static void enum_map_stop(struct seq_file *m, void *v) { - mutex_unlock(&trace_enum_mutex); + mutex_unlock(&trace_eval_mutex); } static int enum_map_show(struct seq_file *m, void *v) @@ -4865,7 +4865,7 @@ trace_insert_enum_map_file(struct module *mod, struct trace_eval_map **start, return; } - mutex_lock(&trace_enum_mutex); + mutex_lock(&trace_eval_mutex); if (!trace_eval_maps) trace_eval_maps = map_array; @@ -4890,7 +4890,7 @@ trace_insert_enum_map_file(struct module *mod, struct trace_eval_map **start, } memset(map_array, 0, sizeof(*map_array)); - mutex_unlock(&trace_enum_mutex); + mutex_unlock(&trace_eval_mutex); } static void trace_create_enum_file(struct dentry *d_tracer) @@ -7768,7 +7768,7 @@ static void trace_module_remove_enums(struct module *mod) if (!mod->num_trace_evals) return; - mutex_lock(&trace_enum_mutex); + mutex_lock(&trace_eval_mutex); map = trace_eval_maps; @@ -7785,7 +7785,7 @@ static void trace_module_remove_enums(struct module *mod) *last = trace_enum_jmp_to_tail(map)->tail.next; kfree(map); out: - mutex_unlock(&trace_enum_mutex); + mutex_unlock(&trace_eval_mutex); } #else static inline void trace_module_remove_enums(struct module *mod) { } -- cgit From 5f60b351a7e37bf243725bcc131708be2c8ea497 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 31 May 2017 16:56:47 -0500 Subject: trace: rename trace.c enum functions Rename the init and trace_enum_jmp_to_tail() routines to reflect their use by more than enumerated types. Link: http://lkml.kernel.org/r/20170531215653.3240-7-jeremy.linton@arm.com Signed-off-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e0279f5dc83f..d703f429bbd9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4837,7 +4837,7 @@ static const struct file_operations tracing_enum_map_fops = { }; static inline union trace_eval_map_item * -trace_enum_jmp_to_tail(union trace_eval_map_item *ptr) +trace_eval_jmp_to_tail(union trace_eval_map_item *ptr) { /* Return tail of array given the head */ return ptr + ptr->head.length + 1; @@ -4872,7 +4872,7 @@ trace_insert_enum_map_file(struct module *mod, struct trace_eval_map **start, else { ptr = trace_eval_maps; for (;;) { - ptr = trace_enum_jmp_to_tail(ptr); + ptr = trace_eval_jmp_to_tail(ptr); if (!ptr->tail.next) break; ptr = ptr->tail.next; @@ -7735,7 +7735,7 @@ struct dentry *tracing_init_dentry(void) extern struct trace_eval_map *__start_ftrace_eval_maps[]; extern struct trace_eval_map *__stop_ftrace_eval_maps[]; -static void __init trace_enum_init(void) +static void __init trace_eval_init(void) { int len; @@ -7775,14 +7775,14 @@ static void trace_module_remove_enums(struct module *mod) while (map) { if (map->head.mod == mod) break; - map = trace_enum_jmp_to_tail(map); + map = trace_eval_jmp_to_tail(map); last = &map->tail.next; map = map->tail.next; } if (!map) goto out; - *last = trace_enum_jmp_to_tail(map)->tail.next; + *last = trace_eval_jmp_to_tail(map)->tail.next; kfree(map); out: mutex_unlock(&trace_eval_mutex); @@ -7839,7 +7839,7 @@ static __init int tracer_init_tracefs(void) trace_create_file("saved_cmdlines_size", 0644, d_tracer, NULL, &tracing_saved_cmdlines_size_fops); - trace_enum_init(); + trace_eval_init(); trace_create_enum_file(d_tracer); -- cgit From f57a41434fc51732dd5e35e0e1aa9e607f1a05d6 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 31 May 2017 16:56:48 -0500 Subject: trace: rename enum_map functions Rename the core trace enum routines to use eval, to reflect their use by more than just enum to value mapping. Link: http://lkml.kernel.org/r/20170531215653.3240-8-jeremy.linton@arm.com Signed-off-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 74 ++++++++++++++++++++++----------------------- kernel/trace/trace.h | 4 +-- kernel/trace/trace_events.c | 2 +- 3 files changed, 40 insertions(+), 40 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d703f429bbd9..d830be7f0ba6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -145,7 +145,7 @@ static DEFINE_MUTEX(trace_eval_mutex); * one at the beginning, and one at the end. The beginning item contains * the count of the saved maps (head.length), and the module they * belong to if not built in (head.mod). The ending item contains a - * pointer to the next array of saved enum_map items. + * pointer to the next array of saved enum_eval/enum_map items. */ union trace_eval_map_item { struct trace_eval_map map; @@ -1141,9 +1141,9 @@ unsigned long nsecs_to_usecs(unsigned long nsecs) /* * TRACE_FLAGS is defined as a tuple matching bit masks with strings. - * It uses C(a, b) where 'a' is the enum name and 'b' is the string that + * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list - * of strings in the order that the enums were defined. + * of strings in the order that the evals (enum) were defined. */ #undef C #define C(a, b) b @@ -4746,7 +4746,7 @@ static const struct file_operations tracing_saved_cmdlines_size_fops = { #ifdef CONFIG_TRACE_ENUM_MAP_FILE static union trace_eval_map_item * -update_enum_map(union trace_eval_map_item *ptr) +update_eval_map(union trace_eval_map_item *ptr) { if (!ptr->map.eval_string) { if (ptr->tail.next) { @@ -4759,7 +4759,7 @@ update_enum_map(union trace_eval_map_item *ptr) return ptr; } -static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos) +static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) { union trace_eval_map_item *ptr = v; @@ -4767,7 +4767,7 @@ static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos) * Paranoid! If ptr points to end, we don't want to increment past it. * This really should never happen. */ - ptr = update_enum_map(ptr); + ptr = update_eval_map(ptr); if (WARN_ON_ONCE(!ptr)) return NULL; @@ -4775,12 +4775,12 @@ static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos) (*pos)++; - ptr = update_enum_map(ptr); + ptr = update_eval_map(ptr); return ptr; } -static void *enum_map_start(struct seq_file *m, loff_t *pos) +static void *eval_map_start(struct seq_file *m, loff_t *pos) { union trace_eval_map_item *v; loff_t l = 0; @@ -4792,18 +4792,18 @@ static void *enum_map_start(struct seq_file *m, loff_t *pos) v++; while (v && l < *pos) { - v = enum_map_next(m, v, &l); + v = eval_map_next(m, v, &l); } return v; } -static void enum_map_stop(struct seq_file *m, void *v) +static void eval_map_stop(struct seq_file *m, void *v) { mutex_unlock(&trace_eval_mutex); } -static int enum_map_show(struct seq_file *m, void *v) +static int eval_map_show(struct seq_file *m, void *v) { union trace_eval_map_item *ptr = v; @@ -4814,23 +4814,23 @@ static int enum_map_show(struct seq_file *m, void *v) return 0; } -static const struct seq_operations tracing_enum_map_seq_ops = { - .start = enum_map_start, - .next = enum_map_next, - .stop = enum_map_stop, - .show = enum_map_show, +static const struct seq_operations tracing_eval_map_seq_ops = { + .start = eval_map_start, + .next = eval_map_next, + .stop = eval_map_stop, + .show = eval_map_show, }; -static int tracing_enum_map_open(struct inode *inode, struct file *filp) +static int tracing_eval_map_open(struct inode *inode, struct file *filp) { if (tracing_disabled) return -ENODEV; - return seq_open(filp, &tracing_enum_map_seq_ops); + return seq_open(filp, &tracing_eval_map_seq_ops); } -static const struct file_operations tracing_enum_map_fops = { - .open = tracing_enum_map_open, +static const struct file_operations tracing_eval_map_fops = { + .open = tracing_eval_map_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, @@ -4844,7 +4844,7 @@ trace_eval_jmp_to_tail(union trace_eval_map_item *ptr) } static void -trace_insert_enum_map_file(struct module *mod, struct trace_eval_map **start, +trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, int len) { struct trace_eval_map **stop; @@ -4861,7 +4861,7 @@ trace_insert_enum_map_file(struct module *mod, struct trace_eval_map **start, */ map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL); if (!map_array) { - pr_warn("Unable to allocate trace enum mapping\n"); + pr_warn("Unable to allocate trace eval mapping\n"); return; } @@ -4893,19 +4893,19 @@ trace_insert_enum_map_file(struct module *mod, struct trace_eval_map **start, mutex_unlock(&trace_eval_mutex); } -static void trace_create_enum_file(struct dentry *d_tracer) +static void trace_create_eval_file(struct dentry *d_tracer) { trace_create_file("enum_map", 0444, d_tracer, - NULL, &tracing_enum_map_fops); + NULL, &tracing_eval_map_fops); } #else /* CONFIG_TRACE_ENUM_MAP_FILE */ -static inline void trace_create_enum_file(struct dentry *d_tracer) { } -static inline void trace_insert_enum_map_file(struct module *mod, +static inline void trace_create_eval_file(struct dentry *d_tracer) { } +static inline void trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, int len) { } #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */ -static void trace_insert_enum_map(struct module *mod, +static void trace_insert_eval_map(struct module *mod, struct trace_eval_map **start, int len) { struct trace_eval_map **map; @@ -4915,9 +4915,9 @@ static void trace_insert_enum_map(struct module *mod, map = start; - trace_event_enum_update(map, len); + trace_event_eval_update(map, len); - trace_insert_enum_map_file(mod, start, len); + trace_insert_eval_map_file(mod, start, len); } static ssize_t @@ -7740,11 +7740,11 @@ static void __init trace_eval_init(void) int len; len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; - trace_insert_enum_map(NULL, __start_ftrace_eval_maps, len); + trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len); } #ifdef CONFIG_MODULES -static void trace_module_add_enums(struct module *mod) +static void trace_module_add_evals(struct module *mod) { if (!mod->num_trace_evals) return; @@ -7756,11 +7756,11 @@ static void trace_module_add_enums(struct module *mod) if (trace_module_has_bad_taint(mod)) return; - trace_insert_enum_map(mod, mod->trace_evals, mod->num_trace_evals); + trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals); } #ifdef CONFIG_TRACE_ENUM_MAP_FILE -static void trace_module_remove_enums(struct module *mod) +static void trace_module_remove_evals(struct module *mod) { union trace_eval_map_item *map; union trace_eval_map_item **last = &trace_eval_maps; @@ -7788,7 +7788,7 @@ static void trace_module_remove_enums(struct module *mod) mutex_unlock(&trace_eval_mutex); } #else -static inline void trace_module_remove_enums(struct module *mod) { } +static inline void trace_module_remove_evals(struct module *mod) { } #endif /* CONFIG_TRACE_ENUM_MAP_FILE */ static int trace_module_notify(struct notifier_block *self, @@ -7798,10 +7798,10 @@ static int trace_module_notify(struct notifier_block *self, switch (val) { case MODULE_STATE_COMING: - trace_module_add_enums(mod); + trace_module_add_evals(mod); break; case MODULE_STATE_GOING: - trace_module_remove_enums(mod); + trace_module_remove_evals(mod); break; } @@ -7841,7 +7841,7 @@ static __init int tracer_init_tracefs(void) trace_eval_init(); - trace_create_enum_file(d_tracer); + trace_create_eval_file(d_tracer); #ifdef CONFIG_MODULES register_module_notifier(&trace_module_nb); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a9667297ae49..69a3ab3ee4f5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1773,10 +1773,10 @@ static inline const char *get_syscall_name(int syscall) #ifdef CONFIG_EVENT_TRACING void trace_event_init(void); -void trace_event_enum_update(struct trace_eval_map **map, int len); +void trace_event_eval_update(struct trace_eval_map **map, int len); #else static inline void __init trace_event_init(void) { } -static inline void trace_event_enum_update(struct trace_eval_map **map, int len) { } +static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } #endif extern struct trace_iterator *tracepoint_print_iter; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index cf5b9aa4d732..e6897b005947 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2165,7 +2165,7 @@ static void update_event_printk(struct trace_event_call *call, } } -void trace_event_enum_update(struct trace_eval_map **map, int len) +void trace_event_eval_update(struct trace_eval_map **map, int len) { struct trace_event_call *call, *p; const char *last_system = NULL; -- cgit From 67ec0d85955630924b971e04c0954370a74b8706 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 31 May 2017 16:56:49 -0500 Subject: tracing: Rename enum_replace to eval_replace The enum_replace stanza works as is for sizeof() calls as well as enums. Rename it as well. Link: http://lkml.kernel.org/r/20170531215653.3240-9-jeremy.linton@arm.com Signed-off-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e6897b005947..83dfd0dbbbfe 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2067,12 +2067,12 @@ __register_event(struct trace_event_call *call, struct module *mod) return 0; } -static char *enum_replace(char *ptr, struct trace_eval_map *map, int len) +static char *eval_replace(char *ptr, struct trace_eval_map *map, int len) { int rlen; int elen; - /* Find the length of the enum value as a string */ + /* Find the length of the eval value as a string */ elen = snprintf(ptr, 0, "%ld", map->eval_value); /* Make sure there's enough room to replace the string with the value */ if (len < elen) @@ -2127,14 +2127,14 @@ static void update_event_printk(struct trace_event_call *call, if (isalpha(*ptr) || *ptr == '_') { if (strncmp(map->eval_string, ptr, len) == 0 && !isalnum(ptr[len]) && ptr[len] != '_') { - ptr = enum_replace(ptr, map, len); - /* Hmm, enum string smaller than value */ + ptr = eval_replace(ptr, map, len); + /* enum/sizeof string smaller than value */ if (WARN_ON_ONCE(!ptr)) return; /* - * No need to decrement here, as enum_replace() + * No need to decrement here, as eval_replace() * returns the pointer to the character passed - * the enum, and two enums can not be placed + * the eval, and two evals can not be placed * back to back without something in between. * We can skip that something in between. */ -- cgit From 681bec0367c2606b6154060310a2ffa543175980 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 31 May 2017 16:56:53 -0500 Subject: tracing: Rename update the enum_map file The enum_map file is used to display a list of symbol to name conversions. As its now used to resolve sizeof lets update the name and description. Link: http://lkml.kernel.org/r/20170531215653.3240-13-jeremy.linton@arm.com Signed-off-by: Jeremy Linton Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 22 +++++++++++----------- kernel/trace/trace.c | 20 ++++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 7e06f04e98fe..434c840e2d82 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -667,30 +667,30 @@ config RING_BUFFER_STARTUP_TEST If unsure, say N -config TRACE_ENUM_MAP_FILE - bool "Show enum mappings for trace events" +config TRACE_EVAL_MAP_FILE + bool "Show eval mappings for trace events" depends on TRACING help - The "print fmt" of the trace events will show the enum names instead - of their values. This can cause problems for user space tools that - use this string to parse the raw data as user space does not know + The "print fmt" of the trace events will show the enum/sizeof names + instead of their values. This can cause problems for user space tools + that use this string to parse the raw data as user space does not know how to convert the string to its value. To fix this, there's a special macro in the kernel that can be used - to convert the enum into its value. If this macro is used, then the - print fmt strings will have the enums converted to their values. + to convert an enum/sizeof into its value. If this macro is used, then + the print fmt strings will be converted to their values. If something does not get converted properly, this option can be - used to show what enums the kernel tried to convert. + used to show what enums/sizeof the kernel tried to convert. - This option is for debugging the enum conversions. A file is created - in the tracing directory called "enum_map" that will show the enum + This option is for debugging the conversions. A file is created + in the tracing directory called "eval_map" that will show the names matched with their values and what trace event system they belong too. Normally, the mapping of the strings to values will be freed after boot up or module load. With this option, they will not be freed, as - they are needed for the "enum_map" file. Enabling this option will + they are needed for the "eval_map" file. Enabling this option will increase the memory footprint of the running kernel. If unsure, say N diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d830be7f0ba6..19ac2088d10a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -120,8 +120,8 @@ enum ftrace_dump_mode ftrace_dump_on_oops; /* When set, tracing will stop when a WARN*() is hit */ int __disable_trace_on_warning; -#ifdef CONFIG_TRACE_ENUM_MAP_FILE -/* Map of enums to their values, for "enum_map" file */ +#ifdef CONFIG_TRACE_EVAL_MAP_FILE +/* Map of enums to their values, for "eval_map" file */ struct trace_eval_map_head { struct module *mod; unsigned long length; @@ -145,7 +145,7 @@ static DEFINE_MUTEX(trace_eval_mutex); * one at the beginning, and one at the end. The beginning item contains * the count of the saved maps (head.length), and the module they * belong to if not built in (head.mod). The ending item contains a - * pointer to the next array of saved enum_eval/enum_map items. + * pointer to the next array of saved eval_map items. */ union trace_eval_map_item { struct trace_eval_map map; @@ -154,7 +154,7 @@ union trace_eval_map_item { }; static union trace_eval_map_item *trace_eval_maps; -#endif /* CONFIG_TRACE_ENUM_MAP_FILE */ +#endif /* CONFIG_TRACE_EVAL_MAP_FILE */ static int tracing_set_tracer(struct trace_array *tr, const char *buf); @@ -4744,7 +4744,7 @@ static const struct file_operations tracing_saved_cmdlines_size_fops = { .write = tracing_saved_cmdlines_size_write, }; -#ifdef CONFIG_TRACE_ENUM_MAP_FILE +#ifdef CONFIG_TRACE_EVAL_MAP_FILE static union trace_eval_map_item * update_eval_map(union trace_eval_map_item *ptr) { @@ -4895,15 +4895,15 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, static void trace_create_eval_file(struct dentry *d_tracer) { - trace_create_file("enum_map", 0444, d_tracer, + trace_create_file("eval_map", 0444, d_tracer, NULL, &tracing_eval_map_fops); } -#else /* CONFIG_TRACE_ENUM_MAP_FILE */ +#else /* CONFIG_TRACE_EVAL_MAP_FILE */ static inline void trace_create_eval_file(struct dentry *d_tracer) { } static inline void trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, int len) { } -#endif /* !CONFIG_TRACE_ENUM_MAP_FILE */ +#endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ static void trace_insert_eval_map(struct module *mod, struct trace_eval_map **start, int len) @@ -7759,7 +7759,7 @@ static void trace_module_add_evals(struct module *mod) trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals); } -#ifdef CONFIG_TRACE_ENUM_MAP_FILE +#ifdef CONFIG_TRACE_EVAL_MAP_FILE static void trace_module_remove_evals(struct module *mod) { union trace_eval_map_item *map; @@ -7789,7 +7789,7 @@ static void trace_module_remove_evals(struct module *mod) } #else static inline void trace_module_remove_evals(struct module *mod) { } -#endif /* CONFIG_TRACE_ENUM_MAP_FILE */ +#endif /* CONFIG_TRACE_EVAL_MAP_FILE */ static int trace_module_notify(struct notifier_block *self, unsigned long val, void *data) -- cgit From 31fd85816dbe3a714bcc3f67c17c3dd87011f79e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 13 Jun 2017 15:52:13 -0700 Subject: bpf: permits narrower load from bpf program context fields Currently, verifier will reject a program if it contains an narrower load from the bpf context structure. For example, __u8 h = __sk_buff->hash, or __u16 p = __sk_buff->protocol __u32 sample_period = bpf_perf_event_data->sample_period which are narrower loads of 4-byte or 8-byte field. This patch solves the issue by: . Introduce a new parameter ctx_field_size to carry the field size of narrower load from prog type specific *__is_valid_access validator back to verifier. . The non-zero ctx_field_size for a memory access indicates (1). underlying prog type specific convert_ctx_accesses supporting non-whole-field access (2). the current insn is a narrower or whole field access. . In verifier, for such loads where load memory size is less than ctx_field_size, verifier transforms it to a full field load followed by proper masking. . Currently, __sk_buff and bpf_perf_event_data->sample_period are supporting narrowing loads. . Narrower stores are still not allowed as typical ctx stores are just normal stores. Because of this change, some tests in verifier will fail and these tests are removed. As a bonus, rename some out of bound __sk_buff->cb access to proper field name and remove two redundant "skb cb oob" tests. Acked-by: Daniel Borkmann Signed-off-by: Yonghong Song Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 +- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 71 ++++++++++++++++------ kernel/trace/bpf_trace.c | 21 +++++-- net/core/filter.c | 56 +++++++++++++----- tools/testing/selftests/bpf/test_verifier.c | 92 ++++------------------------- 6 files changed, 124 insertions(+), 119 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c32bace66d3d..1bcbf0a71f75 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -157,7 +157,7 @@ struct bpf_verifier_ops { * with 'type' (read or write) is allowed */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type); + enum bpf_reg_type *reg_type, int *ctx_field_size); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); u32 (*convert_ctx_access)(enum bpf_access_type type, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d5093b52b485..189741c0da85 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -73,6 +73,7 @@ struct bpf_insn_aux_data { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ }; + int ctx_field_size; /* the ctx field size for load/store insns, maybe 0 */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 519a6144d3d3..44b97d958fb7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -758,15 +758,26 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, } /* check access to 'struct bpf_context' fields */ -static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, +static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { + int ctx_field_size = 0; + /* for analyzer ctx accesses are already validated and converted */ if (env->analyzer_ops) return 0; if (env->prog->aux->ops->is_valid_access && - env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) { + env->prog->aux->ops->is_valid_access(off, size, t, reg_type, &ctx_field_size)) { + /* a non zero ctx_field_size indicates: + * . For this field, the prog type specific ctx conversion algorithm + * only supports whole field access. + * . This ctx access is a candiate for later verifier transformation + * to load the whole field and then apply a mask to get correct result. + */ + if (ctx_field_size) + env->insn_aux_data[insn_idx].ctx_field_size = ctx_field_size; + /* remember the offset of last byte accessed in ctx */ if (env->prog->aux->max_ctx_offset < off + size) env->prog->aux->max_ctx_offset = off + size; @@ -868,7 +879,7 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, * if t==write && value_regno==-1, some unknown value is stored into memory * if t==read && value_regno==-1, don't care what we read from memory */ -static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, +static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, int bpf_size, enum bpf_access_type t, int value_regno) { @@ -911,7 +922,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, verbose("R%d leaks addr into ctx\n", value_regno); return -EACCES; } - err = check_ctx_access(env, off, size, t, ®_type); + err = check_ctx_access(env, insn_idx, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { mark_reg_unknown_value_and_range(state->regs, value_regno); @@ -972,7 +983,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, return err; } -static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) +static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) { struct bpf_reg_state *regs = env->cur_state.regs; int err; @@ -994,13 +1005,13 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; /* check whether atomic_add can read the memory */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); if (err) return err; /* check whether atomic_add can write into the same memory */ - return check_mem_access(env, insn->dst_reg, insn->off, + return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1); } @@ -1416,7 +1427,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) * is inferred from register state. */ for (i = 0; i < meta.access_size; i++) { - err = check_mem_access(env, meta.regno, i, BPF_B, BPF_WRITE, -1); + err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1); if (err) return err; } @@ -2993,18 +3004,12 @@ static int do_check(struct bpf_verifier_env *env) /* check that memory (src_reg + off) is readable, * the state of dst_reg will be updated by this func */ - err = check_mem_access(env, insn->src_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, insn->dst_reg); if (err) return err; - if (BPF_SIZE(insn->code) != BPF_W && - BPF_SIZE(insn->code) != BPF_DW) { - insn_idx++; - continue; - } - prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; if (*prev_src_type == NOT_INIT) { @@ -3032,7 +3037,7 @@ static int do_check(struct bpf_verifier_env *env) enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { - err = check_xadd(env, insn); + err = check_xadd(env, insn_idx, insn); if (err) return err; insn_idx++; @@ -3051,7 +3056,7 @@ static int do_check(struct bpf_verifier_env *env) dst_reg_type = regs[insn->dst_reg].type; /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg); if (err) @@ -3080,7 +3085,7 @@ static int do_check(struct bpf_verifier_env *env) return err; /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn->dst_reg, insn->off, + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1); if (err) @@ -3383,7 +3388,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i, cnt, delta = 0; + int i, cnt, off, size, ctx_field_size, is_narrower_load, delta = 0; if (ops->gen_prologue) { cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, @@ -3423,11 +3428,39 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; + off = insn->off; + size = bpf_size_to_bytes(BPF_SIZE(insn->code)); + ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; + is_narrower_load = (type == BPF_READ && size < ctx_field_size); + + /* If the read access is a narrower load of the field, + * convert to a 4/8-byte load, to minimum program type specific + * convert_ctx_access changes. If conversion is successful, + * we will apply proper mask to the result. + */ + if (is_narrower_load) { + int size_code = BPF_H; + + if (ctx_field_size == 4) + size_code = BPF_W; + else if (ctx_field_size == 8) + size_code = BPF_DW; + insn->off = off & ~(ctx_field_size - 1); + insn->code = BPF_LDX | BPF_MEM | size_code; + } cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { verbose("bpf verifier is misconfigured\n"); return -EINVAL; } + if (is_narrower_load) { + if (ctx_field_size <= 4) + insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, + (1 << size * 8) - 1); + else + insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, + (1 << size * 8) - 1); + } new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); if (!new_prog) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 051d7fca0c09..9d3ec8253131 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -479,7 +479,7 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func /* bpf+kprobe programs can access fields of 'struct pt_regs' */ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, int *ctx_field_size) { if (off < 0 || off >= sizeof(struct pt_regs)) return false; @@ -562,7 +562,7 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) } static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, int *ctx_field_size) { if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) return false; @@ -581,17 +581,26 @@ const struct bpf_verifier_ops tracepoint_prog_ops = { }; static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, int *ctx_field_size) { + int sample_period_off; + if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) return false; if (type != BPF_READ) return false; if (off % size != 0) return false; - if (off == offsetof(struct bpf_perf_event_data, sample_period)) { - if (size != sizeof(u64)) - return false; + + /* permit 1, 2, 4 byte narrower and 8 normal read access to sample_period */ + sample_period_off = offsetof(struct bpf_perf_event_data, sample_period); + if (off >= sample_period_off && off < sample_period_off + sizeof(__u64)) { + *ctx_field_size = 8; +#ifdef __LITTLE_ENDIAN + return (off & 0x7) == 0 && size <= 8 && (size & (size - 1)) == 0; +#else + return ((off & 0x7) + size) == 8 && size <= 8 && (size & (size - 1)) == 0; +#endif } else { if (size != sizeof(long)) return false; diff --git a/net/core/filter.c b/net/core/filter.c index a65a3b25e104..60ed6f343a63 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2856,7 +2856,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id) } } -static bool __is_valid_access(int off, int size) +static bool __is_valid_access(int off, int size, enum bpf_access_type type, + int *ctx_field_size) { if (off < 0 || off >= sizeof(struct __sk_buff)) return false; @@ -2872,9 +2873,27 @@ static bool __is_valid_access(int off, int size) offsetof(struct __sk_buff, cb[4]) + sizeof(__u32)) return false; break; - default: + case offsetof(struct __sk_buff, data) ... + offsetof(struct __sk_buff, data) + sizeof(__u32) - 1: + case offsetof(struct __sk_buff, data_end) ... + offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1: if (size != sizeof(__u32)) return false; + break; + default: + /* permit narrower load for not cb/data/data_end fields */ + *ctx_field_size = 4; + if (type == BPF_WRITE) { + if (size != sizeof(__u32)) + return false; + } else { + if (size != sizeof(__u32)) +#ifdef __LITTLE_ENDIAN + return (off & 0x3) == 0 && (size == 1 || size == 2); +#else + return (off & 0x3) + size == 4 && (size == 1 || size == 2); +#endif + } } return true; @@ -2882,12 +2901,16 @@ static bool __is_valid_access(int off, int size) static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, + int *ctx_field_size) { switch (off) { - case offsetof(struct __sk_buff, tc_classid): - case offsetof(struct __sk_buff, data): - case offsetof(struct __sk_buff, data_end): + case offsetof(struct __sk_buff, tc_classid) ... + offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1: + case offsetof(struct __sk_buff, data) ... + offsetof(struct __sk_buff, data) + sizeof(__u32) - 1: + case offsetof(struct __sk_buff, data_end) ... + offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1: return false; } @@ -2901,15 +2924,17 @@ static bool sk_filter_is_valid_access(int off, int size, } } - return __is_valid_access(off, size); + return __is_valid_access(off, size, type, ctx_field_size); } static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, + int *ctx_field_size) { switch (off) { - case offsetof(struct __sk_buff, tc_classid): + case offsetof(struct __sk_buff, tc_classid) ... + offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1: return false; } @@ -2934,12 +2959,13 @@ static bool lwt_is_valid_access(int off, int size, break; } - return __is_valid_access(off, size); + return __is_valid_access(off, size, type, ctx_field_size); } static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, + int *ctx_field_size) { if (type == BPF_WRITE) { switch (off) { @@ -3002,7 +3028,8 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, + int *ctx_field_size) { if (type == BPF_WRITE) { switch (off) { @@ -3027,7 +3054,7 @@ static bool tc_cls_act_is_valid_access(int off, int size, break; } - return __is_valid_access(off, size); + return __is_valid_access(off, size, type, ctx_field_size); } static bool __is_valid_xdp_access(int off, int size) @@ -3044,7 +3071,8 @@ static bool __is_valid_xdp_access(int off, int size) static bool xdp_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type) + enum bpf_reg_type *reg_type, + int *ctx_field_size) { if (type == BPF_WRITE) return false; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 4ee4708b0d60..13341700930c 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1073,44 +1073,22 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "check cb access: byte, oob 1", + "__sk_buff->hash, offset 0, byte store not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 4), + offsetof(struct __sk_buff, hash)), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", .result = REJECT, }, { - "check cb access: byte, oob 2", + "__sk_buff->tc_index, offset 3, byte store not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0]) - 1), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check cb access: byte, oob 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4]) + 4), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check cb access: byte, oob 4", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0]) - 1), + offsetof(struct __sk_buff, tc_index) + 3), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", @@ -1188,44 +1166,22 @@ static struct bpf_test tests[] = { .result = REJECT, }, { - "check cb access: half, oob 1", + "check __sk_buff->hash, offset 0, half store not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 4), + offsetof(struct __sk_buff, hash)), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", .result = REJECT, }, { - "check cb access: half, oob 2", + "check __sk_buff->tc_index, offset 2, half store not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0]) - 2), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check cb access: half, oob 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4]) + 4), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check cb access: half, oob 4", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0]) - 2), + offsetof(struct __sk_buff, tc_index) + 2), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", @@ -1366,28 +1322,6 @@ static struct bpf_test tests[] = { }, { "check cb access: double, oob 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 8), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check cb access: double, oob 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0]) - 8), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check cb access: double, oob 4", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, @@ -1398,22 +1332,22 @@ static struct bpf_test tests[] = { .result = REJECT, }, { - "check cb access: double, oob 5", + "check __sk_buff->ifindex dw store not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4]) + 8), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, ifindex)), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", .result = REJECT, }, { - "check cb access: double, oob 6", + "check __sk_buff->ifindex dw load not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0]) - 8), + offsetof(struct __sk_buff, ifindex)), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", -- cgit From feaf1283d11794b9d518fcfd54b6bf8bee1f0b4b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 22 Jun 2017 17:04:55 -0400 Subject: tracing: Show address when function names are not found Currently, when a function is not found in kallsyms, instead of simply showing the function address, it shows nothing at all: # echo ':mod:kvm_intel' > /sys/kernel/tracing/set_ftrace_filter # echo function > /sys/kernel/tracing/set_ftrace_filter # qemu -enable-kvm /home/my-qemu-image # rmmod kvm_intel # cat /sys/kernel/tracing/trace qemu-system-x86-2408 [001] d..2 135.013238: <-kvm_arch_hardware_enable qemu-system-x86-2408 [001] .... 135.014574: <-kvm_arch_vm_ioctl qemu-system-x86-2408 [001] .... 135.015420: <-kvm_vm_ioctl_check_extension qemu-system-x86-2408 [001] .... 135.045411: <-__do_cpuid_ent qemu-system-x86-2408 [001] .... 135.045412: <-__do_cpuid_ent qemu-system-x86-2408 [001] .... 135.045412: <-__do_cpuid_ent qemu-system-x86-2408 [001] .... 135.045412: <-__do_cpuid_ent qemu-system-x86-2408 [001] ...1 135.045413: <-__do_cpuid_ent qemu-system-x86-2408 [001] .... 135.045413: <-__do_cpuid_ent When it should show: qemu-system-x86-2408 [001] d..2 135.013238: 0xffffffffa02a39f0 <-kvm_arch_hardware_enable qemu-system-x86-2408 [001] .... 135.014574: 0xffffffffa02a2ba0 <-kvm_arch_vm_ioctl qemu-system-x86-2408 [001] .... 135.015420: 0xffffffffa029e4e0 <-kvm_vm_ioctl_check_extension qemu-system-x86-2408 [001] .... 135.045411: 0xffffffffa02a1380 <-__do_cpuid_ent qemu-system-x86-2408 [001] .... 135.045412: 0xffffffffa029e160 <-__do_cpuid_ent qemu-system-x86-2408 [001] .... 135.045412: 0xffffffffa029e180 <-__do_cpuid_ent qemu-system-x86-2408 [001] .... 135.045412: 0xffffffffa029e520 <-__do_cpuid_ent qemu-system-x86-2408 [001] ...1 135.045413: 0xffffffffa02a13b0 <-__do_cpuid_ent qemu-system-x86-2408 [001] .... 135.045413: 0xffffffffa02a1380 <-__do_cpuid_ent instead. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_output.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 08f9bab8089e..01ff99969ca7 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -340,31 +340,41 @@ static inline const char *kretprobed(const char *name) static void seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) { -#ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; +#ifdef CONFIG_KALLSYMS const char *name; kallsyms_lookup(address, NULL, NULL, NULL, str); name = kretprobed(str); - trace_seq_printf(s, fmt, name); + if (name && strlen(name)) { + trace_seq_printf(s, fmt, name); + return; + } #endif + snprintf(str, KSYM_SYMBOL_LEN, "0x%08lx", address); + trace_seq_printf(s, fmt, str); } static void seq_print_sym_offset(struct trace_seq *s, const char *fmt, unsigned long address) { -#ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; +#ifdef CONFIG_KALLSYMS const char *name; sprint_symbol(str, address); name = kretprobed(str); - trace_seq_printf(s, fmt, name); + if (name && strlen(name)) { + trace_seq_printf(s, fmt, name); + return; + } #endif + snprintf(str, KSYM_SYMBOL_LEN, "0x%08lx", address); + trace_seq_printf(s, fmt, str); } #ifndef CONFIG_64BIT -- cgit From 239946314e57711d7da546b67964d0b387a3ee42 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 22 Jun 2017 15:07:39 -0700 Subject: bpf: possibly avoid extra masking for narrower load in verifier Commit 31fd85816dbe ("bpf: permits narrower load from bpf program context fields") permits narrower load for certain ctx fields. The commit however will already generate a masking even if the prog-specific ctx conversion produces the result with narrower size. For example, for __sk_buff->protocol, the ctx conversion loads the data into register with 2-byte load. A narrower 2-byte load should not generate masking. For __sk_buff->vlan_present, the conversion function set the result as either 0 or 1, essentially a byte. The narrower 2-byte or 1-byte load should not generate masking. To avoid unnecessary masking, prog-specific *_is_valid_access now passes converted_op_size back to verifier, which indicates the valid data width after perceived future conversion. Based on this information, verifier is able to avoid unnecessary marking. Since we want more information back from prog-specific *_is_valid_access checking, all of them are packed into one data structure for more clarity. Acked-by: Daniel Borkmann Signed-off-by: Yonghong Song Signed-off-by: David S. Miller --- include/linux/bpf.h | 11 +++++- include/linux/bpf_verifier.h | 3 +- kernel/bpf/verifier.c | 29 ++++++++++---- kernel/trace/bpf_trace.c | 17 +++++--- net/core/filter.c | 92 +++++++++++++++++++++++++------------------- 5 files changed, 97 insertions(+), 55 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1bcbf0a71f75..deca4e7f2845 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -149,6 +149,15 @@ enum bpf_reg_type { struct bpf_prog; +/* The information passed from prog-specific *_is_valid_access + * back to the verifier. + */ +struct bpf_insn_access_aux { + enum bpf_reg_type reg_type; + int ctx_field_size; + int converted_op_size; +}; + struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); @@ -157,7 +166,7 @@ struct bpf_verifier_ops { * with 'type' (read or write) is allowed */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type, int *ctx_field_size); + struct bpf_insn_access_aux *info); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); u32 (*convert_ctx_access)(enum bpf_access_type type, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 189741c0da85..621076f56251 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -73,7 +73,8 @@ struct bpf_insn_aux_data { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ }; - int ctx_field_size; /* the ctx field size for load/store insns, maybe 0 */ + int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ + int converted_op_size; /* the valid value width after perceived conversion */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 44b97d958fb7..74ea96ea391b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -761,22 +761,34 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { - int ctx_field_size = 0; + struct bpf_insn_access_aux info = { .reg_type = *reg_type }; /* for analyzer ctx accesses are already validated and converted */ if (env->analyzer_ops) return 0; if (env->prog->aux->ops->is_valid_access && - env->prog->aux->ops->is_valid_access(off, size, t, reg_type, &ctx_field_size)) { - /* a non zero ctx_field_size indicates: + env->prog->aux->ops->is_valid_access(off, size, t, &info)) { + /* a non zero info.ctx_field_size indicates: * . For this field, the prog type specific ctx conversion algorithm * only supports whole field access. * . This ctx access is a candiate for later verifier transformation * to load the whole field and then apply a mask to get correct result. + * a non zero info.converted_op_size indicates perceived actual converted + * value width in convert_ctx_access. */ - if (ctx_field_size) - env->insn_aux_data[insn_idx].ctx_field_size = ctx_field_size; + if ((info.ctx_field_size && !info.converted_op_size) || + (!info.ctx_field_size && info.converted_op_size)) { + verbose("verifier bug in is_valid_access prog type=%u off=%d size=%d\n", + env->prog->type, off, size); + return -EACCES; + } + + if (info.ctx_field_size) { + env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; + env->insn_aux_data[insn_idx].converted_op_size = info.converted_op_size; + } + *reg_type = info.reg_type; /* remember the offset of last byte accessed in ctx */ if (env->prog->aux->max_ctx_offset < off + size) @@ -3388,7 +3400,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i, cnt, off, size, ctx_field_size, is_narrower_load, delta = 0; + int i, cnt, off, size, ctx_field_size, converted_op_size, is_narrower_load, delta = 0; if (ops->gen_prologue) { cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, @@ -3431,7 +3443,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) off = insn->off; size = bpf_size_to_bytes(BPF_SIZE(insn->code)); ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; - is_narrower_load = (type == BPF_READ && size < ctx_field_size); + converted_op_size = env->insn_aux_data[i + delta].converted_op_size; + is_narrower_load = type == BPF_READ && size < ctx_field_size; /* If the read access is a narrower load of the field, * convert to a 4/8-byte load, to minimum program type specific @@ -3453,7 +3466,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) verbose("bpf verifier is misconfigured\n"); return -EINVAL; } - if (is_narrower_load) { + if (is_narrower_load && size < converted_op_size) { if (ctx_field_size <= 4) insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, (1 << size * 8) - 1); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9d3ec8253131..97c46b440cd6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -479,7 +479,7 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func /* bpf+kprobe programs can access fields of 'struct pt_regs' */ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type, int *ctx_field_size) + struct bpf_insn_access_aux *info) { if (off < 0 || off >= sizeof(struct pt_regs)) return false; @@ -562,7 +562,7 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) } static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type, int *ctx_field_size) + struct bpf_insn_access_aux *info) { if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) return false; @@ -581,7 +581,7 @@ const struct bpf_verifier_ops tracepoint_prog_ops = { }; static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type, int *ctx_field_size) + struct bpf_insn_access_aux *info) { int sample_period_off; @@ -595,12 +595,17 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type /* permit 1, 2, 4 byte narrower and 8 normal read access to sample_period */ sample_period_off = offsetof(struct bpf_perf_event_data, sample_period); if (off >= sample_period_off && off < sample_period_off + sizeof(__u64)) { - *ctx_field_size = 8; + int allowed; + #ifdef __LITTLE_ENDIAN - return (off & 0x7) == 0 && size <= 8 && (size & (size - 1)) == 0; + allowed = (off & 0x7) == 0 && size <= 8 && (size & (size - 1)) == 0; #else - return ((off & 0x7) + size) == 8 && size <= 8 && (size & (size - 1)) == 0; + allowed = ((off & 0x7) + size) == 8 && size <= 8 && (size & (size - 1)) == 0; #endif + if (!allowed) + return false; + info->ctx_field_size = 8; + info->converted_op_size = 8; } else { if (size != sizeof(long)) return false; diff --git a/net/core/filter.c b/net/core/filter.c index 60ed6f343a63..4b788007415f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2856,8 +2856,37 @@ lwt_xmit_func_proto(enum bpf_func_id func_id) } } +static void __set_access_aux_info(int off, struct bpf_insn_access_aux *info) +{ + info->ctx_field_size = 4; + switch (off) { + case offsetof(struct __sk_buff, pkt_type) ... + offsetof(struct __sk_buff, pkt_type) + sizeof(__u32) - 1: + case offsetof(struct __sk_buff, vlan_present) ... + offsetof(struct __sk_buff, vlan_present) + sizeof(__u32) - 1: + info->converted_op_size = 1; + break; + case offsetof(struct __sk_buff, queue_mapping) ... + offsetof(struct __sk_buff, queue_mapping) + sizeof(__u32) - 1: + case offsetof(struct __sk_buff, protocol) ... + offsetof(struct __sk_buff, protocol) + sizeof(__u32) - 1: + case offsetof(struct __sk_buff, vlan_tci) ... + offsetof(struct __sk_buff, vlan_tci) + sizeof(__u32) - 1: + case offsetof(struct __sk_buff, vlan_proto) ... + offsetof(struct __sk_buff, vlan_proto) + sizeof(__u32) - 1: + case offsetof(struct __sk_buff, tc_index) ... + offsetof(struct __sk_buff, tc_index) + sizeof(__u32) - 1: + case offsetof(struct __sk_buff, tc_classid) ... + offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1: + info->converted_op_size = 2; + break; + default: + info->converted_op_size = 4; + } +} + static bool __is_valid_access(int off, int size, enum bpf_access_type type, - int *ctx_field_size) + struct bpf_insn_access_aux *info) { if (off < 0 || off >= sizeof(struct __sk_buff)) return false; @@ -2875,24 +2904,32 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type, break; case offsetof(struct __sk_buff, data) ... offsetof(struct __sk_buff, data) + sizeof(__u32) - 1: + if (size != sizeof(__u32)) + return false; + info->reg_type = PTR_TO_PACKET; + break; case offsetof(struct __sk_buff, data_end) ... offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1: if (size != sizeof(__u32)) return false; + info->reg_type = PTR_TO_PACKET_END; break; default: - /* permit narrower load for not cb/data/data_end fields */ - *ctx_field_size = 4; if (type == BPF_WRITE) { if (size != sizeof(__u32)) return false; } else { - if (size != sizeof(__u32)) + int allowed; + + /* permit narrower load for not cb/data/data_end fields */ #ifdef __LITTLE_ENDIAN - return (off & 0x3) == 0 && (size == 1 || size == 2); + allowed = (off & 0x3) == 0 && size <= 4 && (size & (size - 1)) == 0; #else - return (off & 0x3) + size == 4 && (size == 1 || size == 2); + allowed = (off & 0x3) + size == 4 && size <= 4 && (size & (size - 1)) == 0; #endif + if (!allowed) + return false; + __set_access_aux_info(off, info); } } @@ -2901,8 +2938,7 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type, static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type, - int *ctx_field_size) + struct bpf_insn_access_aux *info) { switch (off) { case offsetof(struct __sk_buff, tc_classid) ... @@ -2924,13 +2960,12 @@ static bool sk_filter_is_valid_access(int off, int size, } } - return __is_valid_access(off, size, type, ctx_field_size); + return __is_valid_access(off, size, type, info); } static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type, - int *ctx_field_size) + struct bpf_insn_access_aux *info) { switch (off) { case offsetof(struct __sk_buff, tc_classid) ... @@ -2950,22 +2985,12 @@ static bool lwt_is_valid_access(int off, int size, } } - switch (off) { - case offsetof(struct __sk_buff, data): - *reg_type = PTR_TO_PACKET; - break; - case offsetof(struct __sk_buff, data_end): - *reg_type = PTR_TO_PACKET_END; - break; - } - - return __is_valid_access(off, size, type, ctx_field_size); + return __is_valid_access(off, size, type, info); } static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type, - int *ctx_field_size) + struct bpf_insn_access_aux *info) { if (type == BPF_WRITE) { switch (off) { @@ -3028,8 +3053,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type, - int *ctx_field_size) + struct bpf_insn_access_aux *info) { if (type == BPF_WRITE) { switch (off) { @@ -3045,16 +3069,7 @@ static bool tc_cls_act_is_valid_access(int off, int size, } } - switch (off) { - case offsetof(struct __sk_buff, data): - *reg_type = PTR_TO_PACKET; - break; - case offsetof(struct __sk_buff, data_end): - *reg_type = PTR_TO_PACKET_END; - break; - } - - return __is_valid_access(off, size, type, ctx_field_size); + return __is_valid_access(off, size, type, info); } static bool __is_valid_xdp_access(int off, int size) @@ -3071,18 +3086,17 @@ static bool __is_valid_xdp_access(int off, int size) static bool xdp_is_valid_access(int off, int size, enum bpf_access_type type, - enum bpf_reg_type *reg_type, - int *ctx_field_size) + struct bpf_insn_access_aux *info) { if (type == BPF_WRITE) return false; switch (off) { case offsetof(struct xdp_md, data): - *reg_type = PTR_TO_PACKET; + info->reg_type = PTR_TO_PACKET; break; case offsetof(struct xdp_md, data_end): - *reg_type = PTR_TO_PACKET_END; + info->reg_type = PTR_TO_PACKET_END; break; } -- cgit From 673feb9d76ab3eddde7acfd94b206e321cfc90b9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 23 Jun 2017 15:26:26 -0400 Subject: ftrace: Add :mod: caching infrastructure to trace_array This is the start of the infrastructure work to allow for tracing module functions before it is loaded. Currently the following command: # echo :mod:some-mod > set_ftrace_filter will enable tracing of all functions within the module "some-mod" if it is loaded. What we want, is if the module is not loaded, that line will be saved. When the module is loaded, then the "some-mod" will have that line executed on it, so that the functions within it starts being traced. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++--- kernel/trace/trace.h | 12 +++++ 2 files changed, 148 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9e5841dc14b5..1867edec6269 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1293,6 +1293,28 @@ static void ftrace_hash_clear(struct ftrace_hash *hash) FTRACE_WARN_ON(hash->count); } +static void free_ftrace_mod(struct ftrace_mod_load *ftrace_mod) +{ + list_del(&ftrace_mod->list); + kfree(ftrace_mod->module); + kfree(ftrace_mod->func); + kfree(ftrace_mod); +} + +static void clear_ftrace_mod_list(struct list_head *head) +{ + struct ftrace_mod_load *p, *n; + + /* stack tracer isn't supported yet */ + if (!head) + return; + + mutex_lock(&ftrace_lock); + list_for_each_entry_safe(p, n, head, list) + free_ftrace_mod(p); + mutex_unlock(&ftrace_lock); +} + static void free_ftrace_hash(struct ftrace_hash *hash) { if (!hash || hash == EMPTY_HASH) @@ -1346,6 +1368,35 @@ static struct ftrace_hash *alloc_ftrace_hash(int size_bits) return hash; } + +static int ftrace_add_mod(struct trace_array *tr, + const char *func, const char *module, + int enable) +{ + struct ftrace_mod_load *ftrace_mod; + struct list_head *mod_head = enable ? &tr->mod_trace : &tr->mod_notrace; + + ftrace_mod = kzalloc(sizeof(*ftrace_mod), GFP_KERNEL); + if (!ftrace_mod) + return -ENOMEM; + + ftrace_mod->func = kstrdup(func, GFP_KERNEL); + ftrace_mod->module = kstrdup(module, GFP_KERNEL); + ftrace_mod->enable = enable; + + if (!ftrace_mod->func || !ftrace_mod->module) + goto out_free; + + list_add(&ftrace_mod->list, mod_head); + + return 0; + + out_free: + free_ftrace_mod(ftrace_mod); + + return -ENOMEM; +} + static struct ftrace_hash * alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) { @@ -3457,6 +3508,8 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, { struct ftrace_iterator *iter; struct ftrace_hash *hash; + struct list_head *mod_head; + struct trace_array *tr = ops->private; int ret = 0; ftrace_ops_init(ops); @@ -3478,18 +3531,23 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, mutex_lock(&ops->func_hash->regex_lock); - if (flag & FTRACE_ITER_NOTRACE) + if (flag & FTRACE_ITER_NOTRACE) { hash = ops->func_hash->notrace_hash; - else + mod_head = tr ? &tr->mod_trace : NULL; + } else { hash = ops->func_hash->filter_hash; + mod_head = tr ? &tr->mod_notrace : NULL; + } if (file->f_mode & FMODE_WRITE) { const int size_bits = FTRACE_HASH_DEFAULT_BITS; - if (file->f_flags & O_TRUNC) + if (file->f_flags & O_TRUNC) { iter->hash = alloc_ftrace_hash(size_bits); - else + clear_ftrace_mod_list(mod_head); + } else { iter->hash = alloc_and_copy_ftrace_hash(size_bits, hash); + } if (!iter->hash) { trace_parser_put(&iter->parser); @@ -3761,6 +3819,68 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, return ret; } +static bool module_exists(const char *module) +{ + /* All modules have the symbol __this_module */ + const char this_mod[] = "__this_module"; + const int modname_size = MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 1; + char modname[modname_size + 1]; + unsigned long val; + int n; + + n = snprintf(modname, modname_size + 1, "%s:%s", module, this_mod); + + if (n > modname_size) + return false; + + val = module_kallsyms_lookup_name(modname); + return val != 0; +} + +static int cache_mod(struct trace_array *tr, + const char *func, char *module, int enable) +{ + struct ftrace_mod_load *ftrace_mod, *n; + struct list_head *head = enable ? &tr->mod_trace : &tr->mod_notrace; + int ret; + + mutex_lock(&ftrace_lock); + + /* We do not cache inverse filters */ + if (func[0] == '!') { + func++; + ret = -EINVAL; + + /* Look to remove this hash */ + list_for_each_entry_safe(ftrace_mod, n, head, list) { + if (strcmp(ftrace_mod->module, module) != 0) + continue; + + /* no func matches all */ + if (!func || strcmp(func, "*") == 0 || + (ftrace_mod->func && + strcmp(ftrace_mod->func, func) == 0)) { + ret = 0; + free_ftrace_mod(ftrace_mod); + continue; + } + } + goto out; + } + + ret = -EINVAL; + /* We only care about modules that have not been loaded yet */ + if (module_exists(module)) + goto out; + + /* Save this string off, and execute it when the module is loaded */ + ret = ftrace_add_mod(tr, func, module, enable); + out: + mutex_unlock(&ftrace_lock); + + return ret; +} + /* * We register the module command as a template to show others how * to register the a command as well. @@ -3768,10 +3888,16 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, static int ftrace_mod_callback(struct trace_array *tr, struct ftrace_hash *hash, - char *func, char *cmd, char *module, int enable) + char *func_orig, char *cmd, char *module, int enable) { + char *func; int ret; + /* match_records() modifies func, and we need the original */ + func = kstrdup(func_orig, GFP_KERNEL); + if (!func) + return -ENOMEM; + /* * cmd == 'mod' because we only registered this func * for the 'mod' ftrace_func_command. @@ -3780,8 +3906,10 @@ ftrace_mod_callback(struct trace_array *tr, struct ftrace_hash *hash, * parameter. */ ret = match_records(hash, func, strlen(func), module); + kfree(func); + if (!ret) - return -EINVAL; + return cache_mod(tr, func_orig, module, enable); if (ret < 0) return ret; return 0; @@ -5570,6 +5698,8 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops) void ftrace_init_trace_array(struct trace_array *tr) { INIT_LIST_HEAD(&tr->func_probes); + INIT_LIST_HEAD(&tr->mod_trace); + INIT_LIST_HEAD(&tr->mod_notrace); } #else diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 69a3ab3ee4f5..d63550cdbdfa 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -263,7 +263,10 @@ struct trace_array { struct ftrace_ops *ops; struct trace_pid_list __rcu *function_pids; #ifdef CONFIG_DYNAMIC_FTRACE + /* All of these are protected by the ftrace_lock */ struct list_head func_probes; + struct list_head mod_trace; + struct list_head mod_notrace; #endif /* function tracing enabled */ int function_enabled; @@ -761,6 +764,15 @@ enum print_line_t print_trace_line(struct trace_iterator *iter); extern char trace_find_mark(unsigned long long duration); +struct ftrace_hash; + +struct ftrace_mod_load { + struct list_head list; + char *func; + char *module; + int enable; +}; + struct ftrace_hash { unsigned long size_bits; struct hlist_head *buckets; -- cgit From 5985ea8bd5d1b820b909af49fbc2767a990080a6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 23 Jun 2017 16:05:11 -0400 Subject: ftrace: Have the cached module list show in set_ftrace_filter When writing in a module filter into set_ftrace_filter for a module that is not yet loaded, it it cached, and will be executed when the module is loaded (although that is not implemented yet at this commit). Display the list of cached modules to be traced. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 3 +- kernel/trace/ftrace.c | 112 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 102 insertions(+), 13 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1b6992e994e6..9fb9a67dc9d4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -446,7 +446,8 @@ enum { FTRACE_ITER_PRINTALL = (1 << 2), FTRACE_ITER_DO_PROBES = (1 << 3), FTRACE_ITER_PROBE = (1 << 4), - FTRACE_ITER_ENABLED = (1 << 5), + FTRACE_ITER_MOD = (1 << 5), + FTRACE_ITER_ENABLED = (1 << 6), }; void arch_ftrace_update_code(int command); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1867edec6269..bfdbce78064b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3112,6 +3112,7 @@ ftrace_allocate_pages(unsigned long num_to_init) struct ftrace_iterator { loff_t pos; loff_t func_pos; + loff_t mod_pos; struct ftrace_page *pg; struct dyn_ftrace *func; struct ftrace_func_probe *probe; @@ -3119,6 +3120,8 @@ struct ftrace_iterator { struct trace_parser parser; struct ftrace_hash *hash; struct ftrace_ops *ops; + struct trace_array *tr; + struct list_head *mod_list; int pidx; int idx; unsigned flags; @@ -3203,13 +3206,13 @@ static void *t_probe_start(struct seq_file *m, loff_t *pos) if (!(iter->flags & FTRACE_ITER_DO_PROBES)) return NULL; - if (iter->func_pos > *pos) + if (iter->mod_pos > *pos) return NULL; iter->probe = NULL; iter->probe_entry = NULL; iter->pidx = 0; - for (l = 0; l <= (*pos - iter->func_pos); ) { + for (l = 0; l <= (*pos - iter->mod_pos); ) { p = t_probe_next(m, &l); if (!p) break; @@ -3247,6 +3250,82 @@ t_probe_show(struct seq_file *m, struct ftrace_iterator *iter) return 0; } +static void * +t_mod_next(struct seq_file *m, loff_t *pos) +{ + struct ftrace_iterator *iter = m->private; + struct trace_array *tr = iter->tr; + + (*pos)++; + iter->pos = *pos; + + iter->mod_list = iter->mod_list->next; + + if (iter->mod_list == &tr->mod_trace || + iter->mod_list == &tr->mod_notrace) { + iter->flags &= ~FTRACE_ITER_MOD; + return NULL; + } + + iter->mod_pos = *pos; + + return iter; +} + +static void *t_mod_start(struct seq_file *m, loff_t *pos) +{ + struct ftrace_iterator *iter = m->private; + void *p = NULL; + loff_t l; + + if (iter->func_pos > *pos) + return NULL; + + iter->mod_pos = iter->func_pos; + + /* probes are only available if tr is set */ + if (!iter->tr) + return NULL; + + for (l = 0; l <= (*pos - iter->func_pos); ) { + p = t_mod_next(m, &l); + if (!p) + break; + } + if (!p) { + iter->flags &= ~FTRACE_ITER_MOD; + return t_probe_start(m, pos); + } + + /* Only set this if we have an item */ + iter->flags |= FTRACE_ITER_MOD; + + return iter; +} + +static int +t_mod_show(struct seq_file *m, struct ftrace_iterator *iter) +{ + struct ftrace_mod_load *ftrace_mod; + struct trace_array *tr = iter->tr; + + if (WARN_ON_ONCE(!iter->mod_list) || + iter->mod_list == &tr->mod_trace || + iter->mod_list == &tr->mod_notrace) + return -EIO; + + ftrace_mod = list_entry(iter->mod_list, struct ftrace_mod_load, list); + + if (ftrace_mod->func) + seq_printf(m, "%s", ftrace_mod->func); + else + seq_putc(m, '*'); + + seq_printf(m, ":mod:%s\n", ftrace_mod->module); + + return 0; +} + static void * t_func_next(struct seq_file *m, loff_t *pos) { @@ -3288,7 +3367,7 @@ static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct ftrace_iterator *iter = m->private; - loff_t l = *pos; /* t_hash_start() must use original pos */ + loff_t l = *pos; /* t_probe_start() must use original pos */ void *ret; if (unlikely(ftrace_disabled)) @@ -3297,16 +3376,19 @@ t_next(struct seq_file *m, void *v, loff_t *pos) if (iter->flags & FTRACE_ITER_PROBE) return t_probe_next(m, pos); + if (iter->flags & FTRACE_ITER_MOD) + return t_mod_next(m, pos); + if (iter->flags & FTRACE_ITER_PRINTALL) { /* next must increment pos, and t_probe_start does not */ (*pos)++; - return t_probe_start(m, &l); + return t_mod_start(m, &l); } ret = t_func_next(m, pos); if (!ret) - return t_probe_start(m, &l); + return t_mod_start(m, &l); return ret; } @@ -3315,7 +3397,7 @@ static void reset_iter_read(struct ftrace_iterator *iter) { iter->pos = 0; iter->func_pos = 0; - iter->flags &= ~(FTRACE_ITER_PRINTALL | FTRACE_ITER_PROBE); + iter->flags &= ~(FTRACE_ITER_PRINTALL | FTRACE_ITER_PROBE | FTRACE_ITER_MOD); } static void *t_start(struct seq_file *m, loff_t *pos) @@ -3344,15 +3426,15 @@ static void *t_start(struct seq_file *m, loff_t *pos) ftrace_hash_empty(iter->hash)) { iter->func_pos = 1; /* Account for the message */ if (*pos > 0) - return t_probe_start(m, pos); + return t_mod_start(m, pos); iter->flags |= FTRACE_ITER_PRINTALL; /* reset in case of seek/pread */ iter->flags &= ~FTRACE_ITER_PROBE; return iter; } - if (iter->flags & FTRACE_ITER_PROBE) - return t_probe_start(m, pos); + if (iter->flags & FTRACE_ITER_MOD) + return t_mod_start(m, pos); /* * Unfortunately, we need to restart at ftrace_pages_start @@ -3368,7 +3450,7 @@ static void *t_start(struct seq_file *m, loff_t *pos) } if (!p) - return t_probe_start(m, pos); + return t_mod_start(m, pos); return iter; } @@ -3402,6 +3484,9 @@ static int t_show(struct seq_file *m, void *v) if (iter->flags & FTRACE_ITER_PROBE) return t_probe_show(m, iter); + if (iter->flags & FTRACE_ITER_MOD) + return t_mod_show(m, iter); + if (iter->flags & FTRACE_ITER_PRINTALL) { if (iter->flags & FTRACE_ITER_NOTRACE) seq_puts(m, "#### no functions disabled ####\n"); @@ -3528,17 +3613,20 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, iter->ops = ops; iter->flags = flag; + iter->tr = tr; mutex_lock(&ops->func_hash->regex_lock); if (flag & FTRACE_ITER_NOTRACE) { hash = ops->func_hash->notrace_hash; - mod_head = tr ? &tr->mod_trace : NULL; + mod_head = tr ? &tr->mod_notrace : NULL; } else { hash = ops->func_hash->filter_hash; - mod_head = tr ? &tr->mod_notrace : NULL; + mod_head = tr ? &tr->mod_trace : NULL; } + iter->mod_list = mod_head; + if (file->f_mode & FMODE_WRITE) { const int size_bits = FTRACE_HASH_DEFAULT_BITS; -- cgit From d7fbf8df7ca0a5c7e85db79f7005f99cb461c525 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 26 Jun 2017 10:57:21 -0400 Subject: ftrace: Implement cached modules tracing on module load If a module is cached in the set_ftrace_filter, and that module is loaded, then enable tracing on that module as if the cached module text was written into set_ftrace_filter just as the module is loaded. # echo ":mod:kvm_intel" > # cat /sys/kernel/tracing/set_ftrace_filter #### all functions enabled #### :mod:kvm_intel # modprobe kvm_intel # cat /sys/kernel/tracing/set_ftrace_filter vmx_get_rflags [kvm_intel] vmx_get_pkru [kvm_intel] vmx_get_interrupt_shadow [kvm_intel] vmx_rdtscp_supported [kvm_intel] vmx_invpcid_supported [kvm_intel] [..] Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bfdbce78064b..f1ccf8be9df7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3969,6 +3969,97 @@ static int cache_mod(struct trace_array *tr, return ret; } +static int +ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, + int reset, int enable); + +static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, + char *mod, bool enable) +{ + struct ftrace_mod_load *ftrace_mod, *n; + struct ftrace_hash **orig_hash, *new_hash; + LIST_HEAD(process_mods); + char *func; + int ret; + + mutex_lock(&ops->func_hash->regex_lock); + + if (enable) + orig_hash = &ops->func_hash->filter_hash; + else + orig_hash = &ops->func_hash->notrace_hash; + + new_hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, + *orig_hash); + if (!new_hash) + return; /* Warn? */ + + mutex_lock(&ftrace_lock); + + list_for_each_entry_safe(ftrace_mod, n, head, list) { + + if (strcmp(ftrace_mod->module, mod) != 0) + continue; + + if (ftrace_mod->func) + func = kstrdup(ftrace_mod->func, GFP_KERNEL); + else + func = kstrdup("*", GFP_KERNEL); + + if (!func) /* warn? */ + continue; + + list_del(&ftrace_mod->list); + list_add(&ftrace_mod->list, &process_mods); + + /* Use the newly allocated func, as it may be "*" */ + kfree(ftrace_mod->func); + ftrace_mod->func = func; + } + + mutex_unlock(&ftrace_lock); + + list_for_each_entry_safe(ftrace_mod, n, &process_mods, list) { + + func = ftrace_mod->func; + + /* Grabs ftrace_lock, which is why we have this extra step */ + match_records(new_hash, func, strlen(func), mod); + free_ftrace_mod(ftrace_mod); + } + + mutex_lock(&ftrace_lock); + + ret = ftrace_hash_move_and_update_ops(ops, orig_hash, + new_hash, enable); + mutex_unlock(&ftrace_lock); + + mutex_unlock(&ops->func_hash->regex_lock); + + free_ftrace_hash(new_hash); +} + +static void process_cached_mods(const char *mod_name) +{ + struct trace_array *tr; + char *mod; + + mod = kstrdup(mod_name, GFP_KERNEL); + if (!mod) + return; + + mutex_lock(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (!list_empty(&tr->mod_trace)) + process_mod_list(&tr->mod_trace, tr->ops, mod, true); + if (!list_empty(&tr->mod_notrace)) + process_mod_list(&tr->mod_notrace, tr->ops, mod, false); + } + mutex_unlock(&trace_types_lock); + + kfree(mod); +} + /* * We register the module command as a template to show others how * to register the a command as well. @@ -5682,6 +5773,8 @@ void ftrace_module_enable(struct module *mod) out_unlock: mutex_unlock(&ftrace_lock); + + process_cached_mods(mod->name); } void ftrace_module_init(struct module *mod) -- cgit From 8c08f0d5c6fb10ff93ffb1cbf416f4f1c3a52a80 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 26 Jun 2017 11:47:31 -0400 Subject: ftrace: Have cached module filters be an active filter When a module filter is added to set_ftrace_filter, if the module is not loaded, it is cached. This should be considered an active filter, and function tracing should be filtered by this. That is, if a cached module filter is the only filter set, then no function tracing should be happening, as all the functions available will be filtered out. This makes sense, as the reason to add a cached module filter, is to trace the module when you load it. There shouldn't be any other tracing happening until then. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 2 ++ kernel/trace/ftrace.c | 20 +++++++++++++++----- kernel/trace/trace.h | 7 ++++++- 3 files changed, 23 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9fb9a67dc9d4..5857390ac35a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -120,6 +120,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * this ops will fail to register or set_filter_ip. * PID - Is affected by set_ftrace_pid (allows filtering on those pids) * RCU - Set when the ops can only be called when RCU is watching. + * TRACE_ARRAY - The ops->private points to a trace_array descriptor. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -138,6 +139,7 @@ enum { FTRACE_OPS_FL_IPMODIFY = 1 << 13, FTRACE_OPS_FL_PID = 1 << 14, FTRACE_OPS_FL_RCU = 1 << 15, + FTRACE_OPS_FL_TRACE_ARRAY = 1 << 16, }; #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f1ccf8be9df7..914539e3e301 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1410,6 +1410,9 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) if (!new_hash) return NULL; + if (hash) + new_hash->flags = hash->flags; + /* Empty hash? */ if (ftrace_hash_empty(hash)) return new_hash; @@ -1454,7 +1457,7 @@ __ftrace_hash_move(struct ftrace_hash *src) /* * If the new source is empty, just return the empty_hash. */ - if (!src->count) + if (ftrace_hash_empty(src)) return EMPTY_HASH; /* @@ -1471,6 +1474,8 @@ __ftrace_hash_move(struct ftrace_hash *src) if (!new_hash) return NULL; + new_hash->flags = src->flags; + size = 1 << src->size_bits; for (i = 0; i < size; i++) { hhd = &src->buckets[i]; @@ -1701,7 +1706,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, struct dyn_ftrace *rec; bool update = false; int count = 0; - int all = 0; + int all = false; /* Only update if the ops has been registered */ if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) @@ -1722,7 +1727,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, hash = ops->func_hash->filter_hash; other_hash = ops->func_hash->notrace_hash; if (ftrace_hash_empty(hash)) - all = 1; + all = true; } else { inc = !inc; hash = ops->func_hash->notrace_hash; @@ -4028,6 +4033,9 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, free_ftrace_mod(ftrace_mod); } + if (enable && list_empty(head)) + new_hash->flags &= ~FTRACE_HASH_FL_MOD; + mutex_lock(&ftrace_lock); ret = ftrace_hash_move_and_update_ops(ops, orig_hash, @@ -5035,9 +5043,11 @@ int ftrace_regex_release(struct inode *inode, struct file *file) if (file->f_mode & FMODE_WRITE) { filter_hash = !!(iter->flags & FTRACE_ITER_FILTER); - if (filter_hash) + if (filter_hash) { orig_hash = &iter->ops->func_hash->filter_hash; - else + if (!list_empty(&iter->tr->mod_trace)) + iter->hash->flags |= FTRACE_HASH_FL_MOD; + } else orig_hash = &iter->ops->func_hash->notrace_hash; mutex_lock(&ftrace_lock); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d63550cdbdfa..13823951e42b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -773,10 +773,15 @@ struct ftrace_mod_load { int enable; }; +enum { + FTRACE_HASH_FL_MOD = (1 << 0), +}; + struct ftrace_hash { unsigned long size_bits; struct hlist_head *buckets; unsigned long count; + unsigned long flags; struct rcu_head rcu; }; @@ -785,7 +790,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip); static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash) { - return !hash || !hash->count; + return !hash || !(hash->count || (hash->flags & FTRACE_HASH_FL_MOD)); } /* Standard output formatting function used for function return traces */ -- cgit From 6a9c981b1e9657ca5866d10aa38b8a4fe1159138 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 27 Jun 2017 11:02:49 -0400 Subject: ftrace: Remove unused function ftrace_arch_read_dyn_info() ftrace_arch_read_dyn_info() was used so that archs could add its own debug information into the dyn_ftrace_total_info in the tracefs file system. That file is for debugging usage of dynamic ftrace. No arch uses that function anymore, so just get rid of it. This also allows for tracing_read_dyn_info() to be cleaned up a bit. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 19ac2088d10a..14318ce92b13 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6737,33 +6737,18 @@ static const struct file_operations tracing_stats_fops = { #ifdef CONFIG_DYNAMIC_FTRACE -int __weak ftrace_arch_read_dyn_info(char *buf, int size) -{ - return 0; -} - static ssize_t tracing_read_dyn_info(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - static char ftrace_dyn_info_buffer[1024]; - static DEFINE_MUTEX(dyn_info_mutex); unsigned long *p = filp->private_data; - char *buf = ftrace_dyn_info_buffer; - int size = ARRAY_SIZE(ftrace_dyn_info_buffer); + char buf[64]; /* Not too big for a shallow stack */ int r; - mutex_lock(&dyn_info_mutex); - r = sprintf(buf, "%ld ", *p); - - r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r); + r = scnprintf(buf, 63, "%ld", *p); buf[r++] = '\n'; - r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); - - mutex_unlock(&dyn_info_mutex); - - return r; + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } static const struct file_operations tracing_dyn_info_fops = { -- cgit From 83dd14933e33a45e9b366c572e15505982b46845 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 27 Jun 2017 11:04:40 -0400 Subject: ftrace: Decrement count for dyn_ftrace_total_info file The dyn_ftrace_total_info file is used to show how many functions have been converted into nops and can be used by ftrace. The problem is that it does not get decremented when functions are removed (init boot code being freed, and modules being freed). That means the number is very inaccurate everytime functions are removed from the ftrace tables. Decrement it when functions are removed. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 914539e3e301..7509ef9810bf 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5705,6 +5705,7 @@ void ftrace_release_mod(struct module *mod) if (pg == ftrace_pages) ftrace_pages = next_to_ftrace_page(last_pg); + ftrace_update_tot_cnt -= pg->index; *last_pg = pg->next; order = get_count_order(pg->size / ENTRIES_PER_PAGE); free_pages((unsigned long)pg->records, order); -- cgit From d914ba37d7145acb9fd3bb23075c2d56e5a44eb6 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 26 Jun 2017 19:01:55 -0700 Subject: tracing: Add support for recording tgid of tasks Inorder to support recording of tgid, the following changes are made: * Introduce a new API (tracing_record_taskinfo) to additionally record the tgid along with the task's comm at the same time. This has has the benefit of not setting trace_cmdline_save before all the information for a task is saved. * Add a new API tracing_record_taskinfo_sched_switch to record task information for 2 tasks at a time (previous and next) and use it from sched_switch probe. * Preserve the old API (tracing_record_cmdline) and create it as a wrapper around the new one so that existing callers aren't affected. * Reuse the existing sched_switch and sched_wakeup probes to record tgid information and add a new option 'record-tgid' to enable recording of tgid When record-tgid option isn't enabled to being with, we take care to make sure that there's isn't memory or runtime overhead. Link: http://lkml.kernel.org/r/20170627020155.5139-1-joelaf@google.com Cc: kernel-team@android.com Cc: Ingo Molnar Tested-by: Michael Sartain Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 13 ++++- kernel/trace/trace.c | 105 ++++++++++++++++++++++++++++++++++---- kernel/trace/trace.h | 7 +++ kernel/trace/trace_events.c | 42 ++++++++++++++- kernel/trace/trace_sched_switch.c | 72 +++++++++++++++++++++----- 5 files changed, 213 insertions(+), 26 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index a556805eff8a..f73cedfa2e0b 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -151,7 +151,15 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, int type, unsigned long len, unsigned long flags, int pc); -void tracing_record_cmdline(struct task_struct *tsk); +#define TRACE_RECORD_CMDLINE BIT(0) +#define TRACE_RECORD_TGID BIT(1) + +void tracing_record_taskinfo(struct task_struct *task, int flags); +void tracing_record_taskinfo_sched_switch(struct task_struct *prev, + struct task_struct *next, int flags); + +void tracing_record_cmdline(struct task_struct *task); +void tracing_record_tgid(struct task_struct *task); int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); @@ -290,6 +298,7 @@ struct trace_subsystem_dir; enum { EVENT_FILE_FL_ENABLED_BIT, EVENT_FILE_FL_RECORDED_CMD_BIT, + EVENT_FILE_FL_RECORDED_TGID_BIT, EVENT_FILE_FL_FILTERED_BIT, EVENT_FILE_FL_NO_SET_FILTER_BIT, EVENT_FILE_FL_SOFT_MODE_BIT, @@ -303,6 +312,7 @@ enum { * Event file flags: * ENABLED - The event is enabled * RECORDED_CMD - The comms should be recorded at sched_switch + * RECORDED_TGID - The tgids should be recorded at sched_switch * FILTERED - The event has a filter attached * NO_SET_FILTER - Set when filter has error and is to be ignored * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED @@ -315,6 +325,7 @@ enum { enum { EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT), + EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT), EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 14318ce92b13..ab9db750dd29 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -87,7 +87,7 @@ dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) * tracing is active, only save the comm when a trace event * occurred. */ -static DEFINE_PER_CPU(bool, trace_cmdline_save); +static DEFINE_PER_CPU(bool, trace_taskinfo_save); /* * Kill all tracing for good (never come back). @@ -790,7 +790,7 @@ EXPORT_SYMBOL_GPL(tracing_on); static __always_inline void __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) { - __this_cpu_write(trace_cmdline_save, true); + __this_cpu_write(trace_taskinfo_save, true); /* If this is the temp buffer, we need to commit fully */ if (this_cpu_read(trace_buffered_event) == event) { @@ -1709,6 +1709,8 @@ void tracing_reset_all_online_cpus(void) } } +static int *tgid_map; + #define SAVED_CMDLINES_DEFAULT 128 #define NO_CMDLINE_MAP UINT_MAX static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; @@ -1722,7 +1724,7 @@ struct saved_cmdlines_buffer { static struct saved_cmdlines_buffer *savedcmd; /* temporary disable recording */ -static atomic_t trace_record_cmdline_disabled __read_mostly; +static atomic_t trace_record_taskinfo_disabled __read_mostly; static inline char *get_saved_cmdlines(int idx) { @@ -1990,16 +1992,87 @@ void trace_find_cmdline(int pid, char comm[]) preempt_enable(); } -void tracing_record_cmdline(struct task_struct *tsk) +int trace_find_tgid(int pid) +{ + if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT)) + return 0; + + return tgid_map[pid]; +} + +static int trace_save_tgid(struct task_struct *tsk) { - if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on()) + if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT)) + return 0; + + tgid_map[tsk->pid] = tsk->tgid; + return 1; +} + +static bool tracing_record_taskinfo_skip(int flags) +{ + if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID)))) + return true; + if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on()) + return true; + if (!__this_cpu_read(trace_taskinfo_save)) + return true; + return false; +} + +/** + * tracing_record_taskinfo - record the task info of a task + * + * @task - task to record + * @flags - TRACE_RECORD_CMDLINE for recording comm + * - TRACE_RECORD_TGID for recording tgid + */ +void tracing_record_taskinfo(struct task_struct *task, int flags) +{ + if (tracing_record_taskinfo_skip(flags)) + return; + if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task)) + return; + if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task)) return; - if (!__this_cpu_read(trace_cmdline_save)) + __this_cpu_write(trace_taskinfo_save, false); +} + +/** + * tracing_record_taskinfo_sched_switch - record task info for sched_switch + * + * @prev - previous task during sched_switch + * @next - next task during sched_switch + * @flags - TRACE_RECORD_CMDLINE for recording comm + * TRACE_RECORD_TGID for recording tgid + */ +void tracing_record_taskinfo_sched_switch(struct task_struct *prev, + struct task_struct *next, int flags) +{ + if (tracing_record_taskinfo_skip(flags)) return; - if (trace_save_cmdline(tsk)) - __this_cpu_write(trace_cmdline_save, false); + if ((flags & TRACE_RECORD_CMDLINE) && + (!trace_save_cmdline(prev) || !trace_save_cmdline(next))) + return; + + if ((flags & TRACE_RECORD_TGID) && + (!trace_save_tgid(prev) || !trace_save_tgid(next))) + return; + + __this_cpu_write(trace_taskinfo_save, false); +} + +/* Helpers to record a specific task information */ +void tracing_record_cmdline(struct task_struct *task) +{ + tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE); +} + +void tracing_record_tgid(struct task_struct *task) +{ + tracing_record_taskinfo(task, TRACE_RECORD_TGID); } /* @@ -3144,7 +3217,7 @@ static void *s_start(struct seq_file *m, loff_t *pos) #endif if (!iter->snapshot) - atomic_inc(&trace_record_cmdline_disabled); + atomic_inc(&trace_record_taskinfo_disabled); if (*pos != iter->pos) { iter->ent = NULL; @@ -3189,7 +3262,7 @@ static void s_stop(struct seq_file *m, void *p) #endif if (!iter->snapshot) - atomic_dec(&trace_record_cmdline_disabled); + atomic_dec(&trace_record_taskinfo_disabled); trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); @@ -4236,6 +4309,18 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) if (mask == TRACE_ITER_RECORD_CMD) trace_event_enable_cmd_record(enabled); + if (mask == TRACE_ITER_RECORD_TGID) { + if (!tgid_map) + tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map), + GFP_KERNEL); + if (!tgid_map) { + tr->trace_flags &= ~TRACE_ITER_RECORD_TGID; + return -ENOMEM; + } + + trace_event_enable_tgid_record(enabled); + } + if (mask == TRACE_ITER_EVENT_FORK) trace_event_follow_fork(tr, enabled); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 13823951e42b..6ade1c55cc3a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -640,6 +640,9 @@ void set_graph_array(struct trace_array *tr); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); +void tracing_start_tgid_record(void); +void tracing_stop_tgid_record(void); + int register_tracer(struct tracer *type); int is_tracing_stopped(void); @@ -700,6 +703,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags, extern u64 ftrace_now(int cpu); extern void trace_find_cmdline(int pid, char comm[]); +extern int trace_find_tgid(int pid); extern void trace_event_follow_fork(struct trace_array *tr, bool enable); #ifdef CONFIG_DYNAMIC_FTRACE @@ -1124,6 +1128,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \ C(LATENCY_FMT, "latency-format"), \ C(RECORD_CMD, "record-cmd"), \ + C(RECORD_TGID, "record-tgid"), \ C(OVERWRITE, "overwrite"), \ C(STOP_ON_FREE, "disable_on_free"), \ C(IRQ_INFO, "irq-info"), \ @@ -1440,6 +1445,8 @@ struct ftrace_event_field * trace_find_event_field(struct trace_event_call *call, char *name); extern void trace_event_enable_cmd_record(bool enable); +extern void trace_event_enable_tgid_record(bool enable); + extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 83dfd0dbbbfe..36132f9280e6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable) mutex_unlock(&event_mutex); } +void trace_event_enable_tgid_record(bool enable) +{ + struct trace_event_file *file; + struct trace_array *tr; + + mutex_lock(&event_mutex); + do_for_each_event_file(tr, file) { + if (!(file->flags & EVENT_FILE_FL_ENABLED)) + continue; + + if (enable) { + tracing_start_tgid_record(); + set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); + } else { + tracing_stop_tgid_record(); + clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, + &file->flags); + } + } while_for_each_event_file(); + mutex_unlock(&event_mutex); +} + static int __ftrace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable) { @@ -381,6 +403,12 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, tracing_stop_cmdline_record(); clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } + + if (file->flags & EVENT_FILE_FL_RECORDED_TGID) { + tracing_stop_tgid_record(); + clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); + } + call->class->reg(call, TRACE_REG_UNREGISTER, file); } /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ @@ -407,18 +435,30 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, } if (!(file->flags & EVENT_FILE_FL_ENABLED)) { + bool cmd = false, tgid = false; /* Keep the event disabled, when going to SOFT_MODE. */ if (soft_disable) set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); if (tr->trace_flags & TRACE_ITER_RECORD_CMD) { + cmd = true; tracing_start_cmdline_record(); set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } + + if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { + tgid = true; + tracing_start_tgid_record(); + set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); + } + ret = call->class->reg(call, TRACE_REG_REGISTER, file); if (ret) { - tracing_stop_cmdline_record(); + if (cmd) + tracing_stop_cmdline_record(); + if (tgid) + tracing_stop_tgid_record(); pr_info("event trace: Could not enable event " "%s\n", trace_event_name(call)); break; diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 4c896a0101bd..b341c02730be 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -12,27 +12,38 @@ #include "trace.h" -static int sched_ref; +#define RECORD_CMDLINE 1 +#define RECORD_TGID 2 + +static int sched_cmdline_ref; +static int sched_tgid_ref; static DEFINE_MUTEX(sched_register_mutex); static void probe_sched_switch(void *ignore, bool preempt, struct task_struct *prev, struct task_struct *next) { - if (unlikely(!sched_ref)) - return; + int flags; + + flags = (RECORD_TGID * !!sched_tgid_ref) + + (RECORD_CMDLINE * !!sched_cmdline_ref); - tracing_record_cmdline(prev); - tracing_record_cmdline(next); + if (!flags) + return; + tracing_record_taskinfo_sched_switch(prev, next, flags); } static void probe_sched_wakeup(void *ignore, struct task_struct *wakee) { - if (unlikely(!sched_ref)) - return; + int flags; + + flags = (RECORD_TGID * !!sched_tgid_ref) + + (RECORD_CMDLINE * !!sched_cmdline_ref); - tracing_record_cmdline(current); + if (!flags) + return; + tracing_record_taskinfo(current, flags); } static int tracing_sched_register(void) @@ -75,28 +86,61 @@ static void tracing_sched_unregister(void) unregister_trace_sched_wakeup(probe_sched_wakeup, NULL); } -static void tracing_start_sched_switch(void) +static void tracing_start_sched_switch(int ops) { + bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref); mutex_lock(&sched_register_mutex); - if (!(sched_ref++)) + + switch (ops) { + case RECORD_CMDLINE: + sched_cmdline_ref++; + break; + + case RECORD_TGID: + sched_tgid_ref++; + break; + } + + if (sched_register && (sched_cmdline_ref || sched_tgid_ref)) tracing_sched_register(); mutex_unlock(&sched_register_mutex); } -static void tracing_stop_sched_switch(void) +static void tracing_stop_sched_switch(int ops) { mutex_lock(&sched_register_mutex); - if (!(--sched_ref)) + + switch (ops) { + case RECORD_CMDLINE: + sched_cmdline_ref--; + break; + + case RECORD_TGID: + sched_tgid_ref--; + break; + } + + if (!sched_cmdline_ref && !sched_tgid_ref) tracing_sched_unregister(); mutex_unlock(&sched_register_mutex); } void tracing_start_cmdline_record(void) { - tracing_start_sched_switch(); + tracing_start_sched_switch(RECORD_CMDLINE); } void tracing_stop_cmdline_record(void) { - tracing_stop_sched_switch(); + tracing_stop_sched_switch(RECORD_CMDLINE); +} + +void tracing_start_tgid_record(void) +{ + tracing_start_sched_switch(RECORD_TGID); +} + +void tracing_stop_tgid_record(void) +{ + tracing_stop_sched_switch(RECORD_TGID); } -- cgit From 441dae8f2f2975c68101a84bc3f528ec95ecf7c3 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Sun, 25 Jun 2017 22:38:43 -0700 Subject: tracing: Add support for display of tgid in trace output Earlier patches introduced ability to record the tgid using the 'record-tgid' option. Here we read the tgid and output it if the option is enabled. Link: http://lkml.kernel.org/r/20170626053844.5746-3-joelaf@google.com Cc: kernel-team@android.com Cc: Ingo Molnar Tested-by: Michael Sartain Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 36 ++++++++++++++++++++++-------------- kernel/trace/trace_output.c | 9 +++++++++ 2 files changed, 31 insertions(+), 14 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ab9db750dd29..c579dea4a0eb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3319,23 +3319,29 @@ static void print_event_info(struct trace_buffer *buf, struct seq_file *m) seq_puts(m, "#\n"); } -static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m) +static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m, + unsigned int flags) { + bool tgid = flags & TRACE_ITER_RECORD_TGID; + print_event_info(buf, m); - seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n" - "# | | | | |\n"); + + seq_printf(m, "# TASK-PID CPU# %s TIMESTAMP FUNCTION\n", tgid ? "TGID " : ""); + seq_printf(m, "# | | | %s | |\n", tgid ? " | " : ""); } -static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m) +static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m, + unsigned int flags) { - print_event_info(buf, m); - seq_puts(m, "# _-----=> irqs-off\n" - "# / _----=> need-resched\n" - "# | / _---=> hardirq/softirq\n" - "# || / _--=> preempt-depth\n" - "# ||| / delay\n" - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n" - "# | | | |||| | |\n"); + bool tgid = flags & TRACE_ITER_RECORD_TGID; + + seq_printf(m, "# %s _-----=> irqs-off\n", tgid ? " " : ""); + seq_printf(m, "# %s / _----=> need-resched\n", tgid ? " " : ""); + seq_printf(m, "# %s| / _---=> hardirq/softirq\n", tgid ? " " : ""); + seq_printf(m, "# %s|| / _--=> preempt-depth\n", tgid ? " " : ""); + seq_printf(m, "# %s||| / delay\n", tgid ? " " : ""); + seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); + seq_printf(m, "# | | | %s|||| | |\n", tgid ? " | " : ""); } void @@ -3651,9 +3657,11 @@ void trace_default_header(struct seq_file *m) } else { if (!(trace_flags & TRACE_ITER_VERBOSE)) { if (trace_flags & TRACE_ITER_IRQ_INFO) - print_func_help_header_irq(iter->trace_buffer, m); + print_func_help_header_irq(iter->trace_buffer, + m, trace_flags); else - print_func_help_header(iter->trace_buffer, m); + print_func_help_header(iter->trace_buffer, m, + trace_flags); } } } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 01ff99969ca7..bac629af2285 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -597,6 +597,15 @@ int trace_print_context(struct trace_iterator *iter) trace_seq_printf(s, "%16s-%-5d [%03d] ", comm, entry->pid, iter->cpu); + if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { + unsigned int tgid = trace_find_tgid(entry->pid); + + if (!tgid) + trace_seq_printf(s, "(-----) "); + else + trace_seq_printf(s, "(%5d) ", tgid); + } + if (tr->trace_flags & TRACE_ITER_IRQ_INFO) trace_print_lat_fmt(s, entry); -- cgit From 3b58a3c72f484393c65995a551902945f5a18c70 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Jun 2017 09:09:38 -0400 Subject: ftrace: Unlock hash mutex on failed allocation in process_mod_list() If the new_hash fails to allocate, then unlock the hash mutex on error. Reported-by: Julia Lawall Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7509ef9810bf..2c79630cd267 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3997,7 +3997,7 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, new_hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); if (!new_hash) - return; /* Warn? */ + goto out; /* warn? */ mutex_lock(&ftrace_lock); @@ -4042,6 +4042,7 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, new_hash, enable); mutex_unlock(&ftrace_lock); + out: mutex_unlock(&ops->func_hash->regex_lock); free_ftrace_hash(new_hash); -- cgit From 4ec78467858739c0119569c0610676aa50dfa8fb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Jun 2017 11:57:03 -0400 Subject: ftrace: Decrement count for dyn_ftrace_total_info for init functions Init boot up functions may be traced, but they are also freed when the kernel finishes booting. These are removed from the ftrace tables, and the debug variable for dyn_ftrace_total_info needs to reflect that as well. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2c79630cd267..e392f750a1cf 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5825,6 +5825,7 @@ void __init ftrace_free_init_mem(void) if (!rec) continue; pg->index--; + ftrace_update_tot_cnt--; if (!pg->index) { *last_pg = pg->next; order = get_count_order(pg->size / ENTRIES_PER_PAGE); -- cgit From f96da09473b52c09125cc9bf7d7d4576ae8229e0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 2 Jul 2017 02:13:27 +0200 Subject: bpf: simplify narrower ctx access This work tries to make the semantics and code around the narrower ctx access a bit easier to follow. Right now everything is done inside the .is_valid_access(). Offset matching is done differently for read/write types, meaning writes don't support narrower access and thus matching only on offsetof(struct foo, bar) is enough whereas for read case that supports narrower access we must check for offsetof(struct foo, bar) + offsetof(struct foo, bar) + sizeof() - 1 for each of the cases. For read cases of individual members that don't support narrower access (like packet pointers or skb->cb[] case which has its own narrow access logic), we check as usual only offsetof(struct foo, bar) like in write case. Then, for the case where narrower access is allowed, we also need to set the aux info for the access. Meaning, ctx_field_size and converted_op_size have to be set. First is the original field size e.g. sizeof() as in above example from the user facing ctx, and latter one is the target size after actual rewrite happened, thus for the kernel facing ctx. Also here we need the range match and we need to keep track changing convert_ctx_access() and converted_op_size from is_valid_access() as both are not at the same location. We can simplify the code a bit: check_ctx_access() becomes simpler in that we only store ctx_field_size as a meta data and later in convert_ctx_accesses() we fetch the target_size right from the location where we do convert. Should the verifier be misconfigured we do reject for BPF_WRITE cases or target_size that are not provided. For the subsystems, we always work on ranges in is_valid_access() and add small helpers for ranges and narrow access, convert_ctx_accesses() sets target_size for the relevant instruction. Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Cc: Yonghong Song Signed-off-by: David S. Miller --- include/linux/bpf.h | 9 +- include/linux/filter.h | 47 ++++++++++ kernel/bpf/verifier.c | 78 +++++++--------- kernel/trace/bpf_trace.c | 31 +++--- net/core/filter.c | 239 +++++++++++++++++++++-------------------------- 5 files changed, 209 insertions(+), 195 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5175729270d7..b69e7a5869ff 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -156,9 +156,14 @@ struct bpf_prog; struct bpf_insn_access_aux { enum bpf_reg_type reg_type; int ctx_field_size; - int converted_op_size; }; +static inline void +bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) +{ + aux->ctx_field_size = size; +} + struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); @@ -173,7 +178,7 @@ struct bpf_verifier_ops { u32 (*convert_ctx_access)(enum bpf_access_type type, const struct bpf_insn *src, struct bpf_insn *dst, - struct bpf_prog *prog); + struct bpf_prog *prog, u32 *target_size); int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); }; diff --git a/include/linux/filter.h b/include/linux/filter.h index 738f8b14f025..f1fc9baa3509 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -337,6 +337,22 @@ struct bpf_prog_aux; bpf_size; \ }) +#define bpf_size_to_bytes(bpf_size) \ +({ \ + int bytes = -EINVAL; \ + \ + if (bpf_size == BPF_B) \ + bytes = sizeof(u8); \ + else if (bpf_size == BPF_H) \ + bytes = sizeof(u16); \ + else if (bpf_size == BPF_W) \ + bytes = sizeof(u32); \ + else if (bpf_size == BPF_DW) \ + bytes = sizeof(u64); \ + \ + bytes; \ +}) + #define BPF_SIZEOF(type) \ ({ \ const int __size = bytes_to_bpf_size(sizeof(type)); \ @@ -351,6 +367,13 @@ struct bpf_prog_aux; __size; \ }) +#define BPF_LDST_BYTES(insn) \ + ({ \ + const int __size = bpf_size_to_bytes(BPF_SIZE(insn->code)); \ + WARN_ON(__size < 0); \ + __size; \ + }) + #define __BPF_MAP_0(m, v, ...) v #define __BPF_MAP_1(m, v, t, a, ...) m(t, a) #define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__) @@ -401,6 +424,18 @@ struct bpf_prog_aux; #define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) #define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) +#define bpf_ctx_range(TYPE, MEMBER) \ + offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 +#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2) \ + offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1 + +#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \ + ({ \ + BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE)); \ + *(PTR_SIZE) = (SIZE); \ + offsetof(TYPE, MEMBER); \ + }) + #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { @@ -564,6 +599,18 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog) return prog->type == BPF_PROG_TYPE_UNSPEC; } +static inline bool +bpf_ctx_narrow_access_ok(u32 off, u32 size, const u32 size_default) +{ + bool off_ok; +#ifdef __LITTLE_ENDIAN + off_ok = (off & (size_default - 1)) == 0; +#else + off_ok = (off & (size_default - 1)) + size == size_default; +#endif + return off_ok && size <= size_default && (size & (size - 1)) == 0; +} + #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) #ifdef CONFIG_ARCH_HAS_SET_MEMORY diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6ea2adcb233b..6f820a044079 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -546,20 +546,6 @@ static int check_reg_arg(struct bpf_reg_state *regs, u32 regno, return 0; } -static int bpf_size_to_bytes(int bpf_size) -{ - if (bpf_size == BPF_W) - return 4; - else if (bpf_size == BPF_H) - return 2; - else if (bpf_size == BPF_B) - return 1; - else if (bpf_size == BPF_DW) - return 8; - else - return -EINVAL; -} - static bool is_spillable_regtype(enum bpf_reg_type type) { switch (type) { @@ -761,7 +747,9 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { - struct bpf_insn_access_aux info = { .reg_type = *reg_type }; + struct bpf_insn_access_aux info = { + .reg_type = *reg_type, + }; /* for analyzer ctx accesses are already validated and converted */ if (env->analyzer_ops) @@ -769,25 +757,14 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, if (env->prog->aux->ops->is_valid_access && env->prog->aux->ops->is_valid_access(off, size, t, &info)) { - /* a non zero info.ctx_field_size indicates: - * . For this field, the prog type specific ctx conversion algorithm - * only supports whole field access. - * . This ctx access is a candiate for later verifier transformation - * to load the whole field and then apply a mask to get correct result. - * a non zero info.converted_op_size indicates perceived actual converted - * value width in convert_ctx_access. + /* A non zero info.ctx_field_size indicates that this field is a + * candidate for later verifier transformation to load the whole + * field and then apply a mask when accessed with a narrower + * access than actual ctx access size. A zero info.ctx_field_size + * will only allow for whole field access and rejects any other + * type of narrower access. */ - if ((info.ctx_field_size && !info.converted_op_size) || - (!info.ctx_field_size && info.converted_op_size)) { - verbose("verifier bug in is_valid_access prog type=%u off=%d size=%d\n", - env->prog->type, off, size); - return -EACCES; - } - - if (info.ctx_field_size) { - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; - env->insn_aux_data[insn_idx].converted_op_size = info.converted_op_size; - } + env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; *reg_type = info.reg_type; /* remember the offset of last byte accessed in ctx */ @@ -3401,11 +3378,13 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of static int convert_ctx_accesses(struct bpf_verifier_env *env) { const struct bpf_verifier_ops *ops = env->prog->aux->ops; + int i, cnt, size, ctx_field_size, delta = 0; const int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i, cnt, off, size, ctx_field_size, converted_op_size, is_narrower_load, delta = 0; + bool is_narrower_load; + u32 target_size; if (ops->gen_prologue) { cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, @@ -3445,39 +3424,50 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX) continue; - off = insn->off; - size = bpf_size_to_bytes(BPF_SIZE(insn->code)); ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size; - converted_op_size = env->insn_aux_data[i + delta].converted_op_size; - is_narrower_load = type == BPF_READ && size < ctx_field_size; + size = BPF_LDST_BYTES(insn); /* If the read access is a narrower load of the field, * convert to a 4/8-byte load, to minimum program type specific * convert_ctx_access changes. If conversion is successful, * we will apply proper mask to the result. */ + is_narrower_load = size < ctx_field_size; if (is_narrower_load) { - int size_code = BPF_H; + u32 off = insn->off; + u8 size_code; + + if (type == BPF_WRITE) { + verbose("bpf verifier narrow ctx access misconfigured\n"); + return -EINVAL; + } + size_code = BPF_H; if (ctx_field_size == 4) size_code = BPF_W; else if (ctx_field_size == 8) size_code = BPF_DW; + insn->off = off & ~(ctx_field_size - 1); insn->code = BPF_LDX | BPF_MEM | size_code; } - cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog); - if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { + + target_size = 0; + cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog, + &target_size); + if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || + (ctx_field_size && !target_size)) { verbose("bpf verifier is misconfigured\n"); return -EINVAL; } - if (is_narrower_load && size < converted_op_size) { + + if (is_narrower_load && size < target_size) { if (ctx_field_size <= 4) insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, - (1 << size * 8) - 1); + (1 << size * 8) - 1); else insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, - (1 << size * 8) - 1); + (1 << size * 8) - 1); } new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 97c46b440cd6..5c6d538dbf43 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -583,7 +583,8 @@ const struct bpf_verifier_ops tracepoint_prog_ops = { static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { - int sample_period_off; + const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data, + sample_period); if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) return false; @@ -592,43 +593,35 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type if (off % size != 0) return false; - /* permit 1, 2, 4 byte narrower and 8 normal read access to sample_period */ - sample_period_off = offsetof(struct bpf_perf_event_data, sample_period); - if (off >= sample_period_off && off < sample_period_off + sizeof(__u64)) { - int allowed; - -#ifdef __LITTLE_ENDIAN - allowed = (off & 0x7) == 0 && size <= 8 && (size & (size - 1)) == 0; -#else - allowed = ((off & 0x7) + size) == 8 && size <= 8 && (size & (size - 1)) == 0; -#endif - if (!allowed) + switch (off) { + case bpf_ctx_range(struct bpf_perf_event_data, sample_period): + bpf_ctx_record_field_size(info, size_sp); + if (!bpf_ctx_narrow_access_ok(off, size, size_sp)) return false; - info->ctx_field_size = 8; - info->converted_op_size = 8; - } else { + break; + default: if (size != sizeof(long)) return false; } + return true; } static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; switch (si->off) { case offsetof(struct bpf_perf_event_data, sample_period): - BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64)); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, data), si->dst_reg, si->src_reg, offsetof(struct bpf_perf_event_data_kern, data)); *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg, - offsetof(struct perf_sample_data, period)); + bpf_target_off(struct perf_sample_data, period, 8, + target_size)); break; default: *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, diff --git a/net/core/filter.c b/net/core/filter.c index 29620df45b7c..94169572d002 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3088,38 +3088,11 @@ lwt_xmit_func_proto(enum bpf_func_id func_id) } } -static void __set_access_aux_info(int off, struct bpf_insn_access_aux *info) +static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) { - info->ctx_field_size = 4; - switch (off) { - case offsetof(struct __sk_buff, pkt_type) ... - offsetof(struct __sk_buff, pkt_type) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, vlan_present) ... - offsetof(struct __sk_buff, vlan_present) + sizeof(__u32) - 1: - info->converted_op_size = 1; - break; - case offsetof(struct __sk_buff, queue_mapping) ... - offsetof(struct __sk_buff, queue_mapping) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, protocol) ... - offsetof(struct __sk_buff, protocol) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, vlan_tci) ... - offsetof(struct __sk_buff, vlan_tci) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, vlan_proto) ... - offsetof(struct __sk_buff, vlan_proto) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, tc_index) ... - offsetof(struct __sk_buff, tc_index) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, tc_classid) ... - offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1: - info->converted_op_size = 2; - break; - default: - info->converted_op_size = 4; - } -} + const int size_default = sizeof(__u32); -static bool __is_valid_access(int off, int size, enum bpf_access_type type, - struct bpf_insn_access_aux *info) -{ if (off < 0 || off >= sizeof(struct __sk_buff)) return false; @@ -3128,40 +3101,24 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type, return false; switch (off) { - case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: - if (off + size > - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32)) + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): + if (off + size > offsetofend(struct __sk_buff, cb[4])) return false; break; - case offsetof(struct __sk_buff, data) ... - offsetof(struct __sk_buff, data) + sizeof(__u32) - 1: - if (size != sizeof(__u32)) + case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_end): + if (size != size_default) return false; - info->reg_type = PTR_TO_PACKET; - break; - case offsetof(struct __sk_buff, data_end) ... - offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1: - if (size != sizeof(__u32)) - return false; - info->reg_type = PTR_TO_PACKET_END; break; default: + /* Only narrow read access allowed for now. */ if (type == BPF_WRITE) { - if (size != sizeof(__u32)) + if (size != size_default) return false; } else { - int allowed; - - /* permit narrower load for not cb/data/data_end fields */ -#ifdef __LITTLE_ENDIAN - allowed = (off & 0x3) == 0 && size <= 4 && (size & (size - 1)) == 0; -#else - allowed = (off & 0x3) + size == 4 && size <= 4 && (size & (size - 1)) == 0; -#endif - if (!allowed) + bpf_ctx_record_field_size(info, size_default); + if (!bpf_ctx_narrow_access_ok(off, size, size_default)) return false; - __set_access_aux_info(off, info); } } @@ -3173,26 +3130,22 @@ static bool sk_filter_is_valid_access(int off, int size, struct bpf_insn_access_aux *info) { switch (off) { - case offsetof(struct __sk_buff, tc_classid) ... - offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, data) ... - offsetof(struct __sk_buff, data) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, data_end) ... - offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1: + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_end): return false; } if (type == BPF_WRITE) { switch (off) { - case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; default: return false; } } - return __is_valid_access(off, size, type, info); + return bpf_skb_is_valid_access(off, size, type, info); } static bool lwt_is_valid_access(int off, int size, @@ -3200,24 +3153,31 @@ static bool lwt_is_valid_access(int off, int size, struct bpf_insn_access_aux *info) { switch (off) { - case offsetof(struct __sk_buff, tc_classid) ... - offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1: + case bpf_ctx_range(struct __sk_buff, tc_classid): return false; } if (type == BPF_WRITE) { switch (off) { - case offsetof(struct __sk_buff, mark): - case offsetof(struct __sk_buff, priority): - case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: + case bpf_ctx_range(struct __sk_buff, mark): + case bpf_ctx_range(struct __sk_buff, priority): + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; default: return false; } } - return __is_valid_access(off, size, type, info); + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, info); } static bool sock_filter_is_valid_access(int off, int size, @@ -3289,19 +3249,27 @@ static bool tc_cls_act_is_valid_access(int off, int size, { if (type == BPF_WRITE) { switch (off) { - case offsetof(struct __sk_buff, mark): - case offsetof(struct __sk_buff, tc_index): - case offsetof(struct __sk_buff, priority): - case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: - case offsetof(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, mark): + case bpf_ctx_range(struct __sk_buff, tc_index): + case bpf_ctx_range(struct __sk_buff, priority): + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; default: return false; } } - return __is_valid_access(off, size, type, info); + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, info); } static bool __is_valid_xdp_access(int off, int size) @@ -3374,98 +3342,108 @@ static bool sock_ops_is_valid_access(int off, int size, static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; int off; switch (si->off) { case offsetof(struct __sk_buff, len): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, len)); + bpf_target_off(struct sk_buff, len, 4, + target_size)); break; case offsetof(struct __sk_buff, protocol): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, protocol)); + bpf_target_off(struct sk_buff, protocol, 2, + target_size)); break; case offsetof(struct __sk_buff, vlan_proto): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2); - *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, vlan_proto)); + bpf_target_off(struct sk_buff, vlan_proto, 2, + target_size)); break; case offsetof(struct __sk_buff, priority): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4); - if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, priority)); + bpf_target_off(struct sk_buff, priority, 4, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, priority)); + bpf_target_off(struct sk_buff, priority, 4, + target_size)); break; case offsetof(struct __sk_buff, ingress_ifindex): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, skb_iif)); + bpf_target_off(struct sk_buff, skb_iif, 4, + target_size)); break; case offsetof(struct __sk_buff, ifindex): - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), si->dst_reg, si->src_reg, offsetof(struct sk_buff, dev)); *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, - offsetof(struct net_device, ifindex)); + bpf_target_off(struct net_device, ifindex, 4, + target_size)); break; case offsetof(struct __sk_buff, hash): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, hash)); + bpf_target_off(struct sk_buff, hash, 4, + target_size)); break; case offsetof(struct __sk_buff, mark): - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, mark)); + bpf_target_off(struct sk_buff, mark, 4, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, mark)); + bpf_target_off(struct sk_buff, mark, 4, + target_size)); break; case offsetof(struct __sk_buff, pkt_type): - return convert_skb_access(SKF_AD_PKTTYPE, si->dst_reg, - si->src_reg, insn); + *target_size = 1; + *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, + PKT_TYPE_OFFSET()); + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX); +#ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5); +#endif + break; case offsetof(struct __sk_buff, queue_mapping): - return convert_skb_access(SKF_AD_QUEUE, si->dst_reg, - si->src_reg, insn); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, queue_mapping, 2, + target_size)); + break; case offsetof(struct __sk_buff, vlan_present): - return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT, - si->dst_reg, si->src_reg, insn); - case offsetof(struct __sk_buff, vlan_tci): - return convert_skb_access(SKF_AD_VLAN_TAG, - si->dst_reg, si->src_reg, insn); + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, vlan_tci, 2, + target_size)); + if (si->off == offsetof(struct __sk_buff, vlan_tci)) { + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, + ~VLAN_TAG_PRESENT); + } else { + *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12); + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1); + } + break; case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1: + offsetofend(struct __sk_buff, cb[4]) - 1: BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); BUILD_BUG_ON((offsetof(struct sk_buff, cb) + offsetof(struct qdisc_skb_cb, data)) % @@ -3491,6 +3469,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, off -= offsetof(struct __sk_buff, tc_classid); off += offsetof(struct sk_buff, cb); off += offsetof(struct qdisc_skb_cb, tc_classid); + *target_size = 2; if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, off); @@ -3516,14 +3495,14 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, tc_index): #ifdef CONFIG_NET_SCHED - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); - if (type == BPF_WRITE) *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, tc_index)); + bpf_target_off(struct sk_buff, tc_index, 2, + target_size)); else *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, tc_index)); + bpf_target_off(struct sk_buff, tc_index, 2, + target_size)); #else if (type == BPF_WRITE) *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); @@ -3534,10 +3513,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, napi_id): #if defined(CONFIG_NET_RX_BUSY_POLL) - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4); - *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, - offsetof(struct sk_buff, napi_id)); + bpf_target_off(struct sk_buff, napi_id, 4, + target_size)); *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1); *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #else @@ -3552,7 +3530,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; @@ -3596,22 +3574,22 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; switch (si->off) { case offsetof(struct __sk_buff, ifindex): - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), si->dst_reg, si->src_reg, offsetof(struct sk_buff, dev)); *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, - offsetof(struct net_device, ifindex)); + bpf_target_off(struct net_device, ifindex, 4, + target_size)); break; default: - return bpf_convert_ctx_access(type, si, insn_buf, prog); + return bpf_convert_ctx_access(type, si, insn_buf, prog, + target_size); } return insn - insn_buf; @@ -3620,7 +3598,7 @@ static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, static u32 xdp_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; @@ -3643,7 +3621,8 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, - struct bpf_prog *prog) + struct bpf_prog *prog, + u32 *target_size) { struct bpf_insn *insn = insn_buf; int off; -- cgit From 7bda4b40c5624c3f1c69227f8ebfd46a4b83f2ef Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 2 Jul 2017 02:13:29 +0200 Subject: bpf: extend bpf_trace_printk to support %i Currently, bpf_trace_printk does not support common formatting symbol '%i' however vsprintf does and is what eventually gets called by bpf helper. If users are used to '%i' and currently make use of it, then bpf_trace_printk will just return with error without dumping anything to the trace pipe, so just add support for '%i' to the helper. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 5c6d538dbf43..37385193a608 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -122,8 +122,8 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) } /* - * limited trace_printk() - * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed + * Only limited trace_printk() conversion specifiers allowed: + * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s */ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64, arg2, u64, arg3) @@ -198,7 +198,8 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, i++; } - if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') + if (fmt[i] != 'i' && fmt[i] != 'd' && + fmt[i] != 'u' && fmt[i] != 'x') return -EINVAL; fmt_cnt++; } -- cgit From 69d71879d2cf67a381055f698a1d7def00dc4ed7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 5 Jul 2017 09:45:43 -0400 Subject: ftrace: Test for NULL iter->tr in regex for stack_trace_filter changes As writing into stack_trace_filter, the iter-tr is not set and is NULL. Check if it is NULL before dereferencing it in ftrace_regex_release(). Fixes: 8c08f0d5c6fb ("ftrace: Have cached module filters be an active filter") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f8c18f15b190..2953d558bbee 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5043,7 +5043,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file) if (filter_hash) { orig_hash = &iter->ops->func_hash->filter_hash; - if (!list_empty(&iter->tr->mod_trace)) + if (iter->tr && !list_empty(&iter->tr->mod_trace)) iter->hash->flags |= FTRACE_HASH_FL_MOD; } else orig_hash = &iter->ops->func_hash->notrace_hash; -- cgit From 99c621d704cf1c4eb74c3c42e674edf3df64f92d Mon Sep 17 00:00:00 2001 From: Michael Sartain Date: Wed, 5 Jul 2017 22:07:15 -0600 Subject: tracing: Add saved_tgids file to show cached pid to tgid mappings Export the cached pid / tgid mappings in debugfs tracing saved_tgids file. This allows user apps to translate the pids from a trace to their respective thread group. Example saved_tgids file with pid / tgid values separated by ' ': # cat saved_tgids 1048 1048 1047 1047 7 7 1049 1047 1054 1047 1053 1047 Link: http://lkml.kernel.org/r/20170630004023.064965233@goodmis.org Link: http://lkml.kernel.org/r/20170706040713.unwkumbta5menygi@mikesart-cos Reviewed-by: Joel Fernandes Signed-off-by: Michael Sartain Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 00e2e4169b1e..f079a8ca1117 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4688,6 +4688,76 @@ static const struct file_operations tracing_readme_fops = { .llseek = generic_file_llseek, }; +static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos) +{ + int *ptr = v; + + if (*pos || m->count) + ptr++; + + (*pos)++; + + for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) { + if (trace_find_tgid(*ptr)) + return ptr; + } + + return NULL; +} + +static void *saved_tgids_start(struct seq_file *m, loff_t *pos) +{ + void *v; + loff_t l = 0; + + if (!tgid_map) + return NULL; + + v = &tgid_map[0]; + while (l <= *pos) { + v = saved_tgids_next(m, v, &l); + if (!v) + return NULL; + } + + return v; +} + +static void saved_tgids_stop(struct seq_file *m, void *v) +{ +} + +static int saved_tgids_show(struct seq_file *m, void *v) +{ + int pid = (int *)v - tgid_map; + + seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid)); + return 0; +} + +static const struct seq_operations tracing_saved_tgids_seq_ops = { + .start = saved_tgids_start, + .stop = saved_tgids_stop, + .next = saved_tgids_next, + .show = saved_tgids_show, +}; + +static int tracing_saved_tgids_open(struct inode *inode, struct file *filp) +{ + if (tracing_disabled) + return -ENODEV; + + return seq_open(filp, &tracing_saved_tgids_seq_ops); +} + + +static const struct file_operations tracing_saved_tgids_fops = { + .open = tracing_saved_tgids_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos) { unsigned int *ptr = v; @@ -7920,6 +7990,9 @@ static __init int tracer_init_tracefs(void) trace_create_file("saved_cmdlines_size", 0644, d_tracer, NULL, &tracing_saved_cmdlines_size_fops); + trace_create_file("saved_tgids", 0444, d_tracer, + NULL, &tracing_saved_tgids_fops); + trace_eval_init(); trace_create_eval_file(d_tracer); -- cgit From eaf260ac04d9b4cf9f458d5c97555bfff2da526e Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 6 Jul 2017 16:00:21 -0700 Subject: tracing: Treat recording comm for idle task as a success Currently we stop recording comm for non-idle tasks when switching from/to idle task since we treat that as a record failure. Fix that by treat recording of comm for idle task as a success. Link: http://lkml.kernel.org/r/20170706230023.17942-1-joelaf@google.com Cc: kernel-team@android.com Cc: Ingo Molnar Reported-by: Michael Sartain Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f079a8ca1117..6722d86f2af5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1916,7 +1916,11 @@ static int trace_save_cmdline(struct task_struct *tsk) { unsigned pid, idx; - if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT)) + /* treat recording of idle task as a success */ + if (!tsk->pid) + return 1; + + if (unlikely(tsk->pid > PID_MAX_DEFAULT)) return 0; /* -- cgit From bd45d34d25720a820021c8ea45de5cd607eace64 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 6 Jul 2017 16:00:22 -0700 Subject: tracing: Treat recording tgid for idle task as a success Currently we stop recording tgid for non-idle tasks when switching from/to idle task since we treat that as a record failure. Fix that by treat recording of tgid for idle task as a success. Link: http://lkml.kernel.org/r/20170706230023.17942-2-joelaf@google.com Cc: kernel-team@android.com Cc: Ingo Molnar Reported-by: Michael Sartain Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6722d86f2af5..aee11e3a394f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2006,7 +2006,11 @@ int trace_find_tgid(int pid) static int trace_save_tgid(struct task_struct *tsk) { - if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT)) + /* treat recording of idle task as a success */ + if (!tsk->pid) + return 1; + + if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT)) return 0; tgid_map[tsk->pid] = tsk->tgid; -- cgit From 29b1a8ad7df4528b862a79e3d5fb0936f4d199c7 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 6 Jul 2017 16:00:23 -0700 Subject: tracing: Attempt to record other information even if some fail In recent patches where we record comm and tgid at the same time, we skip continuing to record if any fail. Fix that by trying to record as many things as we can even if some couldn't be recorded. If any information isn't recorded, then we don't set trace_taskinfo_save as before. Link: http://lkml.kernel.org/r/20170706230023.17942-3-joelaf@google.com Cc: kernel-team@android.com Cc: Ingo Molnar Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aee11e3a394f..92af8fd1429b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2037,11 +2037,20 @@ static bool tracing_record_taskinfo_skip(int flags) */ void tracing_record_taskinfo(struct task_struct *task, int flags) { + bool done; + if (tracing_record_taskinfo_skip(flags)) return; - if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task)) - return; - if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task)) + + /* + * Record as much task information as possible. If some fail, continue + * to try to record the others. + */ + done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task); + done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task); + + /* If recording any information failed, retry again soon. */ + if (!done) return; __this_cpu_write(trace_taskinfo_save, false); @@ -2058,15 +2067,22 @@ void tracing_record_taskinfo(struct task_struct *task, int flags) void tracing_record_taskinfo_sched_switch(struct task_struct *prev, struct task_struct *next, int flags) { + bool done; + if (tracing_record_taskinfo_skip(flags)) return; - if ((flags & TRACE_RECORD_CMDLINE) && - (!trace_save_cmdline(prev) || !trace_save_cmdline(next))) - return; + /* + * Record as much task information as possible. If some fail, continue + * to try to record the others. + */ + done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev); + done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next); + done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev); + done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next); - if ((flags & TRACE_RECORD_TGID) && - (!trace_save_tgid(prev) || !trace_save_tgid(next))) + /* If recording any information failed, retry again soon. */ + if (!done) return; __this_cpu_write(trace_taskinfo_save, false); -- cgit From fca18a47cf3eb8425ec19c2dfc374f3d04f5219f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Sat, 8 Jul 2017 00:27:30 +0530 Subject: trace/kprobes: Sanitize derived event names When we derive event names, convert some expected symbols (such as ':' used to specify module:name and '.' present in some symbols) into underscores so that the event name is not rejected. Before this patch: # echo 'p kobject_example:foo_store' > kprobe_events trace_kprobe: Failed to allocate trace_probe.(-22) -sh: write error: Invalid argument After this patch: # echo 'p kobject_example:foo_store' > kprobe_events # cat kprobe_events p:kprobes/p_kobject_example_foo_store_0 kobject_example:foo_store Link: http://lkml.kernel.org/r/66c189e09e71361aba91dd4a5bd146a1b62a7a51.1499453040.git.naveen.n.rao@linux.vnet.ibm.com Acked-by: Masami Hiramatsu Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c129fca6ec99..44fd819aa33d 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -598,6 +598,14 @@ static struct notifier_block trace_kprobe_module_nb = { .priority = 1 /* Invoked after kprobe module callback */ }; +/* Convert certain expected symbols into '_' when generating event names */ +static inline void sanitize_event_name(char *name) +{ + while (*name++ != '\0') + if (*name == ':' || *name == '.') + *name = '_'; +} + static int create_trace_kprobe(int argc, char **argv) { /* @@ -740,6 +748,7 @@ static int create_trace_kprobe(int argc, char **argv) else snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p", is_return ? 'r' : 'p', addr); + sanitize_event_name(buf); event = buf; } tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive, -- cgit From b11fb73743fc406204e0749ead18560aeda8b136 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 11 Jul 2017 15:43:24 -0400 Subject: tracing: Fixup trace file header alignment The addition of TGID to the tracing header added a check to see if TGID shoudl be displayed or not, and updated the header accordingly. Unfortunately, it broke the default header. Also add constant strings to use for spacing. This does remove the visibility of the header a bit, but cuts it down from the extended lines much greater than 80 characters. Before this change: # tracer: function # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU#|||| TIMESTAMP FUNCTION # | | | |||| | | swapper/0-1 [000] .... 0.277830: migration_init <-do_one_initcall swapper/0-1 [002] d... 13.861967: Unknown type 1201 swapper/0-1 [002] d..1 13.861970: Unknown type 1202 After this change: # tracer: function # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | swapper/0-1 [000] .... 0.278245: migration_init <-do_one_initcall swapper/0-1 [003] d... 13.861189: Unknown type 1201 swapper/0-1 [003] d..1 13.861192: Unknown type 1202 Cc: Joel Fernandes Fixes: 441dae8f2f29 ("tracing: Add support for display of tgid in trace output") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 92af8fd1429b..dabd810a10cd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3358,14 +3358,23 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; - - seq_printf(m, "# %s _-----=> irqs-off\n", tgid ? " " : ""); - seq_printf(m, "# %s / _----=> need-resched\n", tgid ? " " : ""); - seq_printf(m, "# %s| / _---=> hardirq/softirq\n", tgid ? " " : ""); - seq_printf(m, "# %s|| / _--=> preempt-depth\n", tgid ? " " : ""); - seq_printf(m, "# %s||| / delay\n", tgid ? " " : ""); - seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); - seq_printf(m, "# | | | %s|||| | |\n", tgid ? " | " : ""); + const char tgid_space[] = " "; + const char space[] = " "; + + seq_printf(m, "# %s _-----=> irqs-off\n", + tgid ? tgid_space : space); + seq_printf(m, "# %s / _----=> need-resched\n", + tgid ? tgid_space : space); + seq_printf(m, "# %s| / _---=> hardirq/softirq\n", + tgid ? tgid_space : space); + seq_printf(m, "# %s|| / _--=> preempt-depth\n", + tgid ? tgid_space : space); + seq_printf(m, "# %s||| / delay\n", + tgid ? tgid_space : space); + seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n", + tgid ? " TGID " : space); + seq_printf(m, "# | | | %s|||| | |\n", + tgid ? " | " : space); } void -- cgit From bbd1d27d863d5c0acee65ecd0c2e34035e1df5ea Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 11 Jul 2017 19:21:04 -0400 Subject: tracing: Do note expose stack_trace_filter without DYNAMIC_FTRACE The "stack_trace_filter" file only makes sense if DYNAMIC_FTRACE is configured in. If it is not, then the user can not filter any functions. Not only that, the open function causes warnings when DYNAMIC_FTRACE is not set. Link: http://lkml.kernel.org/r/20170710110521.600806-1-arnd@arndb.de Reported-by: Arnd Bergmann Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_stack.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index b4a751e8f9d6..a4df67cbc711 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -406,6 +406,8 @@ static const struct file_operations stack_trace_fops = { .release = seq_release, }; +#ifdef CONFIG_DYNAMIC_FTRACE + static int stack_trace_filter_open(struct inode *inode, struct file *file) { @@ -423,6 +425,8 @@ static const struct file_operations stack_trace_filter_fops = { .release = ftrace_regex_release, }; +#endif /* CONFIG_DYNAMIC_FTRACE */ + int stack_trace_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -477,8 +481,10 @@ static __init int stack_trace_init(void) trace_create_file("stack_trace", 0444, d_tracer, NULL, &stack_trace_fops); +#ifdef CONFIG_DYNAMIC_FTRACE trace_create_file("stack_trace_filter", 0444, d_tracer, &trace_ops, &stack_trace_filter_fops); +#endif if (stack_trace_filter_buf[0]) ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1); -- cgit From 69449bbd65687e8e5fb968a5a0c46089f6af6001 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Jul 2017 10:44:03 +0200 Subject: ftrace: Hide cached module code for !CONFIG_MODULES When modules are disabled, we get a harmless build warning: kernel/trace/ftrace.c:4051:13: error: 'process_cached_mods' defined but not used [-Werror=unused-function] This adds the same #ifdef around the new code that exists around its caller. Link: http://lkml.kernel.org/r/20170710084413.1820568-1-arnd@arndb.de Fixes: d7fbf8df7ca0 ("ftrace: Implement cached modules tracing on module load") Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2953d558bbee..4706f0ed193e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3978,6 +3978,7 @@ static int ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, int reset, int enable); +#ifdef CONFIG_MODULES static void process_mod_list(struct list_head *head, struct ftrace_ops *ops, char *mod, bool enable) { @@ -4068,6 +4069,7 @@ static void process_cached_mods(const char *mod_name) kfree(mod); } +#endif /* * We register the module command as a template to show others how -- cgit From 44925dfff05fd1a897992d278b15a6b6b55e79a7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Jul 2017 10:33:40 +0300 Subject: ftrace: Remove an unneeded NULL check "func" can't be NULL and it doesn't make sense to check because we've already derefenced it. Link: http://lkml.kernel.org/r/20170712073340.4enzeojeoupuds5a@mwanda Signed-off-by: Dan Carpenter Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4706f0ed193e..5fb5b40b3ae8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3950,7 +3950,7 @@ static int cache_mod(struct trace_array *tr, continue; /* no func matches all */ - if (!func || strcmp(func, "*") == 0 || + if (strcmp(func, "*") == 0 || (ftrace_mod->func && strcmp(ftrace_mod->func, func) == 0)) { ret = 0; -- cgit From 2e028c4fe12907f226b8221815f16c2486ad3aa7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Jul 2017 10:35:57 +0300 Subject: ftrace: Fix uninitialized variable in match_records() My static checker complains that if "func" is NULL then "clear_filter" is uninitialized. This seems like it could be true, although it's possible something subtle is happening that I haven't seen. kernel/trace/ftrace.c:3844 match_records() error: uninitialized symbol 'clear_filter'. Link: http://lkml.kernel.org/r/20170712073556.h6tkpjcdzjaozozs@mwanda Cc: stable@vger.kernel.org Fixes: f0a3b154bd7 ("ftrace: Clarify code for mod command") Signed-off-by: Dan Carpenter Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5fb5b40b3ae8..53f6b6401cf0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3816,7 +3816,7 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) int exclude_mod = 0; int found = 0; int ret; - int clear_filter; + int clear_filter = 0; if (func) { func_g.type = filter_parse_regex(func, len, &func_g.search, -- cgit From 848618857d2535176037bdc085f8d012d907071f Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 12 Jul 2017 19:14:16 -0700 Subject: tracing/ring_buffer: Try harder to allocate ftrace can fail to allocate per-CPU ring buffer on systems with a large number of CPUs coupled while large amounts of cache happening in the page cache. Currently the ring buffer allocation doesn't retry in the VM implementation even if direct-reclaim made some progress but still wasn't able to find a free page. On retrying I see that the allocations almost always succeed. The retry doesn't happen because __GFP_NORETRY is used in the tracer to prevent the case where we might OOM, however if we drop __GFP_NORETRY, we risk destabilizing the system if OOM killer is triggered. To prevent this situation, use the __GFP_RETRY_MAYFAIL flag introduced recently [1]. Tested the following still succeeds without destabilizing a system with 1GB memory. echo 300000 > /sys/kernel/debug/tracing/buffer_size_kb [1] https://marc.info/?l=linux-mm&m=149820805124906&w=2 Link: http://lkml.kernel.org/r/20170713021416.8897-1-joelaf@google.com Cc: Tim Murray Cc: Ingo Molnar Cc: Andrew Morton Acked-by: Vlastimil Babka Acked-by: Johannes Weiner Acked-by: Michal Hocko Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 4ae268e687fe..529cc50d7243 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1136,12 +1136,12 @@ static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu) for (i = 0; i < nr_pages; i++) { struct page *page; /* - * __GFP_NORETRY flag makes sure that the allocation fails - * gracefully without invoking oom-killer and the system is - * not destabilized. + * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails + * gracefully without invoking oom-killer and the system is not + * destabilized. */ bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), - GFP_KERNEL | __GFP_NORETRY, + GFP_KERNEL | __GFP_RETRY_MAYFAIL, cpu_to_node(cpu)); if (!bpage) goto free_pages; @@ -1149,7 +1149,7 @@ static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu) list_add(&bpage->list, pages); page = alloc_pages_node(cpu_to_node(cpu), - GFP_KERNEL | __GFP_NORETRY, 0); + GFP_KERNEL | __GFP_RETRY_MAYFAIL, 0); if (!page) goto free_pages; bpage->page = page_address(page); -- cgit From db9108e054700c96322b0f0028546aa4e643cf0b Mon Sep 17 00:00:00 2001 From: Chunyu Hu Date: Thu, 20 Jul 2017 18:36:09 +0800 Subject: tracing: Fix kmemleak in instance_rmdir Hit the kmemleak when executing instance_rmdir, it forgot releasing mem of tracing_cpumask. With this fix, the warn does not appear any more. unreferenced object 0xffff93a8dfaa7c18 (size 8): comm "mkdir", pid 1436, jiffies 4294763622 (age 9134.308s) hex dump (first 8 bytes): ff ff ff ff ff ff ff ff ........ backtrace: [] kmemleak_alloc+0x4a/0xa0 [] __kmalloc_node+0xf1/0x280 [] alloc_cpumask_var_node+0x23/0x30 [] alloc_cpumask_var+0xe/0x10 [] instance_mkdir+0x90/0x240 [] tracefs_syscall_mkdir+0x40/0x70 [] vfs_mkdir+0x109/0x1b0 [] SyS_mkdir+0xd0/0x100 [] do_syscall_64+0x67/0x150 [] return_from_SYSCALL_64+0x0/0x6a [] 0xffffffffffffffff Link: http://lkml.kernel.org/r/1500546969-12594-1-git-send-email-chuhu@redhat.com Cc: stable@vger.kernel.org Fixes: ccfe9e42e451 ("tracing: Make tracing_cpumask available for all instances") Signed-off-by: Chunyu Hu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2d0ffcc49dba..42b9355033d4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7774,6 +7774,7 @@ static int instance_rmdir(const char *name) } kfree(tr->topts); + free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); kfree(tr); -- cgit From f86f418059b94aa01f9342611a272ca60c583e89 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 7 Jun 2017 16:12:51 +0800 Subject: trace: fix the errors caused by incompatible type of RCU variables The variables which are processed by RCU functions should be annotated as RCU, otherwise sparse will report the errors like below: "error: incompatible types in comparison expression (different address spaces)" Link: http://lkml.kernel.org/r/1496823171-7758-1-git-send-email-zhang.chunyan@linaro.org Signed-off-by: Chunyan Zhang [ Updated to not be 100% 80 column strict ] Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 6 +++--- include/linux/trace_events.h | 2 +- kernel/trace/ftrace.c | 41 +++++++++++++++++++++++++++-------------- kernel/trace/trace.h | 6 +++--- 4 files changed, 34 insertions(+), 21 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 5857390ac35a..6383115e9d2c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -145,8 +145,8 @@ enum { #ifdef CONFIG_DYNAMIC_FTRACE /* The hash used to know what functions callbacks trace */ struct ftrace_ops_hash { - struct ftrace_hash *notrace_hash; - struct ftrace_hash *filter_hash; + struct ftrace_hash __rcu *notrace_hash; + struct ftrace_hash __rcu *filter_hash; struct mutex regex_lock; }; @@ -168,7 +168,7 @@ static inline void ftrace_free_init_mem(void) { } */ struct ftrace_ops { ftrace_func_t func; - struct ftrace_ops *next; + struct ftrace_ops __rcu *next; unsigned long flags; void *private; ftrace_func_t saved_func; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index f73cedfa2e0b..536c80ff7ad9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -338,7 +338,7 @@ enum { struct trace_event_file { struct list_head list; struct trace_event_call *event_call; - struct event_filter *filter; + struct event_filter __rcu *filter; struct dentry *dir; struct trace_array *tr; struct trace_subsystem_dir *system; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 53f6b6401cf0..02004ae91860 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -113,7 +113,7 @@ static int ftrace_disabled __read_mostly; static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; +static struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; @@ -169,8 +169,11 @@ int ftrace_nr_registered_ops(void) mutex_lock(&ftrace_lock); - for (ops = ftrace_ops_list; - ops != &ftrace_list_end; ops = ops->next) + for (ops = rcu_dereference_protected(ftrace_ops_list, + lockdep_is_held(&ftrace_lock)); + ops != &ftrace_list_end; + ops = rcu_dereference_protected(ops->next, + lockdep_is_held(&ftrace_lock))) cnt++; mutex_unlock(&ftrace_lock); @@ -275,10 +278,11 @@ static void update_ftrace_function(void) * If there's only one ftrace_ops registered, the ftrace_ops_list * will point to the ops we want. */ - set_function_trace_op = ftrace_ops_list; + set_function_trace_op = rcu_dereference_protected(ftrace_ops_list, + lockdep_is_held(&ftrace_lock)); /* If there's no ftrace_ops registered, just call the stub function */ - if (ftrace_ops_list == &ftrace_list_end) { + if (set_function_trace_op == &ftrace_list_end) { func = ftrace_stub; /* @@ -286,7 +290,8 @@ static void update_ftrace_function(void) * recursion safe and not dynamic and the arch supports passing ops, * then have the mcount trampoline call the function directly. */ - } else if (ftrace_ops_list->next == &ftrace_list_end) { + } else if (rcu_dereference_protected(ftrace_ops_list->next, + lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) { func = ftrace_ops_get_list_func(ftrace_ops_list); } else { @@ -348,9 +353,11 @@ int using_ftrace_ops_list_func(void) return ftrace_trace_function == ftrace_ops_list_func; } -static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) +static void add_ftrace_ops(struct ftrace_ops __rcu **list, + struct ftrace_ops *ops) { - ops->next = *list; + rcu_assign_pointer(ops->next, *list); + /* * We are entering ops into the list but another * CPU might be walking that list. We need to make sure @@ -360,7 +367,8 @@ static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) rcu_assign_pointer(*list, ops); } -static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) +static int remove_ftrace_ops(struct ftrace_ops __rcu **list, + struct ftrace_ops *ops) { struct ftrace_ops **p; @@ -368,7 +376,10 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) * If we are removing the last function, then simply point * to the ftrace_stub. */ - if (*list == ops && ops->next == &ftrace_list_end) { + if (rcu_dereference_protected(*list, + lockdep_is_held(&ftrace_lock)) == ops && + rcu_dereference_protected(ops->next, + lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) { *list = &ftrace_list_end; return 0; } @@ -1569,8 +1580,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) return 0; #endif - hash.filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash); - hash.notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash); + rcu_assign_pointer(hash.filter_hash, ops->func_hash->filter_hash); + rcu_assign_pointer(hash.notrace_hash, ops->func_hash->notrace_hash); if (hash_contains_ip(ip, &hash)) ret = 1; @@ -2840,7 +2851,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * If there's no more ops registered with ftrace, run a * sanity check to make sure all rec flags are cleared. */ - if (ftrace_ops_list == &ftrace_list_end) { + if (rcu_dereference_protected(ftrace_ops_list, + lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) { struct ftrace_page *pg; struct dyn_ftrace *rec; @@ -6453,7 +6465,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, if (ftrace_enabled) { /* we are starting ftrace again */ - if (ftrace_ops_list != &ftrace_list_end) + if (rcu_dereference_protected(ftrace_ops_list, + lockdep_is_held(&ftrace_lock)) != &ftrace_list_end) update_ftrace_function(); ftrace_startup_sysctl(); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6ade1c55cc3a..490ba229931d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1210,9 +1210,9 @@ struct ftrace_event_field { struct event_filter { int n_preds; /* Number assigned */ int a_preds; /* allocated */ - struct filter_pred *preds; - struct filter_pred *root; - char *filter_string; + struct filter_pred __rcu *preds; + struct filter_pred __rcu *root; + char *filter_string; }; struct event_subsystem { -- cgit