From dd17c8f72993f9461e9c19250e3f155d6d99df22 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: remove per_cpu__ prefix. Now that the return from alloc_percpu is compatible with the address of per-cpu vars, it makes sense to hand around the address of per-cpu variables. To make this sane, we remove the per_cpu__ prefix we used created to stop people accidentally using these vars directly. Now we have sparse, we can use that (next patch). tj: * Updated to convert stuff which were missed by or added after the original patch. * Kill per_cpu_var() macro. Signed-off-by: Rusty Russell Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter --- kernel/trace/trace.c | 6 +++--- kernel/trace/trace_functions_graph.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 85a5ed70b5b2..b808177af816 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -91,12 +91,12 @@ DEFINE_PER_CPU(int, ftrace_cpu_disabled); static inline void ftrace_disable_cpu(void) { preempt_disable(); - __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled)); + __this_cpu_inc(ftrace_cpu_disabled); } static inline void ftrace_enable_cpu(void) { - __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled)); + __this_cpu_dec(ftrace_cpu_disabled); preempt_enable(); } @@ -1085,7 +1085,7 @@ trace_function(struct trace_array *tr, struct ftrace_entry *entry; /* If we are reading the ring buffer, don't trace */ - if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) + if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) return; event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 90a6daa10962..8614e3241ff8 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -176,7 +176,7 @@ static int __trace_graph_entry(struct trace_array *tr, struct ring_buffer *buffer = tr->buffer; struct ftrace_graph_ent_entry *entry; - if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) + if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) return 0; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, @@ -240,7 +240,7 @@ static void __trace_graph_return(struct trace_array *tr, struct ring_buffer *buffer = tr->buffer; struct ftrace_graph_ret_entry *entry; - if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) + if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) return; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, -- cgit From 07b139c8c81b97bbe55c68daf0cbeca8b1c609ca Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 21 Dec 2009 14:27:35 +0800 Subject: perf events: Remove CONFIG_EVENT_PROFILE Quoted from Ingo: | This reminds me - i think we should eliminate CONFIG_EVENT_PROFILE - | it's an unnecessary Kconfig complication. If both PERF_EVENTS and | EVENT_TRACING is enabled we should expose generic tracepoints. | | Nor is it limited to event 'profiling', so it has become a misnomer as | well. Signed-off-by: Li Zefan Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <4B2F1557.2050705@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 2 +- include/linux/perf_event.h | 2 +- include/linux/syscalls.h | 4 ++-- include/trace/ftrace.h | 12 ++++++------ include/trace/syscall.h | 4 ++-- init/Kconfig | 13 ------------- kernel/perf_event.c | 4 ++-- kernel/trace/Makefile | 4 +++- kernel/trace/trace_events_filter.c | 4 ++-- kernel/trace/trace_kprobe.c | 14 +++++++------- kernel/trace/trace_syscalls.c | 5 ++--- 11 files changed, 28 insertions(+), 40 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 2233c98d80df..0a09e758c7d3 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -188,7 +188,7 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS struct perf_event; extern int ftrace_profile_enable(int event_id); extern void ftrace_profile_disable(int event_id); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a494e7501292..9a1d276db754 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -658,7 +658,7 @@ struct perf_event { perf_overflow_handler_t overflow_handler; -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_EVENT_TRACING struct event_filter *filter; #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 65793e90d6f6..b7c7fcf7790b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -99,7 +99,7 @@ struct perf_event_attr; #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ .profile_enable = prof_sysenter_enable, \ @@ -113,7 +113,7 @@ struct perf_event_attr; #define TRACE_SYS_ENTER_PROFILE_INIT(sname) #define TRACE_SYS_EXIT_PROFILE(sname) #define TRACE_SYS_EXIT_PROFILE_INIT(sname) -#endif +#endif /* CONFIG_PERF_EVENTS */ #ifdef CONFIG_FTRACE_SYSCALLS #define __SC_STR_ADECL1(t, a) #a diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 73523151a731..2fdd36df41f6 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -498,7 +498,7 @@ static inline int ftrace_get_offsets_##call( \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS /* * Generate the functions needed for tracepoint perf_event support. @@ -541,7 +541,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#endif +#endif /* CONFIG_PERF_EVENTS */ /* * Stage 4 of the trace events. @@ -626,7 +626,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ * */ -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS #define _TRACE_PROFILE_INIT(call) \ .profile_enable = ftrace_profile_enable_##call, \ @@ -634,7 +634,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #else #define _TRACE_PROFILE_INIT(call) -#endif +#endif /* CONFIG_PERF_EVENTS */ #undef __entry #define __entry entry @@ -834,7 +834,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * } */ -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS #undef __perf_addr #define __perf_addr(a) __addr = (a) @@ -926,7 +926,7 @@ static void ftrace_profile_##call(proto) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#endif /* CONFIG_EVENT_PROFILE */ +#endif /* CONFIG_PERF_EVENTS */ #undef _TRACE_PROFILE_INIT diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 961fda3556bb..3d463dcef298 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -49,12 +49,12 @@ ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif -#ifdef CONFIG_EVENT_PROFILE + +#ifdef CONFIG_PERF_EVENTS int prof_sysenter_enable(struct ftrace_event_call *call); void prof_sysenter_disable(struct ftrace_event_call *call); int prof_sysexit_enable(struct ftrace_event_call *call); void prof_sysexit_disable(struct ftrace_event_call *call); - #endif #endif /* _TRACE_SYSCALL_H */ diff --git a/init/Kconfig b/init/Kconfig index a23da9f01803..06dab27c18d9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -966,19 +966,6 @@ config PERF_EVENTS Say Y if unsure. -config EVENT_PROFILE - bool "Tracepoint profiling sources" - depends on PERF_EVENTS && EVENT_TRACING - default y - help - Allow the use of tracepoints as software performance events. - - When this is enabled, you can create perf events based on - tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID - found in debugfs://tracing/events/*/*/id. (The -e/--events - option to the perf tool can parse and interpret symbolic - tracepoints, in the subsystem:tracepoint_name format.) - config PERF_COUNTERS bool "Kernel performance counters (old config option)" depends on HAVE_PERF_EVENTS diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 099bd662daa6..5b987b4a98a8 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4177,7 +4177,7 @@ static const struct pmu perf_ops_task_clock = { .read = task_clock_perf_event_read, }; -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_EVENT_TRACING void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size) @@ -4282,7 +4282,7 @@ static void perf_event_free_filter(struct perf_event *event) { } -#endif /* CONFIG_EVENT_PROFILE */ +#endif /* CONFIG_EVENT_TRACING */ #ifdef CONFIG_HAVE_HW_BREAKPOINT static void bp_perf_event_destroy(struct perf_event *event) diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index cd9ecd89ec77..d00c6fe23f54 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -51,7 +51,9 @@ endif obj-$(CONFIG_EVENT_TRACING) += trace_events.o obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o -obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o +ifeq ($(CONFIG_PERF_EVENTS),y) +obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o +endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 50504cb228de..74563d7e102e 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1360,7 +1360,7 @@ out_unlock: return err; } -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS void ftrace_profile_free_filter(struct perf_event *event) { @@ -1428,5 +1428,5 @@ out_unlock: return err; } -#endif /* CONFIG_EVENT_PROFILE */ +#endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 375f81a568dc..75d75dec226a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1249,7 +1249,7 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, ", REC->" FIELD_STRING_RETIP); } -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ static __kprobes int kprobe_profile_func(struct kprobe *kp, @@ -1407,7 +1407,7 @@ static void probe_profile_disable(struct ftrace_event_call *call) disable_kprobe(&tp->rp.kp); } } -#endif /* CONFIG_EVENT_PROFILE */ +#endif /* CONFIG_PERF_EVENTS */ static __kprobes @@ -1417,10 +1417,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) if (tp->flags & TP_FLAG_TRACE) kprobe_trace_func(kp, regs); -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS if (tp->flags & TP_FLAG_PROFILE) kprobe_profile_func(kp, regs); -#endif /* CONFIG_EVENT_PROFILE */ +#endif return 0; /* We don't tweek kernel, so just return 0 */ } @@ -1431,10 +1431,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) if (tp->flags & TP_FLAG_TRACE) kretprobe_trace_func(ri, regs); -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS if (tp->flags & TP_FLAG_PROFILE) kretprobe_profile_func(ri, regs); -#endif /* CONFIG_EVENT_PROFILE */ +#endif return 0; /* We don't tweek kernel, so just return 0 */ } @@ -1463,7 +1463,7 @@ static int register_probe_event(struct trace_probe *tp) call->regfunc = probe_event_enable; call->unregfunc = probe_event_disable; -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS call->profile_enable = probe_profile_enable; call->profile_disable = probe_profile_disable; #endif diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 75289f372dd2..f694f66d75b0 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -421,7 +421,7 @@ int __init init_ftrace_syscalls(void) } core_initcall(init_ftrace_syscalls); -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_PERF_EVENTS static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); @@ -626,6 +626,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } -#endif - +#endif /* CONFIG_PERF_EVENTS */ -- cgit From 79615760f380ec86cd58204744e774c33fab9211 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: local_t: Move local.h include to ringbuffer.c and ring_buffer_benchmark.c ringbuffer*.c are the last users of local.h. Remove the include from modules.h and add it to ringbuffer files. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/module.h | 1 - kernel/trace/ring_buffer.c | 1 + kernel/trace/ring_buffer_benchmark.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/include/linux/module.h b/include/linux/module.h index 2302f09ea2d9..7e74ae0051cc 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2326b04c95c4..eb6c8988c31a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -20,6 +20,7 @@ #include #include +#include #include "trace.h" /* diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index b2477caf09c2..df74c7982255 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -8,6 +8,7 @@ #include #include #include +#include struct rb_page { u64 ts; -- cgit From 809826a389040e0ad9d646b587bccc0e34691afd Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:34 +0800 Subject: tracing: Have __dynamic_array() define a field This is part of a patch set that removes the show_format method in the ftrace event macros. This patch set requires that all fields are added to the ftrace_event_call->fields. This patch changes __dynamic_array() to call trace_define_field() to include fields that use __dynamic_array(). Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D36.8090100@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_export.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index d4fa5dc1ee4e..9978a4f40090 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -175,7 +175,12 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ return ret; #undef __dynamic_array -#define __dynamic_array(type, item) +#define __dynamic_array(type, item) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + 0, is_signed_type(type), FILTER_OTHER);\ + if (ret) \ + return ret; #undef FTRACE_ENTRY #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ -- cgit From 509e760cd91c831983097ae174cb6c0b8c6c8e6b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:42 +0800 Subject: tracing: Add print_fmt field This is part of a patch set that removes the show_format method in the ftrace event macros. The print_fmt field is added to hold the string that shows the print_fmt in the event format files. This patch only adds the field but it is currently not used. Later patches will use this field to enable us to remove the show_format field and function. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D3E.2000704@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + include/trace/ftrace.h | 28 +++++++++++++++++++++++++++- kernel/trace/trace_export.c | 7 +++++++ 3 files changed, 35 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 2233c98d80df..bd23d8e52f02 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -121,6 +121,7 @@ struct ftrace_event_call { int (*regfunc)(struct ftrace_event_call *); void (*unregfunc)(struct ftrace_event_call *); int id; + const char *print_fmt; int (*raw_init)(struct ftrace_event_call *); int (*show_format)(struct ftrace_event_call *, struct trace_seq *); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c6fe03e902ca..3351d85c83a3 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -722,8 +722,20 @@ static struct trace_event ftrace_event_type_##call = { \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#undef __entry +#define __entry REC + +#undef __print_flags +#undef __print_symbolic +#undef __get_dynamic_array +#undef __get_str + +#undef TP_printk +#define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args) + #undef DECLARE_EVENT_CLASS -#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static const char print_fmt_##call[] = print; #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ @@ -737,6 +749,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = trace_event_raw_init, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ + .print_fmt = print_fmt_##template, \ .show_format = ftrace_format_##template, \ .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ @@ -745,6 +758,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ \ +static const char print_fmt_##call[] = print; \ + \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ @@ -754,6 +769,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = trace_event_raw_init, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ + .print_fmt = print_fmt_##call, \ .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ @@ -837,6 +853,16 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #ifdef CONFIG_EVENT_PROFILE +#undef __entry +#define __entry entry + +#undef __get_dynamic_array +#define __get_dynamic_array(field) \ + ((void *)__entry + (__entry->__data_loc_##field & 0xffff)) + +#undef __get_str +#define __get_str(field) (char *)__get_dynamic_array(field) + #undef __perf_addr #define __perf_addr(a) __addr = (a) diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 9978a4f40090..95d14b640a66 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -203,6 +203,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call) return 0; } +#undef __entry +#define __entry REC + #undef __field #define __field(type, item) @@ -218,6 +221,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call) #undef __dynamic_array #define __dynamic_array(type, item) +#undef F_printk +#define F_printk(fmt, args...) #fmt ", " __stringify(args) + #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ \ @@ -228,6 +234,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .id = type, \ .system = __stringify(TRACE_SYSTEM), \ .raw_init = ftrace_raw_init_event, \ + .print_fmt = print, \ .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##call, \ }; \ -- cgit From 50307a45f8515f6244e3b08e6b19824b9fbfe293 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:45 +0800 Subject: tracing/syscalls: Init print_fmt for syscall events This is part of a patch set that removes the show_format method in the ftrace event macros. Add the print_fmt initialization to the syscall events. The print_fmt is still not used, but will be in the follow up patches. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D41.609@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_syscalls.c | 68 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 75289f372dd2..1352b0a36fac 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -191,6 +191,67 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) return trace_seq_putc(s, '\n'); } +static +int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) +{ + int i; + int pos = 0; + + /* When len=0, we just calculate the needed length */ +#define LEN_OR_ZERO (len ? len - pos : 0) + + pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); + for (i = 0; i < entry->nb_args; i++) { + pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s", + entry->args[i], sizeof(unsigned long), + i == entry->nb_args - 1 ? "" : ", "); + } + pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); + + for (i = 0; i < entry->nb_args; i++) { + pos += snprintf(buf + pos, LEN_OR_ZERO, + ", ((unsigned long)(REC->%s))", entry->args[i]); + } + +#undef LEN_OR_ZERO + + /* return the length of print_fmt */ + return pos; +} + +static int set_syscall_print_fmt(struct ftrace_event_call *call) +{ + char *print_fmt; + int len; + struct syscall_metadata *entry = call->data; + + if (entry->enter_event != call) { + call->print_fmt = "\"0x%lx\", REC->ret"; + return 0; + } + + /* First: called with 0 length to calculate the needed length */ + len = __set_enter_print_fmt(entry, NULL, 0); + + print_fmt = kmalloc(len + 1, GFP_KERNEL); + if (!print_fmt) + return -ENOMEM; + + /* Second: actually write the @print_fmt */ + __set_enter_print_fmt(entry, print_fmt, len + 1); + call->print_fmt = print_fmt; + + return 0; +} + +static void free_syscall_print_fmt(struct ftrace_event_call *call) +{ + struct syscall_metadata *entry = call->data; + + if (entry->enter_event == call) + kfree(call->print_fmt); +} + int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) { int ret; @@ -386,9 +447,14 @@ int init_syscall_trace(struct ftrace_event_call *call) { int id; + if (set_syscall_print_fmt(call) < 0) + return -ENOMEM; + id = register_ftrace_event(call->event); - if (!id) + if (!id) { + free_syscall_print_fmt(call); return -ENODEV; + } call->id = id; INIT_LIST_HEAD(&call->fields); return 0; -- cgit From a342a0280b981c130e32dbb94dbd3a57959c4d04 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:49 +0800 Subject: tracing/kprobes: Init print_fmt for kprobe events This is part of a patch set that removes the show_format method in the ftrace event macros. Add the print_fmt initialization to the kprobe events. The print_fmt is still not used, but will be in the follow up patches. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D45.3080100@cn.fujitsu.com> Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 64 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6ea90c0e2c96..147491dccead 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1250,6 +1250,62 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, ", REC->" FIELD_STRING_RETIP); } +static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) +{ + int i; + int pos = 0; + + const char *fmt, *arg; + + if (!probe_is_return(tp)) { + fmt = "(%lx)"; + arg = "REC->" FIELD_STRING_IP; + } else { + fmt = "(%lx <- %lx)"; + arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; + } + + /* When len=0, we just calculate the needed length */ +#define LEN_OR_ZERO (len ? len - pos : 0) + + pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); + + for (i = 0; i < tp->nr_args; i++) { + pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx", + tp->args[i].name); + } + + pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); + + for (i = 0; i < tp->nr_args; i++) { + pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", + tp->args[i].name); + } + +#undef LEN_OR_ZERO + + /* return the length of print_fmt */ + return pos; +} + +static int set_print_fmt(struct trace_probe *tp) +{ + int len; + char *print_fmt; + + /* First: called with 0 length to calculate the needed length */ + len = __set_print_fmt(tp, NULL, 0); + print_fmt = kmalloc(len + 1, GFP_KERNEL); + if (!print_fmt) + return -ENOMEM; + + /* Second: actually write the @print_fmt */ + __set_print_fmt(tp, print_fmt, len + 1); + tp->call.print_fmt = print_fmt; + + return 0; +} + #ifdef CONFIG_EVENT_PROFILE /* Kprobe profile handler */ @@ -1456,10 +1512,14 @@ static int register_probe_event(struct trace_probe *tp) call->show_format = kprobe_event_show_format; call->define_fields = kprobe_event_define_fields; } + if (set_print_fmt(tp) < 0) + return -ENOMEM; call->event = &tp->event; call->id = register_ftrace_event(&tp->event); - if (!call->id) + if (!call->id) { + kfree(call->print_fmt); return -ENODEV; + } call->enabled = 0; call->regfunc = probe_event_enable; call->unregfunc = probe_event_disable; @@ -1472,6 +1532,7 @@ static int register_probe_event(struct trace_probe *tp) ret = trace_add_event_call(call); if (ret) { pr_info("Failed to register kprobe event: %s\n", call->name); + kfree(call->print_fmt); unregister_ftrace_event(&tp->event); } return ret; @@ -1481,6 +1542,7 @@ static void unregister_probe_event(struct trace_probe *tp) { /* tp->event is unregistered in trace_remove_event_call() */ trace_remove_event_call(&tp->call); + kfree(tp->call.print_fmt); } /* Make a debugfs interface for controling probe points */ -- cgit From c7ef3a9004201bca90626db246a19dadd2c29c9b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 28 Dec 2009 21:13:59 -0500 Subject: tracing: Have syscall tracing call its own init function In the clean up of having all events call one specific function, the syscall event init was changed to call this helper function. With the new print_fmt updates, the syscalls need to do special initializations. This patch converts the syscall events to call its own init function again. Cc: Lai Jiangshan Cc: Li Zefan Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 4 ++-- kernel/trace/trace_syscalls.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 207466a49f3d..ed353d274a77 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -143,7 +143,7 @@ struct perf_event_attr; .name = "sys_enter"#sname, \ .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ - .raw_init = trace_event_raw_init, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ @@ -165,7 +165,7 @@ struct perf_event_attr; .name = "sys_exit"#sname, \ .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ - .raw_init = trace_event_raw_init, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 1352b0a36fac..a78e86349ecb 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -450,14 +450,14 @@ int init_syscall_trace(struct ftrace_event_call *call) if (set_syscall_print_fmt(call) < 0) return -ENOMEM; - id = register_ftrace_event(call->event); - if (!id) { + id = trace_event_raw_init(call); + + if (id < 0) { free_syscall_print_fmt(call); - return -ENODEV; + return id; } - call->id = id; - INIT_LIST_HEAD(&call->fields); - return 0; + + return id; } int __init init_ftrace_syscalls(void) -- cgit From 5a65e956220efc2421e21ee56d6153fd5c533a95 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:53 +0800 Subject: tracing: Use defined fields and print_fmt to print formats The calls ftrace_format_##call() and ftrace_define_fields_##call() are almost duplicate in functionality. With the addition of the print_fmt in previous patches, these two functions can be merged into one. The trace_define_field() defines the fields and links them into the struct ftrace_event_call. The previous patches introduced the print_fmt field and this can now be used with the trace_define_field() to create the event format file fields and print_fmt field. The struct ftrace_event_call->fields are used to print the fields The struct ftrace_event_call->print_fmt is used to print the "print fmt: XXXXXXXXXXX" line. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D49.5000006@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 65 ++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 24 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 189b09baf4fb..250ec865d5f5 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -528,33 +528,16 @@ extern char *__bad_type_size(void); #type, "common_" #name, offsetof(typeof(field), name), \ sizeof(field.name), is_signed_type(type) -static int trace_write_header(struct trace_seq *s) -{ - struct trace_entry field; - - /* struct trace_entry */ - return trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" - "\n", - FIELD(unsigned short, type), - FIELD(unsigned char, flags), - FIELD(unsigned char, preempt_count), - FIELD(int, pid), - FIELD(int, lock_depth)); -} - static ssize_t event_format_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct ftrace_event_call *call = filp->private_data; + struct ftrace_event_field *field; struct trace_seq *s; + int common_field_count = 5; char *buf; - int r; + int r = 0; if (*ppos) return 0; @@ -565,14 +548,48 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - /* If any of the first writes fail, so will the show_format. */ - trace_seq_printf(s, "name: %s\n", call->name); trace_seq_printf(s, "ID: %d\n", call->id); trace_seq_printf(s, "format:\n"); - trace_write_header(s); - r = call->show_format(call, s); + list_for_each_entry_reverse(field, &call->fields, link) { + /* + * Smartly shows the array type(except dynamic array). + * Normal: + * field:TYPE VAR + * If TYPE := TYPE[LEN], it is shown: + * field:TYPE VAR[LEN] + */ + const char *array_descriptor = strchr(field->type, '['); + + if (!strncmp(field->type, "__data_loc", 10)) + array_descriptor = NULL; + + if (!array_descriptor) { + r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;" + "\tsize:%u;\tsigned:%d;\n", + field->type, field->name, field->offset, + field->size, !!field->is_signed); + } else { + r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;" + "\tsize:%u;\tsigned:%d;\n", + (int)(array_descriptor - field->type), + field->type, field->name, + array_descriptor, field->offset, + field->size, !!field->is_signed); + } + + if (--common_field_count == 0) + r = trace_seq_printf(s, "\n"); + + if (!r) + break; + } + + if (r) + r = trace_seq_printf(s, "\nprint fmt: %s\n", + call->print_fmt); + if (!r) { /* * ug! The format output is bigger than a PAGE!! -- cgit From 0fa0edaf32b9a78b9854f1da98d4511a501089b0 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:57 +0800 Subject: tracing: Remove show_format and related macros from TRACE_EVENT The previous patches added the use of print_fmt string and changes the trace_define_field() function to also create the fields and format output for the event format files. text data bss dec hex filename 5857201 1355780 9336808 16549789 fc879d vmlinux 5884589 1351684 9337896 16574169 fce6d9 vmlinux-orig The above shows the size of the vmlinux after this patch set compared to the vmlinux-orig which is before the patch set. This saves us 27k on text, 1k on bss and adds just 4k of data. The total savings of 24k in size. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D4D.40604@cn.fujitsu.com> Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 - include/linux/syscalls.h | 2 - include/trace/ftrace.h | 133 ++---------------------------------------- include/trace/syscall.h | 4 -- kernel/trace/trace_events.c | 12 ---- kernel/trace/trace_export.c | 73 ----------------------- kernel/trace/trace_kprobe.c | 78 ------------------------- kernel/trace/trace_syscalls.c | 66 --------------------- 8 files changed, 6 insertions(+), 364 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bd23d8e52f02..84a5629adfd8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -123,8 +123,6 @@ struct ftrace_event_call { int id; const char *print_fmt; int (*raw_init)(struct ftrace_event_call *); - int (*show_format)(struct ftrace_event_call *, - struct trace_seq *); int (*define_fields)(struct ftrace_event_call *); struct list_head fields; int filter_active; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ed353d274a77..7b219696ad24 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -144,7 +144,6 @@ struct perf_event_attr; .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ .raw_init = init_syscall_trace, \ - .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ @@ -166,7 +165,6 @@ struct perf_event_attr; .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ .raw_init = init_syscall_trace, \ - .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 3351d85c83a3..df65b99880b1 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -130,130 +130,6 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -/* - * Setup the showing format of trace point. - * - * int - * ftrace_format_##call(struct trace_seq *s) - * { - * struct ftrace_raw_##call field; - * int ret; - * - * ret = trace_seq_printf(s, #type " " #item ";" - * " offset:%u; size:%u;\n", - * offsetof(struct ftrace_raw_##call, item), - * sizeof(field.type)); - * - * } - */ - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args - -#undef __field -#define __field(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\tsigned:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item), \ - (unsigned int)is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __field_ext -#define __field_ext(type, item, filter_type) __field(type, item) - -#undef __array -#define __array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\tsigned:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item), \ - (unsigned int)is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __dynamic_array -#define __dynamic_array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\ - "offset:%u;\tsize:%u;\tsigned:%u;\n", \ - (unsigned int)offsetof(typeof(field), \ - __data_loc_##item), \ - (unsigned int)sizeof(field.__data_loc_##item), \ - (unsigned int)is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __entry -#define __entry REC - -#undef __print_symbolic -#undef __get_dynamic_array -#undef __get_str - -#undef TP_printk -#define TP_printk(fmt, args...) "\"%s\", %s\n", fmt, __stringify(args) - -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef TP_perf_assign -#define TP_perf_assign(args...) - -#undef DECLARE_EVENT_CLASS -#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ -static int \ -ftrace_format_setup_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - struct ftrace_raw_##call field __attribute__((unused)); \ - int ret = 0; \ - \ - tstruct; \ - \ - return ret; \ -} \ - \ -static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - int ret = 0; \ - \ - ret = ftrace_format_setup_##call(unused, s); \ - if (!ret) \ - return ret; \ - \ - ret = trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#undef DEFINE_EVENT -#define DEFINE_EVENT(template, name, proto, args) - -#undef DEFINE_EVENT_PRINT -#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ -static int \ -ftrace_format_##name(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - int ret = 0; \ - \ - ret = ftrace_format_setup_##template(unused, s); \ - if (!ret) \ - return ret; \ - \ - trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) - /* * Stage 3 of the trace events. * @@ -622,7 +498,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ * .raw_init = trace_event_raw_init, * .regfunc = ftrace_reg_event_, * .unregfunc = ftrace_unreg_event_, - * .show_format = ftrace_format_, * } * */ @@ -657,6 +532,12 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TP_perf_assign +#define TP_perf_assign(args...) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ \ @@ -750,7 +631,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .print_fmt = print_fmt_##template, \ - .show_format = ftrace_format_##template, \ .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ } @@ -770,7 +650,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .print_fmt = print_fmt_##call, \ - .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ } diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 961fda3556bb..8cd410254456 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -34,10 +34,6 @@ struct syscall_metadata { extern unsigned long arch_syscall_addr(int nr); extern int init_syscall_trace(struct ftrace_event_call *call); -extern int syscall_enter_format(struct ftrace_event_call *call, - struct trace_seq *s); -extern int syscall_exit_format(struct ftrace_event_call *call, - struct trace_seq *s); extern int syscall_enter_define_fields(struct ftrace_event_call *call); extern int syscall_exit_define_fields(struct ftrace_event_call *call); extern int reg_event_syscall_enter(struct ftrace_event_call *call); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 250ec865d5f5..c2a3077b7353 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -520,14 +520,6 @@ out: return ret; } -extern char *__bad_type_size(void); - -#undef FIELD -#define FIELD(type, name) \ - sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ - #type, "common_" #name, offsetof(typeof(field), name), \ - sizeof(field.name), is_signed_type(type) - static ssize_t event_format_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -965,10 +957,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, filter); } - /* A trace may not want to export its format */ - if (!call->show_format) - return 0; - trace_create_file("format", 0444, call->dir, call, format); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 95d14b640a66..e091f64ba6ce 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -62,78 +62,6 @@ static void __always_unused ____ftrace_check_##name(void) \ #include "trace_entries.h" - -#undef __field -#define __field(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item), is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __field_desc -#define __field_desc(type, container, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ - offsetof(typeof(field), container.item), \ - sizeof(field.container.item), \ - is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __array -#define __array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item), is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __array_desc -#define __array_desc(type, container, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ - offsetof(typeof(field), container.item), \ - sizeof(field.container.item), \ - is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __dynamic_array -#define __dynamic_array(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:0;\tsigned:%u;\n", \ - offsetof(typeof(field), item), \ - is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef F_printk -#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) - -#undef __entry -#define __entry REC - -#undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ -static int \ -ftrace_format_##name(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - struct struct_name field __attribute__((unused)); \ - int ret = 0; \ - \ - tstruct; \ - \ - trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#include "trace_entries.h" - #undef __field #define __field(type, item) \ ret = trace_define_field(event_call, #type, #item, \ @@ -235,7 +163,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .system = __stringify(TRACE_SYSTEM), \ .raw_init = ftrace_raw_init_event, \ .print_fmt = print, \ - .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##call, \ }; \ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 147491dccead..c99029916c76 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1174,82 +1174,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) return 0; } -static int __probe_event_show_format(struct trace_seq *s, - struct trace_probe *tp, const char *fmt, - const char *arg) -{ - int i; - - /* Show format */ - if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt)) - return 0; - - for (i = 0; i < tp->nr_args; i++) - if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name)) - return 0; - - if (!trace_seq_printf(s, "\", %s", arg)) - return 0; - - for (i = 0; i < tp->nr_args; i++) - if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name)) - return 0; - - return trace_seq_puts(s, "\n"); -} - -#undef SHOW_FIELD -#define SHOW_FIELD(type, item, name) \ - do { \ - ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \ - "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\ - (unsigned int)offsetof(typeof(field), item),\ - (unsigned int)sizeof(type), \ - is_signed_type(type)); \ - if (!ret) \ - return 0; \ - } while (0) - -static int kprobe_event_show_format(struct ftrace_event_call *call, - struct trace_seq *s) -{ - struct kprobe_trace_entry field __attribute__((unused)); - int ret, i; - struct trace_probe *tp = (struct trace_probe *)call->data; - - SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP); - SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); - - /* Show fields */ - for (i = 0; i < tp->nr_args; i++) - SHOW_FIELD(unsigned long, args[i], tp->args[i].name); - trace_seq_puts(s, "\n"); - - return __probe_event_show_format(s, tp, "(%lx)", - "REC->" FIELD_STRING_IP); -} - -static int kretprobe_event_show_format(struct ftrace_event_call *call, - struct trace_seq *s) -{ - struct kretprobe_trace_entry field __attribute__((unused)); - int ret, i; - struct trace_probe *tp = (struct trace_probe *)call->data; - - SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); - SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); - SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); - - /* Show fields */ - for (i = 0; i < tp->nr_args; i++) - SHOW_FIELD(unsigned long, args[i], tp->args[i].name); - trace_seq_puts(s, "\n"); - - return __probe_event_show_format(s, tp, "(%lx <- %lx)", - "REC->" FIELD_STRING_FUNC - ", REC->" FIELD_STRING_RETIP); -} - static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) { int i; @@ -1504,12 +1428,10 @@ static int register_probe_event(struct trace_probe *tp) if (probe_is_return(tp)) { tp->event.trace = print_kretprobe_event; call->raw_init = probe_event_raw_init; - call->show_format = kretprobe_event_show_format; call->define_fields = kretprobe_event_define_fields; } else { tp->event.trace = print_kprobe_event; call->raw_init = probe_event_raw_init; - call->show_format = kprobe_event_show_format; call->define_fields = kprobe_event_define_fields; } if (set_print_fmt(tp) < 0) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a78e86349ecb..49cea70fbf6d 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -143,54 +143,6 @@ extern char *__bad_type_size(void); #type, #name, offsetof(typeof(trace), name), \ sizeof(trace.name), is_signed_type(type) -int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) -{ - int i; - int ret; - struct syscall_metadata *entry = call->data; - struct syscall_trace_enter trace; - int offset = offsetof(struct syscall_trace_enter, args); - - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" - "\tsigned:%u;\n", - SYSCALL_FIELD(int, nr)); - if (!ret) - return 0; - - for (i = 0; i < entry->nb_args; i++) { - ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], - entry->args[i]); - if (!ret) - return 0; - ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" - "\tsigned:%u;\n", offset, - sizeof(unsigned long), - is_signed_type(unsigned long)); - if (!ret) - return 0; - offset += sizeof(unsigned long); - } - - trace_seq_puts(s, "\nprint fmt: \""); - for (i = 0; i < entry->nb_args; i++) { - ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], - sizeof(unsigned long), - i == entry->nb_args - 1 ? "" : ", "); - if (!ret) - return 0; - } - trace_seq_putc(s, '"'); - - for (i = 0; i < entry->nb_args; i++) { - ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))", - entry->args[i]); - if (!ret) - return 0; - } - - return trace_seq_putc(s, '\n'); -} - static int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) { @@ -252,24 +204,6 @@ static void free_syscall_print_fmt(struct ftrace_event_call *call) kfree(call->print_fmt); } -int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) -{ - int ret; - struct syscall_trace_exit trace; - - ret = trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" - "\tsigned:%u;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" - "\tsigned:%u;\n", - SYSCALL_FIELD(int, nr), - SYSCALL_FIELD(long, ret)); - if (!ret) - return 0; - - return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); -} - int syscall_enter_define_fields(struct ftrace_event_call *call) { struct syscall_trace_enter trace; -- cgit From 7e53bd42d14c75192b99674c40fcc359392da59d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 6 Jan 2010 20:08:50 +0800 Subject: tracing: Consolidate protection of reader access to the ring buffer At the beginning, access to the ring buffer was fully serialized by trace_types_lock. Patch d7350c3f4569 gives more freedom to readers, and patch b04cc6b1f6 adds code to protect trace_pipe and cpu#/trace_pipe. But actually it is not enough, ring buffer readers are not always read-only, they may consume data. This patch makes accesses to trace, trace_pipe, trace_pipe_raw cpu#/trace, cpu#/trace_pipe and cpu#/trace_pipe_raw serialized. And removes tracing_reader_cpumask which is used to protect trace_pipe. Details: Ring buffer serializes readers, but it is low level protection. The validity of the events (which returns by ring_buffer_peek() ..etc) are not protected by ring buffer. The content of events may become garbage if we allow another process to consume these events concurrently: A) the page of the consumed events may become a normal page (not reader page) in ring buffer, and this page will be rewritten by the events producer. B) The page of the consumed events may become a page for splice_read, and this page will be returned to system. This patch adds trace_access_lock() and trace_access_unlock() primitives. These primitives allow multi process access to different cpu ring buffers concurrently. These primitives don't distinguish read-only and read-consume access. Multi read-only access is also serialized. And we don't use these primitives when we open files, we only use them when we read files. Signed-off-by: Lai Jiangshan LKML-Reference: <4B447D52.1050602@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 136 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 39 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0df1b0f2cb9e..abdd333a0825 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -102,9 +103,6 @@ static inline void ftrace_enable_cpu(void) static cpumask_var_t __read_mostly tracing_buffer_mask; -/* Define which cpu buffers are currently read in trace_pipe */ -static cpumask_var_t tracing_reader_cpumask; - #define for_each_tracing_cpu(cpu) \ for_each_cpu(cpu, tracing_buffer_mask) @@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly; /* * trace_types_lock is used to protect the trace_types list. - * This lock is also used to keep user access serialized. - * Accesses from userspace will grab this lock while userspace - * activities happen inside the kernel. */ static DEFINE_MUTEX(trace_types_lock); +/* + * serialize the access of the ring buffer + * + * ring buffer serializes readers, but it is low level protection. + * The validity of the events (which returns by ring_buffer_peek() ..etc) + * are not protected by ring buffer. + * + * The content of events may become garbage if we allow other process consumes + * these events concurrently: + * A) the page of the consumed events may become a normal page + * (not reader page) in ring buffer, and this page will be rewrited + * by events producer. + * B) The page of the consumed events may become a page for splice_read, + * and this page will be returned to system. + * + * These primitives allow multi process access to different cpu ring buffer + * concurrently. + * + * These primitives don't distinguish read-only and read-consume access. + * Multi read-only access are also serialized. + */ + +#ifdef CONFIG_SMP +static DECLARE_RWSEM(all_cpu_access_lock); +static DEFINE_PER_CPU(struct mutex, cpu_access_lock); + +static inline void trace_access_lock(int cpu) +{ + if (cpu == TRACE_PIPE_ALL_CPU) { + /* gain it for accessing the whole ring buffer. */ + down_write(&all_cpu_access_lock); + } else { + /* gain it for accessing a cpu ring buffer. */ + + /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */ + down_read(&all_cpu_access_lock); + + /* Secondly block other access to this @cpu ring buffer. */ + mutex_lock(&per_cpu(cpu_access_lock, cpu)); + } +} + +static inline void trace_access_unlock(int cpu) +{ + if (cpu == TRACE_PIPE_ALL_CPU) { + up_write(&all_cpu_access_lock); + } else { + mutex_unlock(&per_cpu(cpu_access_lock, cpu)); + up_read(&all_cpu_access_lock); + } +} + +static inline void trace_access_lock_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + mutex_init(&per_cpu(cpu_access_lock, cpu)); +} + +#else + +static DEFINE_MUTEX(access_lock); + +static inline void trace_access_lock(int cpu) +{ + (void)cpu; + mutex_lock(&access_lock); +} + +static inline void trace_access_unlock(int cpu) +{ + (void)cpu; + mutex_unlock(&access_lock); +} + +static inline void trace_access_lock_init(void) +{ +} + +#endif + /* trace_wait is a waitqueue for tasks blocked on trace_poll */ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); @@ -1580,12 +1657,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu) } /* - * No necessary locking here. The worst thing which can - * happen is loosing events consumed at the same time - * by a trace_pipe reader. - * Other than that, we don't risk to crash the ring buffer - * because it serializes the readers. - * * The current tracer is copied to avoid a global locking * all around. */ @@ -1640,12 +1711,16 @@ static void *s_start(struct seq_file *m, loff_t *pos) } trace_event_read_lock(); + trace_access_lock(cpu_file); return p; } static void s_stop(struct seq_file *m, void *p) { + struct trace_iterator *iter = m->private; + atomic_dec(&trace_record_cmdline_disabled); + trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); } @@ -2836,22 +2911,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) mutex_lock(&trace_types_lock); - /* We only allow one reader per cpu */ - if (cpu_file == TRACE_PIPE_ALL_CPU) { - if (!cpumask_empty(tracing_reader_cpumask)) { - ret = -EBUSY; - goto out; - } - cpumask_setall(tracing_reader_cpumask); - } else { - if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask)) - cpumask_set_cpu(cpu_file, tracing_reader_cpumask); - else { - ret = -EBUSY; - goto out; - } - } - /* create a buffer to store the information to pass to userspace */ iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { @@ -2907,12 +2966,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) mutex_lock(&trace_types_lock); - if (iter->cpu_file == TRACE_PIPE_ALL_CPU) - cpumask_clear(tracing_reader_cpumask); - else - cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); - - if (iter->trace->pipe_close) iter->trace->pipe_close(iter); @@ -3074,6 +3127,7 @@ waitagain: iter->pos = -1; trace_event_read_lock(); + trace_access_lock(iter->cpu_file); while (find_next_entry_inc(iter) != NULL) { enum print_line_t ret; int len = iter->seq.len; @@ -3090,6 +3144,7 @@ waitagain: if (iter->seq.len >= cnt) break; } + trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); /* Now copy what we have to the user */ @@ -3215,6 +3270,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, } trace_event_read_lock(); + trace_access_lock(iter->cpu_file); /* Fill as many pages as possible. */ for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { @@ -3238,6 +3294,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, trace_seq_init(&iter->seq); } + trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); mutex_unlock(&iter->mutex); @@ -3539,10 +3596,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, info->read = 0; + trace_access_lock(info->cpu); ret = ring_buffer_read_page(info->tr->buffer, &info->spare, count, info->cpu, 0); + trace_access_unlock(info->cpu); if (ret < 0) return 0; @@ -3670,6 +3729,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, len &= PAGE_MASK; } + trace_access_lock(info->cpu); entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { @@ -3717,6 +3777,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); } + trace_access_unlock(info->cpu); spd.nr_pages = i; /* did we read anything? */ @@ -4153,6 +4214,8 @@ static __init int tracer_init_debugfs(void) struct dentry *d_tracer; int cpu; + trace_access_lock_init(); + d_tracer = tracing_init_dentry(); trace_create_file("tracing_enabled", 0644, d_tracer, @@ -4387,9 +4450,6 @@ __init static int tracer_alloc_buffers(void) if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) goto out_free_buffer_mask; - if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) - goto out_free_tracing_cpumask; - /* To save memory, keep the ring buffer size to its minimum */ if (ring_buffer_expanded) ring_buf_size = trace_buf_size; @@ -4447,8 +4507,6 @@ __init static int tracer_alloc_buffers(void) return 0; out_free_cpumask: - free_cpumask_var(tracing_reader_cpumask); -out_free_tracing_cpumask: free_cpumask_var(tracing_cpumask); out_free_buffer_mask: free_cpumask_var(tracing_buffer_mask); -- cgit From d931369b74b3d6f2044f595af6f3dd074f65d9cf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 6 Jan 2010 17:27:11 -0500 Subject: tracing: Add stack dump to trace_printk if stacktrace option is set If the ftrace stacktrace option is set, then add the stack dumps to trace_printk. Requested-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index abdd333a0825..5314c90bbc83 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1392,8 +1392,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) entry->fmt = fmt; memcpy(entry->buf, trace_buf, sizeof(u32) * len); - if (!filter_check_discard(call, entry, buffer, event)) + if (!filter_check_discard(call, entry, buffer, event)) { ring_buffer_unlock_commit(buffer, event); + ftrace_trace_stack(buffer, flags, 6, pc); + } out_unlock: arch_spin_unlock(&trace_buf_lock); @@ -1466,8 +1468,10 @@ int trace_array_vprintk(struct trace_array *tr, memcpy(&entry->buf, trace_buf, len); entry->buf[len] = '\0'; - if (!filter_check_discard(call, entry, buffer, event)) + if (!filter_check_discard(call, entry, buffer, event)) { ring_buffer_unlock_commit(buffer, event); + ftrace_trace_stack(buffer, irq_flags, 6, pc); + } out_unlock: arch_spin_unlock(&trace_buf_lock); -- cgit From 14640106f243a3b29944d7198569090fa6546f2d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jan 2010 17:46:48 -0500 Subject: tracing/kprobe: Drop function argument access syntax Drop function argument access syntax, because the function arguments depend on not only architecture but also compile-options and function API. And now, we have perf-probe for finding register/memory assigned to each argument. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: systemtap Cc: DLE Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Roland McGrath Cc: Oleg Nesterov Cc: Mahesh Salgaonkar Cc: Benjamin Herrenschmidt Cc: Michael Neuling Cc: linuxppc-dev@ozlabs.org LKML-Reference: <20100105224648.19431.52309.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- Documentation/trace/kprobetrace.txt | 21 ++++++++++----------- kernel/trace/trace_kprobe.c | 18 +----------------- 2 files changed, 11 insertions(+), 28 deletions(-) (limited to 'kernel/trace') diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index c3eff6ff945f..f30978e001f8 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -37,15 +37,12 @@ Synopsis of kprobe_events @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) $stackN : Fetch Nth entry of stack (N >= 0) $stack : Fetch stack address. - $argN : Fetch function argument. (N >= 0)(*) - $retval : Fetch return value.(**) - +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) + $retval : Fetch return value.(*) + +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) NAME=FETCHARG: Set NAME as the argument name of FETCHARG. - (*) aN may not correct on asmlinkaged functions and at the middle of - function body. - (**) only for return probe. - (***) this is useful for fetching a field of data structures. + (*) only for return probe. + (**) this is useful for fetching a field of data structures. Per-Probe Event Filtering @@ -82,11 +79,14 @@ Usage examples To add a probe as a new event, write a new definition to kprobe_events as below. - echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events + echo p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack) > /sys/kernel/debug/tracing/kprobe_events This sets a kprobe on the top of do_sys_open() function with recording -1st to 4th arguments as "myprobe" event. As this example shows, users can -choose more familiar names for each arguments. +1st to 4th arguments as "myprobe" event. Note, which register/stack entry is +assigned to each function argument depends on arch-specific ABI. If you unsure +the ABI, please try to use probe subcommand of perf-tools (you can find it +under tools/perf/). +As this example shows, users can choose more familiar names for each arguments. echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events @@ -147,4 +147,3 @@ events, you need to enable it. returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel returns from do_sys_open to sys_open+0x1b). - diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 47f54ab57b68..7ac728ded964 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) return retval; } -static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num) -{ - return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num)); -} - static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, void *dummy) { @@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) { int ret = -EINVAL; - if (ff->func == fetch_argument) - ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data); - else if (ff->func == fetch_register) { + if (ff->func == fetch_register) { const char *name; name = regs_query_register_name((unsigned int)((long)ff->data)); ret = snprintf(buf, n, "%%%s", name); @@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) } } else ret = -EINVAL; - } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) { - ret = strict_strtoul(arg + 3, 10, ¶m); - if (ret || param > PARAM_MAX_ARGS) - ret = -EINVAL; - else { - ff->func = fetch_argument; - ff->data = (void *)param; - } } else ret = -EINVAL; return ret; @@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv) * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] * Fetch args: - * $argN : fetch Nth of function argument. (N:0-) * $retval : fetch return value * $stack : fetch stack address * $stackN : fetch Nth of stack (N:0-) -- cgit From 24a53652e361321b09df5040711e69387344ce09 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 14 Jan 2010 08:23:15 +0100 Subject: tracing: Drop the tr check from the graph tracing path Each time we save a function entry from the function graph tracer, we check if the trace array is set, which is wasteful because it is set anyway before we start the tracer. All we need is to ensure we have good read and write orderings. When we set the trace array, we just need to guarantee it to be visible before starting tracing. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Lai Jiangshan Cc: Paul E. McKenney LKML-Reference: <1263453795-7496-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_functions_graph.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index b1342c5d37cf..f2252296607c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -212,9 +212,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) int cpu; int pc; - if (unlikely(!tr)) - return 0; - if (!ftrace_trace_task(current)) return 0; @@ -287,11 +284,20 @@ void trace_graph_return(struct ftrace_graph_ret *trace) local_irq_restore(flags); } +void set_graph_array(struct trace_array *tr) +{ + graph_array = tr; + + /* Make graph_array visible before we start tracing */ + + smp_mb(); +} + static int graph_trace_init(struct trace_array *tr) { int ret; - graph_array = tr; + set_graph_array(tr); ret = register_ftrace_graph(&trace_graph_return, &trace_graph_entry); if (ret) @@ -301,11 +307,6 @@ static int graph_trace_init(struct trace_array *tr) return 0; } -void set_graph_array(struct trace_array *tr) -{ - graph_array = tr; -} - static void graph_trace_reset(struct trace_array *tr) { tracing_stop_cmdline_record(); -- cgit From 231e36f4d2e63dd770db80b9f5113310c2bcfcfd Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 14 Jan 2010 00:12:12 -0500 Subject: tracing/kprobe: Update kprobe tracing self test for new syntax Update kprobe tracing self test for new syntax (it supports deleting individual probes, and drops $argN support) and behavior change (new probes are disabled in default). This selftest includes the following checks: - Adding function-entry probe and return probe with arguments. - Enabling these probes. - Deleting it individually. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <20100114051211.7814.29436.stgit@localhost6.localdomain6> Signed-off-by: Ingo Molnar --- kernel/trace/trace_kprobe.c | 55 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 8 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 7ac728ded964..d6266cad6953 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1507,28 +1507,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3, static __init int kprobe_trace_self_tests_init(void) { - int ret; + int ret, warn = 0; int (*target)(int, int, int, int, int, int); + struct trace_probe *tp; target = kprobe_trace_selftest_target; pr_info("Testing kprobe tracing: "); ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " - "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); - if (WARN_ON_ONCE(ret)) - pr_warning("error enabling function entry\n"); + "$stack $stack0 +0($stack)"); + if (WARN_ON_ONCE(ret)) { + pr_warning("error on probing function entry.\n"); + warn++; + } else { + /* Enable trace point */ + tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM); + if (WARN_ON_ONCE(tp == NULL)) { + pr_warning("error on getting new probe.\n"); + warn++; + } else + probe_event_enable(&tp->call); + } ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " "$retval"); - if (WARN_ON_ONCE(ret)) - pr_warning("error enabling function return\n"); + if (WARN_ON_ONCE(ret)) { + pr_warning("error on probing function return.\n"); + warn++; + } else { + /* Enable trace point */ + tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM); + if (WARN_ON_ONCE(tp == NULL)) { + pr_warning("error on getting new probe.\n"); + warn++; + } else + probe_event_enable(&tp->call); + } + + if (warn) + goto end; ret = target(1, 2, 3, 4, 5, 6); - cleanup_all_probes(); + ret = command_trace_probe("-:testprobe"); + if (WARN_ON_ONCE(ret)) { + pr_warning("error on deleting a probe.\n"); + warn++; + } + + ret = command_trace_probe("-:testprobe2"); + if (WARN_ON_ONCE(ret)) { + pr_warning("error on deleting a probe.\n"); + warn++; + } - pr_cont("OK\n"); +end: + cleanup_all_probes(); + if (warn) + pr_cont("NG: Some tests are failed. Please check them.\n"); + else + pr_cont("OK\n"); return 0; } -- cgit From ea2c68a08fedb5053ba312d661e47df9f4d72411 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 13 Jan 2010 19:38:30 +0800 Subject: tracing: Simplify test for function_graph tracing start point In the function graph tracer, a calling function is to be traced only when it is enabled through the set_graph_function file, or when it is nested in an enabled function. Current code uses TSK_TRACE_FL_GRAPH to test whether it is nested or not. Looking at the code, we can get this: (trace->depth > 0) <==> (TSK_TRACE_FL_GRAPH is set) trace->depth is more explicit to tell that it is nested. So we use trace->depth directly and simplify the code. No functionality is changed. TSK_TRACE_FL_GRAPH is not removed yet, it is left for future usage. Signed-off-by: Lai Jiangshan Cc: Ingo Molnar Cc: Steven Rostedt LKML-Reference: <4B4DB0B6.7040607@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/trace.h | 2 +- kernel/trace/trace_functions_graph.c | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4df6a77eb196..ce077fbbf552 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -504,7 +504,7 @@ static inline int ftrace_graph_addr(unsigned long addr) { int i; - if (!ftrace_graph_count || test_tsk_trace_graph(current)) + if (!ftrace_graph_count) return 1; for (i = 0; i < ftrace_graph_count; i++) { diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index f2252296607c..616b135c9eb9 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -215,7 +215,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) if (!ftrace_trace_task(current)) return 0; - if (!ftrace_graph_addr(trace->func)) + /* trace it when it is-nested-in or is a function enabled. */ + if (!(trace->depth || ftrace_graph_addr(trace->func))) return 0; local_irq_save(flags); @@ -228,9 +229,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) } else { ret = 0; } - /* Only do the atomic if it is not already set */ - if (!test_tsk_trace_graph(current)) - set_tsk_trace_graph(current); atomic_dec(&data->disabled); local_irq_restore(flags); @@ -278,8 +276,6 @@ void trace_graph_return(struct ftrace_graph_ret *trace) pc = preempt_count(); __trace_graph_return(tr, trace, flags, pc); } - if (!trace->depth) - clear_tsk_trace_graph(current); atomic_dec(&data->disabled); local_irq_restore(flags); } -- cgit From 430ad5a600a83956749307b13257c464c3826b55 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 28 Jan 2010 09:32:29 +0800 Subject: perf: Factorize trace events raw sample buffer operations Introduce ftrace_perf_buf_prepare() and ftrace_perf_buf_submit() to gather the common code that operates on raw events sampling buffer. This cleans up redundant code between regular trace events, syscall events and kprobe events. Changelog v1->v2: - Rename function name as per Masami and Frederic's suggestion - Add __kprobes for ftrace_perf_buf_prepare() and make ftrace_perf_buf_submit() inline as per Masami's suggestion - Export ftrace_perf_buf_prepare since modules will use it Signed-off-by: Xiao Guangrong Acked-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Steven Rostedt Cc: Paul Mackerras Cc: Jason Baron Cc: Peter Zijlstra LKML-Reference: <4B60E92D.9000808@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 18 ++++++-- include/trace/ftrace.h | 48 +++------------------ kernel/trace/trace_event_profile.c | 52 ++++++++++++++++++++--- kernel/trace/trace_kprobe.c | 86 +++++--------------------------------- kernel/trace/trace_syscalls.c | 71 +++++-------------------------- 5 files changed, 88 insertions(+), 187 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0a09e758c7d3..cd95919d9ff3 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -5,6 +5,7 @@ #include #include #include +#include struct trace_array; struct tracer; @@ -138,9 +139,6 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -extern char *perf_trace_buf; -extern char *perf_trace_buf_nmi; - #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ @@ -195,6 +193,20 @@ extern void ftrace_profile_disable(int event_id); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); +extern void * +ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp, + unsigned long *irq_flags); + +static inline void +ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr, + u64 count, unsigned long irq_flags) +{ + struct trace_entry *entry = raw_data; + + perf_tp_event(entry->type, addr, count, raw_data, size); + perf_swevent_put_recursion_context(rctx); + local_irq_restore(irq_flags); +} #endif #endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 4a46a60c2077..f2c09e4d656c 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -850,22 +850,12 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - extern int perf_swevent_get_recursion_context(void); \ - extern void perf_swevent_put_recursion_context(int rctx); \ - extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ - struct trace_entry *ent; \ int __entry_size; \ int __data_size; \ - char *trace_buf; \ - char *raw_data; \ - int __cpu; \ int rctx; \ - int pc; \ - \ - pc = preempt_count(); \ \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ @@ -875,42 +865,16 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \ "profile buffer not large enough")) \ return; \ - \ - local_irq_save(irq_flags); \ - \ - rctx = perf_swevent_get_recursion_context(); \ - if (rctx < 0) \ - goto end_recursion; \ - \ - __cpu = smp_processor_id(); \ - \ - if (in_nmi()) \ - trace_buf = rcu_dereference(perf_trace_buf_nmi); \ - else \ - trace_buf = rcu_dereference(perf_trace_buf); \ - \ - if (!trace_buf) \ - goto end; \ - \ - raw_data = per_cpu_ptr(trace_buf, __cpu); \ - \ - *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ - entry = (struct ftrace_raw_##call *)raw_data; \ - ent = &entry->ent; \ - tracing_generic_entry_update(ent, irq_flags, pc); \ - ent->type = event_call->id; \ - \ + entry = (struct ftrace_raw_##call *)ftrace_perf_buf_prepare( \ + __entry_size, event_call->id, &rctx, &irq_flags); \ + if (!entry) \ + return; \ tstruct \ \ { assign; } \ \ - perf_tp_event(event_call->id, __addr, __count, entry, \ - __entry_size); \ - \ -end: \ - perf_swevent_put_recursion_context(rctx); \ -end_recursion: \ - local_irq_restore(irq_flags); \ + ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr, \ + __count, irq_flags); \ } #undef DEFINE_EVENT diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 9e25573242cf..f0d693005075 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -6,14 +6,12 @@ */ #include +#include #include "trace.h" -char *perf_trace_buf; -EXPORT_SYMBOL_GPL(perf_trace_buf); - -char *perf_trace_buf_nmi; -EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); +static char *perf_trace_buf; +static char *perf_trace_buf_nmi; typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; @@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id) } mutex_unlock(&event_mutex); } + +__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, + int *rctxp, unsigned long *irq_flags) +{ + struct trace_entry *entry; + char *trace_buf, *raw_data; + int pc, cpu; + + pc = preempt_count(); + + /* Protect the per cpu buffer, begin the rcu read side */ + local_irq_save(*irq_flags); + + *rctxp = perf_swevent_get_recursion_context(); + if (*rctxp < 0) + goto err_recursion; + + cpu = smp_processor_id(); + + if (in_nmi()) + trace_buf = rcu_dereference(perf_trace_buf_nmi); + else + trace_buf = rcu_dereference(perf_trace_buf); + + if (!trace_buf) + goto err; + + raw_data = per_cpu_ptr(trace_buf, cpu); + + /* zero the dead bytes from align to not leak stack to user */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + + entry = (struct trace_entry *)raw_data; + tracing_generic_entry_update(entry, *irq_flags, pc); + entry->type = type; + + return raw_data; +err: + perf_swevent_put_recursion_context(*rctxp); +err_recursion: + local_irq_restore(*irq_flags); + return NULL; +} +EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d6266cad6953..2e28ee36646f 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1243,14 +1243,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; - struct trace_entry *ent; - int size, __size, i, pc, __cpu; + int size, __size, i; unsigned long irq_flags; - char *trace_buf; - char *raw_data; int rctx; - pc = preempt_count(); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -1258,45 +1254,16 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, "profile buffer not large enough")) return 0; - /* - * Protect the non nmi buffer - * This also protects the rcu read side - */ - local_irq_save(irq_flags); - - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - - __cpu = smp_processor_id(); - - if (in_nmi()) - trace_buf = rcu_dereference(perf_trace_buf_nmi); - else - trace_buf = rcu_dereference(perf_trace_buf); - - if (!trace_buf) - goto end; - - raw_data = per_cpu_ptr(trace_buf, __cpu); - - /* Zero dead bytes from alignment to avoid buffer leak to userspace */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; - entry = (struct kprobe_trace_entry *)raw_data; - ent = &entry->ent; + entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); + if (!entry) + return 0; - tracing_generic_entry_update(ent, irq_flags, pc); - ent->type = call->id; entry->nargs = tp->nr_args; entry->ip = (unsigned long)kp->addr; for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); - perf_tp_event(call->id, entry->ip, 1, entry, size); -end: - perf_swevent_put_recursion_context(rctx); -end_recursion: - local_irq_restore(irq_flags); + ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); return 0; } @@ -1308,14 +1275,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; - struct trace_entry *ent; - int size, __size, i, pc, __cpu; + int size, __size, i; unsigned long irq_flags; - char *trace_buf; - char *raw_data; int rctx; - pc = preempt_count(); __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); @@ -1323,46 +1286,17 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, "profile buffer not large enough")) return 0; - /* - * Protect the non nmi buffer - * This also protects the rcu read side - */ - local_irq_save(irq_flags); - - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - - __cpu = smp_processor_id(); - - if (in_nmi()) - trace_buf = rcu_dereference(perf_trace_buf_nmi); - else - trace_buf = rcu_dereference(perf_trace_buf); - - if (!trace_buf) - goto end; - - raw_data = per_cpu_ptr(trace_buf, __cpu); - - /* Zero dead bytes from alignment to avoid buffer leak to userspace */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; - entry = (struct kretprobe_trace_entry *)raw_data; - ent = &entry->ent; + entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); + if (!entry) + return 0; - tracing_generic_entry_update(ent, irq_flags, pc); - ent->type = call->id; entry->nargs = tp->nr_args; entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); - perf_tp_event(call->id, entry->ret_ip, 1, entry, size); -end: - perf_swevent_put_recursion_context(rctx); -end_recursion: - local_irq_restore(irq_flags); + ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); return 0; } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index f694f66d75b0..4e332b9e449c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -433,12 +433,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; unsigned long flags; - char *trace_buf; - char *raw_data; int syscall_nr; int rctx; int size; - int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) @@ -457,37 +454,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) "profile buffer not large enough")) return; - /* Protect the per cpu buffer, begin the rcu read side */ - local_irq_save(flags); - - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - - cpu = smp_processor_id(); - - trace_buf = rcu_dereference(perf_trace_buf); - - if (!trace_buf) - goto end; - - raw_data = per_cpu_ptr(trace_buf, cpu); - - /* zero the dead bytes from align to not leak stack to user */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, + sys_data->enter_event->id, &rctx, &flags); + if (!rec) + return; - rec = (struct syscall_trace_enter *) raw_data; - tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->enter_event->id; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); - -end: - perf_swevent_put_recursion_context(rctx); -end_recursion: - local_irq_restore(flags); + ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); } int prof_sysenter_enable(struct ftrace_event_call *call) @@ -531,11 +506,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) struct syscall_trace_exit *rec; unsigned long flags; int syscall_nr; - char *trace_buf; - char *raw_data; int rctx; int size; - int cpu; syscall_nr = syscall_get_nr(current, regs); if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) @@ -557,38 +529,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) "exit event has grown above profile buffer size")) return; - /* Protect the per cpu buffer, begin the rcu read side */ - local_irq_save(flags); - - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - - cpu = smp_processor_id(); - - trace_buf = rcu_dereference(perf_trace_buf); - - if (!trace_buf) - goto end; - - raw_data = per_cpu_ptr(trace_buf, cpu); - - /* zero the dead bytes from align to not leak stack to user */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; - - rec = (struct syscall_trace_exit *)raw_data; + rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, + sys_data->exit_event->id, &rctx, &flags); + if (!rec) + return; - tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->exit_event->id; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); - -end: - perf_swevent_put_recursion_context(rctx); -end_recursion: - local_irq_restore(flags); + ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); } int prof_sysexit_enable(struct ftrace_event_call *call) -- cgit From 1e12a4a7a3a78bc9c3aaf3486dde3b8ab1cdf465 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 28 Jan 2010 09:34:27 +0800 Subject: tracing/kprobe: Cleanup unused return value of tracing functions The return values of the kprobe's tracing functions are meaningless, lets remove these. Signed-off-by: Xiao Guangrong Acked-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Ingo Molnar Cc: Paul Mackerras Cc: Jason Baron Cc: Peter Zijlstra LKML-Reference: <4B60E9A3.2040505@cn.fujitsu.com> [fweisbec@gmail: whitespace fixes, drop useless void returns in end of functions] Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_kprobe.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 2e28ee36646f..6178abf3637e 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -942,7 +942,7 @@ static const struct file_operations kprobe_profile_ops = { }; /* Kprobe handler */ -static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) +static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) { struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct kprobe_trace_entry *entry; @@ -962,7 +962,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) event = trace_current_buffer_lock_reserve(&buffer, call->id, size, irq_flags, pc); if (!event) - return 0; + return; entry = ring_buffer_event_data(event); entry->nargs = tp->nr_args; @@ -972,11 +972,10 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) if (!filter_current_check_discard(buffer, call, entry, event)) trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); - return 0; } /* Kretprobe handler */ -static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, +static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); @@ -995,7 +994,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, event = trace_current_buffer_lock_reserve(&buffer, call->id, size, irq_flags, pc); if (!event) - return 0; + return; entry = ring_buffer_event_data(event); entry->nargs = tp->nr_args; @@ -1006,8 +1005,6 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, if (!filter_current_check_discard(buffer, call, entry, event)) trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); - - return 0; } /* Event entry printers */ @@ -1237,7 +1234,7 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static __kprobes int kprobe_profile_func(struct kprobe *kp, +static __kprobes void kprobe_profile_func(struct kprobe *kp, struct pt_regs *regs) { struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); @@ -1252,11 +1249,11 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, size -= sizeof(u32); if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, "profile buffer not large enough")) - return 0; + return; entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); if (!entry) - return 0; + return; entry->nargs = tp->nr_args; entry->ip = (unsigned long)kp->addr; @@ -1264,12 +1261,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); - - return 0; } /* Kretprobe profile handler */ -static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, +static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); @@ -1284,11 +1279,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, size -= sizeof(u32); if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, "profile buffer not large enough")) - return 0; + return; entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); if (!entry) - return 0; + return; entry->nargs = tp->nr_args; entry->func = (unsigned long)tp->rp.kp.addr; @@ -1297,8 +1292,6 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); - - return 0; } static int probe_profile_enable(struct ftrace_event_call *call) -- cgit From 4f48f8b7fd18c44f8478174f9925cc3c059c6ce4 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 2 Feb 2010 15:32:09 +0800 Subject: tracing: Fix circular dead lock in stack trace When we cat /tracing/stack_trace, we may cause circular lock: sys_read() t_start() arch_spin_lock(&max_stack_lock); t_show() seq_printf(), vsnprintf() .... /* they are all trace-able, when they are traced, max_stack_lock may be required again. */ The following script can trigger this circular dead lock very easy: #!/bin/bash echo 1 > /proc/sys/kernel/stack_tracer_enabled mount -t debugfs xxx /mnt > /dev/null 2>&1 ( # make check_stack() zealous to require max_stack_lock for ((; ;)) { echo 1 > /mnt/tracing/stack_max_size } ) & for ((; ;)) { cat /mnt/tracing/stack_trace > /dev/null } To fix this bug, we increase the percpu trace_active before require the lock. Reported-by: Li Zefan Signed-off-by: Lai Jiangshan LKML-Reference: <4B67D4F9.9080905@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_stack.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 678a5120ee30..f4bc9b27de5f 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, unsigned long val, flags; char buf[64]; int ret; + int cpu; if (count >= sizeof(buf)) return -EINVAL; @@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, return ret; local_irq_save(flags); + + /* + * In case we trace inside arch_spin_lock() or after (NMI), + * we will cause circular lock, so we also need to increase + * the percpu trace_active here. + */ + cpu = smp_processor_id(); + per_cpu(trace_active, cpu)++; + arch_spin_lock(&max_stack_lock); *ptr = val; arch_spin_unlock(&max_stack_lock); + + per_cpu(trace_active, cpu)--; local_irq_restore(flags); return count; @@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { + int cpu; + local_irq_disable(); + + cpu = smp_processor_id(); + per_cpu(trace_active, cpu)++; + arch_spin_lock(&max_stack_lock); if (*pos == 0) @@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void t_stop(struct seq_file *m, void *p) { + int cpu; + arch_spin_unlock(&max_stack_lock); + + cpu = smp_processor_id(); + per_cpu(trace_active, cpu)--; + local_irq_enable(); } -- cgit From 2cfa19780d61740f65790c5bae363b759d7c96fa Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 2 Feb 2010 16:49:11 -0500 Subject: ftrace/alternatives: Introducing *_text_reserved functions Introducing *_text_reserved functions for checking the text address range is partially reserved or not. This patch provides checking routines for x86 smp alternatives and dynamic ftrace. Since both functions modify fixed pieces of kernel text, they should reserve and protect those from other dynamic text modifier, like kprobes. This will also be extended when introducing other subsystems which modify fixed pieces of kernel text. Dynamic text modifiers should avoid those. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: przemyslaw@pawelczyk.it Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Mathieu Desnoyers Cc: Jason Baron LKML-Reference: <20100202214911.4694.16587.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/alternative.h | 5 +++++ arch/x86/kernel/alternative.c | 16 ++++++++++++++++ include/linux/ftrace.h | 6 ++++++ kernel/trace/ftrace.c | 15 +++++++++++++++ 4 files changed, 42 insertions(+) (limited to 'kernel/trace') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 69b74a7b877f..ac80b7d70014 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -65,12 +65,17 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, void *text, void *text_end); extern void alternatives_smp_module_del(struct module *mod); extern void alternatives_smp_switch(int smp); +extern int alternatives_text_reserved(void *start, void *end); #else static inline void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) {} static inline void alternatives_smp_module_del(struct module *mod) {} static inline void alternatives_smp_switch(int smp) {} +static inline int alternatives_text_reserved(void *start, void *end) +{ + return 0; +} #endif /* CONFIG_SMP */ /* alternative assembly primitive: */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index de7353c0ce9c..3c13284ff86d 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -390,6 +390,22 @@ void alternatives_smp_switch(int smp) mutex_unlock(&smp_alt); } +/* Return 1 if the address range is reserved for smp-alternatives */ +int alternatives_text_reserved(void *start, void *end) +{ + struct smp_alt_module *mod; + u8 **ptr; + + list_for_each_entry(mod, &smp_alt_modules, next) { + if (mod->text > end || mod->text_end < start) + continue; + for (ptr = mod->locks; ptr < mod->locks_end; ptr++) + if (start <= *ptr && end >= *ptr) + return 1; + } + + return 0; +} #endif #ifdef CONFIG_PARAVIRT diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0b4f97d24d7f..9d127efed43c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -134,6 +134,8 @@ extern void unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops); extern void unregister_ftrace_function_probe_all(char *glob); +extern int ftrace_text_reserved(void *start, void *end); + enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), @@ -250,6 +252,10 @@ static inline int unregister_ftrace_command(char *cmd_name) { return -EINVAL; } +static inline int ftrace_text_reserved(void *start, void *end) +{ + return 0; +} #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1e6640f80454..3d90661a5f40 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1025,6 +1025,21 @@ static void ftrace_bug(int failed, unsigned long ip) } +/* Return 1 if the address range is reserved for ftrace */ +int ftrace_text_reserved(void *start, void *end) +{ + struct dyn_ftrace *rec; + struct ftrace_page *pg; + + do_for_each_ftrace_rec(pg, rec) { + if (rec->ip <= (unsigned long)end && + rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start) + return 1; + } while_for_each_ftrace_rec(); + return 0; +} + + static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) { -- cgit From f24bb999d2b9f2950e5cac5b69bffedf73c24ea4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 2 Feb 2010 16:49:25 -0500 Subject: ftrace: Remove record freezing Remove record freezing. Because kprobes never puts probe on ftrace's mcount call anymore, it doesn't need ftrace to check whether kprobes on it. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: przemyslaw@pawelczyk.it Cc: Frederic Weisbecker LKML-Reference: <20100202214925.4694.73469.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 1 - kernel/trace/ftrace.c | 39 --------------------------------------- 2 files changed, 40 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9d127efed43c..eb054ae95605 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -143,7 +143,6 @@ enum { FTRACE_FL_ENABLED = (1 << 3), FTRACE_FL_NOTRACE = (1 << 4), FTRACE_FL_CONVERTED = (1 << 5), - FTRACE_FL_FROZEN = (1 << 6), }; struct dyn_ftrace { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3d90661a5f40..1904797f4a8a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records; } \ } -#ifdef CONFIG_KPROBES - -static int frozen_record_count; - -static inline void freeze_record(struct dyn_ftrace *rec) -{ - if (!(rec->flags & FTRACE_FL_FROZEN)) { - rec->flags |= FTRACE_FL_FROZEN; - frozen_record_count++; - } -} - -static inline void unfreeze_record(struct dyn_ftrace *rec) -{ - if (rec->flags & FTRACE_FL_FROZEN) { - rec->flags &= ~FTRACE_FL_FROZEN; - frozen_record_count--; - } -} - -static inline int record_frozen(struct dyn_ftrace *rec) -{ - return rec->flags & FTRACE_FL_FROZEN; -} -#else -# define freeze_record(rec) ({ 0; }) -# define unfreeze_record(rec) ({ 0; }) -# define record_frozen(rec) ({ 0; }) -#endif /* CONFIG_KPROBES */ - static void ftrace_free_rec(struct dyn_ftrace *rec) { rec->freelist = ftrace_free_records; @@ -1091,14 +1060,6 @@ static void ftrace_replace_code(int enable) !(rec->flags & FTRACE_FL_CONVERTED)) continue; - /* ignore updates to this record's mcount site */ - if (get_kprobe((void *)rec->ip)) { - freeze_record(rec); - continue; - } else { - unfreeze_record(rec); - } - failed = __ftrace_replace_code(rec, enable); if (failed) { rec->flags |= FTRACE_FL_FAILED; -- cgit From ede55c9d78101fef0d8e620940a5163f14b02f29 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 27 Jan 2010 11:25:54 -0500 Subject: tracing: Add correct/incorrect to sort keys for branch annotation output The branch annotation is a bit difficult to see the worst offenders because it only sorts by percentage: correct incorrect % Function File Line ------- --------- - -------- ---- ---- 0 163 100 qdisc_restart sch_generic.c 179 0 163 100 pfifo_fast_dequeue sch_generic.c 447 0 4 100 pskb_trim_rcsum skbuff.h 1689 0 4 100 llc_rcv llc_input.c 170 0 18 100 psmouse_interrupt psmouse-base.c 304 0 3 100 atkbd_interrupt atkbd.c 389 0 5 100 usb_alloc_dev usb.c 437 0 11 100 vsscanf vsprintf.c 1897 0 2 100 IS_ERR err.h 34 0 23 100 __rmqueue_fallback page_alloc.c 865 0 4 100 probe_wakeup_sched_switch trace_sched_wakeup.c 142 0 3 100 move_masked_irq migration.c 11 Adding the incorrect and correct values as sort keys makes this file a bit more informative: correct incorrect % Function File Line ------- --------- - -------- ---- ---- 0 366541 100 audit_syscall_entry auditsc.c 1637 0 366538 100 audit_syscall_exit auditsc.c 1685 0 115839 100 sched_info_switch sched_stats.h 269 0 74567 100 sched_info_queued sched_stats.h 222 0 66578 100 sched_info_dequeued sched_stats.h 177 0 15113 100 trace_workqueue_insertion workqueue.h 38 0 15107 100 trace_workqueue_execution workqueue.h 45 0 3622 100 syscall_trace_leave ptrace.c 1772 0 2750 100 sched_move_task sched.c 10100 0 2750 100 sched_move_task sched.c 10110 0 1815 100 pre_schedule_rt sched_rt.c 1462 0 837 100 audit_alloc auditsc.c 879 0 814 100 tcp_mss_split_point tcp_output.c 1302 Signed-off-by: Steven Rostedt --- kernel/trace/trace_branch.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 4a194f08f88c..b9bc4d470177 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -307,8 +307,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2) return -1; if (percent_a > percent_b) return 1; - else - return 0; + + if (a->incorrect < b->incorrect) + return -1; + if (a->incorrect > b->incorrect) + return 1; + + /* + * Since the above shows worse (incorrect) cases + * first, we continue that by showing best (correct) + * cases last. + */ + if (a->correct > b->correct) + return -1; + if (a->correct < b->correct) + return 1; + + return 0; } static struct tracer_stat annotated_branch_stats = { -- cgit From c7c6b1fe9f942c1a30585ec2210a09dfff238506 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 10 Feb 2010 15:43:04 +0800 Subject: ftrace: Allow to remove a single function from function graph filter I don't see why we can only clear all functions from the filter. After patching: # echo sys_open > set_graph_function # echo sys_close >> set_graph_function # cat set_graph_function sys_open sys_close # echo '!sys_close' >> set_graph_function # cat set_graph_function sys_open Signed-off-by: Li Zefan LKML-Reference: <4B726388.2000408@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 51 +++++++++++++++++++++++++++++++-------------------- kernel/trace/trace.h | 3 ++- 2 files changed, 33 insertions(+), 21 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7968762c8167..43bec993c864 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2426,6 +2426,7 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); int ftrace_graph_count; +int ftrace_graph_filter_enabled; unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; static void * @@ -2448,7 +2449,7 @@ static void *g_start(struct seq_file *m, loff_t *pos) mutex_lock(&graph_lock); /* Nothing, tell g_show to print all functions are enabled */ - if (!ftrace_graph_count && !*pos) + if (!ftrace_graph_filter_enabled && !*pos) return (void *)1; return __g_next(m, pos); @@ -2494,6 +2495,7 @@ ftrace_graph_open(struct inode *inode, struct file *file) mutex_lock(&graph_lock); if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { + ftrace_graph_filter_enabled = 0; ftrace_graph_count = 0; memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); } @@ -2519,7 +2521,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) struct dyn_ftrace *rec; struct ftrace_page *pg; int search_len; - int found = 0; + int fail = 1; int type, not; char *search; bool exists; @@ -2530,37 +2532,51 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) /* decode regex */ type = filter_parse_regex(buffer, strlen(buffer), &search, ¬); - if (not) - return -EINVAL; + if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS) + return -EBUSY; search_len = strlen(search); mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { - if (*idx >= FTRACE_GRAPH_MAX_FUNCS) - break; - if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) continue; if (ftrace_match_record(rec, search, search_len, type)) { - /* ensure it is not already in the array */ + /* if it is in the array */ exists = false; - for (i = 0; i < *idx; i++) + for (i = 0; i < *idx; i++) { if (array[i] == rec->ip) { exists = true; break; } - if (!exists) - array[(*idx)++] = rec->ip; - found = 1; + } + + if (!not) { + fail = 0; + if (!exists) { + array[(*idx)++] = rec->ip; + if (*idx >= FTRACE_GRAPH_MAX_FUNCS) + goto out; + } + } else { + if (exists) { + array[i] = array[--(*idx)]; + array[*idx] = 0; + fail = 0; + } + } } } while_for_each_ftrace_rec(); - +out: mutex_unlock(&ftrace_lock); - return found ? 0 : -EINVAL; + if (fail) + return -EINVAL; + + ftrace_graph_filter_enabled = 1; + return 0; } static ssize_t @@ -2570,16 +2586,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, struct trace_parser parser; ssize_t read, ret; - if (!cnt || cnt < 0) + if (!cnt) return 0; mutex_lock(&graph_lock); - if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) { - ret = -EBUSY; - goto out_unlock; - } - if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { ret = -ENOMEM; goto out_unlock; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ce077fbbf552..b477fce41edf 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -497,6 +497,7 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); #ifdef CONFIG_DYNAMIC_FTRACE /* TODO: make this variable */ #define FTRACE_GRAPH_MAX_FUNCS 32 +extern int ftrace_graph_filter_enabled; extern int ftrace_graph_count; extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; @@ -504,7 +505,7 @@ static inline int ftrace_graph_addr(unsigned long addr) { int i; - if (!ftrace_graph_count) + if (!ftrace_graph_filter_enabled) return 1; for (i = 0; i < ftrace_graph_count; i++) { -- cgit From a9bb18f36c8056f0712fb28c52c0f85d98438dfb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Feb 2010 17:23:47 +0100 Subject: tracing/kprobes: Fix probe parsing Trying to add a probe like: echo p:myprobe 0x10000 > /sys/kernel/debug/tracing/kprobe_events will fail since the wrong pointer is passed to strict_strtoul when trying to convert the address to an unsigned long. Signed-off-by: Heiko Carstens Acked-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <20100210162346.GA6933@osiris.boeblingen.de.ibm.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_kprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6ea90c0e2c96..50b1b8239806 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -689,7 +689,7 @@ static int create_trace_probe(int argc, char **argv) return -EINVAL; } /* an address specified */ - ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); + ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr); if (ret) { pr_info("Failed to parse address.\n"); return ret; -- cgit From e7b8e675d9c71b868b66f62f725a948047514719 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 26 Jan 2010 04:40:03 -0500 Subject: tracing: Unify arch_syscall_addr() implementations Most implementations of arch_syscall_addr() are the same, so create a default version in common code and move the one piece that differs (the syscall table) to asm/syscall.h. New arch ports don't have to waste time copying & pasting this simple function. The s390/sparc versions need to be different, so document why. Signed-off-by: Mike Frysinger Acked-by: David S. Miller Acked-by: Paul Mundt Acked-by: Heiko Carstens Cc: Steven Rostedt LKML-Reference: <1264498803-17278-1-git-send-email-vapier@gentoo.org> Signed-off-by: Frederic Weisbecker --- Documentation/trace/ftrace-design.txt | 5 ++--- arch/s390/include/asm/syscall.h | 7 +++++++ arch/s390/kernel/ftrace.c | 10 ---------- arch/sh/include/asm/syscall.h | 2 ++ arch/sh/kernel/ftrace.c | 9 --------- arch/sparc/include/asm/syscall.h | 7 +++++++ arch/sparc/kernel/ftrace.c | 11 ----------- arch/x86/include/asm/syscall.h | 2 ++ arch/x86/kernel/ftrace.c | 10 ---------- include/linux/ftrace.h | 6 ++++++ kernel/trace/trace_syscalls.c | 5 +++++ 11 files changed, 31 insertions(+), 43 deletions(-) (limited to 'kernel/trace') diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 239f14b2b55a..99df1101d2a5 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -218,11 +218,10 @@ HAVE_SYSCALL_TRACEPOINTS You need very few things to get the syscalls tracing in an arch. +- Support HAVE_ARCH_TRACEHOOK (see arch/Kconfig). - Have a NR_syscalls variable in that provides the number of syscalls supported by the arch. -- Implement arch_syscall_addr() that resolves a syscall address from a - syscall number. -- Support the TIF_SYSCALL_TRACEPOINT thread flags +- Support the TIF_SYSCALL_TRACEPOINT thread flags. - Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace in the ptrace syscalls tracing path. - Tag this arch as HAVE_SYSCALL_TRACEPOINTS. diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index e0a73d3eb837..8429686951f9 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -15,6 +15,13 @@ #include #include +/* + * The syscall table always contains 32 bit pointers since we know that the + * address of the function to be called is (way) below 4GB. So the "int" + * type here is what we want [need] for both 32 bit and 64 bit systems. + */ +extern const unsigned int sys_call_table[]; + static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 5a82bc68193e..9e69449e77ad 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -200,13 +200,3 @@ out: return parent; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#ifdef CONFIG_FTRACE_SYSCALLS - -extern unsigned int sys_call_table[]; - -unsigned long __init arch_syscall_addr(int nr) -{ - return (unsigned long)sys_call_table[nr]; -} -#endif diff --git a/arch/sh/include/asm/syscall.h b/arch/sh/include/asm/syscall.h index 6a381429ee9d..aa7777bdc370 100644 --- a/arch/sh/include/asm/syscall.h +++ b/arch/sh/include/asm/syscall.h @@ -1,6 +1,8 @@ #ifndef __ASM_SH_SYSCALL_H #define __ASM_SH_SYSCALL_H +extern const unsigned long sys_call_table[]; + #ifdef CONFIG_SUPERH32 # include "syscall_32.h" #else diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index a48cdedc73b5..30e13196d35b 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -399,12 +399,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long *sys_call_table; - -unsigned long __init arch_syscall_addr(int nr) -{ - return (unsigned long)sys_call_table[nr]; -} -#endif /* CONFIG_FTRACE_SYSCALLS */ diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h index 7486c605e23c..025a02ad2e31 100644 --- a/arch/sparc/include/asm/syscall.h +++ b/arch/sparc/include/asm/syscall.h @@ -5,6 +5,13 @@ #include #include +/* + * The syscall table always contains 32 bit pointers since we know that the + * address of the function to be called is (way) below 4GB. So the "int" + * type here is what we want [need] for both 32 bit and 64 bit systems. + */ +extern const unsigned int sys_call_table[]; + /* The system call number is given by the user in %g1 */ static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 29973daa9930..9103a56b39e8 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -91,14 +91,3 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } #endif - -#ifdef CONFIG_FTRACE_SYSCALLS - -extern unsigned int sys_call_table[]; - -unsigned long __init arch_syscall_addr(int nr) -{ - return (unsigned long)sys_call_table[nr]; -} - -#endif diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 8d33bc5462d1..c4a348f7bd43 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,6 +16,8 @@ #include #include +extern const unsigned long sys_call_table[]; + /* * Only the low 32 bits of orig_ax are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 309689245431..0d93a941934c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -484,13 +484,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#ifdef CONFIG_FTRACE_SYSCALLS - -extern unsigned long *sys_call_table; - -unsigned long __init arch_syscall_addr(int nr) -{ - return (unsigned long)(&sys_call_table)[nr]; -} -#endif diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0b4f97d24d7f..1cbb36f2759c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -511,4 +511,10 @@ static inline void trace_hw_branch_oops(void) {} #endif /* CONFIG_HW_BRANCH_TRACER */ +#ifdef CONFIG_FTRACE_SYSCALLS + +unsigned long arch_syscall_addr(int nr); + +#endif /* CONFIG_FTRACE_SYSCALLS */ + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 49cea70fbf6d..ecf00782b46c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -394,6 +394,11 @@ int init_syscall_trace(struct ftrace_event_call *call) return id; } +unsigned long __init arch_syscall_addr(int nr) +{ + return (unsigned long)sys_call_table[nr]; +} + int __init init_ftrace_syscalls(void) { struct syscall_metadata *meta; -- cgit From f850c30c8b426ba1688cb63b1a3e534eed03a138 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Feb 2010 17:25:17 +0100 Subject: tracing/kprobes: Make Kconfig dependencies generic KPROBES_EVENT actually depends on the regs and stack access API (b1cf540f) and not on x86. So introduce a new config option which architectures can select if they have the API implemented and switch x86. Signed-off-by: Heiko Carstens Acked-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Martin Schwidefsky LKML-Reference: <20100210162517.GB6933@osiris.boeblingen.de.ibm.com> Signed-off-by: Frederic Weisbecker --- arch/Kconfig | 3 +++ arch/x86/Kconfig | 1 + kernel/trace/Kconfig | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/arch/Kconfig b/arch/Kconfig index 9d055b4f0585..04e3aa77da25 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -121,6 +121,9 @@ config HAVE_DMA_ATTRS config USE_GENERIC_SMP_HELPERS bool +config HAVE_REGS_AND_STACK_ACCESS_API + bool + config HAVE_CLK bool help diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 55298e891571..07baa12929b4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -45,6 +45,7 @@ config X86 select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS select USER_STACKTRACE_SUPPORT + select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_DMA_API_DEBUG select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6c22d8a2f289..40fef552f012 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -451,7 +451,7 @@ config BLK_DEV_IO_TRACE config KPROBE_EVENT depends on KPROBES - depends on X86 + depends on HAVE_REGS_AND_STACK_ACCESS_API bool "Enable kprobes-based dynamic events" select TRACING default y -- cgit From 86c38a31aa7f2dd6e74a262710bf8ebf7455acc5 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 24 Feb 2010 13:59:23 -0500 Subject: tracing: Fix ftrace_event_call alignment for use with gcc 4.5 GCC 4.5 introduces behavior that forces the alignment of structures to use the largest possible value. The default value is 32 bytes, so if some structures are defined with a 4-byte alignment and others aren't declared with an alignment constraint at all - it will align at 32-bytes. For things like the ftrace events, this results in a non-standard array. When initializing the ftrace subsystem, we traverse the _ftrace_events section and call the initialization callback for each event. When the structures are misaligned, we could be treating another part of the structure (or the zeroed out space between them) as a function pointer. This patch forces the alignment for all the ftrace_event_call structures to 4 bytes. Without this patch, the kernel fails to boot very early when built with gcc 4.5. It's trivial to check the alignment of the members of the array, so it might be worthwhile to add something to the build system to do that automatically. Unfortunately, that only covers this case. I've asked one of the gcc developers about adding a warning when this condition is seen. Cc: stable@kernel.org Signed-off-by: Jeff Mahoney LKML-Reference: <4B85770B.6010901@suse.com> Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 6 ++++-- include/trace/ftrace.h | 3 ++- kernel/trace/trace.h | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7b219696ad24..91bd7d78a07d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -132,7 +132,8 @@ struct perf_event_attr; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static const struct syscall_metadata __syscall_meta_##sname; \ - static struct ftrace_event_call event_enter_##sname; \ + static struct ftrace_event_call \ + __attribute__((__aligned__(4))) event_enter_##sname; \ static struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ @@ -153,7 +154,8 @@ struct perf_event_attr; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static const struct syscall_metadata __syscall_meta_##sname; \ - static struct ftrace_event_call event_exit_##sname; \ + static struct ftrace_event_call \ + __attribute__((__aligned__(4))) event_exit_##sname; \ static struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 09fd9afc0859..f23a0ca6910a 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -65,7 +65,8 @@ }; #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) \ - static struct ftrace_event_call event_##name + static struct ftrace_event_call \ + __attribute__((__aligned__(4))) event_##name #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b477fce41edf..fd05bcaf91b0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -792,7 +792,8 @@ extern const char *__stop___trace_bprintk_fmt[]; #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ - extern struct ftrace_event_call event_##call; + extern struct ftrace_event_call \ + __attribute__((__aligned__(4))) event_##call; #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) -- cgit From 1ab83a89411556c4d5914dcf0d5da190178ae7db Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 24 Feb 2010 15:28:14 +0800 Subject: tracing: Remove CONFIG_TRACE_POWER from kernel config The power tracer has been converted to power trace events. Acked-by: Frederic Weisbecker Signed-off-by: Li Zefan LKML-Reference: <4B84D50E.4070806@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/Kconfig | 9 --------- 1 file changed, 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6c22d8a2f289..ca2d3a8778b6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -330,15 +330,6 @@ config BRANCH_TRACER Say N if unsure. -config POWER_TRACER - bool "Trace power consumption behavior" - depends on X86 - select GENERIC_TRACER - help - This tracer helps developers to analyze and optimize the kernel's - power management decisions, specifically the C-state and P-state - behavior. - config KSYM_TRACER bool "Trace read and write access on kernel memory locations" depends on HAVE_HW_BREAKPOINT -- cgit From 6574658b3bc7c408581629de5efb809f125cce8c Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Wed, 24 Feb 2010 15:40:22 +0800 Subject: tracing: Fix typo in prof_sysexit_enable() Signed-off-by: Wenji Huang LKML-Reference: <1266997226-6833-1-git-send-email-wenji.huang@oracle.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_syscalls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 49cea70fbf6d..8cdda95da81a 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -603,7 +603,7 @@ int prof_sysexit_enable(struct ftrace_event_call *call) ret = register_trace_sys_exit(prof_syscall_exit); if (ret) { pr_info("event trace: Could not activate" - "syscall entry trace point"); + "syscall exit trace point"); } else { set_bit(num, enabled_prof_exit_syscalls); sys_prof_refcount_exit++; -- cgit From a5efd925115cbc1f90195dca9a25f7b8daa10c37 Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Wed, 24 Feb 2010 15:40:23 +0800 Subject: tracing: Fix typo of info text in trace_kprobe.c Signed-off-by: Wenji Huang LKML-Reference: <1266997226-6833-2-git-send-email-wenji.huang@oracle.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c99029916c76..8d4bd16d6f70 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -651,12 +651,12 @@ static int create_trace_probe(int argc, char **argv) event = strchr(group, '/') + 1; event[-1] = '\0'; if (strlen(group) == 0) { - pr_info("Group name is not specifiled\n"); + pr_info("Group name is not specified\n"); return -EINVAL; } } if (strlen(event) == 0) { - pr_info("Event name is not specifiled\n"); + pr_info("Event name is not specified\n"); return -EINVAL; } } -- cgit From c85f3a91f84d5a85f179c2504bb7a39370c82b41 Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Wed, 24 Feb 2010 15:40:24 +0800 Subject: tracing: Remove unnecessary variable in print_graph_return The "cpu" variable is declared at the start of the function and also within a branch, with the exact same initialization. Remove the local variable of the same name in the branch. Signed-off-by: Wenji Huang LKML-Reference: <1266997226-6833-3-git-send-email-wenji.huang@oracle.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 616b135c9eb9..112561df2a0a 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -855,7 +855,6 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, int i; if (data) { - int cpu = iter->cpu; int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); /* -- cgit From 7b60997f73865b019e595720185c85285ca3df9a Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Wed, 24 Feb 2010 15:40:26 +0800 Subject: tracing: Simplify memory recycle of trace_define_field Discard freeing field->type since it is not necessary. Reviewed-by: Li Zefan Signed-off-by: Wenji Huang LKML-Reference: <1266997226-6833-5-git-send-email-wenji.huang@oracle.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c2a3077b7353..3f972ad98d04 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -60,10 +60,8 @@ int trace_define_field(struct ftrace_event_call *call, const char *type, return 0; err: - if (field) { + if (field) kfree(field->name); - kfree(field->type); - } kfree(field); return -ENOMEM; -- cgit From f1c7f517a5dc23bce07efa5ed55e2c074ed9d4ba Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 26 Feb 2010 17:08:16 -0500 Subject: ftrace: Add function names to dangling } in function graph tracer The function graph tracer is currently the most invasive tracer in the ftrace family. It can easily overflow the buffer even with 10megs per CPU. This means that events can often be lost. On start up, or after events are lost, if the function return is recorded but the function enter was lost, all we get to see is the exiting '}'. Here is how a typical trace output starts: [tracing] cat trace # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 0) + 91.897 us | } 0) ! 567.961 us | } 0) <========== | 0) ! 579.083 us | _raw_spin_lock_irqsave(); 0) 4.694 us | _raw_spin_unlock_irqrestore(); 0) ! 594.862 us | } 0) ! 603.361 us | } 0) ! 613.574 us | } 0) ! 623.554 us | } 0) 3.653 us | fget_light(); 0) | sock_poll() { There are a series of '}' with no matching "func() {". There's no information to what functions these ending brackets belong to. This patch adds a stack on the per cpu structure used in outputting the function graph tracer to keep track of what function was outputted. Then on a function exit event, it checks the depth to see if the function exit has a matching entry event. If it does, then it only prints the '}', otherwise it adds the function name after the '}'. This allows function exit events to show what function they belong to at trace output startup, when the entry was lost due to ring buffer overflow, or even after a new task is scheduled in. Here is what the above trace will look like after this patch: [tracing] cat trace # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 0) + 91.897 us | } (irq_exit) 0) ! 567.961 us | } (smp_apic_timer_interrupt) 0) <========== | 0) ! 579.083 us | _raw_spin_lock_irqsave(); 0) 4.694 us | _raw_spin_unlock_irqrestore(); 0) ! 594.862 us | } (add_wait_queue) 0) ! 603.361 us | } (__pollwait) 0) ! 613.574 us | } (tcp_poll) 0) ! 623.554 us | } (sock_poll) 0) 3.653 us | fget_light(); 0) | sock_poll() { Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 52 +++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 112561df2a0a..e998a824e9db 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -18,6 +18,7 @@ struct fgraph_cpu_data { pid_t last_pid; int depth; int ignore; + unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; }; struct fgraph_data { @@ -670,15 +671,21 @@ print_graph_entry_leaf(struct trace_iterator *iter, duration = graph_ret->rettime - graph_ret->calltime; if (data) { + struct fgraph_cpu_data *cpu_data; int cpu = iter->cpu; - int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); + + cpu_data = per_cpu_ptr(data->cpu_data, cpu); /* * Comments display at + 1 to depth. Since * this is a leaf function, keep the comments * equal to this depth. */ - *depth = call->depth - 1; + cpu_data->depth = call->depth - 1; + + /* No need to keep this function around for this depth */ + if (call->depth < FTRACE_RETFUNC_DEPTH) + cpu_data->enter_funcs[call->depth] = 0; } /* Overhead */ @@ -718,10 +725,15 @@ print_graph_entry_nested(struct trace_iterator *iter, int i; if (data) { + struct fgraph_cpu_data *cpu_data; int cpu = iter->cpu; - int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); - *depth = call->depth; + cpu_data = per_cpu_ptr(data->cpu_data, cpu); + cpu_data->depth = call->depth; + + /* Save this function pointer to see if the exit matches */ + if (call->depth < FTRACE_RETFUNC_DEPTH) + cpu_data->enter_funcs[call->depth] = call->func; } /* No overhead */ @@ -851,18 +863,28 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, struct fgraph_data *data = iter->private; pid_t pid = ent->pid; int cpu = iter->cpu; + int func_match = 1; int ret; int i; if (data) { - int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); + struct fgraph_cpu_data *cpu_data; + int cpu = iter->cpu; + + cpu_data = per_cpu_ptr(data->cpu_data, cpu); /* * Comments display at + 1 to depth. This is the * return from a function, we now want the comments * to display at the same level of the bracket. */ - *depth = trace->depth - 1; + cpu_data->depth = trace->depth - 1; + + if (trace->depth < FTRACE_RETFUNC_DEPTH) { + if (cpu_data->enter_funcs[trace->depth] != trace->func) + func_match = 0; + cpu_data->enter_funcs[trace->depth] = 0; + } } if (print_graph_prologue(iter, s, 0, 0)) @@ -887,9 +909,21 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, return TRACE_TYPE_PARTIAL_LINE; } - ret = trace_seq_printf(s, "}\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + /* + * If the return function does not have a matching entry, + * then the entry was lost. Instead of just printing + * the '}' and letting the user guess what function this + * belongs to, write out the function name. + */ + if (func_match) { + ret = trace_seq_printf(s, "}\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } else { + ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } /* Overrun */ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { -- cgit From 9a8c28c8311e30ba97499447d5a11662f5aea094 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sat, 27 Feb 2010 20:35:13 +0300 Subject: blktrace: perform cleanup after setup error Currently even if BLKTRACESETUP ioctl has failed user must call BLKTRACETEARDOWN to be shure what all staff was cleaned, which is contr-intuitive. Let's setup ioctl make necessery cleanup by it self. Signed-off-by: Dmitry Monakhov Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index d9d6206e0b14..07f945a99430 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -540,9 +540,10 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (ret) return ret; - if (copy_to_user(arg, &buts, sizeof(buts))) + if (copy_to_user(arg, &buts, sizeof(buts))) { + blk_trace_remove(q); return -EFAULT; - + } return 0; } EXPORT_SYMBOL_GPL(blk_trace_setup); -- cgit