diff options
Diffstat (limited to 'kernel/trace/trace.h')
-rw-r--r-- | kernel/trace/trace.h | 387 |
1 files changed, 284 insertions, 103 deletions
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4d3d381bfd95..8b9f4f6e9559 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -9,6 +9,11 @@ #include <linux/mmiotrace.h> #include <linux/ftrace.h> #include <trace/boot.h> +#include <linux/kmemtrace.h> +#include <trace/power.h> + +#include <linux/trace_seq.h> +#include <linux/ftrace_event.h> enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -16,9 +21,9 @@ enum trace_type { TRACE_FN, TRACE_CTX, TRACE_WAKE, - TRACE_CONT, TRACE_STACK, TRACE_PRINT, + TRACE_BPRINT, TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, @@ -29,24 +34,14 @@ enum trace_type { TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_HW_BRANCHES, + TRACE_SYSCALL_ENTER, + TRACE_SYSCALL_EXIT, + TRACE_KMEM_ALLOC, + TRACE_KMEM_FREE, TRACE_POWER, + TRACE_BLK, - __TRACE_LAST_TYPE -}; - -/* - * The trace entry - the most basic unit of tracing. This is what - * is printed in the end as a single line in the trace output, such as: - * - * bash-15816 [01] 235.197585: idle_cpu <- irq_enter - */ -struct trace_entry { - unsigned char type; - unsigned char cpu; - unsigned char flags; - unsigned char preempt_count; - int pid; - int tgid; + __TRACE_LAST_TYPE, }; /* @@ -60,13 +55,13 @@ struct ftrace_entry { /* Function call entry */ struct ftrace_graph_ent_entry { - struct trace_entry ent; + struct trace_entry ent; struct ftrace_graph_ent graph_ent; }; /* Function return entry */ struct ftrace_graph_ret_entry { - struct trace_entry ent; + struct trace_entry ent; struct ftrace_graph_ret ret; }; extern struct tracer boot_tracer; @@ -112,12 +107,18 @@ struct userstack_entry { }; /* - * ftrace_printk entry: + * trace_printk entry: */ +struct bprint_entry { + struct trace_entry ent; + unsigned long ip; + const char *fmt; + u32 buf[]; +}; + struct print_entry { struct trace_entry ent; unsigned long ip; - int depth; char buf[]; }; @@ -170,15 +171,51 @@ struct trace_power { struct power_trace state_data; }; +enum kmemtrace_type_id { + KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ + KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ + KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ +}; + +struct kmemtrace_alloc_entry { + struct trace_entry ent; + enum kmemtrace_type_id type_id; + unsigned long call_site; + const void *ptr; + size_t bytes_req; + size_t bytes_alloc; + gfp_t gfp_flags; + int node; +}; + +struct kmemtrace_free_entry { + struct trace_entry ent; + enum kmemtrace_type_id type_id; + unsigned long call_site; + const void *ptr; +}; + +struct syscall_trace_enter { + struct trace_entry ent; + int nr; + unsigned long args[]; +}; + +struct syscall_trace_exit { + struct trace_entry ent; + int nr; + unsigned long ret; +}; + + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: * IRQS_OFF - interrupts were disabled - * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags + * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags * NEED_RESCED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler - * CONT - multiple entries hold the trace item */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, @@ -186,7 +223,6 @@ enum trace_flag_type { TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, - TRACE_FLAG_CONT = 0x20, }; #define TRACE_BUF_SIZE 1024 @@ -198,6 +234,7 @@ enum trace_flag_type { */ struct trace_array_cpu { atomic_t disabled; + void *buffer_page; /* ring buffer spare */ /* these fields get copied into max-trace: */ unsigned long trace_idx; @@ -215,8 +252,6 @@ struct trace_array_cpu { char comm[TASK_COMM_LEN]; }; -struct trace_iterator; - /* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. @@ -262,10 +297,10 @@ extern void __ftrace_bad_type(void); do { \ IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ - IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ + IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ @@ -279,26 +314,26 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ - IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ + IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ + IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ + TRACE_KMEM_ALLOC); \ + IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ + TRACE_KMEM_FREE); \ + IF_ASSIGN(var, ent, struct syscall_trace_enter, \ + TRACE_SYSCALL_ENTER); \ + IF_ASSIGN(var, ent, struct syscall_trace_exit, \ + TRACE_SYSCALL_EXIT); \ __ftrace_bad_type(); \ } while (0) -/* Return values for print_line callback */ -enum print_line_t { - TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ - TRACE_TYPE_HANDLED = 1, - TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ -}; - - /* * An option specific to a tracer. This is a boolean value. * The bit is the bit index that sets its value on the * flags value in struct tracer_flags. */ struct tracer_opt { - const char *name; /* Will appear on the trace_options file */ - u32 bit; /* Mask assigned in val field in tracer_flags */ + const char *name; /* Will appear on the trace_options file */ + u32 bit; /* Mask assigned in val field in tracer_flags */ }; /* @@ -307,28 +342,51 @@ struct tracer_opt { */ struct tracer_flags { u32 val; - struct tracer_opt *opts; + struct tracer_opt *opts; }; /* Makes more easy to define a tracer opt */ #define TRACER_OPT(s, b) .name = #s, .bit = b -/* - * A specific tracer, represented by methods that operate on a trace array: + +/** + * struct tracer - a specific tracer and its callbacks to interact with debugfs + * @name: the name chosen to select it on the available_tracers file + * @init: called when one switches to this tracer (echo name > current_tracer) + * @reset: called when one switches to another tracer + * @start: called when tracing is unpaused (echo 1 > tracing_enabled) + * @stop: called when tracing is paused (echo 0 > tracing_enabled) + * @open: called when the trace file is opened + * @pipe_open: called when the trace_pipe file is opened + * @wait_pipe: override how the user waits for traces on trace_pipe + * @close: called when the trace file is released + * @read: override the default read callback on trace_pipe + * @splice_read: override the default splice_read callback on trace_pipe + * @selftest: selftest to run on boot (see trace_selftest.c) + * @print_headers: override the first lines that describe your columns + * @print_line: callback that prints a trace + * @set_flag: signals one of your private flags changed (trace_options file) + * @flags: your private flags */ struct tracer { const char *name; - /* Your tracer should raise a warning if init fails */ int (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); void (*start)(struct trace_array *tr); void (*stop)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); + void (*wait_pipe)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); + ssize_t (*splice_read)(struct trace_iterator *iter, + struct file *filp, + loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, + unsigned int flags); #ifdef CONFIG_FTRACE_STARTUP_TEST int (*selftest)(struct tracer *trace, struct trace_array *tr); @@ -339,51 +397,49 @@ struct tracer { int (*set_flag)(u32 old_flags, u32 bit, int set); struct tracer *next; int print_max; - struct tracer_flags *flags; -}; - -struct trace_seq { - unsigned char buffer[PAGE_SIZE]; - unsigned int len; - unsigned int readpos; + struct tracer_flags *flags; + struct tracer_stat *stats; }; -/* - * Trace iterator - used by printout routines who present trace - * results to users and which routines might sleep, etc: - */ -struct trace_iterator { - struct trace_array *tr; - struct tracer *trace; - void *private; - struct ring_buffer_iter *buffer_iter[NR_CPUS]; - - /* The below is zeroed out in pipe_read */ - struct trace_seq seq; - struct trace_entry *ent; - int cpu; - u64 ts; - - unsigned long iter_flags; - loff_t pos; - long idx; - cpumask_var_t started; -}; +#define TRACE_PIPE_ALL_CPU -1 +int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); void tracing_reset_online_cpus(struct trace_array *tr); +void tracing_reset_current(int cpu); +void tracing_reset_current_online_cpus(void); int tracing_open_generic(struct inode *inode, struct file *filp); +struct dentry *trace_create_file(const char *name, + mode_t mode, + struct dentry *parent, + void *data, + const struct file_operations *fops); + struct dentry *tracing_init_dentry(void); void init_tracer_sysprof_debugfs(struct dentry *d_tracer); +struct ring_buffer_event; + +struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, + int type, + unsigned long len, + unsigned long flags, + int pc); +void trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer_event *event, + unsigned long flags, int pc); + struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); -void tracing_generic_entry_update(struct trace_entry *entry, - unsigned long flags, - int pc); + +struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, + int *ent_cpu, u64 *ent_ts); + +void default_wait_pipe(struct trace_iterator *iter); +void poll_wait_pipe(struct trace_iterator *iter); void ftrace(struct trace_array *tr, struct trace_array_cpu *data, @@ -391,14 +447,11 @@ void ftrace(struct trace_array *tr, unsigned long parent_ip, unsigned long flags, int pc); void tracing_sched_switch_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, unsigned long flags, int pc); -void tracing_record_cmdline(struct task_struct *tsk); void tracing_sched_wakeup_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *wakee, struct task_struct *cur, unsigned long flags, int pc); @@ -408,14 +461,12 @@ void trace_special(struct trace_array *tr, unsigned long arg2, unsigned long arg3, int pc); void trace_function(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); -void trace_hw_branch(struct trace_array *tr, u64 from, u64 to); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); @@ -434,15 +485,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); -extern cycle_t ftrace_now(int cpu); +void __trace_stack(struct trace_array *tr, + unsigned long flags, + int skip, int pc); -#ifdef CONFIG_FUNCTION_TRACER -void tracing_start_function_trace(void); -void tracing_stop_function_trace(void); -#else -# define tracing_start_function_trace() do { } while (0) -# define tracing_stop_function_trace() do { } while (0) -#endif +extern cycle_t ftrace_now(int cpu); #ifdef CONFIG_CONTEXT_SWITCH_TRACER typedef void @@ -456,10 +503,10 @@ struct tracer_switch_ops { void *private; struct tracer_switch_ops *next; }; - -char *trace_find_cmdline(int pid); #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ +extern void trace_find_cmdline(int pid, char comm[]); + #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func @@ -469,6 +516,8 @@ extern int DYN_FTRACE_TEST_NAME(void); #ifdef CONFIG_FTRACE_STARTUP_TEST extern int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr); +extern int trace_selftest_startup_function_graph(struct tracer *trace, + struct trace_array *tr); extern int trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_preemptoff(struct tracer *trace, @@ -485,27 +534,24 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); +extern int trace_selftest_startup_hw_branches(struct tracer *trace, + struct trace_array *tr); #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); -extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); -extern void trace_seq_print_cont(struct trace_seq *s, - struct trace_iterator *iter); - +extern unsigned long long ns2usecs(cycle_t nsec); extern int -seq_print_ip_sym(struct trace_seq *s, unsigned long ip, - unsigned long sym_flags); -extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, - size_t cnt); -extern long ns2usecs(cycle_t nsec); +trace_vbprintk(unsigned long ip, const char *fmt, va_list args); extern int -trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); +trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern unsigned long trace_flags; /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern enum print_line_t print_graph_function(struct trace_iterator *iter); +extern enum print_line_t +trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); #ifdef CONFIG_DYNAMIC_FTRACE /* TODO: make this variable */ @@ -537,7 +583,6 @@ static inline int ftrace_graph_addr(unsigned long addr) return 1; } #endif /* CONFIG_DYNAMIC_FTRACE */ - #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function(struct trace_iterator *iter) @@ -548,6 +593,7 @@ print_graph_function(struct trace_iterator *iter) extern struct pid *ftrace_pid_trace; +#ifdef CONFIG_FUNCTION_TRACER static inline int ftrace_trace_task(struct task_struct *task) { if (!ftrace_pid_trace) @@ -555,6 +601,12 @@ static inline int ftrace_trace_task(struct task_struct *task) return test_tsk_trace_trace(task); } +#else +static inline int ftrace_trace_task(struct task_struct *task) +{ + return 1; +} +#endif /* * trace_iterator_flags is an enumeration that defines bit @@ -580,7 +632,12 @@ enum trace_iterator_flags { TRACE_ITER_ANNOTATE = 0x2000, TRACE_ITER_USERSTACKTRACE = 0x4000, TRACE_ITER_SYM_USEROBJ = 0x8000, - TRACE_ITER_PRINTK_MSGONLY = 0x10000 + TRACE_ITER_PRINTK_MSGONLY = 0x10000, + TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ + TRACE_ITER_LATENCY_FMT = 0x40000, + TRACE_ITER_GLOBAL_CLK = 0x80000, + TRACE_ITER_SLEEP_TIME = 0x100000, + TRACE_ITER_GRAPH_TIME = 0x200000, }; /* @@ -601,12 +658,12 @@ extern struct tracer nop_trace; * preempt_enable (after a disable), a schedule might take place * causing an infinite recursion. * - * To prevent this, we read the need_recshed flag before + * To prevent this, we read the need_resched flag before * disabling preemption. When we want to enable preemption we * check the flag, if it is set, then we call preempt_enable_no_resched. * Otherwise, we call preempt_enable. * - * The rational for doing the above is that if need resched is set + * The rational for doing the above is that if need_resched is set * and we have yet to reschedule, we are either in an atomic location * (where we do not need to check for scheduling) or we are inside * the scheduler and do not want to resched. @@ -627,7 +684,7 @@ static inline int ftrace_preempt_disable(void) * * This is a scheduler safe way to enable preemption and not miss * any preemption checks. The disabled saved the state of preemption. - * If resched is set, then we were either inside an atomic or + * If resched is set, then we are either inside an atomic or * are inside the scheduler (we would have already scheduled * otherwise). In this case, we do not want to call normal * preempt_enable, but preempt_enable_no_resched instead. @@ -664,4 +721,128 @@ static inline void trace_branch_disable(void) } #endif /* CONFIG_BRANCH_TRACER */ +/* set ring buffers to default size if not already done so */ +int tracing_update_buffers(void); + +/* trace event type bit fields, not numeric */ +enum { + TRACE_EVENT_TYPE_PRINTF = 1, + TRACE_EVENT_TYPE_RAW = 2, +}; + +struct ftrace_event_field { + struct list_head link; + char *name; + char *type; + int offset; + int size; + int is_signed; +}; + +struct event_filter { + int n_preds; + struct filter_pred **preds; + char *filter_string; +}; + +struct event_subsystem { + struct list_head list; + const char *name; + struct dentry *entry; + void *filter; +}; + +struct filter_pred; + +typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, + int val1, int val2); + +struct filter_pred { + filter_pred_fn_t fn; + u64 val; + char str_val[MAX_FILTER_STR_VAL]; + int str_len; + char *field_name; + int offset; + int not; + int op; + int pop_n; +}; + +extern void print_event_filter(struct ftrace_event_call *call, + struct trace_seq *s); +extern int apply_event_filter(struct ftrace_event_call *call, + char *filter_string); +extern int apply_subsystem_event_filter(struct event_subsystem *system, + char *filter_string); +extern void print_subsystem_event_filter(struct event_subsystem *system, + struct trace_seq *s); + +static inline int +filter_check_discard(struct ftrace_event_call *call, void *rec, + struct ring_buffer *buffer, + struct ring_buffer_event *event) +{ + if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { + ring_buffer_discard_commit(buffer, event); + return 1; + } + + return 0; +} + +#define DEFINE_COMPARISON_PRED(type) \ +static int filter_pred_##type(struct filter_pred *pred, void *event, \ + int val1, int val2) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + int match = 0; \ + \ + switch (pred->op) { \ + case OP_LT: \ + match = (*addr < val); \ + break; \ + case OP_LE: \ + match = (*addr <= val); \ + break; \ + case OP_GT: \ + match = (*addr > val); \ + break; \ + case OP_GE: \ + match = (*addr >= val); \ + break; \ + default: \ + break; \ + } \ + \ + return match; \ +} + +#define DEFINE_EQUALITY_PRED(size) \ +static int filter_pred_##size(struct filter_pred *pred, void *event, \ + int val1, int val2) \ +{ \ + u##size *addr = (u##size *)(event + pred->offset); \ + u##size val = (u##size)pred->val; \ + int match; \ + \ + match = (val == *addr) ^ pred->not; \ + \ + return match; \ +} + +extern struct mutex event_mutex; +extern struct list_head ftrace_events; + +extern const char *__start___trace_bprintk_fmt[]; +extern const char *__stop___trace_bprintk_fmt[]; + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ + extern struct ftrace_event_call event_##call; +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt) +#include "trace_event_types.h" + #endif /* _LINUX_KERNEL_TRACE_H */ |