aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace/trace_functions_graph.c
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2024-11-20 11:34:10 -0800
committerLinus Torvalds <[email protected]>2024-11-20 11:34:10 -0800
commitaad3a0d084513e811233ad48bf234fbfcfcd0a14 (patch)
treef7946159d3ab428ceee2dd5f54097b05c334b626 /kernel/trace/trace_functions_graph.c
parent8f7c8b88bda4988f44e595a760438febf51c92c8 (diff)
parent36a367b8912a3aac023d9e35c815f7b1e609f4a3 (diff)
Merge tag 'ftrace-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull ftrace updates from Steven Rostedt: - Restructure the function graph shadow stack to prepare it for use with kretprobes With the goal of merging the shadow stack logic of function graph and kretprobes, some more restructuring of the function shadow stack is required. Move out function graph specific fields from the fgraph infrastructure and store it on the new stack variables that can pass data from the entry callback to the exit callback. Hopefully, with this change, the merge of kretprobes to use fgraph shadow stacks will be ready by the next merge window. - Make shadow stack 4k instead of using PAGE_SIZE. Some architectures have very large PAGE_SIZE values which make its use for shadow stacks waste a lot of memory. - Give shadow stacks its own kmem cache. When function graph is started, every task on the system gets a shadow stack. In the future, shadow stacks may not be 4K in size. Have it have its own kmem cache so that whatever size it becomes will still be efficient in allocations. - Initialize profiler graph ops as it will be needed for new updates to fgraph - Convert to use guard(mutex) for several ftrace and fgraph functions - Add more comments and documentation - Show function return address in function graph tracer Add an option to show the caller of a function at each entry of the function graph tracer, similar to what the function tracer does. - Abstract out ftrace_regs from being used directly like pt_regs ftrace_regs was created to store a partial pt_regs. It holds only the registers and stack information to get to the function arguments and return values. On several archs, it is simply a wrapper around pt_regs. But some users would access ftrace_regs directly to get the pt_regs which will not work on all archs. Make ftrace_regs an abstract structure that requires all access to its fields be through accessor functions. - Show how long it takes to do function code modifications When code modification for function hooks happen, it always had the time recorded in how long it took to do the conversion. But this value was never exported. Recently the code was touched due to new ROX modification handling that caused a large slow down in doing the modifications and had a significant impact on boot times. Expose the timings in the dyn_ftrace_total_info file. This file was created a while ago to show information about memory usage and such to implement dynamic function tracing. It's also an appropriate file to store the timings of this modification as well. This will make it easier to see the impact of changes to code modification on boot up timings. - Other clean ups and small fixes * tag 'ftrace-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: (22 commits) ftrace: Show timings of how long nop patching took ftrace: Use guard to take ftrace_lock in ftrace_graph_set_hash() ftrace: Use guard to take the ftrace_lock in release_probe() ftrace: Use guard to lock ftrace_lock in cache_mod() ftrace: Use guard for match_records() fgraph: Use guard(mutex)(&ftrace_lock) for unregister_ftrace_graph() fgraph: Give ret_stack its own kmem cache fgraph: Separate size of ret_stack from PAGE_SIZE ftrace: Rename ftrace_regs_return_value to ftrace_regs_get_return_value selftests/ftrace: Fix check of return value in fgraph-retval.tc test ftrace: Use arch_ftrace_regs() for ftrace_regs_*() macros ftrace: Consolidate ftrace_regs accessor functions for archs using pt_regs ftrace: Make ftrace_regs abstract from direct use fgragh: No need to invoke the function call_filter_check_discard() fgraph: Simplify return address printing in function graph tracer function_graph: Remove unnecessary initialization in ftrace_graph_ret_addr() function_graph: Support recording and printing the function return address ftrace: Have calltime be saved in the fgraph storage ftrace: Use a running sleeptime instead of saving on shadow stack fgraph: Use fgraph data to store subtime for profiler ...
Diffstat (limited to 'kernel/trace/trace_functions_graph.c')
-rw-r--r--kernel/trace/trace_functions_graph.c264
1 files changed, 220 insertions, 44 deletions
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index a569daaac4c4..03c5a0d300a5 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -31,7 +31,10 @@ struct fgraph_data {
struct fgraph_cpu_data __percpu *cpu_data;
/* Place to preserve last processed entry. */
- struct ftrace_graph_ent_entry ent;
+ union {
+ struct ftrace_graph_ent_entry ent;
+ struct fgraph_retaddr_ent_entry rent;
+ } ent;
struct ftrace_graph_ret_entry ret;
int failed;
int cpu;
@@ -64,6 +67,10 @@ static struct tracer_opt trace_opts[] = {
/* Display function return value in hexadecimal format ? */
{ TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) },
#endif
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+ /* Display function return address ? */
+ { TRACER_OPT(funcgraph-retaddr, TRACE_GRAPH_PRINT_RETADDR) },
+#endif
/* Include sleep time (scheduled out) between entry and return */
{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
@@ -83,6 +90,11 @@ static struct tracer_flags tracer_flags = {
.opts = trace_opts
};
+static bool tracer_flags_is_set(u32 flags)
+{
+ return (tracer_flags.val & flags) == flags;
+}
+
/*
* DURATION column is being also used to display IRQ signs,
* following values are used by print_graph_irq and others
@@ -119,6 +131,38 @@ int __trace_graph_entry(struct trace_array *tr,
return 1;
}
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+int __trace_graph_retaddr_entry(struct trace_array *tr,
+ struct ftrace_graph_ent *trace,
+ unsigned int trace_ctx,
+ unsigned long retaddr)
+{
+ struct ring_buffer_event *event;
+ struct trace_buffer *buffer = tr->array_buffer.buffer;
+ struct fgraph_retaddr_ent_entry *entry;
+
+ event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RETADDR_ENT,
+ sizeof(*entry), trace_ctx);
+ if (!event)
+ return 0;
+ entry = ring_buffer_event_data(event);
+ entry->graph_ent.func = trace->func;
+ entry->graph_ent.depth = trace->depth;
+ entry->graph_ent.retaddr = retaddr;
+ trace_buffer_unlock_commit_nostack(buffer, event);
+
+ return 1;
+}
+#else
+int __trace_graph_retaddr_entry(struct trace_array *tr,
+ struct ftrace_graph_ent *trace,
+ unsigned int trace_ctx,
+ unsigned long retaddr)
+{
+ return 1;
+}
+#endif
+
static inline int ftrace_graph_ignore_irqs(void)
{
if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))
@@ -127,12 +171,18 @@ static inline int ftrace_graph_ignore_irqs(void)
return in_hardirq();
}
+struct fgraph_times {
+ unsigned long long calltime;
+ unsigned long long sleeptime; /* may be optional! */
+};
+
int trace_graph_entry(struct ftrace_graph_ent *trace,
struct fgraph_ops *gops)
{
unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
+ struct fgraph_times *ftimes;
unsigned long flags;
unsigned int trace_ctx;
long disabled;
@@ -167,6 +217,19 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
if (ftrace_graph_ignore_irqs())
return 0;
+ if (fgraph_sleep_time) {
+ /* Only need to record the calltime */
+ ftimes = fgraph_reserve_data(gops->idx, sizeof(ftimes->calltime));
+ } else {
+ ftimes = fgraph_reserve_data(gops->idx, sizeof(*ftimes));
+ if (ftimes)
+ ftimes->sleeptime = current->ftrace_sleeptime;
+ }
+ if (!ftimes)
+ return 0;
+
+ ftimes->calltime = trace_clock_local();
+
/*
* Stop here if tracing_threshold is set. We only write function return
* events to the ring buffer.
@@ -180,7 +243,13 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1)) {
trace_ctx = tracing_gen_ctx_flags(flags);
- ret = __trace_graph_entry(tr, trace, trace_ctx);
+ if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
+ tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR))) {
+ unsigned long retaddr = ftrace_graph_top_ret_addr(current);
+
+ ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr);
+ } else
+ ret = __trace_graph_entry(tr, trace, trace_ctx);
} else {
ret = 0;
}
@@ -238,15 +307,27 @@ void __trace_graph_return(struct trace_array *tr,
trace_buffer_unlock_commit_nostack(buffer, event);
}
+static void handle_nosleeptime(struct ftrace_graph_ret *trace,
+ struct fgraph_times *ftimes,
+ int size)
+{
+ if (fgraph_sleep_time || size < sizeof(*ftimes))
+ return;
+
+ ftimes->calltime += current->ftrace_sleeptime - ftimes->sleeptime;
+}
+
void trace_graph_return(struct ftrace_graph_ret *trace,
struct fgraph_ops *gops)
{
unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
+ struct fgraph_times *ftimes;
unsigned long flags;
unsigned int trace_ctx;
long disabled;
+ int size;
int cpu;
ftrace_graph_addr_finish(gops, trace);
@@ -256,6 +337,14 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
return;
}
+ ftimes = fgraph_retrieve_data(gops->idx, &size);
+ if (!ftimes)
+ return;
+
+ handle_nosleeptime(trace, ftimes, size);
+
+ trace->calltime = ftimes->calltime;
+
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
@@ -271,6 +360,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
struct fgraph_ops *gops)
{
+ struct fgraph_times *ftimes;
+ int size;
+
ftrace_graph_addr_finish(gops, trace);
if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
@@ -278,8 +370,16 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
return;
}
+ ftimes = fgraph_retrieve_data(gops->idx, &size);
+ if (!ftimes)
+ return;
+
+ handle_nosleeptime(trace, ftimes, size);
+
+ trace->calltime = ftimes->calltime;
+
if (tracing_thresh &&
- (trace->rettime - trace->calltime < tracing_thresh))
+ (trace->rettime - ftimes->calltime < tracing_thresh))
return;
else
trace_graph_return(trace, gops);
@@ -457,7 +557,7 @@ get_return_for_leaf(struct trace_iterator *iter,
* then we just reuse the data from before.
*/
if (data && data->failed) {
- curr = &data->ent;
+ curr = &data->ent.ent;
next = &data->ret;
} else {
@@ -487,7 +587,10 @@ get_return_for_leaf(struct trace_iterator *iter,
* Save current and next entries for later reference
* if the output fails.
*/
- data->ent = *curr;
+ if (unlikely(curr->ent.type == TRACE_GRAPH_RETADDR_ENT))
+ data->ent.rent = *(struct fgraph_retaddr_ent_entry *)curr;
+ else
+ data->ent.ent = *curr;
/*
* If the next event is not a return type, then
* we only care about what type it is. Otherwise we can
@@ -651,52 +754,96 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
}
#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
-
#define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL
+#else
+#define __TRACE_GRAPH_PRINT_RETVAL 0
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+#define __TRACE_GRAPH_PRINT_RETADDR TRACE_GRAPH_PRINT_RETADDR
+static void print_graph_retaddr(struct trace_seq *s, struct fgraph_retaddr_ent_entry *entry,
+ u32 trace_flags, bool comment)
+{
+ if (comment)
+ trace_seq_puts(s, " /*");
+
+ trace_seq_puts(s, " <-");
+ seq_print_ip_sym(s, entry->graph_ent.retaddr, trace_flags | TRACE_ITER_SYM_OFFSET);
+
+ if (comment)
+ trace_seq_puts(s, " */");
+}
+#else
+#define __TRACE_GRAPH_PRINT_RETADDR 0
+#define print_graph_retaddr(_seq, _entry, _tflags, _comment) do { } while (0)
+#endif
-static void print_graph_retval(struct trace_seq *s, unsigned long retval,
- bool leaf, void *func, bool hex_format)
+#if defined(CONFIG_FUNCTION_GRAPH_RETVAL) || defined(CONFIG_FUNCTION_GRAPH_RETADDR)
+
+static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entry *entry,
+ struct ftrace_graph_ret *graph_ret, void *func,
+ u32 opt_flags, u32 trace_flags)
{
unsigned long err_code = 0;
+ unsigned long retval = 0;
+ bool print_retaddr = false;
+ bool print_retval = false;
+ bool hex_format = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL_HEX);
- if (retval == 0 || hex_format)
- goto done;
+#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
+ retval = graph_ret->retval;
+ print_retval = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL);
+#endif
- /* Check if the return value matches the negative format */
- if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
- (((u64)retval) >> 32) == 0) {
- /* sign extension */
- err_code = (unsigned long)(s32)retval;
- } else {
- err_code = retval;
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+ print_retaddr = !!(opt_flags & TRACE_GRAPH_PRINT_RETADDR);
+#endif
+
+ if (print_retval && retval && !hex_format) {
+ /* Check if the return value matches the negative format */
+ if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
+ (((u64)retval) >> 32) == 0) {
+ err_code = sign_extend64(retval, 31);
+ } else {
+ err_code = retval;
+ }
+
+ if (!IS_ERR_VALUE(err_code))
+ err_code = 0;
}
- if (!IS_ERR_VALUE(err_code))
- err_code = 0;
+ if (entry) {
+ if (entry->ent.type != TRACE_GRAPH_RETADDR_ENT)
+ print_retaddr = false;
-done:
- if (leaf) {
- if (hex_format || (err_code == 0))
- trace_seq_printf(s, "%ps(); /* = 0x%lx */\n",
- func, retval);
+ trace_seq_printf(s, "%ps();", func);
+ if (print_retval || print_retaddr)
+ trace_seq_puts(s, " /*");
else
- trace_seq_printf(s, "%ps(); /* = %ld */\n",
- func, err_code);
+ trace_seq_putc(s, '\n');
} else {
+ print_retaddr = false;
+ trace_seq_printf(s, "} /* %ps", func);
+ }
+
+ if (print_retaddr)
+ print_graph_retaddr(s, (struct fgraph_retaddr_ent_entry *)entry,
+ trace_flags, false);
+
+ if (print_retval) {
if (hex_format || (err_code == 0))
- trace_seq_printf(s, "} /* %ps = 0x%lx */\n",
- func, retval);
+ trace_seq_printf(s, " ret=0x%lx", retval);
else
- trace_seq_printf(s, "} /* %ps = %ld */\n",
- func, err_code);
+ trace_seq_printf(s, " ret=%ld", err_code);
}
+
+ if (!entry || print_retval || print_retaddr)
+ trace_seq_puts(s, " */\n");
}
#else
-#define __TRACE_GRAPH_PRINT_RETVAL 0
-
-#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0)
+#define print_graph_retval(_seq, _ent, _ret, _func, _opt_flags, _trace_flags) do {} while (0)
#endif
@@ -748,14 +895,15 @@ print_graph_entry_leaf(struct trace_iterator *iter,
trace_seq_putc(s, ' ');
/*
- * Write out the function return value if the option function-retval is
- * enabled.
+ * Write out the function return value or return address
*/
- if (flags & __TRACE_GRAPH_PRINT_RETVAL)
- print_graph_retval(s, graph_ret->retval, true, (void *)func,
- !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
- else
+ if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR)) {
+ print_graph_retval(s, entry, graph_ret,
+ (void *)graph_ret->func + iter->tr->text_delta,
+ flags, tr->trace_flags);
+ } else {
trace_seq_printf(s, "%ps();\n", (void *)func);
+ }
print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
cpu, iter->ent->pid, flags);
@@ -796,7 +944,12 @@ print_graph_entry_nested(struct trace_iterator *iter,
func = call->func + iter->tr->text_delta;
- trace_seq_printf(s, "%ps() {\n", (void *)func);
+ trace_seq_printf(s, "%ps() {", (void *)func);
+ if (flags & __TRACE_GRAPH_PRINT_RETADDR &&
+ entry->ent.type == TRACE_GRAPH_RETADDR_ENT)
+ print_graph_retaddr(s, (struct fgraph_retaddr_ent_entry *)entry,
+ tr->trace_flags, true);
+ trace_seq_putc(s, '\n');
if (trace_seq_has_overflowed(s))
return TRACE_TYPE_PARTIAL_LINE;
@@ -1043,11 +1196,10 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
/*
* Always write out the function name and its return value if the
- * function-retval option is enabled.
+ * funcgraph-retval option is enabled.
*/
if (flags & __TRACE_GRAPH_PRINT_RETVAL) {
- print_graph_retval(s, trace->retval, false, (void *)func,
- !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
+ print_graph_retval(s, NULL, trace, (void *)func, flags, tr->trace_flags);
} else {
/*
* If the return function does not have a matching entry,
@@ -1162,7 +1314,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
* to print out the missing entry which would never go out.
*/
if (data && data->failed) {
- field = &data->ent;
+ field = &data->ent.ent;
iter->cpu = data->cpu;
ret = print_graph_entry(field, s, iter, flags);
if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
@@ -1186,6 +1338,16 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
saved = *field;
return print_graph_entry(&saved, s, iter, flags);
}
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+ case TRACE_GRAPH_RETADDR_ENT: {
+ struct fgraph_retaddr_ent_entry saved;
+ struct fgraph_retaddr_ent_entry *rfield;
+
+ trace_assign_type(rfield, entry);
+ saved = *rfield;
+ return print_graph_entry((struct ftrace_graph_ent_entry *)&saved, s, iter, flags);
+ }
+#endif
case TRACE_GRAPH_RET: {
struct ftrace_graph_ret_entry *field;
trace_assign_type(field, entry);
@@ -1380,6 +1542,13 @@ static struct trace_event graph_trace_entry_event = {
.funcs = &graph_functions,
};
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+static struct trace_event graph_trace_retaddr_entry_event = {
+ .type = TRACE_GRAPH_RETADDR_ENT,
+ .funcs = &graph_functions,
+};
+#endif
+
static struct trace_event graph_trace_ret_event = {
.type = TRACE_GRAPH_RET,
.funcs = &graph_functions
@@ -1466,6 +1635,13 @@ static __init int init_graph_trace(void)
return 1;
}
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+ if (!register_trace_event(&graph_trace_retaddr_entry_event)) {
+ pr_warn("Warning: could not register graph trace retaddr events\n");
+ return 1;
+ }
+#endif
+
if (!register_trace_event(&graph_trace_ret_event)) {
pr_warn("Warning: could not register graph trace events\n");
return 1;