aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace/trace.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-18 13:36:33 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-18 13:36:33 -0700
commit70045bfc4cd5fef44ada25fa3367329eba98731a (patch)
treef0c304052fe870c645bfc323bb143e11671d9ff5 /kernel/trace/trace.h
parent2fd4130e53db0958167510eddbca0f09dc858109 (diff)
parentb576d375b536568c85d42c15a189f6b6fdd75b74 (diff)
Merge tag 'ftrace-v6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull ftrace updates from Steven Rostedt: "Rewrite of function graph tracer to allow multiple users Up until now, the function graph tracer could only have a single user attached to it. If another user tried to attach to the function graph tracer while one was already attached, it would fail. Allowing function graph tracer to have more than one user has been asked for since 2009, but it required a rewrite to the logic to pull it off so it never happened. Until now! There's three systems that trace the return of a function. That is kretprobes, function graph tracer, and BPF. kretprobes and function graph tracing both do it similarly. The difference is that kretprobes uses a shadow stack per callback and function graph tracer creates a shadow stack for all tasks. The function graph tracer method makes it possible to trace the return of all functions. As kretprobes now needs that feature too, allowing it to use function graph tracer was needed. BPF also wants to trace the return of many probes and its method doesn't scale either. Having it use function graph tracer would improve that. By allowing function graph tracer to have multiple users allows both kretprobes and BPF to use function graph tracer in these cases. This will allow kretprobes code to be removed in the future as it's version will no longer be needed. Note, function graph tracer is only limited to 16 simultaneous users, due to shadow stack size and allocated slots" * tag 'ftrace-v6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: (49 commits) fgraph: Use str_plural() in test_graph_storage_single() function_graph: Add READ_ONCE() when accessing fgraph_array[] ftrace: Add missing kerneldoc parameters to unregister_ftrace_direct() function_graph: Everyone uses HAVE_FUNCTION_GRAPH_RET_ADDR_PTR, remove it function_graph: Fix up ftrace_graph_ret_addr() function_graph: Make fgraph_update_pid_func() a stub for !DYNAMIC_FTRACE function_graph: Rename BYTE_NUMBER to CHAR_NUMBER in selftests fgraph: Remove some unused functions ftrace: Hide one more entry in stack trace when ftrace_pid is enabled function_graph: Do not update pid func if CONFIG_DYNAMIC_FTRACE not enabled function_graph: Make fgraph_do_direct static key static ftrace: Fix prototypes for ftrace_startup/shutdown_subops() ftrace: Assign RCU list variable with rcu_assign_ptr() ftrace: Assign ftrace_list_end to ftrace_ops_list type cast to RCU ftrace: Declare function_trace_op in header to quiet sparse warning ftrace: Add comments to ftrace_hash_move() and friends ftrace: Convert "inc" parameter to bool in ftrace_hash_rec_update_modify() ftrace: Add comments to ftrace_hash_rec_disable/enable() ftrace: Remove "filter_hash" parameter from __ftrace_hash_rec_update() ftrace: Rename dup_hash() and comment it ...
Diffstat (limited to 'kernel/trace/trace.h')
-rw-r--r--kernel/trace/trace.h93
1 files changed, 77 insertions, 16 deletions
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 749a182dab48..8783bebd0562 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -397,6 +397,9 @@ struct trace_array {
struct ftrace_ops *ops;
struct trace_pid_list __rcu *function_pids;
struct trace_pid_list __rcu *function_no_pids;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ struct fgraph_ops *gops;
+#endif
#ifdef CONFIG_DYNAMIC_FTRACE
/* All of these are protected by the ftrace_lock */
struct list_head func_probes;
@@ -679,9 +682,8 @@ void trace_latency_header(struct seq_file *m);
void trace_default_header(struct seq_file *m);
void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
-void trace_graph_return(struct ftrace_graph_ret *trace);
-int trace_graph_entry(struct ftrace_graph_ent *trace);
-void set_graph_array(struct trace_array *tr);
+void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops);
+int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops);
void tracing_start_cmdline_record(void);
void tracing_stop_cmdline_record(void);
@@ -892,12 +894,59 @@ extern int __trace_graph_entry(struct trace_array *tr,
extern void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
unsigned int trace_ctx);
+extern void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
+extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
+extern void free_fgraph_ops(struct trace_array *tr);
+
+enum {
+ TRACE_GRAPH_FL = 1,
+
+ /*
+ * In the very unlikely case that an interrupt came in
+ * at a start of graph tracing, and we want to trace
+ * the function in that interrupt, the depth can be greater
+ * than zero, because of the preempted start of a previous
+ * trace. In an even more unlikely case, depth could be 2
+ * if a softirq interrupted the start of graph tracing,
+ * followed by an interrupt preempting a start of graph
+ * tracing in the softirq, and depth can even be 3
+ * if an NMI came in at the start of an interrupt function
+ * that preempted a softirq start of a function that
+ * preempted normal context!!!! Luckily, it can't be
+ * greater than 3, so the next two bits are a mask
+ * of what the depth is when we set TRACE_GRAPH_FL
+ */
+
+ TRACE_GRAPH_DEPTH_START_BIT,
+ TRACE_GRAPH_DEPTH_END_BIT,
+
+ /*
+ * To implement set_graph_notrace, if this bit is set, we ignore
+ * function graph tracing of called functions, until the return
+ * function is called to clear it.
+ */
+ TRACE_GRAPH_NOTRACE_BIT,
+};
+
+#define TRACE_GRAPH_NOTRACE (1 << TRACE_GRAPH_NOTRACE_BIT)
+
+static inline unsigned long ftrace_graph_depth(unsigned long *task_var)
+{
+ return (*task_var >> TRACE_GRAPH_DEPTH_START_BIT) & 3;
+}
+
+static inline void ftrace_graph_set_depth(unsigned long *task_var, int depth)
+{
+ *task_var &= ~(3 << TRACE_GRAPH_DEPTH_START_BIT);
+ *task_var |= (depth & 3) << TRACE_GRAPH_DEPTH_START_BIT;
+}
#ifdef CONFIG_DYNAMIC_FTRACE
extern struct ftrace_hash __rcu *ftrace_graph_hash;
extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash;
-static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
+static inline int
+ftrace_graph_addr(unsigned long *task_var, struct ftrace_graph_ent *trace)
{
unsigned long addr = trace->func;
int ret = 0;
@@ -919,13 +968,12 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
}
if (ftrace_lookup_ip(hash, addr)) {
-
/*
* This needs to be cleared on the return functions
* when the depth is zero.
*/
- trace_recursion_set(TRACE_GRAPH_BIT);
- trace_recursion_set_depth(trace->depth);
+ *task_var |= TRACE_GRAPH_FL;
+ ftrace_graph_set_depth(task_var, trace->depth);
/*
* If no irqs are to be traced, but a set_graph_function
@@ -944,11 +992,14 @@ out:
return ret;
}
-static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace)
+static inline void
+ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftrace_graph_ret *trace)
{
- if (trace_recursion_test(TRACE_GRAPH_BIT) &&
- trace->depth == trace_recursion_depth())
- trace_recursion_clear(TRACE_GRAPH_BIT);
+ unsigned long *task_var = fgraph_get_task_var(gops);
+
+ if ((*task_var & TRACE_GRAPH_FL) &&
+ trace->depth == ftrace_graph_depth(task_var))
+ *task_var &= ~TRACE_GRAPH_FL;
}
static inline int ftrace_graph_notrace_addr(unsigned long addr)
@@ -974,7 +1025,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
return ret;
}
#else
-static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
+static inline int ftrace_graph_addr(unsigned long *task_var, struct ftrace_graph_ent *trace)
{
return 1;
}
@@ -983,27 +1034,37 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
{
return 0;
}
-static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace)
+static inline void ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftrace_graph_ret *trace)
{ }
#endif /* CONFIG_DYNAMIC_FTRACE */
extern unsigned int fgraph_max_depth;
-static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace)
+static inline bool
+ftrace_graph_ignore_func(struct fgraph_ops *gops, struct ftrace_graph_ent *trace)
{
+ unsigned long *task_var = fgraph_get_task_var(gops);
+
/* trace it when it is-nested-in or is a function enabled. */
- return !(trace_recursion_test(TRACE_GRAPH_BIT) ||
- ftrace_graph_addr(trace)) ||
+ return !((*task_var & TRACE_GRAPH_FL) ||
+ ftrace_graph_addr(task_var, trace)) ||
(trace->depth < 0) ||
(fgraph_max_depth && trace->depth >= fgraph_max_depth);
}
+void fgraph_init_ops(struct ftrace_ops *dst_ops,
+ struct ftrace_ops *src_ops);
+
#else /* CONFIG_FUNCTION_GRAPH_TRACER */
static inline enum print_line_t
print_graph_function_flags(struct trace_iterator *iter, u32 flags)
{
return TRACE_TYPE_UNHANDLED;
}
+static inline void free_fgraph_ops(struct trace_array *tr) { }
+/* ftrace_ops may not be defined */
+#define init_array_fgraph_ops(tr, ops) do { } while (0)
+#define allocate_fgraph_ops(tr, ops) ({ 0; })
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
extern struct list_head ftrace_pids;