aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/bpf_trace.c11
-rw-r--r--kernel/trace/ring_buffer.c62
-rw-r--r--kernel/trace/trace_kprobe.c33
-rw-r--r--kernel/trace/trace_probe.h12
5 files changed, 73 insertions, 51 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index ae3a2d519e50..0b249e2f0c3c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -355,7 +355,7 @@ config PROFILE_ANNOTATED_BRANCHES
on if you need to profile the system's use of these macros.
config PROFILE_ALL_BRANCHES
- bool "Profile all if conditionals"
+ bool "Profile all if conditionals" if !FORTIFY_SOURCE
select TRACE_BRANCH_PROFILING
help
This tracer profiles all branch conditions. Every if ()
@@ -533,9 +533,7 @@ config FUNCTION_PROFILER
config BPF_KPROBE_OVERRIDE
bool "Enable BPF programs to override a kprobed function"
depends on BPF_EVENTS
- depends on KPROBES_ON_FTRACE
- depends on HAVE_KPROBE_OVERRIDE
- depends on DYNAMIC_FTRACE_WITH_REGS
+ depends on FUNCTION_ERROR_INJECTION
default n
help
Allows BPF to override the execution of a probed function and
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f6d2327ecb59..f274468cbc45 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -14,7 +14,7 @@
#include <linux/uaccess.h>
#include <linux/ctype.h>
#include <linux/kprobes.h>
-#include <asm/kprobes.h>
+#include <linux/error-injection.h>
#include "trace_probe.h"
#include "trace.h"
@@ -83,9 +83,8 @@ EXPORT_SYMBOL_GPL(trace_call_bpf);
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
{
- __this_cpu_write(bpf_kprobe_override, 1);
regs_set_return_value(regs, rc);
- arch_ftrace_kprobe_override_function(regs);
+ override_function_with_return(regs);
return 0;
}
@@ -800,11 +799,11 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
int ret = -EEXIST;
/*
- * Kprobe override only works for ftrace based kprobes, and only if they
- * are on the opt-in list.
+ * Kprobe override only works if they are on the function entry,
+ * and only if they are on the opt-in list.
*/
if (prog->kprobe_override &&
- (!trace_kprobe_ftrace(event->tp_event) ||
+ (!trace_kprobe_on_func_entry(event->tp_event) ||
!trace_kprobe_error_injectable(event->tp_event)))
return -EINVAL;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9ab18995ff1e..0cddf60186da 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2534,29 +2534,59 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* The lock and unlock are done within a preempt disable section.
* The current_context per_cpu variable can only be modified
* by the current task between lock and unlock. But it can
- * be modified more than once via an interrupt. There are four
- * different contexts that we need to consider.
+ * be modified more than once via an interrupt. To pass this
+ * information from the lock to the unlock without having to
+ * access the 'in_interrupt()' functions again (which do show
+ * a bit of overhead in something as critical as function tracing,
+ * we use a bitmask trick.
*
- * Normal context.
- * SoftIRQ context
- * IRQ context
- * NMI context
+ * bit 0 = NMI context
+ * bit 1 = IRQ context
+ * bit 2 = SoftIRQ context
+ * bit 3 = normal context.
*
- * If for some reason the ring buffer starts to recurse, we
- * only allow that to happen at most 4 times (one for each
- * context). If it happens 5 times, then we consider this a
- * recusive loop and do not let it go further.
+ * This works because this is the order of contexts that can
+ * preempt other contexts. A SoftIRQ never preempts an IRQ
+ * context.
+ *
+ * When the context is determined, the corresponding bit is
+ * checked and set (if it was set, then a recursion of that context
+ * happened).
+ *
+ * On unlock, we need to clear this bit. To do so, just subtract
+ * 1 from the current_context and AND it to itself.
+ *
+ * (binary)
+ * 101 - 1 = 100
+ * 101 & 100 = 100 (clearing bit zero)
+ *
+ * 1010 - 1 = 1001
+ * 1010 & 1001 = 1000 (clearing bit 1)
+ *
+ * The least significant bit can be cleared this way, and it
+ * just so happens that it is the same bit corresponding to
+ * the current context.
*/
static __always_inline int
trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
{
- if (cpu_buffer->current_context >= 4)
+ unsigned int val = cpu_buffer->current_context;
+ unsigned long pc = preempt_count();
+ int bit;
+
+ if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
+ bit = RB_CTX_NORMAL;
+ else
+ bit = pc & NMI_MASK ? RB_CTX_NMI :
+ pc & HARDIRQ_MASK ? RB_CTX_IRQ :
+ pc & SOFTIRQ_OFFSET ? 2 : RB_CTX_SOFTIRQ;
+
+ if (unlikely(val & (1 << bit)))
return 1;
- cpu_buffer->current_context++;
- /* Interrupts must see this update */
- barrier();
+ val |= (1 << bit);
+ cpu_buffer->current_context = val;
return 0;
}
@@ -2564,9 +2594,7 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
static __always_inline void
trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
{
- /* Don't let the dec leak out */
- barrier();
- cpu_buffer->current_context--;
+ cpu_buffer->current_context &= cpu_buffer->current_context - 1;
}
/**
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 91f4b57dab82..1fad24acd444 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/rculist.h>
+#include <linux/error-injection.h>
#include "trace_probe.h"
@@ -42,8 +43,6 @@ struct trace_kprobe {
(offsetof(struct trace_kprobe, tp.args) + \
(sizeof(struct probe_arg) * (n)))
-DEFINE_PER_CPU(int, bpf_kprobe_override);
-
static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
{
return tk->rp.handler != NULL;
@@ -88,13 +87,16 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
return nhit;
}
-int trace_kprobe_ftrace(struct trace_event_call *call)
+bool trace_kprobe_on_func_entry(struct trace_event_call *call)
{
struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
- return kprobe_ftrace(&tk->rp.kp);
+
+ return kprobe_on_func_entry(tk->rp.kp.addr,
+ tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
+ tk->rp.kp.addr ? 0 : tk->rp.kp.offset);
}
-int trace_kprobe_error_injectable(struct trace_event_call *call)
+bool trace_kprobe_error_injectable(struct trace_event_call *call)
{
struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
unsigned long addr;
@@ -106,7 +108,7 @@ int trace_kprobe_error_injectable(struct trace_event_call *call)
} else {
addr = (unsigned long)tk->rp.kp.addr;
}
- return within_kprobe_error_injection_list(addr);
+ return within_error_injection_list(addr);
}
static int register_kprobe_event(struct trace_kprobe *tk);
@@ -1202,6 +1204,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
int rctx;
if (bpf_prog_array_valid(call)) {
+ unsigned long orig_ip = instruction_pointer(regs);
int ret;
ret = trace_call_bpf(call, regs);
@@ -1209,12 +1212,13 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
/*
* We need to check and see if we modified the pc of the
* pt_regs, and if so clear the kprobe and return 1 so that we
- * don't do the instruction skipping. Also reset our state so
- * we are clean the next pass through.
+ * don't do the single stepping.
+ * The ftrace kprobe handler leaves it up to us to re-enable
+ * preemption here before returning if we've modified the ip.
*/
- if (__this_cpu_read(bpf_kprobe_override)) {
- __this_cpu_write(bpf_kprobe_override, 0);
+ if (orig_ip != instruction_pointer(regs)) {
reset_current_kprobe();
+ preempt_enable_no_resched();
return 1;
}
if (!ret)
@@ -1322,15 +1326,8 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
if (tk->tp.flags & TP_FLAG_TRACE)
kprobe_trace_func(tk, regs);
#ifdef CONFIG_PERF_EVENTS
- if (tk->tp.flags & TP_FLAG_PROFILE) {
+ if (tk->tp.flags & TP_FLAG_PROFILE)
ret = kprobe_perf_func(tk, regs);
- /*
- * The ftrace kprobe handler leaves it up to us to re-enable
- * preemption here before returning if we've modified the ip.
- */
- if (ret)
- preempt_enable_no_resched();
- }
#endif
return ret;
}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 5e54d748c84c..e101c5bb9eda 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -252,8 +252,8 @@ struct symbol_cache;
unsigned long update_symbol_cache(struct symbol_cache *sc);
void free_symbol_cache(struct symbol_cache *sc);
struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
-int trace_kprobe_ftrace(struct trace_event_call *call);
-int trace_kprobe_error_injectable(struct trace_event_call *call);
+bool trace_kprobe_on_func_entry(struct trace_event_call *call);
+bool trace_kprobe_error_injectable(struct trace_event_call *call);
#else
/* uprobes do not support symbol fetch methods */
#define fetch_symbol_u8 NULL
@@ -280,14 +280,14 @@ alloc_symbol_cache(const char *sym, long offset)
return NULL;
}
-static inline int trace_kprobe_ftrace(struct trace_event_call *call)
+static inline bool trace_kprobe_on_func_entry(struct trace_event_call *call)
{
- return 0;
+ return false;
}
-static inline int trace_kprobe_error_injectable(struct trace_event_call *call)
+static inline bool trace_kprobe_error_injectable(struct trace_event_call *call)
{
- return 0;
+ return false;
}
#endif /* CONFIG_KPROBE_EVENTS */