diff options
Diffstat (limited to 'kernel/trace')
| -rw-r--r-- | kernel/trace/Kconfig | 2 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 61 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 16 |
3 files changed, 60 insertions, 19 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 904c952ac383..f54dc62b599c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -355,7 +355,7 @@ config PROFILE_ANNOTATED_BRANCHES on if you need to profile the system's use of these macros. config PROFILE_ALL_BRANCHES - bool "Profile all if conditionals" + bool "Profile all if conditionals" if !FORTIFY_SOURCE select TRACE_BRANCH_PROFILING help This tracer profiles all branch conditions. Every if () diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9ab18995ff1e..5af2842dea96 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2534,29 +2534,58 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * The lock and unlock are done within a preempt disable section. * The current_context per_cpu variable can only be modified * by the current task between lock and unlock. But it can - * be modified more than once via an interrupt. There are four - * different contexts that we need to consider. + * be modified more than once via an interrupt. To pass this + * information from the lock to the unlock without having to + * access the 'in_interrupt()' functions again (which do show + * a bit of overhead in something as critical as function tracing, + * we use a bitmask trick. * - * Normal context. - * SoftIRQ context - * IRQ context - * NMI context + * bit 0 = NMI context + * bit 1 = IRQ context + * bit 2 = SoftIRQ context + * bit 3 = normal context. * - * If for some reason the ring buffer starts to recurse, we - * only allow that to happen at most 4 times (one for each - * context). If it happens 5 times, then we consider this a - * recusive loop and do not let it go further. + * This works because this is the order of contexts that can + * preempt other contexts. A SoftIRQ never preempts an IRQ + * context. + * + * When the context is determined, the corresponding bit is + * checked and set (if it was set, then a recursion of that context + * happened). + * + * On unlock, we need to clear this bit. To do so, just subtract + * 1 from the current_context and AND it to itself. + * + * (binary) + * 101 - 1 = 100 + * 101 & 100 = 100 (clearing bit zero) + * + * 1010 - 1 = 1001 + * 1010 & 1001 = 1000 (clearing bit 1) + * + * The least significant bit can be cleared this way, and it + * just so happens that it is the same bit corresponding to + * the current context. */ static __always_inline int trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { - if (cpu_buffer->current_context >= 4) + unsigned int val = cpu_buffer->current_context; + unsigned long pc = preempt_count(); + int bit; + + if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) + bit = RB_CTX_NORMAL; + else + bit = pc & NMI_MASK ? RB_CTX_NMI : + pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; + + if (unlikely(val & (1 << bit))) return 1; - cpu_buffer->current_context++; - /* Interrupts must see this update */ - barrier(); + val |= (1 << bit); + cpu_buffer->current_context = val; return 0; } @@ -2564,9 +2593,7 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) static __always_inline void trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) { - /* Don't let the dec leak out */ - barrier(); - cpu_buffer->current_context--; + cpu_buffer->current_context &= cpu_buffer->current_context - 1; } /** diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ec0f9aa4e151..1b87157edbff 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2213,6 +2213,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) { struct trace_event_call *call, *p; const char *last_system = NULL; + bool first = false; int last_i; int i; @@ -2220,15 +2221,28 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) list_for_each_entry_safe(call, p, &ftrace_events, list) { /* events are usually grouped together with systems */ if (!last_system || call->class->system != last_system) { + first = true; last_i = 0; last_system = call->class->system; } + /* + * Since calls are grouped by systems, the likelyhood that the + * next call in the iteration belongs to the same system as the + * previous call is high. As an optimization, we skip seaching + * for a map[] that matches the call's system if the last call + * was from the same system. That's what last_i is for. If the + * call has the same system as the previous call, then last_i + * will be the index of the first map[] that has a matching + * system. + */ for (i = last_i; i < len; i++) { if (call->class->system == map[i]->system) { /* Save the first system if need be */ - if (!last_i) + if (first) { last_i = i; + first = false; + } update_event_printk(call, map[i]); } } |