From 4d60e5e36aa6f11b4d9eadc5d2b94128f24870c7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 20 Sep 2018 16:00:45 +0300 Subject: perf tools: Improve thread_stack__event() for trace begin / end thread_stack__event() is used to create call stacks, by keeping track of calls and returns. Improve the handling of trace begin / end to allow for a trace that ends in a call. Previously, the Intel PT decoder would indicate begin / end by a branch from / to zero. That hides useful information, in particular when a trace ends with a call. Before remedying that, enhance the thread stack so that it does not expect to see the 'return' for a 'call' that ends the trace. Committer notes: Added this: return thread_stack__push(thread->ts, ret_addr, - flags && PERF_IP_FLAG_TRACE_END); + flags & PERF_IP_FLAG_TRACE_END); To fix problem spotted by: debian:9: clang version 3.8.1-24 (tags/RELEASE_381/final) debian:experimental: clang version 6.0.1-6 (tags/RELEASE_601/final) Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20180920130048.31432-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread-stack.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'tools/perf/util/thread-stack.c') diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index dd17d6a38d3a..e3f7dfecafa9 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -36,6 +36,7 @@ * @branch_count: the branch count when the entry was created * @cp: call path * @no_call: a 'call' was not seen + * @trace_end: a 'call' but trace ended */ struct thread_stack_entry { u64 ret_addr; @@ -44,6 +45,7 @@ struct thread_stack_entry { u64 branch_count; struct call_path *cp; bool no_call; + bool trace_end; }; /** @@ -112,7 +114,8 @@ static struct thread_stack *thread_stack__new(struct thread *thread, return ts; } -static int thread_stack__push(struct thread_stack *ts, u64 ret_addr) +static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, + bool trace_end) { int err = 0; @@ -124,6 +127,7 @@ static int thread_stack__push(struct thread_stack *ts, u64 ret_addr) } } + ts->stack[ts->cnt].trace_end = trace_end; ts->stack[ts->cnt++].ret_addr = ret_addr; return err; @@ -150,6 +154,18 @@ static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) } } +static void thread_stack__pop_trace_end(struct thread_stack *ts) +{ + size_t i; + + for (i = ts->cnt; i; ) { + if (ts->stack[--i].trace_end) + ts->cnt = i; + else + return; + } +} + static bool thread_stack__in_kernel(struct thread_stack *ts) { if (!ts->cnt) @@ -254,10 +270,19 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, ret_addr = from_ip + insn_len; if (ret_addr == to_ip) return 0; /* Zero-length calls are excluded */ - return thread_stack__push(thread->ts, ret_addr); - } else if (flags & PERF_IP_FLAG_RETURN) { - if (!from_ip) - return 0; + return thread_stack__push(thread->ts, ret_addr, + flags & PERF_IP_FLAG_TRACE_END); + } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { + /* + * If the caller did not change the trace number (which would + * have flushed the stack) then try to make sense of the stack. + * Possibly, tracing began after returning to the current + * address, so try to pop that. Also, do not expect a call made + * when the trace ended, to return, so pop that. + */ + thread_stack__pop(thread->ts, to_ip); + thread_stack__pop_trace_end(thread->ts); + } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { thread_stack__pop(thread->ts, to_ip); } -- cgit From 2dcde4e152a3e319cc7e76c7c6b8548a3c72310d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 20 Sep 2018 16:00:46 +0300 Subject: perf tools: Improve thread_stack__process() for trace begin / end thread_stack__process() is used to create call paths for database export. Improve the handling of trace begin / end to allow for a trace that ends in a call. Previously, the Intel PT decoder would indicate begin / end by a branch from / to zero. That hides useful information, in particular when a trace ends with a call. Before remedying that, enhance the thread stack so that it identifies the trace end by the flag instead of by ip == 0. Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20180920130048.31432-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread-stack.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'tools/perf/util/thread-stack.c') diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index e3f7dfecafa9..c091635bf7dc 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -357,7 +357,7 @@ void call_return_processor__free(struct call_return_processor *crp) static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, u64 timestamp, u64 ref, struct call_path *cp, - bool no_call) + bool no_call, bool trace_end) { struct thread_stack_entry *tse; int err; @@ -375,6 +375,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, tse->branch_count = ts->branch_count; tse->cp = cp; tse->no_call = no_call; + tse->trace_end = trace_end; return 0; } @@ -448,7 +449,7 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, return -ENOMEM; return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp, - true); + true, false); } static int thread_stack__no_call_return(struct thread *thread, @@ -480,7 +481,7 @@ static int thread_stack__no_call_return(struct thread *thread, if (!cp) return -ENOMEM; return thread_stack__push_cp(ts, 0, sample->time, ref, - cp, true); + cp, true, false); } } else if (thread_stack__in_kernel(ts) && sample->ip < ks) { /* Return to userspace, so pop all kernel addresses */ @@ -505,7 +506,7 @@ static int thread_stack__no_call_return(struct thread *thread, return -ENOMEM; err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp, - true); + true, false); if (err) return err; @@ -525,7 +526,7 @@ static int thread_stack__trace_begin(struct thread *thread, /* Pop trace end */ tse = &ts->stack[ts->cnt - 1]; - if (tse->cp->sym == NULL && tse->cp->ip == 0) { + if (tse->trace_end) { err = thread_stack__call_return(thread, ts, --ts->cnt, timestamp, ref, false); if (err) @@ -554,7 +555,7 @@ static int thread_stack__trace_end(struct thread_stack *ts, ret_addr = sample->ip + sample->insn_len; return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, - false); + false, true); } int thread_stack__process(struct thread *thread, struct comm *comm, @@ -604,6 +605,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, ts->last_time = sample->time; if (sample->flags & PERF_IP_FLAG_CALL) { + bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; struct call_path_root *cpr = ts->crp->cpr; struct call_path *cp; u64 ret_addr; @@ -621,7 +623,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, if (!cp) return -ENOMEM; err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, - cp, false); + cp, false, trace_end); } else if (sample->flags & PERF_IP_FLAG_RETURN) { if (!sample->ip || !sample->addr) return 0; -- cgit From 242483068b4b9ad02f1653819b6e683577681e0e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 31 Oct 2018 11:10:42 +0200 Subject: perf intel-pt: Insert callchain context into synthesized callchains In the absence of a fallback, callchains must encode also the callchain context. Do that now there is no fallback. Signed-off-by: Adrian Hunter Reviewed-by: Jiri Olsa Cc: Andi Kleen Cc: David S. Miller Cc: Leo Yan Cc: Mathieu Poirier Cc: stable@vger.kernel.org # 4.19 Link: http://lkml.kernel.org/r/100ea2ec-ed14-b56d-d810-e0a6d2f4b069@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 6 ++++-- tools/perf/util/thread-stack.c | 44 +++++++++++++++++++++++++++++++++--------- tools/perf/util/thread-stack.h | 2 +- 3 files changed, 40 insertions(+), 12 deletions(-) (limited to 'tools/perf/util/thread-stack.c') diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index ffa385a029b3..60732213d16a 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -759,7 +759,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, if (pt->synth_opts.callchain) { size_t sz = sizeof(struct ip_callchain); - sz += pt->synth_opts.callchain_sz * sizeof(u64); + /* Add 1 to callchain_sz for callchain context */ + sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); ptq->chain = zalloc(sz); if (!ptq->chain) goto out_free; @@ -1160,7 +1161,8 @@ static void intel_pt_prep_sample(struct intel_pt *pt, if (pt->synth_opts.callchain) { thread_stack__sample(ptq->thread, ptq->chain, - pt->synth_opts.callchain_sz, sample->ip); + pt->synth_opts.callchain_sz + 1, + sample->ip, pt->kernel_start); sample->callchain = ptq->chain; } diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index c091635bf7dc..61a4286a74dc 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -310,20 +310,46 @@ void thread_stack__free(struct thread *thread) } } +static inline u64 callchain_context(u64 ip, u64 kernel_start) +{ + return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; +} + void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, - size_t sz, u64 ip) + size_t sz, u64 ip, u64 kernel_start) { - size_t i; + u64 context = callchain_context(ip, kernel_start); + u64 last_context; + size_t i, j; - if (!thread || !thread->ts) - chain->nr = 1; - else - chain->nr = min(sz, thread->ts->cnt + 1); + if (sz < 2) { + chain->nr = 0; + return; + } - chain->ips[0] = ip; + chain->ips[0] = context; + chain->ips[1] = ip; + + if (!thread || !thread->ts) { + chain->nr = 2; + return; + } + + last_context = context; + + for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) { + ip = thread->ts->stack[thread->ts->cnt - j].ret_addr; + context = callchain_context(ip, kernel_start); + if (context != last_context) { + if (i >= sz - 1) + break; + chain->ips[i++] = context; + last_context = context; + } + chain->ips[i] = ip; + } - for (i = 1; i < chain->nr; i++) - chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; + chain->nr = i; } struct call_return_processor * diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index b7e41c4ebfdd..f97c00a8c251 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -84,7 +84,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr); void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, - size_t sz, u64 ip); + size_t sz, u64 ip, u64 kernel_start); int thread_stack__flush(struct thread *thread); void thread_stack__free(struct thread *thread); size_t thread_stack__depth(struct thread *thread); -- cgit