From 6745d8ea825966b0956c691cf7fccc13debedc39 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Apr 2016 15:26:13 +0200 Subject: perf script: Add stackcollapse.py script Add stackcollapse.py script as an example of parsing call chains, and also of using optparse to access command line options. The flame graph tools include a set of scripts that parse output from various tools (including "perf script"), remove the offsets in the function and collapse each stack to a single line. The website also says "perf report could have a report style [...] that output folded stacks directly, obviating the need for stackcollapse-perf.pl", so here it is. This script is a Python rewrite of stackcollapse-perf.pl, using the perf scripting interface to access the perf data directly from Python. Signed-off-by: Paolo Bonzini Acked-by: Jiri Olsa Cc: Brendan Gregg Link: http://lkml.kernel.org/r/1460467573-22989-1-git-send-email-pbonzini@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/bin/stackcollapse-record | 8 ++ tools/perf/scripts/python/bin/stackcollapse-report | 3 + tools/perf/scripts/python/stackcollapse.py | 127 +++++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100755 tools/perf/scripts/python/bin/stackcollapse-record create mode 100755 tools/perf/scripts/python/bin/stackcollapse-report create mode 100755 tools/perf/scripts/python/stackcollapse.py (limited to 'tools/perf/scripts/python') diff --git a/tools/perf/scripts/python/bin/stackcollapse-record b/tools/perf/scripts/python/bin/stackcollapse-record new file mode 100755 index 000000000000..9d8f9f0f3a17 --- /dev/null +++ b/tools/perf/scripts/python/bin/stackcollapse-record @@ -0,0 +1,8 @@ +#!/bin/sh + +# +# stackcollapse.py can cover all type of perf samples including +# the tracepoints, so no special record requirements, just record what +# you want to analyze. +# +perf record "$@" diff --git a/tools/perf/scripts/python/bin/stackcollapse-report b/tools/perf/scripts/python/bin/stackcollapse-report new file mode 100755 index 000000000000..356b9656393d --- /dev/null +++ b/tools/perf/scripts/python/bin/stackcollapse-report @@ -0,0 +1,3 @@ +#!/bin/sh +# description: produce callgraphs in short form for scripting use +perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@" diff --git a/tools/perf/scripts/python/stackcollapse.py b/tools/perf/scripts/python/stackcollapse.py new file mode 100755 index 000000000000..a2dfcda41ae6 --- /dev/null +++ b/tools/perf/scripts/python/stackcollapse.py @@ -0,0 +1,127 @@ +#!/usr/bin/perl -w +# +# stackcollapse.py - format perf samples with one line per distinct call stack +# +# This script's output has two space-separated fields. The first is a semicolon +# separated stack including the program name (from the "comm" field) and the +# function names from the call stack. The second is a count: +# +# swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 2 +# +# The file is sorted according to the first field. +# +# Input may be created and processed using: +# +# perf record -a -g -F 99 sleep 60 +# perf script report stackcollapse > out.stacks-folded +# +# (perf script record stackcollapse works too). +# +# Written by Paolo Bonzini +# Based on Brendan Gregg's stackcollapse-perf.pl script. + +import os +import sys +from collections import defaultdict +from optparse import OptionParser, make_option + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * +from EventClass import * + +# command line parsing + +option_list = [ + # formatting options for the bottom entry of the stack + make_option("--include-tid", dest="include_tid", + action="store_true", default=False, + help="include thread id in stack"), + make_option("--include-pid", dest="include_pid", + action="store_true", default=False, + help="include process id in stack"), + make_option("--no-comm", dest="include_comm", + action="store_false", default=True, + help="do not separate stacks according to comm"), + make_option("--tidy-java", dest="tidy_java", + action="store_true", default=False, + help="beautify Java signatures"), + make_option("--kernel", dest="annotate_kernel", + action="store_true", default=False, + help="annotate kernel functions with _[k]") +] + +parser = OptionParser(option_list=option_list) +(opts, args) = parser.parse_args() + +if len(args) != 0: + parser.error("unexpected command line argument") +if opts.include_tid and not opts.include_comm: + parser.error("requesting tid but not comm is invalid") +if opts.include_pid and not opts.include_comm: + parser.error("requesting pid but not comm is invalid") + +# event handlers + +lines = defaultdict(lambda: 0) + +def process_event(param_dict): + def tidy_function_name(sym, dso): + if sym is None: + sym = '[unknown]' + + sym = sym.replace(';', ':') + if opts.tidy_java: + # the original stackcollapse-perf.pl script gives the + # example of converting this: + # Lorg/mozilla/javascript/MemberBox;.(Ljava/lang/reflect/Method;)V + # to this: + # org/mozilla/javascript/MemberBox:.init + sym = sym.replace('<', '') + sym = sym.replace('>', '') + if sym[0] == 'L' and sym.find('/'): + sym = sym[1:] + try: + sym = sym[:sym.index('(')] + except ValueError: + pass + + if opts.annotate_kernel and dso == '[kernel.kallsyms]': + return sym + '_[k]' + else: + return sym + + stack = list() + if 'callchain' in param_dict: + for entry in param_dict['callchain']: + entry.setdefault('sym', dict()) + entry['sym'].setdefault('name', None) + entry.setdefault('dso', None) + stack.append(tidy_function_name(entry['sym']['name'], + entry['dso'])) + else: + param_dict.setdefault('symbol', None) + param_dict.setdefault('dso', None) + stack.append(tidy_function_name(param_dict['symbol'], + param_dict['dso'])) + + if opts.include_comm: + comm = param_dict["comm"].replace(' ', '_') + sep = "-" + if opts.include_pid: + comm = comm + sep + str(param_dict['sample']['pid']) + sep = "/" + if opts.include_tid: + comm = comm + sep + str(param_dict['sample']['tid']) + stack.append(comm) + + stack_string = ';'.join(reversed(stack)) + lines[stack_string] = lines[stack_string] + 1 + +def trace_end(): + list = lines.keys() + list.sort() + for stack in list: + print "%s %d" % (stack, lines[stack]) -- cgit From dd4629d46c3121b82e6a552c94cda6dcccfc38c6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Jun 2016 17:33:20 -0300 Subject: perf script stackcollapse: Remove reference to the perl interpreter It is ignored and this is actually a python script, not a perl one. Reported-by: Brendan Gregg Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Paolo Bonzini Link: http://lkml.kernel.org/n/tip-0w4bpbqd79v3sl34jvpr11v0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/stackcollapse.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools/perf/scripts/python') diff --git a/tools/perf/scripts/python/stackcollapse.py b/tools/perf/scripts/python/stackcollapse.py index a2dfcda41ae6..5a605f70ef32 100755 --- a/tools/perf/scripts/python/stackcollapse.py +++ b/tools/perf/scripts/python/stackcollapse.py @@ -1,5 +1,3 @@ -#!/usr/bin/perl -w -# # stackcollapse.py - format perf samples with one line per distinct call stack # # This script's output has two space-separated fields. The first is a semicolon -- cgit From 1db19db7f5ff4ddd3b1b6dd2092a87298ee5bd0b Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 7 Jul 2016 18:01:32 +0200 Subject: net: tracepoint napi:napi_poll add work and budget An important information for the napi_poll tracepoint is knowing the work done (packets processed) by the napi_poll() call. Add both the work done and budget, as they are related. Handle trace_napi_poll() param change in dropwatch/drop_monitor and in python perf script netdev-times.py in backward compat way, as python fortunately supports optional parameter handling. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/napi.h | 13 +++++++++---- net/core/dev.c | 4 ++-- net/core/drop_monitor.c | 3 ++- net/core/netpoll.c | 2 +- tools/perf/scripts/python/netdev-times.py | 11 +++++++---- 5 files changed, 21 insertions(+), 12 deletions(-) (limited to 'tools/perf/scripts/python') diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index 8fe1e93f531d..118ed7767639 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -12,22 +12,27 @@ TRACE_EVENT(napi_poll, - TP_PROTO(struct napi_struct *napi), + TP_PROTO(struct napi_struct *napi, int work, int budget), - TP_ARGS(napi), + TP_ARGS(napi, work, budget), TP_STRUCT__entry( __field( struct napi_struct *, napi) + __field( int, work) + __field( int, budget) __string( dev_name, napi->dev ? napi->dev->name : NO_DEV) ), TP_fast_assign( __entry->napi = napi; + __entry->work = work; + __entry->budget = budget; __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV); ), - TP_printk("napi poll on napi struct %p for device %s", - __entry->napi, __get_str(dev_name)) + TP_printk("napi poll on napi struct %p for device %s work %d budget %d", + __entry->napi, __get_str(dev_name), + __entry->work, __entry->budget) ); #undef NO_DEV diff --git a/net/core/dev.c b/net/core/dev.c index b92d63bfde7a..7894e406c806 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4972,7 +4972,7 @@ bool sk_busy_loop(struct sock *sk, int nonblock) if (test_bit(NAPI_STATE_SCHED, &napi->state)) { rc = napi->poll(napi, BUSY_POLL_BUDGET); - trace_napi_poll(napi); + trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); if (rc == BUSY_POLL_BUDGET) { napi_complete_done(napi, rc); napi_schedule(napi); @@ -5128,7 +5128,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) work = 0; if (test_bit(NAPI_STATE_SCHED, &n->state)) { work = n->poll(n, weight); - trace_napi_poll(n); + trace_napi_poll(n, work, weight); } WARN_ON_ONCE(work > weight); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 252e155c837b..d6b3b579560d 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -187,7 +187,8 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *locatio trace_drop_common(skb, location); } -static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi) +static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi, + int work, int budget) { struct dm_hw_stat_delta *new_stat; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 94acfc89ad97..53599bd0c82d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -163,7 +163,7 @@ static void poll_one_napi(struct napi_struct *napi) */ work = napi->poll(napi, 0); WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll); - trace_napi_poll(napi); + trace_napi_poll(napi, work, 0); clear_bit(NAPI_STATE_NPSVC, &napi->state); } diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py index 4d21ef2d601d..4c6f09ac7d12 100644 --- a/tools/perf/scripts/python/netdev-times.py +++ b/tools/perf/scripts/python/netdev-times.py @@ -252,9 +252,10 @@ def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, callchain, i event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret) all_event_list.append(event_info) -def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi, dev_name): +def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi, + dev_name, work=None, budget=None): event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, - napi, dev_name) + napi, dev_name, work, budget) all_event_list.append(event_info) def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr, @@ -354,11 +355,13 @@ def handle_irq_softirq_exit(event_info): receive_hunk_list.append(rec_data) def handle_napi_poll(event_info): - (name, context, cpu, time, pid, comm, napi, dev_name) = event_info + (name, context, cpu, time, pid, comm, napi, dev_name, + work, budget) = event_info if cpu in net_rx_dic.keys(): event_list = net_rx_dic[cpu]['event_list'] rec_data = {'event_name':'napi_poll', - 'dev':dev_name, 'event_t':time} + 'dev':dev_name, 'event_t':time, + 'work':work, 'budget':budget} event_list.append(rec_data) def handle_netif_rx(event_info): -- cgit