diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 440 |
1 files changed, 215 insertions, 225 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6d3eeac1ea25..e5f6ece65a13 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -32,6 +32,7 @@ * Wu Fengguang <fengguang.wu@intel.com> * Mike Galbraith <efault@gmx.de> * Paul Mackerras <paulus@samba.org> + * Jaswinder Singh Rajput <jaswinder@kernel.org> * * Released under the GPL v2. (and only v2, not any later version) */ @@ -41,11 +42,13 @@ #include "util/util.h" #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/event.h" +#include "util/debug.h" #include <sys/prctl.h> #include <math.h> -static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { +static struct perf_event_attr default_attrs[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, @@ -60,79 +63,101 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { }; static int system_wide = 0; +static unsigned int nr_cpus = 0; +static int run_idx = 0; + +static int run_count = 1; static int inherit = 1; -static int verbose = 0; +static int scale = 1; +static int target_pid = -1; +static int null_run = 0; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; -static int target_pid = -1; -static int nr_cpus = 0; -static unsigned int page_size; +static int event_scaled[MAX_COUNTERS]; -static int scale = 1; - -static const unsigned int default_count[] = { - 1000000, - 1000000, - 10000, - 10000, - 1000000, - 10000, +struct stats +{ + double n, mean, M2; }; -#define MAX_RUN 100 - -static int run_count = 1; -static int run_idx = 0; - -static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; -static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; - -//static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; +static void update_stats(struct stats *stats, u64 val) +{ + double delta; + stats->n++; + delta = val - stats->mean; + stats->mean += delta / stats->n; + stats->M2 += delta*(val - stats->mean); +} -static u64 runtime_nsecs[MAX_RUN]; -static u64 walltime_nsecs[MAX_RUN]; -static u64 runtime_cycles[MAX_RUN]; +static double avg_stats(struct stats *stats) +{ + return stats->mean; +} -static u64 event_res_avg[MAX_COUNTERS][3]; -static u64 event_res_noise[MAX_COUNTERS][3]; +/* + * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + * + * (\Sum n_i^2) - ((\Sum n_i)^2)/n + * s^2 = ------------------------------- + * n - 1 + * + * http://en.wikipedia.org/wiki/Stddev + * + * The std dev of the mean is related to the std dev by: + * + * s + * s_mean = ------- + * sqrt(n) + * + */ +static double stddev_stats(struct stats *stats) +{ + double variance = stats->M2 / (stats->n - 1); + double variance_mean = variance / stats->n; -static u64 event_scaled_avg[MAX_COUNTERS]; + return sqrt(variance_mean); +} -static u64 runtime_nsecs_avg; -static u64 runtime_nsecs_noise; +struct stats event_res_stats[MAX_COUNTERS][3]; +struct stats runtime_nsecs_stats; +struct stats walltime_nsecs_stats; +struct stats runtime_cycles_stats; -static u64 walltime_nsecs_avg; -static u64 walltime_nsecs_noise; +#define MATCH_EVENT(t, c, counter) \ + (attrs[counter].type == PERF_TYPE_##t && \ + attrs[counter].config == PERF_COUNT_##c) -static u64 runtime_cycles_avg; -static u64 runtime_cycles_noise; +#define ERR_PERF_OPEN \ +"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" -static void create_perf_stat_counter(int counter) +static void create_perf_stat_counter(int counter, int pid) { - struct perf_counter_attr *attr = attrs + counter; + struct perf_event_attr *attr = attrs + counter; if (scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; if (system_wide) { - int cpu; - for (cpu = 0; cpu < nr_cpus; cpu ++) { - fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); - if (fd[cpu][counter] < 0 && verbose) { - printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); - } - } - } else { - attr->inherit = inherit; - attr->disabled = 1; + unsigned int cpu; - fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); - if (fd[0][counter] < 0 && verbose) { - printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); + for (cpu = 0; cpu < nr_cpus; cpu++) { + fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0); + if (fd[cpu][counter] < 0 && verbose) + fprintf(stderr, ERR_PERF_OPEN, counter, + fd[cpu][counter], strerror(errno)); } + } else { + attr->inherit = inherit; + attr->disabled = 1; + attr->enable_on_exec = 1; + + fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); + if (fd[0][counter] < 0 && verbose) + fprintf(stderr, ERR_PERF_OPEN, counter, + fd[0][counter], strerror(errno)); } } @@ -141,13 +166,8 @@ static void create_perf_stat_counter(int counter) */ static inline int nsec_counter(int counter) { - if (attrs[counter].type != PERF_TYPE_SOFTWARE) - return 0; - - if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK) - return 1; - - if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) + if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || + MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) return 1; return 0; @@ -158,22 +178,22 @@ static inline int nsec_counter(int counter) */ static void read_counter(int counter) { - u64 *count, single_count[3]; - ssize_t res; - int cpu, nv; + u64 count[3], single_count[3]; + unsigned int cpu; + size_t res, nv; int scaled; - - count = event_res[run_idx][counter]; + int i; count[0] = count[1] = count[2] = 0; nv = scale ? 3 : 1; - for (cpu = 0; cpu < nr_cpus; cpu ++) { + for (cpu = 0; cpu < nr_cpus; cpu++) { if (fd[cpu][counter] < 0) continue; res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); assert(res == nv * sizeof(u64)); + close(fd[cpu][counter]); fd[cpu][counter] = -1; @@ -187,63 +207,107 @@ static void read_counter(int counter) scaled = 0; if (scale) { if (count[2] == 0) { - event_scaled[run_idx][counter] = -1; + event_scaled[counter] = -1; count[0] = 0; return; } if (count[2] < count[1]) { - event_scaled[run_idx][counter] = 1; + event_scaled[counter] = 1; count[0] = (unsigned long long) ((double)count[0] * count[1] / count[2] + 0.5); } } + + for (i = 0; i < 3; i++) + update_stats(&event_res_stats[counter][i], count[i]); + + if (verbose) { + fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), + count[0], count[1], count[2]); + } + /* * Save the full runtime - to allow normalization during printout: */ - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) - runtime_nsecs[run_idx] = count[0]; - if (attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) - runtime_cycles[run_idx] = count[0]; + if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) + update_stats(&runtime_nsecs_stats, count[0]); + if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) + update_stats(&runtime_cycles_stats, count[0]); } -static int run_perf_stat(int argc, const char **argv) +static int run_perf_stat(int argc __used, const char **argv) { unsigned long long t0, t1; int status = 0; int counter; int pid; + int child_ready_pipe[2], go_pipe[2]; + char buf; if (!system_wide) nr_cpus = 1; - for (counter = 0; counter < nr_counters; counter++) - create_perf_stat_counter(counter); - - /* - * Enable counters and exec the command: - */ - t0 = rdclock(); - prctl(PR_TASK_PERF_COUNTERS_ENABLE); + if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { + perror("failed to create pipes"); + exit(1); + } if ((pid = fork()) < 0) perror("failed to fork"); if (!pid) { - if (execvp(argv[0], (char **)argv)) { - perror(argv[0]); - exit(-1); - } + close(child_ready_pipe[0]); + close(go_pipe[1]); + fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); + + /* + * Do a dummy execvp to get the PLT entry resolved, + * so we avoid the resolver overhead on the real + * execvp call. + */ + execvp("", (char **)argv); + + /* + * Tell the parent we're ready to go + */ + close(child_ready_pipe[1]); + + /* + * Wait until the parent tells us to go. + */ + if (read(go_pipe[0], &buf, 1) == -1) + perror("unable to read pipe"); + + execvp(argv[0], (char **)argv); + + perror(argv[0]); + exit(-1); } + /* + * Wait for the child to be ready to exec. + */ + close(child_ready_pipe[1]); + close(go_pipe[0]); + if (read(child_ready_pipe[0], &buf, 1) == -1) + perror("unable to read pipe"); + close(child_ready_pipe[0]); + + for (counter = 0; counter < nr_counters; counter++) + create_perf_stat_counter(counter, pid); + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + + close(go_pipe[1]); wait(&status); - prctl(PR_TASK_PERF_COUNTERS_DISABLE); t1 = rdclock(); - walltime_nsecs[run_idx] = t1 - t0; + update_stats(&walltime_nsecs_stats, t1 - t0); for (counter = 0; counter < nr_counters; counter++) read_counter(counter); @@ -251,46 +315,48 @@ static int run_perf_stat(int argc, const char **argv) return WEXITSTATUS(status); } -static void print_noise(u64 *count, u64 *noise) +static void print_noise(int counter, double avg) { - if (run_count > 1) - fprintf(stderr, " ( +- %7.3f%% )", - (double)noise[0]/(count[0]+1)*100.0); + if (run_count == 1) + return; + + fprintf(stderr, " ( +- %7.3f%% )", + 100 * stddev_stats(&event_res_stats[counter][0]) / avg); } -static void nsec_printout(int counter, u64 *count, u64 *noise) +static void nsec_printout(int counter, double avg) { - double msecs = (double)count[0] / 1000000; - - fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); + double msecs = avg / 1e6; - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { + fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); - if (walltime_nsecs_avg) - fprintf(stderr, " # %10.3f CPUs ", - (double)count[0] / (double)walltime_nsecs_avg); + if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { + fprintf(stderr, " # %10.3f CPUs ", + avg / avg_stats(&walltime_nsecs_stats)); } - print_noise(count, noise); } -static void abs_printout(int counter, u64 *count, u64 *noise) +static void abs_printout(int counter, double avg) { - fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); + double total, ratio = 0.0; - if (runtime_cycles_avg && - attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { + fprintf(stderr, " %14.0f %-24s", avg, event_name(counter)); - fprintf(stderr, " # %10.3f IPC ", - (double)count[0] / (double)runtime_cycles_avg); + if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { + total = avg_stats(&runtime_cycles_stats); + + if (total) + ratio = avg / total; + + fprintf(stderr, " # %10.3f IPC ", ratio); } else { - if (runtime_nsecs_avg) { - fprintf(stderr, " # %10.3f M/sec", - (double)count[0]/runtime_nsecs_avg*1000.0); - } + total = avg_stats(&runtime_nsecs_stats); + + if (total) + ratio = 1000.0 * avg / total; + + fprintf(stderr, " # %10.3f M/sec", ratio); } - print_noise(count, noise); } /* @@ -298,121 +364,39 @@ static void abs_printout(int counter, u64 *count, u64 *noise) */ static void print_counter(int counter) { - u64 *count, *noise; - int scaled; - - count = event_res_avg[counter]; - noise = event_res_noise[counter]; - scaled = event_scaled_avg[counter]; + double avg = avg_stats(&event_res_stats[counter][0]); + int scaled = event_scaled[counter]; if (scaled == -1) { - fprintf(stderr, " %14s %-20s\n", + fprintf(stderr, " %14s %-24s\n", "<not counted>", event_name(counter)); return; } if (nsec_counter(counter)) - nsec_printout(counter, count, noise); + nsec_printout(counter, avg); else - abs_printout(counter, count, noise); - - if (scaled) - fprintf(stderr, " (scaled from %.2f%%)", - (double) count[2] / count[1] * 100); - - fprintf(stderr, "\n"); -} - -/* - * normalize_noise noise values down to stddev: - */ -static void normalize_noise(u64 *val) -{ - double res; - - res = (double)*val / (run_count * sqrt((double)run_count)); + abs_printout(counter, avg); - *val = (u64)res; -} + print_noise(counter, avg); -static void update_avg(const char *name, int idx, u64 *avg, u64 *val) -{ - *avg += *val; + if (scaled) { + double avg_enabled, avg_running; - if (verbose > 1) - fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); -} -/* - * Calculate the averages and noises: - */ -static void calc_avg(void) -{ - int i, j; - - if (verbose > 1) - fprintf(stderr, "\n"); - - for (i = 0; i < run_count; i++) { - update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i); - update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); - update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); - - for (j = 0; j < nr_counters; j++) { - update_avg("counter/0", j, - event_res_avg[j]+0, event_res[i][j]+0); - update_avg("counter/1", j, - event_res_avg[j]+1, event_res[i][j]+1); - update_avg("counter/2", j, - event_res_avg[j]+2, event_res[i][j]+2); - update_avg("scaled", j, - event_scaled_avg + j, event_scaled[i]+j); - } - } - runtime_nsecs_avg /= run_count; - walltime_nsecs_avg /= run_count; - runtime_cycles_avg /= run_count; - - for (j = 0; j < nr_counters; j++) { - event_res_avg[j][0] /= run_count; - event_res_avg[j][1] /= run_count; - event_res_avg[j][2] /= run_count; - } + avg_enabled = avg_stats(&event_res_stats[counter][1]); + avg_running = avg_stats(&event_res_stats[counter][2]); - for (i = 0; i < run_count; i++) { - runtime_nsecs_noise += - abs((s64)(runtime_nsecs[i] - runtime_nsecs_avg)); - walltime_nsecs_noise += - abs((s64)(walltime_nsecs[i] - walltime_nsecs_avg)); - runtime_cycles_noise += - abs((s64)(runtime_cycles[i] - runtime_cycles_avg)); - - for (j = 0; j < nr_counters; j++) { - event_res_noise[j][0] += - abs((s64)(event_res[i][j][0] - event_res_avg[j][0])); - event_res_noise[j][1] += - abs((s64)(event_res[i][j][1] - event_res_avg[j][1])); - event_res_noise[j][2] += - abs((s64)(event_res[i][j][2] - event_res_avg[j][2])); - } + fprintf(stderr, " (scaled from %.2f%%)", + 100 * avg_running / avg_enabled); } - normalize_noise(&runtime_nsecs_noise); - normalize_noise(&walltime_nsecs_noise); - normalize_noise(&runtime_cycles_noise); - - for (j = 0; j < nr_counters; j++) { - normalize_noise(&event_res_noise[j][0]); - normalize_noise(&event_res_noise[j][1]); - normalize_noise(&event_res_noise[j][2]); - } + fprintf(stderr, "\n"); } static void print_stat(int argc, const char **argv) { int i, counter; - calc_avg(); - fflush(stdout); fprintf(stderr, "\n"); @@ -429,11 +413,15 @@ static void print_stat(int argc, const char **argv) for (counter = 0; counter < nr_counters; counter++) print_counter(counter); - - fprintf(stderr, "\n"); - fprintf(stderr, " %14.9f seconds time elapsed.\n", - (double)walltime_nsecs_avg/1e9); fprintf(stderr, "\n"); + fprintf(stderr, " %14.9f seconds time elapsed", + avg_stats(&walltime_nsecs_stats)/1e9); + if (run_count > 1) { + fprintf(stderr, " ( +- %7.3f%% )", + 100*stddev_stats(&walltime_nsecs_stats) / + avg_stats(&walltime_nsecs_stats)); + } + fprintf(stderr, "\n\n"); } static volatile int signr = -1; @@ -466,36 +454,38 @@ static const struct option options[] = { OPT_INTEGER('p', "pid", &target_pid, "stat events on existing pid"), OPT_BOOLEAN('a', "all-cpus", &system_wide, - "system-wide collection from all CPUs"), - OPT_BOOLEAN('S', "scale", &scale, - "scale/normalize counters"), + "system-wide collection from all CPUs"), + OPT_BOOLEAN('c', "scale", &scale, + "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &run_count, "repeat command and print average + stddev (max: 100)"), + OPT_BOOLEAN('n', "null", &null_run, + "null run - dont start any counters"), OPT_END() }; -int cmd_stat(int argc, const char **argv, const char *prefix) +int cmd_stat(int argc, const char **argv, const char *prefix __used) { int status; - page_size = sysconf(_SC_PAGE_SIZE); - - memcpy(attrs, default_attrs, sizeof(attrs)); - - argc = parse_options(argc, argv, options, stat_usage, 0); + argc = parse_options(argc, argv, options, stat_usage, + PARSE_OPT_STOP_AT_NON_OPTION); if (!argc) usage_with_options(stat_usage, options); - if (run_count <= 0 || run_count > MAX_RUN) + if (run_count <= 0) usage_with_options(stat_usage, options); - if (!nr_counters) - nr_counters = 8; + /* Set attrs and nr_counters if no event is selected and !null_run */ + if (!null_run && !nr_counters) { + memcpy(attrs, default_attrs, sizeof(default_attrs)); + nr_counters = ARRAY_SIZE(default_attrs); + } nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); + assert((int)nr_cpus >= 0); /* * We dont want to block the signals - that would cause @@ -511,7 +501,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) status = 0; for (run_idx = 0; run_idx < run_count; run_idx++) { if (run_count != 1 && verbose) - fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); + fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); status = run_perf_stat(argc, argv); } |