From 0f82ebc452f921590e216b28eee0b41f5e434a48 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 8 Nov 2011 14:41:57 -0200 Subject: perf evsel: Introduce config attr method Out of the code in 'perf record', so that we can share option parsing, etc. Eventually will be used by 'perf top', but first 'trace' will use it. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-hzjqsgnte1esk90ytq0ap98v@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'tools/perf/perf.h') diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 914c895510f7..eb6a13881887 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -185,4 +185,24 @@ extern const char perf_version_string[]; void pthread__unblock_sigwinch(void); +struct perf_record_opts { + pid_t target_pid; + pid_t target_tid; + bool call_graph; + bool inherit_stat; + bool no_delay; + bool no_inherit; + bool no_samples; + bool raw_samples; + bool sample_address; + bool sample_time; + bool sample_id_all_avail; + bool system_wide; + unsigned int freq; + unsigned int user_freq; + u64 default_interval; + u64 user_interval; + const char *cpu_list; +}; + #endif -- cgit From 35b9d88ecd8c5fb720ba0dd325262f356d0b03e7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Nov 2011 08:47:15 -0200 Subject: perf evlist: Introduce {prepare,start}_workload refactored from 'perf record' So that we can easily start a workload in other tools. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-zdsksd4aphu0nltg2lpwsw3x@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 81 ++++++-------------------------------- tools/perf/perf.h | 1 + tools/perf/util/evlist.c | 96 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/evlist.h | 10 +++++ 4 files changed, 120 insertions(+), 68 deletions(-) (limited to 'tools/perf/perf.h') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index c3ac5415c097..4799195ed246 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -47,11 +47,9 @@ static struct perf_record_opts record_opts = { static unsigned int page_size; static unsigned int mmap_pages = UINT_MAX; static int output; -static int pipe_output = 0; static const char *output_name = NULL; static bool group = false; static int realtime_prio = 0; -static pid_t child_pid = -1; static enum write_mode_t write_mode = WRITE_FORCE; static bool no_buildid = false; static bool no_buildid_cache = false; @@ -144,9 +142,9 @@ static void sig_atexit(void) { int status; - if (child_pid > 0) { + if (evsel_list->workload.pid > 0) { if (!child_finished) - kill(child_pid, SIGTERM); + kill(evsel_list->workload.pid, SIGTERM); wait(&status); if (WIFSIGNALED(status)) @@ -304,7 +302,7 @@ static int process_buildids(void) static void atexit_header(void) { - if (!pipe_output) { + if (!record_opts.pipe_output) { session->header.data_size += bytes_written; if (!no_buildid) @@ -377,9 +375,7 @@ static int __cmd_record(int argc, const char **argv) int flags; int err; unsigned long waking = 0; - int child_ready_pipe[2], go_pipe[2]; const bool forks = argc > 0; - char buf; struct machine *machine; progname = argv[0]; @@ -391,20 +387,15 @@ static int __cmd_record(int argc, const char **argv) signal(SIGINT, sig_handler); signal(SIGUSR1, sig_handler); - if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { - perror("failed to create pipes"); - exit(-1); - } - if (!output_name) { if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) - pipe_output = true; + record_opts.pipe_output = true; else output_name = "perf.data"; } if (output_name) { if (!strcmp(output_name, "-")) - pipe_output = true; + record_opts.pipe_output = true; else if (!stat(output_name, &st) && st.st_size) { if (write_mode == WRITE_FORCE) { char oldname[PATH_MAX]; @@ -424,7 +415,7 @@ static int __cmd_record(int argc, const char **argv) else flags |= O_TRUNC; - if (pipe_output) + if (record_opts.pipe_output) output = STDOUT_FILENO; else output = open(output_name, flags, S_IRUSR | S_IWUSR); @@ -470,57 +461,11 @@ static int __cmd_record(int argc, const char **argv) mmap_pages = (512 * 1024) / page_size; if (forks) { - child_pid = fork(); - if (child_pid < 0) { - perror("failed to fork"); - exit(-1); - } - - if (!child_pid) { - if (pipe_output) - dup2(2, 1); - close(child_ready_pipe[0]); - close(go_pipe[1]); - fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); - - /* - * Do a dummy execvp to get the PLT entry resolved, - * so we avoid the resolver overhead on the real - * execvp call. - */ - execvp("", (char **)argv); - - /* - * Tell the parent we're ready to go - */ - close(child_ready_pipe[1]); - - /* - * Wait until the parent tells us to go. - */ - if (read(go_pipe[0], &buf, 1) == -1) - perror("unable to read pipe"); - - execvp(argv[0], (char **)argv); - - perror(argv[0]); - kill(getppid(), SIGUSR1); - exit(-1); - } - - if (!record_opts.system_wide && record_opts.target_tid == -1 && record_opts.target_pid == -1) - evsel_list->threads->map[0] = child_pid; - - close(child_ready_pipe[1]); - close(go_pipe[0]); - /* - * wait for child to settle - */ - if (read(child_ready_pipe[0], &buf, 1) == -1) { - perror("unable to read pipe"); - exit(-1); + err = perf_evlist__prepare_workload(evsel_list, &record_opts, argv); + if (err < 0) { + pr_err("Couldn't run the workload!\n"); + goto out_delete_session; } - close(child_ready_pipe[0]); } open_counters(evsel_list); @@ -530,7 +475,7 @@ static int __cmd_record(int argc, const char **argv) */ atexit(atexit_header); - if (pipe_output) { + if (record_opts.pipe_output) { err = perf_header__write_pipe(output); if (err < 0) return err; @@ -543,7 +488,7 @@ static int __cmd_record(int argc, const char **argv) post_processing_offset = lseek(output, 0, SEEK_CUR); - if (pipe_output) { + if (record_opts.pipe_output) { err = perf_session__synthesize_attrs(session, process_synthesized_event); if (err < 0) { @@ -629,7 +574,7 @@ static int __cmd_record(int argc, const char **argv) * Let the child rip */ if (forks) - close(go_pipe[1]); + perf_evlist__start_workload(evsel_list); for (;;) { int hits = samples; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index eb6a13881887..32ee6ca8eabd 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -193,6 +193,7 @@ struct perf_record_opts { bool no_delay; bool no_inherit; bool no_samples; + bool pipe_output; bool raw_samples; bool sample_address; bool sample_time; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b774341e797f..a472247af191 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -13,6 +13,7 @@ #include "thread_map.h" #include "evlist.h" #include "evsel.h" +#include #include "parse-events.h" @@ -33,6 +34,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); perf_evlist__set_maps(evlist, cpus, threads); + evlist->workload.pid = -1; } struct perf_evlist *perf_evlist__new(struct cpu_map *cpus, @@ -674,3 +676,97 @@ out_err: return err; } + +int perf_evlist__prepare_workload(struct perf_evlist *evlist, + struct perf_record_opts *opts, + const char *argv[]) +{ + int child_ready_pipe[2], go_pipe[2]; + char bf; + + if (pipe(child_ready_pipe) < 0) { + perror("failed to create 'ready' pipe"); + return -1; + } + + if (pipe(go_pipe) < 0) { + perror("failed to create 'go' pipe"); + goto out_close_ready_pipe; + } + + evlist->workload.pid = fork(); + if (evlist->workload.pid < 0) { + perror("failed to fork"); + goto out_close_pipes; + } + + if (!evlist->workload.pid) { + if (opts->pipe_output) + dup2(2, 1); + + close(child_ready_pipe[0]); + close(go_pipe[1]); + fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); + + /* + * Do a dummy execvp to get the PLT entry resolved, + * so we avoid the resolver overhead on the real + * execvp call. + */ + execvp("", (char **)argv); + + /* + * Tell the parent we're ready to go + */ + close(child_ready_pipe[1]); + + /* + * Wait until the parent tells us to go. + */ + if (read(go_pipe[0], &bf, 1) == -1) + perror("unable to read pipe"); + + execvp(argv[0], (char **)argv); + + perror(argv[0]); + kill(getppid(), SIGUSR1); + exit(-1); + } + + if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1) + evlist->threads->map[0] = evlist->workload.pid; + + close(child_ready_pipe[1]); + close(go_pipe[0]); + /* + * wait for child to settle + */ + if (read(child_ready_pipe[0], &bf, 1) == -1) { + perror("unable to read pipe"); + goto out_close_pipes; + } + + evlist->workload.cork_fd = go_pipe[1]; + close(child_ready_pipe[0]); + return 0; + +out_close_pipes: + close(go_pipe[0]); + close(go_pipe[1]); +out_close_ready_pipe: + close(child_ready_pipe[0]); + close(child_ready_pipe[1]); + return -1; +} + +int perf_evlist__start_workload(struct perf_evlist *evlist) +{ + if (evlist->workload.cork_fd > 0) { + /* + * Remove the cork, let it rip! + */ + return close(evlist->workload.cork_fd); + } + + return 0; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 231c06f8286b..07d56b3e6d61 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -6,6 +6,7 @@ #include "../perf.h" #include "event.h" #include "util.h" +#include struct pollfd; struct thread_map; @@ -22,6 +23,10 @@ struct perf_evlist { int nr_fds; int nr_mmaps; int mmap_len; + struct { + int cork_fd; + pid_t pid; + } workload; bool overwrite; union perf_event event_copy; struct perf_mmap *mmap; @@ -68,6 +73,11 @@ int perf_evlist__open(struct perf_evlist *evlist, bool group); void perf_evlist__config_attrs(struct perf_evlist *evlist, struct perf_record_opts *opts); +int perf_evlist__prepare_workload(struct perf_evlist *evlist, + struct perf_record_opts *opts, + const char *argv[]); +int perf_evlist__start_workload(struct perf_evlist *evlist); + int perf_evlist__alloc_mmap(struct perf_evlist *evlist); int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite); void perf_evlist__munmap(struct perf_evlist *evlist); -- cgit From 01c2d99bcf6fc7f6ce3fe3d0fb38b124e1f127fc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Nov 2011 09:16:26 -0200 Subject: perf record: Move mmap_pages to perf_record_opts Tools being developed will need this to allow the user to override this value. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-zydc1yhxfm0z35fuy95bsn1l@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 9 +++++---- tools/perf/perf.h | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'tools/perf/perf.h') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 749862d57a83..ffb627d40210 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -35,9 +35,10 @@ enum write_mode_t { WRITE_APPEND }; -static struct perf_record_opts record_opts = { +struct perf_record_opts record_opts = { .target_pid = -1, .target_tid = -1, + .mmap_pages = UINT_MAX, .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, .freq = 1000, @@ -45,7 +46,6 @@ static struct perf_record_opts record_opts = { }; static unsigned int page_size; -static unsigned int mmap_pages = UINT_MAX; static int output; static const char *output_name = NULL; static bool group = false; @@ -272,7 +272,7 @@ try_again: exit(-1); } - if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) + if (perf_evlist__mmap(evlist, record_opts.mmap_pages, false) < 0) die("failed to mmap with %d (%s)\n", errno, strerror(errno)); if (file_new) @@ -647,7 +647,8 @@ const struct option record_options[] = { OPT_BOOLEAN('i', "no-inherit", &record_opts.no_inherit, "child tasks do not inherit counters"), OPT_UINTEGER('F', "freq", &record_opts.user_freq, "profile at this frequency"), - OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), + OPT_UINTEGER('m', "mmap-pages", &record_opts.mmap_pages, + "number of mmap data pages"), OPT_BOOLEAN(0, "group", &group, "put the counters into a counter group"), OPT_BOOLEAN('g', "call-graph", &record_opts.call_graph, diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 32ee6ca8eabd..13c42f34e8b3 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -200,6 +200,7 @@ struct perf_record_opts { bool sample_id_all_avail; bool system_wide; unsigned int freq; + unsigned int mmap_pages; unsigned int user_freq; u64 default_interval; u64 user_interval; -- cgit From ed80f5813fd6ecc6d74250681910a4214f699d4e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Nov 2011 15:12:56 -0200 Subject: perf record: Move 'group' to perf_event_ops Will be used in other tools to share the command line parsing code. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-8x0yr77r6lrd2t699s499m8n@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 9 ++++----- tools/perf/perf.h | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/perf/perf.h') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ffb627d40210..ba6777a147ca 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -48,7 +48,6 @@ struct perf_record_opts record_opts = { static unsigned int page_size; static int output; static const char *output_name = NULL; -static bool group = false; static int realtime_prio = 0; static enum write_mode_t write_mode = WRITE_FORCE; static bool no_buildid = false; @@ -202,13 +201,13 @@ static void open_counters(struct perf_evlist *evlist) */ bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; - if (group && pos != first) + if (record_opts.group && pos != first) group_fd = first->fd; retry_sample_id: attr->sample_id_all = record_opts.sample_id_all_avail ? 1 : 0; try_again: - if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group, - group_fd) < 0) { + if (perf_evsel__open(pos, evlist->cpus, evlist->threads, + record_opts.group, group_fd) < 0) { int err = errno; if (err == EPERM || err == EACCES) { @@ -649,7 +648,7 @@ const struct option record_options[] = { OPT_UINTEGER('F', "freq", &record_opts.user_freq, "profile at this frequency"), OPT_UINTEGER('m', "mmap-pages", &record_opts.mmap_pages, "number of mmap data pages"), - OPT_BOOLEAN(0, "group", &group, + OPT_BOOLEAN(0, "group", &record_opts.group, "put the counters into a counter group"), OPT_BOOLEAN('g', "call-graph", &record_opts.call_graph, "do call-graph (stack chain/backtrace) recording"), diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 13c42f34e8b3..ea804f5a8cc2 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -189,6 +189,7 @@ struct perf_record_opts { pid_t target_pid; pid_t target_tid; bool call_graph; + bool group; bool inherit_stat; bool no_delay; bool no_inherit; -- cgit From 3e76ac78b08479e84a3eca3fb1b3066fb8230461 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Tue, 20 Dec 2011 17:32:45 +0300 Subject: perf record: Add ability to record event period The problem is that when SAMPLE_PERIOD is not set, the kernel generates a number of samples in proportion to an event's period. Number of these samples may be too big and the kernel throttles all samples above a defined limit. E.g.: I want to trace when a process sleeps. I created a process which sleeps for 1ms and for 4ms. perf got 100 events in both cases. swapper 0 [000] 1141.371830: sched_stat_sleep: comm=foo pid=1801 delay=1386750 [ns] swapper 0 [000] 1141.369444: sched_stat_sleep: comm=foo pid=1801 delay=4499585 [ns] In the first case a kernel want to send 4499585 events and in the second case it wants to send 1386750 events. perf-reports shows that process sleeps in both places equal time. Instead of this we can get only one sample with an attribute period. As result we have less data transferring between kernel and user-space and we avoid throttling of samples. The patch "events: Don't divide events if it has field period" added a kernel part of this functionality. Acked-by: Arun Sharma Cc: Arun Sharma Cc: David Ahern Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: devel@openvz.org Link: http://lkml.kernel.org/r/1324391565-1369947-1-git-send-email-avagin@openvz.org Signed-off-by: Andrew Vagin Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 1 + tools/perf/perf.h | 1 + tools/perf/util/evsel.c | 3 +++ 3 files changed, 5 insertions(+) (limited to 'tools/perf/perf.h') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 766fa0a91a32..f8fd14fb62ec 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -700,6 +700,7 @@ const struct option record_options[] = { OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Sample addresses"), OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"), + OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"), OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, "don't sample"), OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache, diff --git a/tools/perf/perf.h b/tools/perf/perf.h index ea804f5a8cc2..64f8bee31ced 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -200,6 +200,7 @@ struct perf_record_opts { bool sample_time; bool sample_id_all_avail; bool system_wide; + bool period; unsigned int freq; unsigned int mmap_pages; unsigned int user_freq; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4a8c8b02e9cc..60ad0286759e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -108,6 +108,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) if (opts->system_wide) attr->sample_type |= PERF_SAMPLE_CPU; + if (opts->period) + attr->sample_type |= PERF_SAMPLE_PERIOD; + if (opts->sample_id_all_avail && (opts->sample_time || opts->system_wide || !opts->no_inherit || opts->cpu_list)) -- cgit