diff options
Diffstat (limited to 'tools/perf/util/intel-tpebs.c')
-rw-r--r-- | tools/perf/util/intel-tpebs.c | 432 |
1 files changed, 432 insertions, 0 deletions
diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c new file mode 100644 index 000000000000..50a3c3e07160 --- /dev/null +++ b/tools/perf/util/intel-tpebs.c @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * intel_tpebs.c: Intel TPEBS support + */ + + +#include <sys/param.h> +#include <subcmd/run-command.h> +#include <thread.h> +#include "intel-tpebs.h" +#include <linux/list.h> +#include <linux/zalloc.h> +#include <linux/err.h> +#include "sample.h" +#include "debug.h" +#include "evlist.h" +#include "evsel.h" +#include "session.h" +#include "tool.h" +#include "cpumap.h" +#include "metricgroup.h" +#include <sys/stat.h> +#include <sys/file.h> +#include <poll.h> +#include <math.h> + +#define PERF_DATA "-" + +bool tpebs_recording; +static pid_t tpebs_pid = -1; +static size_t tpebs_event_size; +static LIST_HEAD(tpebs_results); +static pthread_t tpebs_reader_thread; +static struct child_process *tpebs_cmd; + +struct tpebs_retire_lat { + struct list_head nd; + /* Event name */ + const char *name; + /* Event name with the TPEBS modifier R */ + const char *tpebs_name; + /* Count of retire_latency values found in sample data */ + size_t count; + /* Sum of all the retire_latency values in sample data */ + int sum; + /* Average of retire_latency, val = sum / count */ + double val; +}; + +static int get_perf_record_args(const char **record_argv, char buf[], + const char *cpumap_buf) +{ + struct tpebs_retire_lat *e; + int i = 0; + + pr_debug("tpebs: Prepare perf record for retire_latency\n"); + + record_argv[i++] = "perf"; + record_argv[i++] = "record"; + record_argv[i++] = "-W"; + record_argv[i++] = "--synth=no"; + record_argv[i++] = buf; + + if (!cpumap_buf) { + pr_err("tpebs: Require cpumap list to run sampling\n"); + return -ECANCELED; + } + /* Use -C when cpumap_buf is not "-1" */ + if (strcmp(cpumap_buf, "-1")) { + record_argv[i++] = "-C"; + record_argv[i++] = cpumap_buf; + } + + list_for_each_entry(e, &tpebs_results, nd) { + record_argv[i++] = "-e"; + record_argv[i++] = e->name; + } + + record_argv[i++] = "-o"; + record_argv[i++] = PERF_DATA; + + return 0; +} + +static int prepare_run_command(const char **argv) +{ + tpebs_cmd = zalloc(sizeof(struct child_process)); + if (!tpebs_cmd) + return -ENOMEM; + tpebs_cmd->argv = argv; + tpebs_cmd->out = -1; + return 0; +} + +static int start_perf_record(int control_fd[], int ack_fd[], + const char *cpumap_buf) +{ + const char **record_argv; + int ret; + char buf[32]; + + scnprintf(buf, sizeof(buf), "--control=fd:%d,%d", control_fd[0], ack_fd[1]); + + record_argv = calloc(12 + 2 * tpebs_event_size, sizeof(char *)); + if (!record_argv) + return -ENOMEM; + + ret = get_perf_record_args(record_argv, buf, cpumap_buf); + if (ret) + goto out; + + ret = prepare_run_command(record_argv); + if (ret) + goto out; + ret = start_command(tpebs_cmd); +out: + free(record_argv); + return ret; +} + +static int process_sample_event(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine __maybe_unused) +{ + int ret = 0; + const char *evname; + struct tpebs_retire_lat *t; + + evname = evsel__name(evsel); + + /* + * Need to handle per core results? We are assuming average retire + * latency value will be used. Save the number of samples and the sum of + * retire latency value for each event. + */ + list_for_each_entry(t, &tpebs_results, nd) { + if (!strcmp(evname, t->name)) { + t->count += 1; + t->sum += sample->retire_lat; + t->val = (double) t->sum / t->count; + break; + } + } + + return ret; +} + +static int process_feature_event(struct perf_session *session, + union perf_event *event) +{ + if (event->feat.feat_id < HEADER_LAST_FEATURE) + return perf_event__process_feature(session, event); + return 0; +} + +static void *__sample_reader(void *arg) +{ + struct child_process *child = arg; + struct perf_session *session; + struct perf_data data = { + .mode = PERF_DATA_MODE_READ, + .path = PERF_DATA, + .file.fd = child->out, + }; + struct perf_tool tool; + + perf_tool__init(&tool, /*ordered_events=*/false); + tool.sample = process_sample_event; + tool.feature = process_feature_event; + tool.attr = perf_event__process_attr; + + session = perf_session__new(&data, &tool); + if (IS_ERR(session)) + return NULL; + perf_session__process_events(session); + perf_session__delete(session); + + return NULL; +} + +/* + * tpebs_stop - stop the sample data read thread and the perf record process. + */ +static int tpebs_stop(void) +{ + int ret = 0; + + /* Like tpebs_start, we should only run tpebs_end once. */ + if (tpebs_pid != -1) { + kill(tpebs_cmd->pid, SIGTERM); + tpebs_pid = -1; + pthread_join(tpebs_reader_thread, NULL); + close(tpebs_cmd->out); + ret = finish_command(tpebs_cmd); + if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) + ret = 0; + } + return ret; +} + +/* + * tpebs_start - start tpebs execution. + * @evsel_list: retire_latency evsels in this list will be selected and sampled + * to get the average retire_latency value. + * + * This function will be called from evlist level later when evlist__open() is + * called consistently. + */ +int tpebs_start(struct evlist *evsel_list) +{ + int ret = 0; + struct evsel *evsel; + char cpumap_buf[50]; + + /* + * We should only run tpebs_start when tpebs_recording is enabled. + * And we should only run it once with all the required events. + */ + if (tpebs_pid != -1 || !tpebs_recording) + return 0; + + cpu_map__snprint(evsel_list->core.user_requested_cpus, cpumap_buf, sizeof(cpumap_buf)); + /* + * Prepare perf record for sampling event retire_latency before fork and + * prepare workload + */ + evlist__for_each_entry(evsel_list, evsel) { + int i; + char *name; + struct tpebs_retire_lat *new; + + if (!evsel->retire_lat) + continue; + + pr_debug("tpebs: Retire_latency of event %s is required\n", evsel->name); + for (i = strlen(evsel->name) - 1; i > 0; i--) { + if (evsel->name[i] == 'R') + break; + } + if (i <= 0 || evsel->name[i] != 'R') { + ret = -1; + goto err; + } + + name = strdup(evsel->name); + if (!name) { + ret = -ENOMEM; + goto err; + } + name[i] = 'p'; + + new = zalloc(sizeof(*new)); + if (!new) { + ret = -1; + zfree(name); + goto err; + } + new->name = name; + new->tpebs_name = evsel->name; + list_add_tail(&new->nd, &tpebs_results); + tpebs_event_size += 1; + } + + if (tpebs_event_size > 0) { + struct pollfd pollfd = { .events = POLLIN, }; + int control_fd[2], ack_fd[2], len; + char ack_buf[8]; + + /*Create control and ack fd for --control*/ + if (pipe(control_fd) < 0) { + pr_err("tpebs: Failed to create control fifo"); + ret = -1; + goto out; + } + if (pipe(ack_fd) < 0) { + pr_err("tpebs: Failed to create control fifo"); + ret = -1; + goto out; + } + + ret = start_perf_record(control_fd, ack_fd, cpumap_buf); + if (ret) + goto out; + tpebs_pid = tpebs_cmd->pid; + if (pthread_create(&tpebs_reader_thread, NULL, __sample_reader, tpebs_cmd)) { + kill(tpebs_cmd->pid, SIGTERM); + close(tpebs_cmd->out); + pr_err("Could not create thread to process sample data.\n"); + ret = -1; + goto out; + } + /* Wait for perf record initialization.*/ + len = strlen(EVLIST_CTL_CMD_ENABLE_TAG); + ret = write(control_fd[1], EVLIST_CTL_CMD_ENABLE_TAG, len); + if (ret != len) { + pr_err("perf record control write control message failed\n"); + goto out; + } + + /* wait for an ack */ + pollfd.fd = ack_fd[0]; + + /* + * We need this poll to ensure the ack_fd PIPE will not hang + * when perf record failed for any reason. The timeout value + * 3000ms is an empirical selection. + */ + if (!poll(&pollfd, 1, 3000)) { + pr_err("tpebs failed: perf record ack timeout\n"); + ret = -1; + goto out; + } + + if (!(pollfd.revents & POLLIN)) { + pr_err("tpebs failed: did not received an ack\n"); + ret = -1; + goto out; + } + + ret = read(ack_fd[0], ack_buf, sizeof(ack_buf)); + if (ret > 0) + ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG); + else { + pr_err("tpebs: perf record control ack failed\n"); + goto out; + } +out: + close(control_fd[0]); + close(control_fd[1]); + close(ack_fd[0]); + close(ack_fd[1]); + } +err: + if (ret) + tpebs_delete(); + return ret; +} + + +int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread) +{ + __u64 val; + bool found = false; + struct tpebs_retire_lat *t; + struct perf_counts_values *count; + + /* Non reitre_latency evsel should never enter this function. */ + if (!evsel__is_retire_lat(evsel)) + return -1; + + /* + * Need to stop the forked record to ensure get sampled data from the + * PIPE to process and get non-zero retire_lat value for hybrid. + */ + tpebs_stop(); + count = perf_counts(evsel->counts, cpu_map_idx, thread); + + list_for_each_entry(t, &tpebs_results, nd) { + if (t->tpebs_name == evsel->name || + (evsel->metric_id && !strcmp(t->tpebs_name, evsel->metric_id))) { + found = true; + break; + } + } + + /* Set ena and run to non-zero */ + count->ena = count->run = 1; + count->lost = 0; + + if (!found) { + /* + * Set default value or 0 when retire_latency for this event is + * not found from sampling data (record_tpebs not set or 0 + * sample recorded). + */ + count->val = 0; + return 0; + } + + /* + * Only set retire_latency value to the first CPU and thread. + */ + if (cpu_map_idx == 0 && thread == 0) + val = rint(t->val); + else + val = 0; + + count->val = val; + return 0; +} + +static void tpebs_retire_lat__delete(struct tpebs_retire_lat *r) +{ + zfree(&r->name); + free(r); +} + + +/* + * tpebs_delete - delete tpebs related data and stop the created thread and + * process by calling tpebs_stop(). + * + * This function is called from evlist_delete() and also from builtin-stat + * stat_handle_error(). If tpebs_start() is called from places other then perf + * stat, need to ensure tpebs_delete() is also called to safely free mem and + * close the data read thread and the forked perf record process. + * + * This function is also called in evsel__close() to be symmetric with + * tpebs_start() being called in evsel__open(). We will update this call site + * when move tpebs_start() to evlist level. + */ +void tpebs_delete(void) +{ + struct tpebs_retire_lat *r, *rtmp; + + if (tpebs_pid == -1) + return; + + tpebs_stop(); + + list_for_each_entry_safe(r, rtmp, &tpebs_results, nd) { + list_del_init(&r->nd); + tpebs_retire_lat__delete(r); + } + + if (tpebs_cmd) { + free(tpebs_cmd); + tpebs_cmd = NULL; + } +} |