aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tools/lib/api/Makefile7
-rw-r--r--tools/lib/api/fd/array.c127
-rw-r--r--tools/lib/api/fd/array.h46
-rw-r--r--tools/perf/Makefile.perf3
-rw-r--r--tools/perf/builtin-kvm.c24
-rw-r--r--tools/perf/builtin-record.c9
-rw-r--r--tools/perf/builtin-top.c4
-rw-r--r--tools/perf/builtin-trace.c7
-rw-r--r--tools/perf/tests/builtin-test.c8
-rw-r--r--tools/perf/tests/fdarray.c174
-rw-r--r--tools/perf/tests/open-syscall-tp-fields.c2
-rw-r--r--tools/perf/tests/perf-record.c2
-rw-r--r--tools/perf/tests/task-exit.c2
-rw-r--r--tools/perf/tests/tests.h2
-rw-r--r--tools/perf/util/evlist.c105
-rw-r--r--tools/perf/util/evlist.h16
-rw-r--r--tools/perf/util/python.c6
17 files changed, 501 insertions, 43 deletions
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index ce00f7ee6455..36c08b1f4afb 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -10,9 +10,14 @@ LIB_OBJS=
LIB_H += fs/debugfs.h
LIB_H += fs/fs.h
+# See comment below about piggybacking...
+LIB_H += fd/array.h
LIB_OBJS += $(OUTPUT)fs/debugfs.o
LIB_OBJS += $(OUTPUT)fs/fs.o
+# XXX piggybacking here, need to introduce libapikfd, or rename this
+# to plain libapik.a and make it have it all api goodies
+LIB_OBJS += $(OUTPUT)fd/array.o
LIBFILE = libapikfs.a
@@ -29,7 +34,7 @@ $(LIBFILE): $(LIB_OBJS)
$(LIB_OBJS): $(LIB_H)
libapi_dirs:
- $(QUIET_MKDIR)mkdir -p $(OUTPUT)fs/
+ $(QUIET_MKDIR)mkdir -p $(OUTPUT)fd $(OUTPUT)fs
$(OUTPUT)%.o: %.c libapi_dirs
$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $<
diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c
new file mode 100644
index 000000000000..0e636c4339b8
--- /dev/null
+++ b/tools/lib/api/fd/array.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2014, Red Hat Inc, Arnaldo Carvalho de Melo <[email protected]>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include "array.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+void fdarray__init(struct fdarray *fda, int nr_autogrow)
+{
+ fda->entries = NULL;
+ fda->priv = NULL;
+ fda->nr = fda->nr_alloc = 0;
+ fda->nr_autogrow = nr_autogrow;
+}
+
+int fdarray__grow(struct fdarray *fda, int nr)
+{
+ void *priv;
+ int nr_alloc = fda->nr_alloc + nr;
+ size_t psize = sizeof(fda->priv[0]) * nr_alloc;
+ size_t size = sizeof(struct pollfd) * nr_alloc;
+ struct pollfd *entries = realloc(fda->entries, size);
+
+ if (entries == NULL)
+ return -ENOMEM;
+
+ priv = realloc(fda->priv, psize);
+ if (priv == NULL) {
+ free(entries);
+ return -ENOMEM;
+ }
+
+ fda->nr_alloc = nr_alloc;
+ fda->entries = entries;
+ fda->priv = priv;
+ return 0;
+}
+
+struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow)
+{
+ struct fdarray *fda = calloc(1, sizeof(*fda));
+
+ if (fda != NULL) {
+ if (fdarray__grow(fda, nr_alloc)) {
+ free(fda);
+ fda = NULL;
+ } else {
+ fda->nr_autogrow = nr_autogrow;
+ }
+ }
+
+ return fda;
+}
+
+void fdarray__exit(struct fdarray *fda)
+{
+ free(fda->entries);
+ free(fda->priv);
+ fdarray__init(fda, 0);
+}
+
+void fdarray__delete(struct fdarray *fda)
+{
+ fdarray__exit(fda);
+ free(fda);
+}
+
+int fdarray__add(struct fdarray *fda, int fd, short revents)
+{
+ int pos = fda->nr;
+
+ if (fda->nr == fda->nr_alloc &&
+ fdarray__grow(fda, fda->nr_autogrow) < 0)
+ return -ENOMEM;
+
+ fda->entries[fda->nr].fd = fd;
+ fda->entries[fda->nr].events = revents;
+ fda->nr++;
+ return pos;
+}
+
+int fdarray__filter(struct fdarray *fda, short revents,
+ void (*entry_destructor)(struct fdarray *fda, int fd))
+{
+ int fd, nr = 0;
+
+ if (fda->nr == 0)
+ return 0;
+
+ for (fd = 0; fd < fda->nr; ++fd) {
+ if (fda->entries[fd].revents & revents) {
+ if (entry_destructor)
+ entry_destructor(fda, fd);
+
+ continue;
+ }
+
+ if (fd != nr) {
+ fda->entries[nr] = fda->entries[fd];
+ fda->priv[nr] = fda->priv[fd];
+ }
+
+ ++nr;
+ }
+
+ return fda->nr = nr;
+}
+
+int fdarray__poll(struct fdarray *fda, int timeout)
+{
+ return poll(fda->entries, fda->nr, timeout);
+}
+
+int fdarray__fprintf(struct fdarray *fda, FILE *fp)
+{
+ int fd, printed = fprintf(fp, "%d [ ", fda->nr);
+
+ for (fd = 0; fd < fda->nr; ++fd)
+ printed += fprintf(fp, "%s%d", fd ? ", " : "", fda->entries[fd].fd);
+
+ return printed + fprintf(fp, " ]");
+}
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
new file mode 100644
index 000000000000..45db01818f45
--- /dev/null
+++ b/tools/lib/api/fd/array.h
@@ -0,0 +1,46 @@
+#ifndef __API_FD_ARRAY__
+#define __API_FD_ARRAY__
+
+#include <stdio.h>
+
+struct pollfd;
+
+/**
+ * struct fdarray: Array of file descriptors
+ *
+ * @priv: Per array entry priv area, users should access just its contents,
+ * not set it to anything, as it is kept in synch with @entries, being
+ * realloc'ed, * for instance, in fdarray__{grow,filter}.
+ *
+ * I.e. using 'fda->priv[N].idx = * value' where N < fda->nr is ok,
+ * but doing 'fda->priv = malloc(M)' is not allowed.
+ */
+struct fdarray {
+ int nr;
+ int nr_alloc;
+ int nr_autogrow;
+ struct pollfd *entries;
+ union {
+ int idx;
+ } *priv;
+};
+
+void fdarray__init(struct fdarray *fda, int nr_autogrow);
+void fdarray__exit(struct fdarray *fda);
+
+struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow);
+void fdarray__delete(struct fdarray *fda);
+
+int fdarray__add(struct fdarray *fda, int fd, short revents);
+int fdarray__poll(struct fdarray *fda, int timeout);
+int fdarray__filter(struct fdarray *fda, short revents,
+ void (*entry_destructor)(struct fdarray *fda, int fd));
+int fdarray__grow(struct fdarray *fda, int extra);
+int fdarray__fprintf(struct fdarray *fda, FILE *fp);
+
+static inline int fdarray__available_entries(struct fdarray *fda)
+{
+ return fda->nr_alloc - fda->nr;
+}
+
+#endif /* __API_FD_ARRAY__ */
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 171f4e65601b..262916f4a377 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -402,6 +402,7 @@ LIB_OBJS += $(OUTPUT)tests/perf-record.o
LIB_OBJS += $(OUTPUT)tests/rdpmc.o
LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
+LIB_OBJS += $(OUTPUT)tests/fdarray.o
LIB_OBJS += $(OUTPUT)tests/pmu.o
LIB_OBJS += $(OUTPUT)tests/hists_common.o
LIB_OBJS += $(OUTPUT)tests/hists_link.o
@@ -769,7 +770,7 @@ $(LIBTRACEEVENT)-clean:
install-traceevent-plugins: $(LIBTRACEEVENT)
$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins
-LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch])
+LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch] $(LIB_PATH)fd/*.[ch])
# if subdir is set, we've been called from above so target has been built
# already
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index f5d3ae483110..1e639d6265cc 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -919,15 +919,8 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm)
signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler);
- /* copy pollfds -- need to add timerfd and stdin */
- nr_fds = kvm->evlist->nr_fds;
- pollfds = zalloc(sizeof(struct pollfd) * (nr_fds + 2));
- if (!pollfds) {
- err = -ENOMEM;
- goto out;
- }
- memcpy(pollfds, kvm->evlist->pollfd,
- sizeof(struct pollfd) * kvm->evlist->nr_fds);
+ /* use pollfds -- need to add timerfd and stdin */
+ nr_fds = kvm->evlist->pollfd.nr;
/* add timer fd */
if (perf_kvm__timerfd_create(kvm) < 0) {
@@ -935,17 +928,21 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm)
goto out;
}
- pollfds[nr_fds].fd = kvm->timerfd;
- pollfds[nr_fds].events = POLLIN;
+ if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd))
+ goto out;
+
nr_fds++;
- pollfds[nr_fds].fd = fileno(stdin);
- pollfds[nr_fds].events = POLLIN;
+ if (perf_evlist__add_pollfd(kvm->evlist, fileno(stdin)))
+ goto out;
+
nr_stdin = nr_fds;
nr_fds++;
if (fd_set_nonblock(fileno(stdin)) != 0)
goto out;
+ pollfds = kvm->evlist->pollfd.entries;
+
/* everything is good - enable the events and process */
perf_evlist__enable(kvm->evlist);
@@ -979,7 +976,6 @@ out:
close(kvm->timerfd);
tcsetattr(0, TCSAFLUSH, &save);
- free(pollfds);
return err;
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a1b040394170..320b198b54dd 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -308,7 +308,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
struct record_opts *opts = &rec->opts;
struct perf_data_file *file = &rec->file;
struct perf_session *session;
- bool disabled = false;
+ bool disabled = false, draining = false;
rec->progname = argv[0];
@@ -457,9 +457,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
}
if (hits == rec->samples) {
- if (done)
+ if (done || draining)
break;
- err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
+ err = perf_evlist__poll(rec->evlist, -1);
/*
* Propagate error, only if there's any. Ignore positive
* number of returned events and interrupt error.
@@ -467,6 +467,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (err > 0 || (err < 0 && errno == EINTR))
err = 0;
waking++;
+
+ if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
+ draining = true;
}
/*
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index e13864be2acb..832fb527ed90 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -964,7 +964,7 @@ static int __cmd_top(struct perf_top *top)
perf_evlist__enable(top->evlist);
/* Wait for a minimal set of events before starting the snapshot */
- poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
+ perf_evlist__poll(top->evlist, 100);
perf_top__mmap_read(top);
@@ -991,7 +991,7 @@ static int __cmd_top(struct perf_top *top)
perf_top__mmap_read(top);
if (hits == top->samples)
- ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
+ ret = perf_evlist__poll(top->evlist, 100);
}
ret = 0;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index a9e96ff49c7f..fe39dc620179 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2044,6 +2044,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
int err = -1, i;
unsigned long before;
const bool forks = argc > 0;
+ bool draining = false;
char sbuf[STRERR_BUFSIZE];
trace->live = true;
@@ -2171,8 +2172,12 @@ next_event:
if (trace->nr_events == before) {
int timeout = done ? 100 : -1;
- if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
+ if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
+ if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
+ draining = true;
+
goto again;
+ }
} else {
goto again;
}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 6a4145e5ad2c..ac655b0700e7 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -158,6 +158,14 @@ static struct test {
.func = test__switch_tracking,
},
{
+ .desc = "Filter fds with revents mask in a fdarray",
+ .func = test__fdarray__filter,
+ },
+ {
+ .desc = "Add fd to a fdarray, making it autogrow",
+ .func = test__fdarray__add,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
new file mode 100644
index 000000000000..d24b837951d4
--- /dev/null
+++ b/tools/perf/tests/fdarray.c
@@ -0,0 +1,174 @@
+#include <api/fd/array.h>
+#include "util/debug.h"
+#include "tests/tests.h"
+
+static void fdarray__init_revents(struct fdarray *fda, short revents)
+{
+ int fd;
+
+ fda->nr = fda->nr_alloc;
+
+ for (fd = 0; fd < fda->nr; ++fd) {
+ fda->entries[fd].fd = fda->nr - fd;
+ fda->entries[fd].revents = revents;
+ }
+}
+
+static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE *fp)
+{
+ int printed = 0;
+
+ if (!verbose)
+ return 0;
+
+ printed += fprintf(fp, "\n%s: ", prefix);
+ return printed + fdarray__fprintf(fda, fp);
+}
+
+int test__fdarray__filter(void)
+{
+ int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
+ struct fdarray *fda = fdarray__new(5, 5);
+
+ if (fda == NULL) {
+ pr_debug("\nfdarray__new() failed!");
+ goto out;
+ }
+
+ fdarray__init_revents(fda, POLLIN);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+ if (nr_fds != fda->nr_alloc) {
+ pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything",
+ nr_fds, fda->nr_alloc);
+ goto out_delete;
+ }
+
+ fdarray__init_revents(fda, POLLHUP);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+ if (nr_fds != 0) {
+ pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds",
+ nr_fds, fda->nr_alloc);
+ goto out_delete;
+ }
+
+ fdarray__init_revents(fda, POLLHUP);
+ fda->entries[2].revents = POLLIN;
+ expected_fd[0] = fda->entries[2].fd;
+
+ pr_debug("\nfiltering all but fda->entries[2]:");
+ fdarray__fprintf_prefix(fda, "before", stderr);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+ fdarray__fprintf_prefix(fda, " after", stderr);
+ if (nr_fds != 1) {
+ pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds);
+ goto out_delete;
+ }
+
+ if (fda->entries[0].fd != expected_fd[0]) {
+ pr_debug("\nfda->entries[0].fd=%d != %d\n",
+ fda->entries[0].fd, expected_fd[0]);
+ goto out_delete;
+ }
+
+ fdarray__init_revents(fda, POLLHUP);
+ fda->entries[0].revents = POLLIN;
+ expected_fd[0] = fda->entries[0].fd;
+ fda->entries[3].revents = POLLIN;
+ expected_fd[1] = fda->entries[3].fd;
+
+ pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
+ fdarray__fprintf_prefix(fda, "before", stderr);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL);
+ fdarray__fprintf_prefix(fda, " after", stderr);
+ if (nr_fds != 2) {
+ pr_debug("\nfdarray__filter()=%d != 2, should have left just two events",
+ nr_fds);
+ goto out_delete;
+ }
+
+ for (fd = 0; fd < 2; ++fd) {
+ if (fda->entries[fd].fd != expected_fd[fd]) {
+ pr_debug("\nfda->entries[%d].fd=%d != %d\n", fd,
+ fda->entries[fd].fd, expected_fd[fd]);
+ goto out_delete;
+ }
+ }
+
+ pr_debug("\n");
+
+ err = 0;
+out_delete:
+ fdarray__delete(fda);
+out:
+ return err;
+}
+
+int test__fdarray__add(void)
+{
+ int err = TEST_FAIL;
+ struct fdarray *fda = fdarray__new(2, 2);
+
+ if (fda == NULL) {
+ pr_debug("\nfdarray__new() failed!");
+ goto out;
+ }
+
+#define FDA_CHECK(_idx, _fd, _revents) \
+ if (fda->entries[_idx].fd != _fd) { \
+ pr_debug("\n%d: fda->entries[%d](%d) != %d!", \
+ __LINE__, _idx, fda->entries[1].fd, _fd); \
+ goto out_delete; \
+ } \
+ if (fda->entries[_idx].events != (_revents)) { \
+ pr_debug("\n%d: fda->entries[%d].revents(%d) != %d!", \
+ __LINE__, _idx, fda->entries[_idx].fd, _revents); \
+ goto out_delete; \
+ }
+
+#define FDA_ADD(_idx, _fd, _revents, _nr) \
+ if (fdarray__add(fda, _fd, _revents) < 0) { \
+ pr_debug("\n%d: fdarray__add(fda, %d, %d) failed!", \
+ __LINE__,_fd, _revents); \
+ goto out_delete; \
+ } \
+ if (fda->nr != _nr) { \
+ pr_debug("\n%d: fdarray__add(fda, %d, %d)=%d != %d", \
+ __LINE__,_fd, _revents, fda->nr, _nr); \
+ goto out_delete; \
+ } \
+ FDA_CHECK(_idx, _fd, _revents)
+
+ FDA_ADD(0, 1, POLLIN, 1);
+ FDA_ADD(1, 2, POLLERR, 2);
+
+ fdarray__fprintf_prefix(fda, "before growing array", stderr);
+
+ FDA_ADD(2, 35, POLLHUP, 3);
+
+ if (fda->entries == NULL) {
+ pr_debug("\nfdarray__add(fda, 35, POLLHUP) should have allocated fda->pollfd!");
+ goto out_delete;
+ }
+
+ fdarray__fprintf_prefix(fda, "after 3rd add", stderr);
+
+ FDA_ADD(3, 88, POLLIN | POLLOUT, 4);
+
+ fdarray__fprintf_prefix(fda, "after 4th add", stderr);
+
+ FDA_CHECK(0, 1, POLLIN);
+ FDA_CHECK(1, 2, POLLERR);
+ FDA_CHECK(2, 35, POLLHUP);
+ FDA_CHECK(3, 88, POLLIN | POLLOUT);
+
+#undef FDA_ADD
+#undef FDA_CHECK
+
+ pr_debug("\n");
+
+ err = 0;
+out_delete:
+ fdarray__delete(fda);
+out:
+ return err;
+}
diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/open-syscall-tp-fields.c
index 922bdb627950..127dcae0b760 100644
--- a/tools/perf/tests/open-syscall-tp-fields.c
+++ b/tools/perf/tests/open-syscall-tp-fields.c
@@ -105,7 +105,7 @@ int test__syscall_open_tp_fields(void)
}
if (nr_events == before)
- poll(evlist->pollfd, evlist->nr_fds, 10);
+ perf_evlist__poll(evlist, 10);
if (++nr_polls > 5) {
pr_debug("%s: no events!\n", __func__);
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 2ce753c1db63..7a228a2a070b 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -268,7 +268,7 @@ int test__PERF_RECORD(void)
* perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does.
*/
if (total_events == before && false)
- poll(evlist->pollfd, evlist->nr_fds, -1);
+ perf_evlist__poll(evlist, -1);
sleep(1);
if (++wakeups > 5) {
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 87522f01c7ad..3a8fedef83bc 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -105,7 +105,7 @@ retry:
}
if (!exited || !nr_exit) {
- poll(evlist->pollfd, evlist->nr_fds, -1);
+ perf_evlist__poll(evlist, -1);
goto retry;
}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index be8be10e3957..00e776a87a9c 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -49,6 +49,8 @@ int test__thread_mg_share(void);
int test__hists_output(void);
int test__hists_cumulate(void);
int test__switch_tracking(void);
+int test__fdarray__filter(void);
+int test__fdarray__add(void);
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index a3e28b49128a..3cebc9a8d52e 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -25,6 +25,9 @@
#include <linux/bitops.h>
#include <linux/hash.h>
+static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
+static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
+
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
@@ -37,6 +40,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
INIT_HLIST_HEAD(&evlist->heads[i]);
INIT_LIST_HEAD(&evlist->entries);
perf_evlist__set_maps(evlist, cpus, threads);
+ fdarray__init(&evlist->pollfd, 64);
evlist->workload.pid = -1;
}
@@ -102,7 +106,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
void perf_evlist__exit(struct perf_evlist *evlist)
{
zfree(&evlist->mmap);
- zfree(&evlist->pollfd);
+ fdarray__exit(&evlist->pollfd);
}
void perf_evlist__delete(struct perf_evlist *evlist)
@@ -402,7 +406,7 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
return perf_evlist__enable_event_thread(evlist, evsel, idx);
}
-static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
{
int nr_cpus = cpu_map__nr(evlist->cpus);
int nr_threads = thread_map__nr(evlist->threads);
@@ -416,16 +420,50 @@ static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
nfds += nr_cpus * nr_threads;
}
- evlist->pollfd = malloc(sizeof(struct pollfd) * nfds);
- return evlist->pollfd != NULL ? 0 : -ENOMEM;
+ if (fdarray__available_entries(&evlist->pollfd) < nfds &&
+ fdarray__grow(&evlist->pollfd, nfds) < 0)
+ return -ENOMEM;
+
+ return 0;
}
-void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
+static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx)
{
- fcntl(fd, F_SETFL, O_NONBLOCK);
- evlist->pollfd[evlist->nr_fds].fd = fd;
- evlist->pollfd[evlist->nr_fds].events = POLLIN;
- evlist->nr_fds++;
+ int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP);
+ /*
+ * Save the idx so that when we filter out fds POLLHUP'ed we can
+ * close the associated evlist->mmap[] entry.
+ */
+ if (pos >= 0) {
+ evlist->pollfd.priv[pos].idx = idx;
+
+ fcntl(fd, F_SETFL, O_NONBLOCK);
+ }
+
+ return pos;
+}
+
+int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
+{
+ return __perf_evlist__add_pollfd(evlist, fd, -1);
+}
+
+static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
+{
+ struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
+
+ perf_evlist__mmap_put(evlist, fda->priv[fd].idx);
+}
+
+int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
+{
+ return fdarray__filter(&evlist->pollfd, revents_and_mask,
+ perf_evlist__munmap_filtered);
+}
+
+int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
+{
+ return fdarray__poll(&evlist->pollfd, timeout);
}
static void perf_evlist__id_hash(struct perf_evlist *evlist,
@@ -638,14 +676,36 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
return event;
}
+static bool perf_mmap__empty(struct perf_mmap *md)
+{
+ return perf_mmap__read_head(md) != md->prev;
+}
+
+static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
+{
+ ++evlist->mmap[idx].refcnt;
+}
+
+static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
+{
+ BUG_ON(evlist->mmap[idx].refcnt == 0);
+
+ if (--evlist->mmap[idx].refcnt == 0)
+ __perf_evlist__munmap(evlist, idx);
+}
+
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
{
+ struct perf_mmap *md = &evlist->mmap[idx];
+
if (!evlist->overwrite) {
- struct perf_mmap *md = &evlist->mmap[idx];
unsigned int old = md->prev;
perf_mmap__write_tail(md, old);
}
+
+ if (md->refcnt == 1 && perf_mmap__empty(md))
+ perf_evlist__mmap_put(evlist, idx);
}
static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
@@ -653,6 +713,7 @@ static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
if (evlist->mmap[idx].base != NULL) {
munmap(evlist->mmap[idx].base, evlist->mmap_len);
evlist->mmap[idx].base = NULL;
+ evlist->mmap[idx].refcnt = 0;
}
}
@@ -686,6 +747,20 @@ struct mmap_params {
static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
struct mmap_params *mp, int fd)
{
+ /*
+ * The last one will be done at perf_evlist__mmap_consume(), so that we
+ * make sure we don't prevent tools from consuming every last event in
+ * the ring buffer.
+ *
+ * I.e. we can get the POLLHUP meaning that the fd doesn't exist
+ * anymore, but the last events for it are still in the ring buffer,
+ * waiting to be consumed.
+ *
+ * Tools can chose to ignore this at their own discretion, but the
+ * evlist layer can't just drop it when filtering events in
+ * perf_evlist__filter_pollfd().
+ */
+ evlist->mmap[idx].refcnt = 2;
evlist->mmap[idx].prev = 0;
evlist->mmap[idx].mask = mp->mask;
evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
@@ -697,7 +772,6 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
return -1;
}
- perf_evlist__add_pollfd(evlist, fd);
return 0;
}
@@ -722,6 +796,13 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
} else {
if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
return -1;
+
+ perf_evlist__mmap_get(evlist, idx);
+ }
+
+ if (__perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
+ perf_evlist__mmap_put(evlist, idx);
+ return -1;
}
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
@@ -881,7 +962,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
return -ENOMEM;
- if (evlist->pollfd == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
+ if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM;
evlist->overwrite = overwrite;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 106de53a6a74..bd312b01e876 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -2,6 +2,7 @@
#define __PERF_EVLIST_H 1
#include <linux/list.h>
+#include <api/fd/array.h>
#include <stdio.h>
#include "../perf.h"
#include "event.h"
@@ -17,9 +18,15 @@ struct record_opts;
#define PERF_EVLIST__HLIST_BITS 8
#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
+/**
+ * struct perf_mmap - perf's ring buffer mmap details
+ *
+ * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
+ */
struct perf_mmap {
void *base;
int mask;
+ int refcnt;
unsigned int prev;
char event_copy[PERF_SAMPLE_MAX_SIZE];
};
@@ -29,7 +36,6 @@ struct perf_evlist {
struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
int nr_entries;
int nr_groups;
- int nr_fds;
int nr_mmaps;
size_t mmap_len;
int id_pos;
@@ -40,8 +46,8 @@ struct perf_evlist {
pid_t pid;
} workload;
bool overwrite;
+ struct fdarray pollfd;
struct perf_mmap *mmap;
- struct pollfd *pollfd;
struct thread_map *threads;
struct cpu_map *cpus;
struct perf_evsel *selected;
@@ -82,7 +88,11 @@ perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
int cpu, int thread, u64 id);
-void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
+int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
+int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask);
+
+int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 12aa9b0d0ba1..3dda85ca50c1 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -736,7 +736,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
return NULL;
- n = poll(evlist->pollfd, evlist->nr_fds, timeout);
+ n = perf_evlist__poll(evlist, timeout);
if (n < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
@@ -753,9 +753,9 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
PyObject *list = PyList_New(0);
int i;
- for (i = 0; i < evlist->nr_fds; ++i) {
+ for (i = 0; i < evlist->pollfd.nr; ++i) {
PyObject *file;
- FILE *fp = fdopen(evlist->pollfd[i].fd, "r");
+ FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r");
if (fp == NULL)
goto free_list;