aboutsummaryrefslogtreecommitdiff
path: root/tools/perf/util/mmap.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-30 11:15:14 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-30 11:15:14 -0800
commitd8b91dde38f4c43bd0bbbf17a90f735b16aaff2c (patch)
treebd72dabf6e4b23e060fce429c87e60504f69de54 /tools/perf/util/mmap.c
parent5e7481a25e90b661d1dbbba18be3fd3dfe12ec6f (diff)
parente4c1091cb495d9cbec8956d642644a71a1689958 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Kernel side changes: - Clean up the x86 instruction decoder (Masami Hiramatsu) - Add new uprobes optimization for PUSH instructions on x86 (Yonghong Song) - Add MSR_IA32_THERM_STATUS to the MSR events (Stephane Eranian) - Fix misc bugs, update documentation, plus various cleanups (Jiri Olsa) There's a large number of tooling side improvements: - Intel-PT/BTS improvements (Adrian Hunter) - Numerous 'perf trace' improvements (Arnaldo Carvalho de Melo) - Introduce an errno code to string facility (Hendrik Brueckner) - Various build system improvements (Jiri Olsa) - Add support for CoreSight trace decoding by making the perf tools use the external openCSD (Mathieu Poirier, Tor Jeremiassen) - Add ARM Statistical Profiling Extensions (SPE) support (Kim Phillips) - libtraceevent updates (Steven Rostedt) - Intel vendor event JSON updates (Andi Kleen) - Introduce 'perf report --mmaps' and 'perf report --tasks' to show info present in 'perf.data' (Jiri Olsa, Arnaldo Carvalho de Melo) - Add infrastructure to record first and last sample time to the perf.data file header, so that when processing all samples in a 'perf record' session, such as when doing build-id processing, or when specifically requesting that that info be recorded, use that in 'perf report --time', that also got support for percent slices in addition to absolute ones. I.e. now it is possible to ask for the samples in the 10%-20% time slice of a perf.data file (Jin Yao) - Allow system wide 'perf stat --per-thread', sorting the result (Jin Yao) E.g.: [root@jouet ~]# perf stat --per-thread --metrics IPC ^C Performance counter stats for 'system wide': make-22229 23,012,094,032 inst_retired.any # 0.8 IPC cc1-22419 692,027,497 inst_retired.any # 0.8 IPC gcc-22418 328,231,855 inst_retired.any # 0.9 IPC cc1-22509 220,853,647 inst_retired.any # 0.8 IPC gcc-22486 199,874,810 inst_retired.any # 1.0 IPC as-22466 177,896,365 inst_retired.any # 0.9 IPC cc1-22465 150,732,374 inst_retired.any # 0.8 IPC gcc-22508 112,555,593 inst_retired.any # 0.9 IPC cc1-22487 108,964,079 inst_retired.any # 0.7 IPC qemu-system-x86-2697 21,330,550 inst_retired.any # 0.3 IPC systemd-journal-551 20,642,951 inst_retired.any # 0.4 IPC docker-containe-17651 9,552,892 inst_retired.any # 0.5 IPC dockerd-current-9809 7,528,586 inst_retired.any # 0.5 IPC make-22153 12,504,194,380 inst_retired.any # 0.8 IPC python2-22429 12,081,290,954 inst_retired.any # 0.8 IPC <SNIP> python2-22429 15,026,328,103 cpu_clk_unhalted.thread cc1-22419 826,660,193 cpu_clk_unhalted.thread gcc-22418 365,321,295 cpu_clk_unhalted.thread cc1-22509 279,169,362 cpu_clk_unhalted.thread gcc-22486 210,156,950 cpu_clk_unhalted.thread <SNIP> 5.638075538 seconds time elapsed [root@jouet ~]# - Improve shell auto-completion of perf events (Jin Yao) - 'perf probe' improvements (Masami Hiramatsu) - Improve PMU infrastructure to support amp64's ThunderX2 implementation defined core events (Ganapatrao Kulkarni) - Various annotation related improvements and fixes (Thomas Richter) - Clarify usage of 'overwrite' and 'backward' in the evlist/mmap code, removing the 'overwrite' parameter from several functions as it was always used it as 'false' (Wang Nan) - Fix/improve 'perf record' reverse recording support (Wang Nan) - Improve command line options documentation (Sihyeon Jang) - Optimize sample parsing for ordering events, where we don't need to parse all the PERF_SAMPLE_ bits, just the ones leading to the timestamp needed to reorder events (Jiri Olsa) - Generalize the annotation code to support other source information besides objdump/DWARF obtained ones, starting with python scripts, that will is slated to be merged soon (Jiri Olsa) - ... and a lot more that I failed to list, see the shortlog and changelog for details" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (262 commits) perf trace beauty flock: Move to separate object file perf evlist: Remove fcntl.h from evlist.h perf trace beauty futex: Beautify FUTEX_BITSET_MATCH_ANY perf trace: Do not print from time delta for interrupted syscall lines perf trace: Add --print-sample perf bpf: Remove misplaced __maybe_unused attribute MAINTAINERS: Adding entry for CoreSight trace decoding perf tools: Add mechanic to synthesise CoreSight trace packets perf tools: Add full support for CoreSight trace decoding pert tools: Add queue management functionality perf tools: Add functionality to communicate with the openCSD decoder perf tools: Add support for decoding CoreSight trace data perf tools: Add decoder mechanic to support dumping trace data perf tools: Add processing of coresight metadata perf tools: Add initial entry point for decoder CoreSight traces perf tools: Integrating the CoreSight decoding library perf vendor events intel: Update IvyTown files to V20 perf vendor events intel: Update IvyBridge files to V20 perf vendor events intel: Update BroadwellDE events to V7 perf vendor events intel: Update SkylakeX events to V1.06 ...
Diffstat (limited to 'tools/perf/util/mmap.c')
-rw-r--r--tools/perf/util/mmap.c73
1 files changed, 25 insertions, 48 deletions
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 9fe5f9c7d577..05076e683938 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -21,33 +21,13 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
}
/* When check_messup is true, 'end' must points to a good entry */
-static union perf_event *perf_mmap__read(struct perf_mmap *map, bool check_messup,
+static union perf_event *perf_mmap__read(struct perf_mmap *map,
u64 start, u64 end, u64 *prev)
{
unsigned char *data = map->base + page_size;
union perf_event *event = NULL;
int diff = end - start;
- if (check_messup) {
- /*
- * If we're further behind than half the buffer, there's a chance
- * the writer will bite our tail and mess up the samples under us.
- *
- * If we somehow ended up ahead of the 'end', we got messed up.
- *
- * In either case, truncate and restart at 'end'.
- */
- if (diff > map->mask / 2 || diff < 0) {
- fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
-
- /*
- * 'end' points to a known good entry, start there.
- */
- start = end;
- diff = 0;
- }
- }
-
if (diff >= (int)sizeof(event->header)) {
size_t size;
@@ -89,7 +69,7 @@ broken_event:
return event;
}
-union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup)
+union perf_event *perf_mmap__read_forward(struct perf_mmap *map)
{
u64 head;
u64 old = map->prev;
@@ -102,7 +82,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_mess
head = perf_mmap__read_head(map);
- return perf_mmap__read(map, check_messup, old, head, &map->prev);
+ return perf_mmap__read(map, old, head, &map->prev);
}
union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
@@ -138,7 +118,7 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
else
end = head + map->mask + 1;
- return perf_mmap__read(map, false, start, end, &map->prev);
+ return perf_mmap__read(map, start, end, &map->prev);
}
void perf_mmap__read_catchup(struct perf_mmap *map)
@@ -254,18 +234,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)
return 0;
}
-static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
+static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
{
struct perf_event_header *pheader;
u64 evt_head = head;
int size = mask + 1;
- pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
+ pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
pheader = (struct perf_event_header *)(buf + (head & mask));
*start = head;
while (true) {
if (evt_head - head >= (unsigned int)size) {
- pr_debug("Finished reading backward ring buffer: rewind\n");
+ pr_debug("Finished reading overwrite ring buffer: rewind\n");
if (evt_head - head > (unsigned int)size)
evt_head -= pheader->size;
*end = evt_head;
@@ -275,7 +255,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64
pheader = (struct perf_event_header *)(buf + (evt_head & mask));
if (pheader->size == 0) {
- pr_debug("Finished reading backward ring buffer: get start\n");
+ pr_debug("Finished reading overwrite ring buffer: get start\n");
*end = evt_head;
return 0;
}
@@ -287,19 +267,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64
return -1;
}
-static int rb_find_range(void *data, int mask, u64 head, u64 old,
- u64 *start, u64 *end, bool backward)
-{
- if (!backward) {
- *start = old;
- *end = head;
- return 0;
- }
-
- return backward_rb_find_range(data, mask, head, start, end);
-}
-
-int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
+int perf_mmap__push(struct perf_mmap *md, bool overwrite,
void *to, int push(void *to, void *buf, size_t size))
{
u64 head = perf_mmap__read_head(md);
@@ -310,19 +278,28 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
void *buf;
int rc = 0;
- if (rb_find_range(data, md->mask, head, old, &start, &end, backward))
- return -1;
+ start = overwrite ? head : old;
+ end = overwrite ? old : head;
if (start == end)
return 0;
size = end - start;
if (size > (unsigned long)(md->mask) + 1) {
- WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
+ if (!overwrite) {
+ WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
- md->prev = head;
- perf_mmap__consume(md, overwrite || backward);
- return 0;
+ md->prev = head;
+ perf_mmap__consume(md, overwrite);
+ return 0;
+ }
+
+ /*
+ * Backward ring buffer is full. We still have a chance to read
+ * most of data from it.
+ */
+ if (overwrite_rb_find_range(data, md->mask, head, &start, &end))
+ return -1;
}
if ((start & md->mask) + size != (end & md->mask)) {
@@ -346,7 +323,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,
}
md->prev = head;
- perf_mmap__consume(md, overwrite || backward);
+ perf_mmap__consume(md, overwrite);
out:
return rc;
}