diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-19 14:25:23 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-19 14:25:23 -0800 |
commit | 9d64bf433c53cab2f48a3fff7a1f2a696bc5229a (patch) | |
tree | 0535254c177c27ec34adbc153d494cea9a9625a0 /tools/perf/util | |
parent | 57f22c8dab6b266ae36b89b073a4a33dea71e762 (diff) | |
parent | d988c9f511af71a3445b6a4f3a2c67208ff8e480 (diff) |
Merge tag 'perf-tools-for-v6.8-1-2024-01-09' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Arnaldo Carvalho de Melo:
"Add Namhyung Kim as tools/perf/ co-maintainer, we're taking turns
processing patches, switching roles from perf-tools to perf-tools-next
at each Linux release.
Data profiling:
- Associate samples that identify loads and stores with data
structures. This uses events available on Intel, AMD and others and
DWARF info:
# To get memory access samples in kernel for 1 second (on Intel)
$ perf mem record -a -K --ldlat=4 -- sleep 1
# Similar for the AMD (but it requires 6.3+ kernel for BPF filters)
$ perf mem record -a --filter 'mem_op == load || mem_op == store, ip > 0x8000000000000000' -- sleep 1
Then, amongst several modes of post processing, one can do things like:
$ perf report -s type,typeoff --hierarchy --group --stdio
...
#
# Samples: 10K of events 'cpu/mem-loads,ldlat=4/P, cpu/mem-stores/P, dummy:u'
# Event count (approx.): 602758064
#
# Overhead Data Type / Data Type Offset
# ........................... ............................
#
26.09% 3.28% 0.00% long unsigned int
26.09% 3.28% 0.00% long unsigned int +0 (no field)
18.48% 0.73% 0.00% struct page
10.83% 0.02% 0.00% struct page +8 (lru.next)
3.90% 0.28% 0.00% struct page +0 (flags)
3.45% 0.06% 0.00% struct page +24 (mapping)
0.25% 0.28% 0.00% struct page +48 (_mapcount.counter)
0.02% 0.06% 0.00% struct page +32 (index)
0.02% 0.00% 0.00% struct page +52 (_refcount.counter)
0.02% 0.01% 0.00% struct page +56 (memcg_data)
0.00% 0.01% 0.00% struct page +16 (lru.prev)
15.37% 17.54% 0.00% (stack operation)
15.37% 17.54% 0.00% (stack operation) +0 (no field)
11.71% 50.27% 0.00% (unknown)
11.71% 50.27% 0.00% (unknown) +0 (no field)
$ perf annotate --data-type
...
Annotate type: 'struct cfs_rq' in [kernel.kallsyms] (13 samples):
============================================================================
samples offset size field
13 0 640 struct cfs_rq {
2 0 16 struct load_weight load {
2 0 8 unsigned long weight;
0 8 4 u32 inv_weight;
};
0 16 8 unsigned long runnable_weight;
0 24 4 unsigned int nr_running;
1 28 4 unsigned int h_nr_running;
...
$ perf annotate --data-type=page --group
Annotate type: 'struct page' in [kernel.kallsyms] (480 samples):
event[0] = cpu/mem-loads,ldlat=4/P
event[1] = cpu/mem-stores/P
event[2] = dummy:u
===================================================================================
samples offset size field
447 33 0 0 64 struct page {
108 8 0 0 8 long unsigned int flags;
319 13 0 8 40 union {
319 13 0 8 40 struct {
236 2 0 8 16 union {
236 2 0 8 16 struct list_head lru {
236 1 0 8 8 struct list_head* next;
0 1 0 16 8 struct list_head* prev;
};
236 2 0 8 16 struct {
236 1 0 8 8 void* __filler;
0 1 0 16 4 unsigned int mlock_count;
};
236 2 0 8 16 struct list_head buddy_list {
236 1 0 8 8 struct list_head* next;
0 1 0 16 8 struct list_head* prev;
};
236 2 0 8 16 struct list_head pcp_list {
236 1 0 8 8 struct list_head* next;
0 1 0 16 8 struct list_head* prev;
};
};
82 4 0 24 8 struct address_space* mapping;
1 7 0 32 8 union {
1 7 0 32 8 long unsigned int index;
1 7 0 32 8 long unsigned int share;
};
0 0 0 40 8 long unsigned int private;
};
This uses the existing annotate code, calling objdump to do the
disassembly, with improvements to avoid having this take too long,
but longer term a switch to a disassembler library, possibly
reusing code in the kernel will be pursued.
This is the initial implementation, please use it and report
impressions and bugs. Make sure the kernel-debuginfo packages match
the running kernel. The 'perf report' phase for non short perf.data
files may take a while.
There is a great article about it on LWN:
https://lwn.net/Articles/955709/ - "Data-type profiling for perf"
One last test I did while writing this text, on a AMD Ryzen 5950X,
using a distro kernel, while doing a simple 'find /' on an
otherwise idle system resulted in:
# uname -r
6.6.9-100.fc38.x86_64
# perf -vv | grep BPF_
bpf: [ on ] # HAVE_LIBBPF_SUPPORT
bpf_skeletons: [ on ] # HAVE_BPF_SKEL
# rpm -qa | grep kernel-debuginfo
kernel-debuginfo-common-x86_64-6.6.9-100.fc38.x86_64
kernel-debuginfo-6.6.9-100.fc38.x86_64
#
# perf mem record -a --filter 'mem_op == load || mem_op == store, ip > 0x8000000000000000'
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 2.199 MB perf.data (2913 samples) ]
#
# ls -la perf.data
-rw-------. 1 root root 2346486 Jan 9 18:36 perf.data
# perf evlist
ibs_op//
dummy:u
# perf evlist -v
ibs_op//: type: 11, size: 136, config: 0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CPU|PERIOD|IDENTIFIER|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, sample_id_all: 1
dummy:u: type: 1 (PERF_TYPE_SOFTWARE), size: 136, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|ADDR|CPU|IDENTIFIER|DATA_SRC|WEIGHT, read_format: ID, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, mmap_data: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1
#
# perf report -s type,typeoff --hierarchy --group --stdio
# Total Lost Samples: 0
#
# Samples: 2K of events 'ibs_op//, dummy:u'
# Event count (approx.): 1904553038
#
# Overhead Data Type / Data Type Offset
# ................... ............................
#
73.70% 0.00% (unknown)
73.70% 0.00% (unknown) +0 (no field)
3.01% 0.00% long unsigned int
3.00% 0.00% long unsigned int +0 (no field)
0.01% 0.00% long unsigned int +2 (no field)
2.73% 0.00% struct task_struct
1.71% 0.00% struct task_struct +52 (on_cpu)
0.38% 0.00% struct task_struct +2104 (rcu_read_unlock_special.b.blocked)
0.23% 0.00% struct task_struct +2100 (rcu_read_lock_nesting)
0.14% 0.00% struct task_struct +2384 ()
0.06% 0.00% struct task_struct +3096 (signal)
0.05% 0.00% struct task_struct +3616 (cgroups)
0.05% 0.00% struct task_struct +2344 (active_mm)
0.02% 0.00% struct task_struct +46 (flags)
0.02% 0.00% struct task_struct +2096 (migration_disabled)
0.01% 0.00% struct task_struct +24 (__state)
0.01% 0.00% struct task_struct +3956 (mm_cid_active)
0.01% 0.00% struct task_struct +1048 (cpus_ptr)
0.01% 0.00% struct task_struct +184 (se.group_node.next)
0.01% 0.00% struct task_struct +20 (thread_info.cpu)
0.00% 0.00% struct task_struct +104 (on_rq)
0.00% 0.00% struct task_struct +2456 (pid)
1.36% 0.00% struct module
0.59% 0.00% struct module +952 (kallsyms)
0.42% 0.00% struct module +0 (state)
0.23% 0.00% struct module +8 (list.next)
0.12% 0.00% struct module +216 (syms)
0.95% 0.00% struct inode
0.41% 0.00% struct inode +40 (i_sb)
0.22% 0.00% struct inode +0 (i_mode)
0.06% 0.00% struct inode +76 (i_rdev)
0.06% 0.00% struct inode +56 (i_security)
<SNIP>
perf top/report:
- Don't ignore job control, allowing control+Z + bg to work.
- Add s390 raw data interpretation for PAI (Processor Activity
Instrumentation) counters.
perf archive:
- Add new option '--all' to pack perf.data with DSOs.
- Add new option '--unpack' to expand tarballs.
Initialization speedups:
- Lazily initialize zstd streams to save memory when not using it.
- Lazily allocate/size mmap event copy.
- Lazy load kernel symbols in 'perf record'.
- Be lazier in allocating lost samples buffer in 'perf record'.
- Don't synthesize BPF events when disabled via the command line
(perf record --no-bpf-event).
Assorted improvements:
- Show note on AMD systems that the :p, :pp, :ppp and :P are all the
same, as IBS (Instruction Based Sampling) is used and it is
inherentely precise, not having levels of precision like in Intel
systems.
- When 'cycles' isn't available, fall back to the "task-clock" event
when not system wide, not to 'cpu-clock'.
- Add --debug-file option to redirect debug output, e.g.:
$ perf --debug-file /tmp/perf.log record -v true
- Shrink 'struct map' to under one cacheline by avoiding function
pointers for selecting if addresses are identity or DSO relative,
and using just a byte for some boolean struct members.
- Resolve the arch specific strerrno just once to use in
perf_env__arch_strerrno().
- Reduce memory for recording PERF_RECORD_LOST_SAMPLES event.
Assorted fixes:
- Fix the default 'perf top' usage on Intel hybrid systems, now it
starts with a browser showing the number of samples for Efficiency
(cpu_atom/cycles/P) and Performance (cpu_core/cycles/P). This
behaviour is similar on ARM64, with its respective set of
big.LITTLE processors.
- Fix segfault on build_mem_topology() error path.
- Fix 'perf mem' error on hybrid related to availability of mem event
in a PMU.
- Fix missing reference count gets (map, maps) in the db-export code.
- Avoid recursively taking env->bpf_progs.lock in the 'perf_env'
code.
- Use the newly introduced maps__for_each_map() to add missing
locking around iteration of 'struct map' entries.
- Parse NOTE segments until the build id is found, don't stop on the
first one, ELF files may have several such NOTE segments.
- Remove 'egrep' usage, its deprecated, use 'grep -E' instead.
- Warn first about missing libelf, not libbpf, that depends on
libelf.
- Use alternative to 'find ... -printf' as this isn't supported in
busybox.
- Address python 3.6 DeprecationWarning for string scapes.
- Fix memory leak in uniq() in libsubcmd.
- Fix man page formatting for 'perf lock'
- Fix some spelling mistakes.
perf tests:
- Fail shell tests that needs some symbol in perf itself if it is
stripped. These tests check if a symbol is resolved, if some hot
function is indeed detected by profiling, etc.
- The 'perf test sigtrap' test is currently failing on PREEMPT_RT,
skip it if sleeping spinlocks are detected (using BTF) and point to
the mailing list discussion about it. This test is also being
skipped on several architectures (powerpc, s390x, arm and aarch64)
due to other pending issues with intruction breakpoints.
- Adjust test case perf record offcpu profiling tests for s390.
- Fix 'Setup struct perf_event_attr' fails on s390 on z/VM guest,
addressing issues caused by the fallback from cycles to task-clock
done in this release.
- Fix mask for VG register in the user-regs test.
- Use shellcheck on 'perf test' shell scripts automatically to make
sure changes don't introduce things it flags as problematic.
- Add option to change objdump binary and allow it to be set via
'perf config'.
- Add basic 'perf script', 'perf list --json" and 'perf diff' tests.
- Basic branch counter support.
- Make DSO tests a suite rather than individual.
- Remove atomics from test_loop to avoid test failures.
- Fix call chain match on powerpc for the record+probe_libc_inet_pton
test.
- Improve Intel hybrid tests.
Vendor event files (JSON):
powerpc:
- Update datasource event name to fix duplicate events on IBM's
Power10.
- Add PVN for HX-C2000 CPU with Power8 Architecture.
Intel:
- Alderlake/rocketlake metric fixes.
- Update emeraldrapids events to v1.02.
- Update icelakex events to v1.23.
- Update sapphirerapids events to v1.17.
- Add skx, clx, icx and spr upi bandwidth metric.
AMD:
- Add Zen 4 memory controller events.
RISC-V:
- Add StarFive Dubhe-80 and Dubhe-90 JSON files.
https://www.starfivetech.com/en/site/cpu-u
- Add T-HEAD C9xx JSON file.
https://github.com/riscv-software-src/opensbi/blob/master/docs/platform/thead-c9xx.md
ARM64:
- Remove UTF-8 characters from cmn.json, that were causing build
failure in some distros.
- Add core PMU events and metrics for Ampere One X.
- Rename Ampere One's BPU_FLUSH_MEM_FAULT to GPC_FLUSH_MEM_FAULT
libperf:
- Rename several perf_cpu_map constructor names to clarify what they
really do.
- Ditto for some other methods, coping with some issues in their
semantics, like perf_cpu_map__empty() ->
perf_cpu_map__has_any_cpu_or_is_empty().
- Document perf_cpu_map__nr()'s behavior
perf stat:
- Exit if parse groups fails.
- Combine the -A/--no-aggr and --no-merge options.
- Fix help message for --metric-no-threshold option.
Hardware tracing:
ARM64 CoreSight:
- Bump minimum OpenCSD version to ensure a bugfix is present.
- Add 'T' itrace option for timestamp trace
- Set start vm addr of exectable file to 0 and don't ignore first
sample on the arm-cs-trace-disasm.py 'perf script'"
* tag 'perf-tools-for-v6.8-1-2024-01-09' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (179 commits)
MAINTAINERS: Add Namhyung as tools/perf/ co-maintainer
perf test: test case 'Setup struct perf_event_attr' fails on s390 on z/vm
perf db-export: Fix missing reference count get in call_path_from_sample()
perf tests: Add perf script test
libsubcmd: Fix memory leak in uniq()
perf TUI: Don't ignore job control
perf vendor events intel: Update sapphirerapids events to v1.17
perf vendor events intel: Update icelakex events to v1.23
perf vendor events intel: Update emeraldrapids events to v1.02
perf vendor events intel: Alderlake/rocketlake metric fixes
perf x86 test: Add hybrid test for conflicting legacy/sysfs event
perf x86 test: Update hybrid expectations
perf vendor events amd: Add Zen 4 memory controller events
perf stat: Fix hard coded LL miss units
perf record: Reduce memory for recording PERF_RECORD_LOST_SAMPLES event
perf env: Avoid recursively taking env->bpf_progs.lock
perf annotate: Add --insn-stat option for debugging
perf annotate: Add --type-stat option for debugging
perf annotate: Support event group display
perf annotate: Add --data-type option
...
Diffstat (limited to 'tools/perf/util')
84 files changed, 3325 insertions, 1248 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 988473bf907a..8027f450fa3e 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -195,6 +195,8 @@ endif perf-$(CONFIG_DWARF) += probe-finder.o perf-$(CONFIG_DWARF) += dwarf-aux.o perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += debuginfo.o +perf-$(CONFIG_DWARF) += annotate-data.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c new file mode 100644 index 000000000000..f22b4f18271c --- /dev/null +++ b/tools/perf/util/annotate-data.c @@ -0,0 +1,405 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Convert sample address to data type using DWARF debug info. + * + * Written by Namhyung Kim <namhyung@kernel.org> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> + +#include "annotate-data.h" +#include "debuginfo.h" +#include "debug.h" +#include "dso.h" +#include "evsel.h" +#include "evlist.h" +#include "map.h" +#include "map_symbol.h" +#include "strbuf.h" +#include "symbol.h" +#include "symbol_conf.h" + +/* + * Compare type name and size to maintain them in a tree. + * I'm not sure if DWARF would have information of a single type in many + * different places (compilation units). If not, it could compare the + * offset of the type entry in the .debug_info section. + */ +static int data_type_cmp(const void *_key, const struct rb_node *node) +{ + const struct annotated_data_type *key = _key; + struct annotated_data_type *type; + + type = rb_entry(node, struct annotated_data_type, node); + + if (key->self.size != type->self.size) + return key->self.size - type->self.size; + return strcmp(key->self.type_name, type->self.type_name); +} + +static bool data_type_less(struct rb_node *node_a, const struct rb_node *node_b) +{ + struct annotated_data_type *a, *b; + + a = rb_entry(node_a, struct annotated_data_type, node); + b = rb_entry(node_b, struct annotated_data_type, node); + + if (a->self.size != b->self.size) + return a->self.size < b->self.size; + return strcmp(a->self.type_name, b->self.type_name) < 0; +} + +/* Recursively add new members for struct/union */ +static int __add_member_cb(Dwarf_Die *die, void *arg) +{ + struct annotated_member *parent = arg; + struct annotated_member *member; + Dwarf_Die member_type, die_mem; + Dwarf_Word size, loc; + Dwarf_Attribute attr; + struct strbuf sb; + int tag; + + if (dwarf_tag(die) != DW_TAG_member) + return DIE_FIND_CB_SIBLING; + + member = zalloc(sizeof(*member)); + if (member == NULL) + return DIE_FIND_CB_END; + + strbuf_init(&sb, 32); + die_get_typename(die, &sb); + + die_get_real_type(die, &member_type); + if (dwarf_aggregate_size(&member_type, &size) < 0) + size = 0; + + if (!dwarf_attr_integrate(die, DW_AT_data_member_location, &attr)) + loc = 0; + else + dwarf_formudata(&attr, &loc); + + member->type_name = strbuf_detach(&sb, NULL); + /* member->var_name can be NULL */ + if (dwarf_diename(die)) + member->var_name = strdup(dwarf_diename(die)); + member->size = size; + member->offset = loc + parent->offset; + INIT_LIST_HEAD(&member->children); + list_add_tail(&member->node, &parent->children); + + tag = dwarf_tag(&member_type); + switch (tag) { + case DW_TAG_structure_type: + case DW_TAG_union_type: + die_find_child(&member_type, __add_member_cb, member, &die_mem); + break; + default: + break; + } + return DIE_FIND_CB_SIBLING; +} + +static void add_member_types(struct annotated_data_type *parent, Dwarf_Die *type) +{ + Dwarf_Die die_mem; + + die_find_child(type, __add_member_cb, &parent->self, &die_mem); +} + +static void delete_members(struct annotated_member *member) +{ + struct annotated_member *child, *tmp; + + list_for_each_entry_safe(child, tmp, &member->children, node) { + list_del(&child->node); + delete_members(child); + free(child->type_name); + free(child->var_name); + free(child); + } +} + +static struct annotated_data_type *dso__findnew_data_type(struct dso *dso, + Dwarf_Die *type_die) +{ + struct annotated_data_type *result = NULL; + struct annotated_data_type key; + struct rb_node *node; + struct strbuf sb; + char *type_name; + Dwarf_Word size; + + strbuf_init(&sb, 32); + if (die_get_typename_from_type(type_die, &sb) < 0) + strbuf_add(&sb, "(unknown type)", 14); + type_name = strbuf_detach(&sb, NULL); + dwarf_aggregate_size(type_die, &size); + + /* Check existing nodes in dso->data_types tree */ + key.self.type_name = type_name; + key.self.size = size; + node = rb_find(&key, &dso->data_types, data_type_cmp); + if (node) { + result = rb_entry(node, struct annotated_data_type, node); + free(type_name); + return result; + } + + /* If not, add a new one */ + result = zalloc(sizeof(*result)); + if (result == NULL) { + free(type_name); + return NULL; + } + + result->self.type_name = type_name; + result->self.size = size; + INIT_LIST_HEAD(&result->self.children); + + if (symbol_conf.annotate_data_member) + add_member_types(result, type_die); + + rb_add(&result->node, &dso->data_types, data_type_less); + return result; +} + +static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die) +{ + Dwarf_Off off, next_off; + size_t header_size; + + if (dwarf_addrdie(di->dbg, pc, cu_die) != NULL) + return cu_die; + + /* + * There are some kernels don't have full aranges and contain only a few + * aranges entries. Fallback to iterate all CU entries in .debug_info + * in case it's missing. + */ + off = 0; + while (dwarf_nextcu(di->dbg, off, &next_off, &header_size, + NULL, NULL, NULL) == 0) { + if (dwarf_offdie(di->dbg, off + header_size, cu_die) && + dwarf_haspc(cu_die, pc)) + return true; + + off = next_off; + } + return false; +} + +/* The type info will be saved in @type_die */ +static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset) +{ + Dwarf_Word size; + + /* Get the type of the variable */ + if (die_get_real_type(var_die, type_die) == NULL) { + pr_debug("variable has no type\n"); + ann_data_stat.no_typeinfo++; + return -1; + } + + /* + * It expects a pointer type for a memory access. + * Convert to a real type it points to. + */ + if (dwarf_tag(type_die) != DW_TAG_pointer_type || + die_get_real_type(type_die, type_die) == NULL) { + pr_debug("no pointer or no type\n"); + ann_data_stat.no_typeinfo++; + return -1; + } + + /* Get the size of the actual type */ + if (dwarf_aggregate_size(type_die, &size) < 0) { + pr_debug("type size is unknown\n"); + ann_data_stat.invalid_size++; + return -1; + } + + /* Minimal sanity check */ + if ((unsigned)offset >= size) { + pr_debug("offset: %d is bigger than size: %" PRIu64 "\n", offset, size); + ann_data_stat.bad_offset++; + return -1; + } + + return 0; +} + +/* The result will be saved in @type_die */ +static int find_data_type_die(struct debuginfo *di, u64 pc, + int reg, int offset, Dwarf_Die *type_die) +{ + Dwarf_Die cu_die, var_die; + Dwarf_Die *scopes = NULL; + int ret = -1; + int i, nr_scopes; + + /* Get a compile_unit for this address */ + if (!find_cu_die(di, pc, &cu_die)) { + pr_debug("cannot find CU for address %" PRIx64 "\n", pc); + ann_data_stat.no_cuinfo++; + return -1; + } + + /* Get a list of nested scopes - i.e. (inlined) functions and blocks. */ + nr_scopes = die_get_scopes(&cu_die, pc, &scopes); + + /* Search from the inner-most scope to the outer */ + for (i = nr_scopes - 1; i >= 0; i--) { + /* Look up variables/parameters in this scope */ + if (!die_find_variable_by_reg(&scopes[i], pc, reg, &var_die)) + continue; + + /* Found a variable, see if it's correct */ + ret = check_variable(&var_die, type_die, offset); + goto out; + } + if (ret < 0) + ann_data_stat.no_var++; + +out: + free(scopes); + return ret; +} + +/** + * find_data_type - Return a data type at the location + * @ms: map and symbol at the location + * @ip: instruction address of the memory access + * @reg: register that holds the base address + * @offset: offset from the base address + * + * This functions searches the debug information of the binary to get the data + * type it accesses. The exact location is expressed by (ip, reg, offset). + * It return %NULL if not found. + */ +struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, + int reg, int offset) +{ + struct annotated_data_type *result = NULL; + struct dso *dso = map__dso(ms->map); + struct debuginfo *di; + Dwarf_Die type_die; + u64 pc; + + di = debuginfo__new(dso->long_name); + if (di == NULL) { + pr_debug("cannot get the debug info\n"); + return NULL; + } + + /* + * IP is a relative instruction address from the start of the map, as + * it can be randomized/relocated, it needs to translate to PC which is + * a file address for DWARF processing. + */ + pc = map__rip_2objdump(ms->map, ip); + if (find_data_type_die(di, pc, reg, offset, &type_die) < 0) + goto out; + + result = dso__findnew_data_type(dso, &type_die); + +out: + debuginfo__delete(di); + return result; +} + +static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries) +{ + int i; + size_t sz = sizeof(struct type_hist); + + sz += sizeof(struct type_hist_entry) * adt->self.size; + + /* Allocate a table of pointers for each event */ + adt->nr_histograms = nr_entries; + adt->histograms = calloc(nr_entries, sizeof(*adt->histograms)); + if (adt->histograms == NULL) + return -ENOMEM; + + /* + * Each histogram is allocated for the whole size of the type. + * TODO: Probably we can move the histogram to members. + */ + for (i = 0; i < nr_entries; i++) { + adt->histograms[i] = zalloc(sz); + if (adt->histograms[i] == NULL) + goto err; + } + return 0; + +err: + while (--i >= 0) + free(adt->histograms[i]); + free(adt->histograms); + return -ENOMEM; +} + +static void delete_data_type_histograms(struct annotated_data_type *adt) +{ + for (int i = 0; i < adt->nr_histograms; i++) + free(adt->histograms[i]); + free(adt->histograms); +} + +void annotated_data_type__tree_delete(struct rb_root *root) +{ + struct annotated_data_type *pos; + + while (!RB_EMPTY_ROOT(root)) { + struct rb_node *node = rb_first(root); + + rb_erase(node, root); + pos = rb_entry(node, struct annotated_data_type, node); + delete_members(&pos->self); + delete_data_type_histograms(pos); + free(pos->self.type_name); + free(pos); + } +} + +/** + * annotated_data_type__update_samples - Update histogram + * @adt: Data type to update + * @evsel: Event to update + * @offset: Offset in the type + * @nr_samples: Number of samples at this offset + * @period: Event count at this offset + * + * This function updates type histogram at @ofs for @evsel. Samples are + * aggregated before calling this function so it can be called with more + * than one samples at a certain offset. + */ +int annotated_data_type__update_samples(struct annotated_data_type *adt, + struct evsel *evsel, int offset, + int nr_samples, u64 period) +{ + struct type_hist *h; + + if (adt == NULL) + return 0; + + if (adt->histograms == NULL) { + int nr = evsel->evlist->core.nr_entries; + + if (alloc_data_type_histograms(adt, nr) < 0) + return -1; + } + + if (offset < 0 || offset >= adt->self.size) + return -1; + + h = adt->histograms[evsel->core.idx]; + + h->nr_samples += nr_samples; + h->addr[offset].nr_samples += nr_samples; + h->period += period; + h->addr[offset].period += period; + return 0; +} diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h new file mode 100644 index 000000000000..8e73096c01d1 --- /dev/null +++ b/tools/perf/util/annotate-data.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PERF_ANNOTATE_DATA_H +#define _PERF_ANNOTATE_DATA_H + +#include <errno.h> +#include <linux/compiler.h> +#include <linux/rbtree.h> +#include <linux/types.h> + +struct evsel; +struct map_symbol; + +/** + * struct annotated_member - Type of member field + * @node: List entry in the parent list + * @children: List head for child nodes + * @type_name: Name of the member type + * @var_name: Name of the member variable + * @offset: Offset from the outer data type + * @size: Size of the member field + * + * This represents a member type in a data type. + */ +struct annotated_member { + struct list_head node; + struct list_head children; + char *type_name; + char *var_name; + int offset; + int size; +}; + +/** + * struct type_hist_entry - Histogram entry per offset + * @nr_samples: Number of samples + * @period: Count of event + */ +struct type_hist_entry { + int nr_samples; + u64 period; +}; + +/** + * struct type_hist - Type histogram for each event + * @nr_samples: Total number of samples in this data type + * @period: Total count of the event in this data type + * @offset: Array of histogram entry + */ +struct type_hist { + u64 nr_samples; + u64 period; + struct type_hist_entry addr[]; +}; + +/** + * struct annotated_data_type - Data type to profile + * @node: RB-tree node for dso->type_tree + * @self: Actual type information + * @nr_histogram: Number of histogram entries + * @histograms: An array of pointers to histograms + * + * This represents a data type accessed by samples in the profile data. + */ +struct annotated_data_type { + struct rb_node node; + struct annotated_member self; + int nr_histograms; + struct type_hist **histograms; +}; + +extern struct annotated_data_type unknown_type; + +/** + * struct annotated_data_stat - Debug statistics + * @total: Total number of entry + * @no_sym: No symbol or map found + * @no_insn: Failed to get disasm line + * @no_insn_ops: The instruction has no operands + * @no_mem_ops: The instruction has no memory operands + * @no_reg: Failed to extract a register from the operand + * @no_dbginfo: The binary has no debug information + * @no_cuinfo: Failed to find a compile_unit + * @no_var: Failed to find a matching variable + * @no_typeinfo: Failed to get a type info for the variable + * @invalid_size: Failed to get a size info of the type + * @bad_offset: The access offset is out of the type + */ +struct annotated_data_stat { + int total; + int no_sym; + int no_insn; + int no_insn_ops; + int no_mem_ops; + int no_reg; + int no_dbginfo; + int no_cuinfo; + int no_var; + int no_typeinfo; + int invalid_size; + int bad_offset; +}; +extern struct annotated_data_stat ann_data_stat; + +#ifdef HAVE_DWARF_SUPPORT + +/* Returns data type at the location (ip, reg, offset) */ +struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, + int reg, int offset); + +/* Update type access histogram at the given offset */ +int annotated_data_type__update_samples(struct annotated_data_type *adt, + struct evsel *evsel, int offset, + int nr_samples, u64 period); + +/* Release all data type information in the tree */ +void annotated_data_type__tree_delete(struct rb_root *root); + +#else /* HAVE_DWARF_SUPPORT */ + +static inline struct annotated_data_type * +find_data_type(struct map_symbol *ms __maybe_unused, u64 ip __maybe_unused, + int reg __maybe_unused, int offset __maybe_unused) +{ + return NULL; +} + +static inline int +annotated_data_type__update_samples(struct annotated_data_type *adt __maybe_unused, + struct evsel *evsel __maybe_unused, + int offset __maybe_unused, + int nr_samples __maybe_unused, + u64 period __maybe_unused) +{ + return -1; +} + +static inline void annotated_data_type__tree_delete(struct rb_root *root __maybe_unused) +{ +} + +#endif /* HAVE_DWARF_SUPPORT */ + +#endif /* _PERF_ANNOTATE_DATA_H */ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 82956adf9963..9b70ab110ce7 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -25,12 +25,14 @@ #include "units.h" #include "debug.h" #include "annotate.h" +#include "annotate-data.h" #include "evsel.h" #include "evlist.h" #include "bpf-event.h" #include "bpf-utils.h" #include "block-range.h" #include "string2.h" +#include "dwarf-regs.h" #include "util/event.h" #include "util/sharded_mutex.h" #include "arch/common.h" @@ -57,6 +59,9 @@ #include <linux/ctype.h> +/* global annotation options */ +struct annotation_options annotate_opts; + static regex_t file_lineno; static struct ins_ops *ins__find(struct arch *arch, const char *name); @@ -85,6 +90,8 @@ struct arch { struct { char comment_char; char skip_functions_char; + char register_char; + char memory_ref_char; } objdump; }; @@ -96,6 +103,10 @@ static struct ins_ops nop_ops; static struct ins_ops lock_ops; static struct ins_ops ret_ops; +/* Data type collection debug statistics */ +struct annotated_data_stat ann_data_stat; +LIST_HEAD(ann_insn_stat); + static int arch__grow_instructions(struct arch *arch) { struct ins *new_instructions; @@ -188,6 +199,8 @@ static struct arch architectures[] = { .insn_suffix = "bwlq", .objdump = { .comment_char = '#', + .register_char = '%', + .memory_ref_char = '(', }, }, { @@ -340,10 +353,10 @@ bool ins__is_call(const struct ins *ins) */ static inline const char *validate_comma(const char *c, struct ins_operands *ops) { - if (ops->raw_comment && c > ops->raw_comment) + if (ops->jump.raw_comment && c > ops->jump.raw_comment) return NULL; - if (ops->raw_func_start && c > ops->raw_func_start) + if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) return NULL; return c; @@ -359,8 +372,8 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s const char *c = strchr(ops->raw, ','); u64 start, end; - ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); - ops->raw_func_start = strchr(ops->raw, '<'); + ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); + ops->jump.raw_func_start = strchr(ops->raw, '<'); c = validate_comma(c, ops); @@ -462,7 +475,16 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size, ops->target.offset); } +static void jump__delete(struct ins_operands *ops __maybe_unused) +{ + /* + * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the + * raw string, don't free them. + */ +} + static struct ins_ops jump_ops = { + .free = jump__delete, .parse = jump__parse, .scnprintf = jump__scnprintf, }; @@ -557,6 +579,34 @@ static struct ins_ops lock_ops = { .scnprintf = lock__scnprintf, }; +/* + * Check if the operand has more than one registers like x86 SIB addressing: + * 0x1234(%rax, %rbx, 8) + * + * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check + * the input string after 'memory_ref_char' if exists. + */ +static bool check_multi_regs(struct arch *arch, const char *op) +{ + int count = 0; + + if (arch->objdump.register_char == 0) + return false; + + if (arch->objdump.memory_ref_char) { + op = strchr(op, arch->objdump.memory_ref_char); + if (op == NULL) + return false; + } + + while ((op = strchr(op, arch->objdump.register_char)) != NULL) { + count++; + op++; + } + + return count > 1; +} + static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused) { char *s = strchr(ops->raw, ','), *target, *comment, prev; @@ -584,6 +634,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy if (ops->source.raw == NULL) return -1; + ops->source.multi_regs = check_multi_regs(arch, ops->source.raw); + target = skip_spaces(++s); comment = strchr(s, arch->objdump.comment_char); @@ -604,6 +656,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy if (ops->target.raw == NULL) goto out_free_source; + ops->target.multi_regs = check_multi_regs(arch, ops->target.raw); + if (comment == NULL) return 0; @@ -795,6 +849,11 @@ static struct arch *arch__find(const char *name) return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); } +bool arch__is(struct arch *arch, const char *name) +{ + return !strcmp(arch->name, name); +} + static struct annotated_source *annotated_source__new(void) { struct annotated_source *src = zalloc(sizeof(*src)); @@ -810,7 +869,6 @@ static __maybe_unused void annotated_source__delete(struct annotated_source *src if (src == NULL) return; zfree(&src->histograms); - zfree(&src->cycles_hist); free(src); } @@ -845,18 +903,6 @@ static int annotated_source__alloc_histograms(struct annotated_source *src, return src->histograms ? 0 : -1; } -/* The cycles histogram is lazily allocated. */ -static int symbol__alloc_hist_cycles(struct symbol *sym) -{ - struct annotation *notes = symbol__annotation(sym); - const size_t size = symbol__size(sym); - - notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); - if (notes->src->cycles_hist == NULL) - return -1; - return 0; -} - void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); @@ -865,9 +911,10 @@ void symbol__annotate_zero_histograms(struct symbol *sym) if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); - if (notes->src->cycles_hist) - memset(notes->src->cycles_hist, 0, - symbol__size(sym) * sizeof(struct cyc_hist)); + } + if (notes->branch && notes->branch->cycles_hist) { + memset(notes->branch->cycles_hist, 0, + symbol__size(sym) * sizeof(struct cyc_hist)); } annotation__unlock(notes); } @@ -958,23 +1005,33 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, return 0; } +struct annotated_branch *annotation__get_branch(struct annotation *notes) +{ + if (notes == NULL) + return NULL; + + if (notes->branch == NULL) + notes->branch = zalloc(sizeof(*notes->branch)); + + return notes->branch; +} + static struct cyc_hist *symbol__cycles_hist(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); + struct annotated_branch *branch; - if (notes->src == NULL) { - notes->src = annotated_source__new(); - if (notes->src == NULL) - return NULL; - goto alloc_cycles_hist; - } + branch = annotation__get_branch(notes); + if (branch == NULL) + return NULL; + + if (branch->cycles_hist == NULL) { + const size_t size = symbol__size(sym); - if (!notes->src->cycles_hist) { -alloc_cycles_hist: - symbol__alloc_hist_cycles(sym); + branch->cycles_hist = calloc(size, sizeof(struct cyc_hist)); } - return notes->src->cycles_hist; + return branch->cycles_hist; } struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) @@ -1077,12 +1134,20 @@ static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 u64 offset; for (offset = start; offset <= end; offset++) { - if (notes->offsets[offset]) + if (notes->src->offsets[offset]) n_insn++; } return n_insn; } +static void annotated_branch__delete(struct annotated_branch *branch) +{ + if (branch) { + zfree(&branch->cycles_hist); + free(branch); + } +} + static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch) { unsigned n_insn; @@ -1091,6 +1156,7 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 n_insn = annotation__count_insn(notes, start, end); if (n_insn && ch->num && ch->cycles) { + struct annotated_branch *branch; float ipc = n_insn / ((double)ch->cycles / (double)ch->num); /* Hide data when there are too many overlaps. */ @@ -1098,54 +1164,76 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 return; for (offset = start; offset <= end; offset++) { - struct annotation_line *al = notes->offsets[offset]; + struct annotation_line *al = notes->src->offsets[offset]; - if (al && al->ipc == 0.0) { - al->ipc = ipc; + if (al && al->cycles && al->cycles->ipc == 0.0) { + al->cycles->ipc = ipc; cover_insn++; } } - if (cover_insn) { - notes->hit_cycles += ch->cycles; - notes->hit_insn += n_insn * ch->num; - notes->cover_insn += cover_insn; + branch = annotation__get_branch(notes); + if (cover_insn && branch) { + branch->hit_cycles += ch->cycles; + branch->hit_insn += n_insn * ch->num; + branch->cover_insn += cover_insn; } } } -void annotation__compute_ipc(struct annotation *notes, size_t size) +static int annotation__compute_ipc(struct annotation *notes, size_t size) { + int err = 0; s64 offset; - if (!notes->src || !notes->src->cycles_hist) - return; + if (!notes->branch || !notes->branch->cycles_hist) + return 0; - notes->total_insn = annotation__count_insn(notes, 0, size - 1); - notes->hit_cycles = 0; - notes->hit_insn = 0; - notes->cover_insn = 0; + notes->branch->total_insn = annotation__count_insn(notes, 0, size - 1); + notes->branch->hit_cycles = 0; + notes->branch->hit_insn = 0; + notes->branch->cover_insn = 0; annotation__lock(notes); for (offset = size - 1; offset >= 0; --offset) { struct cyc_hist *ch; - ch = ¬es->src->cycles_hist[offset]; + ch = ¬es->branch->cycles_hist[offset]; if (ch && ch->cycles) { struct annotation_line *al; + al = notes->src->offsets[offset]; + if (al && al->cycles == NULL) { + al->cycles = zalloc(sizeof(*al->cycles)); + if (al->cycles == NULL) { + err = ENOMEM; + break; + } + } if (ch->have_start) annotation__count_and_fill(notes, ch->start, offset, ch); - al = notes->offsets[offset]; if (al && ch->num_aggr) { - al->cycles = ch->cycles_aggr / ch->num_aggr; - al->cycles_max = ch->cycles_max; - al->cycles_min = ch->cycles_min; + al->cycles->avg = ch->cycles_aggr / ch->num_aggr; + al->cycles->max = ch->cycles_max; + al->cycles->min = ch->cycles_min; + } + } + } + + if (err) { + while (++offset < (s64)size) { + struct cyc_hist *ch = ¬es->branch->cycles_hist[offset]; + + if (ch && ch->cycles) { + struct annotation_line *al = notes->src->offsets[offset]; + if (al) + zfree(&al->cycles); } - notes->have_cycles = true; } } + annotation__unlock(notes); + return 0; } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, @@ -1225,6 +1313,7 @@ static void annotation_line__exit(struct annotation_line *al) { zfree_srcline(&al->path); zfree(&al->line); + zfree(&al->cycles); } static size_t disasm_line_size(int nr) @@ -1299,6 +1388,7 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r void annotation__exit(struct annotation *notes) { annotated_source__delete(notes->src); + annotated_branch__delete(notes->branch); } static struct sharded_mutex *sharded_mutex; @@ -1817,7 +1907,6 @@ static int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args) { struct annotation *notes = symbol__annotation(sym); - struct annotation_options *opts = args->options; struct bpf_prog_linfo *prog_linfo = NULL; struct bpf_prog_info_node *info_node; int len = sym->end - sym->start; @@ -1927,7 +2016,7 @@ static int symbol__disassemble_bpf(struct symbol *sym, prev_buf_size = buf_size; fflush(s); - if (!opts->hide_src_code && srcline) { + if (!annotate_opts.hide_src_code && srcline) { args->offset = -1; args->line = strdup(srcline); args->line_nr = 0; @@ -2050,7 +2139,7 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len) static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { - struct annotation_options *opts = args->options; + struct annotation_options *opts = &annotate_opts; struct map *map = args->ms.map; struct dso *dso = map__dso(map); char *command; @@ -2113,12 +2202,13 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s %s %c%s%c %s%s -C \"$1\"", + " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", opts->objdump_path ?: "objdump", opts->disassembler_style ? "-M " : "", opts->disassembler_style ?: "", map__rip_2objdump(map, sym->start), map__rip_2objdump(map, sym->end), + opts->show_linenr ? "-l" : "", opts->show_asm_raw ? "" : "--no-show-raw-insn", opts->annotate_src ? "-S" : "", opts->prefix ? "--prefix " : "", @@ -2299,15 +2389,8 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel) annotation__calc_percent(notes, evsel, symbol__size(sym)); } -int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch) +static int evsel__get_arch(struct evsel *evsel, struct arch **parch) { - struct symbol *sym = ms->sym; - struct annotation *notes = symbol__annotation(sym); - struct annotate_args args = { - .evsel = evsel, - .options = options, - }; struct perf_env *env = evsel__env(evsel); const char *arch_name = perf_env__arch(env); struct arch *arch; @@ -2316,25 +2399,45 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, if (!arch_name) return errno; - args.arch = arch = arch__find(arch_name); + *parch = arch = arch__find(arch_name); if (arch == NULL) { pr_err("%s: unsupported arch %s\n", __func__, arch_name); return ENOTSUP; } - if (parch) - *parch = arch; - if (arch->init) { err = arch->init(arch, env ? env->cpuid : NULL); if (err) { - pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); + pr_err("%s: failed to initialize %s arch priv area\n", + __func__, arch->name); return err; } } + return 0; +} +int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, + struct arch **parch) +{ + struct symbol *sym = ms->sym; + struct annotation *notes = symbol__annotation(sym); + struct annotate_args args = { + .evsel = evsel, + .options = &annotate_opts, + }; + struct arch *arch = NULL; + int err; + + err = evsel__get_arch(evsel, &arch); + if (err < 0) + return err; + + if (parch) + *parch = arch; + + args.arch = arch; args.ms = *ms; - if (notes->options && notes->options->full_addr) + if (annotate_opts.full_addr) notes->start = map__objdump_2mem(ms->map, ms->sym->start); else notes->start = map__rip_2objdump(ms->map, ms->sym->start); @@ -2342,12 +2445,12 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, return symbol__disassemble(sym, &args); } -static void insert_source_line(struct rb_root *root, struct annotation_line *al, - struct annotation_options *opts) +static void insert_source_line(struct rb_root *root, struct annotation_line *al) { struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; + unsigned int percent_type = annotate_opts.percent_type; int i, ret; while (*p != NULL) { @@ -2358,7 +2461,7 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al, if (ret == 0) { for (i = 0; i < al->data_nr; i++) { iter->data[i].percent_sum += annotation_data__percent(&al->data[i], - opts->percent_type); + percent_type); } return; } @@ -2371,7 +2474,7 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al, for (i = 0; i < al->data_nr; i++) { al->data[i].percent_sum = annotation_data__percent(&al->data[i], - opts->percent_type); + percent_type); } rb_link_node(&al->rb_node, parent, p); @@ -2493,8 +2596,7 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) return 0; } -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) { struct map *map = ms->map; struct symbol *sym = ms->sym; @@ -2505,6 +2607,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evsel->core.idx); struct annotation_line *pos, *queue = NULL; + struct annotation_options *opts = &annotate_opts; u64 start = map__rip_2objdump(map, sym->start); int printed = 2, queue_len = 0, addr_fmt_width; int more = 0; @@ -2633,8 +2736,7 @@ static void FILE__write_graph(void *fp, int graph) fputs(s, fp); } -static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, - struct annotation_options *opts) +static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp) { struct annotation *notes = symbol__annotation(sym); struct annotation_write_ops wops = { @@ -2649,9 +2751,9 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, struct annotation_line *al; list_for_each_entry(al, ¬es->src->source, node) { - if (annotation_line__filter(al, notes)) + if (annotation_line__filter(al)) continue; - annotation_line__write(al, notes, &wops, opts); + annotation_line__write(al, notes, &wops); fputc('\n', fp); wops.first_line = false; } @@ -2659,8 +2761,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, return 0; } -int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel) { const char *ev_name = evsel__name(evsel); char buf[1024]; @@ -2682,7 +2783,7 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, fprintf(fp, "%s() %s\nEvent: %s\n\n", ms->sym->name, map__dso(ms->map)->long_name, ev_name); - symbol__annotate_fprintf2(ms->sym, fp, opts); + symbol__annotate_fprintf2(ms->sym, fp); fclose(fp); err = 0; @@ -2769,7 +2870,7 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) return; for (offset = 0; offset < size; ++offset) { - struct annotation_line *al = notes->offsets[offset]; + struct annotation_line *al = notes->src->offsets[offset]; struct disasm_line *dl; dl = disasm_line(al); @@ -2777,7 +2878,7 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) if (!disasm_line__is_valid_local_jump(dl, sym)) continue; - al = notes->offsets[dl->ops.target.offset]; + al = notes->src->offsets[dl->ops.target.offset]; /* * FIXME: Oops, no jump target? Buggy disassembler? Or do we @@ -2794,19 +2895,20 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) void annotation__set_offsets(struct annotation *notes, s64 size) { struct annotation_line *al; + struct annotated_source *src = notes->src; - notes->max_line_len = 0; - notes->nr_entries = 0; - notes->nr_asm_entries = 0; + src->max_line_len = 0; + src->nr_entries = 0; + src->nr_asm_entries = 0; - list_for_each_entry(al, ¬es->src->source, node) { + list_for_each_entry(al, &src->source, node) { size_t line_len = strlen(al->line); - if (notes->max_line_len < line_len) - notes->max_line_len = line_len; - al->idx = notes->nr_entries++; + if (src->max_line_len < line_len) + src->max_line_len = line_len; + al->idx = src->nr_entries++; if (al->offset != -1) { - al->idx_asm = notes->nr_asm_entries++; + al->idx_asm = src->nr_asm_entries++; /* * FIXME: short term bandaid to cope with assembly * routines that comes with labels in the same column @@ -2815,7 +2917,7 @@ void annotation__set_offsets(struct annotation *notes, s64 size) * E.g. copy_user_generic_unrolled */ if (al->offset < size) - notes->offsets[al->offset] = al; + notes->src->offsets[al->offset] = al; } else al->idx_asm = -1; } @@ -2858,24 +2960,24 @@ void annotation__init_column_widths(struct annotation *notes, struct symbol *sym void annotation__update_column_widths(struct annotation *notes) { - if (notes->options->use_offset) + if (annotate_opts.use_offset) notes->widths.target = notes->widths.min_addr; - else if (notes->options->full_addr) + else if (annotate_opts.full_addr) notes->widths.target = BITS_PER_LONG / 4; else notes->widths.target = notes->widths.max_addr; notes->widths.addr = notes->widths.target; - if (notes->options->show_nr_jumps) + if (annotate_opts.show_nr_jumps) notes->widths.addr += notes->widths.jumps + 1; } void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms) { - notes->options->full_addr = !notes->options->full_addr; + annotate_opts.full_addr = !annotate_opts.full_addr; - if (notes->options->full_addr) + if (annotate_opts.full_addr) notes->start = map__objdump_2mem(ms->map, ms->sym->start); else notes->start = map__rip_2objdump(ms->map, ms->sym->start); @@ -2884,8 +2986,7 @@ void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *m } static void annotation__calc_lines(struct annotation *notes, struct map *map, - struct rb_root *root, - struct annotation_options *opts) + struct rb_root *root) { struct annotation_line *al; struct rb_root tmp_root = RB_ROOT; @@ -2898,7 +2999,7 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, double percent; percent = annotation_data__percent(&al->data[i], - opts->percent_type); + annotate_opts.percent_type); if (percent > percent_max) percent_max = percent; @@ -2909,22 +3010,20 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, al->path = get_srcline(map__dso(map), notes->start + al->offset, NULL, false, true, notes->start + al->offset); - insert_source_line(&tmp_root, al, opts); + insert_source_line(&tmp_root, al); } resort_source_line(root, &tmp_root); } -static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root, - struct annotation_options *opts) +static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root) { struct annotation *notes = symbol__annotation(ms->sym); - annotation__calc_lines(notes, ms->map, root, opts); + annotation__calc_lines(notes, ms->map, root); } -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel) { struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; @@ -2933,7 +3032,7 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, char buf[1024]; int err; - err = symbol__annotate2(ms, evsel, opts, NULL); + err = symbol__annotate2(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; @@ -2943,31 +3042,31 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, return -1; } - if (opts->print_lines) { - srcline_full_filename = opts->full_path; - symbol__calc_lines(ms, &source_line, opts); + if (annotate_opts.print_lines) { + srcline_full_filename = annotate_opts.full_path; + symbol__calc_lines(ms, &source_line); print_summary(&source_line, dso->long_name); } hists__scnprintf_title(hists, buf, sizeof(buf)); fprintf(stdout, "%s, [percent: %s]\n%s() %s\n", - buf, percent_type_str(opts->percent_type), sym->name, dso->long_name); - symbol__annotate_fprintf2(sym, stdout, opts); + buf, percent_type_str(annotate_opts.percent_type), sym->name, + dso->long_name); + symbol__annotate_fprintf2(sym, stdout); annotated_source__purge(symbol__annotation(sym)->src); return 0; } -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel) { struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; int err; - err = symbol__annotate(ms, evsel, opts, NULL); + err = symbol__annotate(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; @@ -2979,13 +3078,13 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, symbol__calc_percent(sym, evsel); - if (opts->print_lines) { - srcline_full_filename = opts->full_path; - symbol__calc_lines(ms, &source_line, opts); + if (annotate_opts.print_lines) { + srcline_full_filename = annotate_opts.full_path; + symbol__calc_lines(ms, &source_line); print_summary(&source_line, dso->long_name); } - symbol__annotate_printf(ms, evsel, opts); + symbol__annotate_printf(ms, evsel); annotated_source__purge(symbol__annotation(sym)->src); @@ -3046,19 +3145,20 @@ call_like: obj__printf(obj, " "); } - disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset, notes->widths.max_ins_name); + disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset, notes->widths.max_ins_name); } static void ipc_coverage_string(char *bf, int size, struct annotation *notes) { double ipc = 0.0, coverage = 0.0; + struct annotated_branch *branch = annotation__get_branch(notes); - if (notes->hit_cycles) - ipc = notes->hit_insn / ((double)notes->hit_cycles); + if (branch && branch->hit_cycles) + ipc = branch->hit_insn / ((double)branch->hit_cycles); - if (notes->total_insn) { - coverage = notes->cover_insn * 100.0 / - ((double)notes->total_insn); + if (branch && branch->total_insn) { + coverage = branch->cover_insn * 100.0 / + ((double)branch->total_insn); } scnprintf(bf, size, "(Average IPC: %.2f, IPC Coverage: %.1f%%)", @@ -3083,8 +3183,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati int printed; if (first_line && (al->offset == -1 || percent_max == 0.0)) { - if (notes->have_cycles) { - if (al->ipc == 0.0 && al->cycles == 0) + if (notes->branch && al->cycles) { + if (al->cycles->ipc == 0.0 && al->cycles->avg == 0) show_title = true; } else show_title = true; @@ -3120,18 +3220,18 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati } } - if (notes->have_cycles) { - if (al->ipc) - obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc); + if (notes->branch) { + if (al->cycles && al->cycles->ipc) + obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->cycles->ipc); else if (!show_title) obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " "); else obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC"); - if (!notes->options->show_minmax_cycle) { - if (al->cycles) + if (!annotate_opts.show_minmax_cycle) { + if (al->cycles && al->cycles->avg) obj__printf(obj, "%*" PRIu64 " ", - ANNOTATION__CYCLES_WIDTH - 1, al->cycles); + ANNOTATION__CYCLES_WIDTH - 1, al->cycles->avg); else if (!show_title) obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " "); @@ -3145,8 +3245,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati scnprintf(str, sizeof(str), "%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")", - al->cycles, al->cycles_min, - al->cycles_max); + al->cycles->avg, al->cycles->min, + al->cycles->max); obj__printf(obj, "%*s ", ANNOTATION__MINMAX_CYCLES_WIDTH - 1, @@ -3172,7 +3272,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati if (!*al->line) obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " "); else if (al->offset == -1) { - if (al->line_nr && notes->options->show_linenr) + if (al->line_nr && annotate_opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr); else printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " "); @@ -3182,15 +3282,15 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati u64 addr = al->offset; int color = -1; - if (!notes->options->use_offset) + if (!annotate_opts.use_offset) addr += notes->start; - if (!notes->options->use_offset) { + if (!annotate_opts.use_offset) { printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { if (al->jump_sources && - notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { - if (notes->options->show_nr_jumps) { + annotate_opts.offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { + if (annotate_opts.show_nr_jumps) { int prev; printed = scnprintf(bf, sizeof(bf), "%*d ", notes->widths.jumps, @@ -3204,9 +3304,9 @@ print_addr: printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", notes->widths.target, addr); } else if (ins__is_call(&disasm_line(al)->ins) && - notes->options->offset_level >= ANNOTATION__OFFSET_CALL) { + annotate_opts.offset_level >= ANNOTATION__OFFSET_CALL) { goto print_addr; - } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { + } else if (annotate_opts.offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { goto print_addr; } else { printed = scnprintf(bf, sizeof(bf), "%-*s ", @@ -3228,43 +3328,44 @@ print_addr: } void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *wops, - struct annotation_options *opts) + struct annotation_write_ops *wops) { __annotation_line__write(al, notes, wops->first_line, wops->current_entry, wops->change_color, wops->width, wops->obj, - opts->percent_type, + annotate_opts.percent_type, wops->set_color, wops->set_percent_color, wops->set_jumps_percent_color, wops->printf, wops->write_graph); } int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch) + struct arch **parch) { struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); size_t size = symbol__size(sym); int nr_pcnt = 1, err; - notes->offsets = zalloc(size * sizeof(struct annotation_line *)); - if (notes->offsets == NULL) + notes->src->offsets = zalloc(size * sizeof(struct annotation_line *)); + if (notes->src->offsets == NULL) return ENOMEM; if (evsel__is_group_event(evsel)) nr_pcnt = evsel->core.nr_members; - err = symbol__annotate(ms, evsel, options, parch); + err = symbol__annotate(ms, evsel, parch); if (err) goto out_free_offsets; - notes->options = options; - symbol__calc_percent(sym, evsel); annotation__set_offsets(notes, size); annotation__mark_jump_targets(notes, sym); - annotation__compute_ipc(notes, size); + + err = annotation__compute_ipc(notes, size); + if (err) + goto out_free_offsets; + annotation__init_column_widths(notes, sym); notes->nr_events = nr_pcnt; @@ -3274,7 +3375,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, return 0; out_free_offsets: - zfree(¬es->offsets); + zfree(¬es->src->offsets); return err; } @@ -3337,8 +3438,10 @@ static int annotation__config(const char *var, const char *value, void *data) return 0; } -void annotation_options__init(struct annotation_options *opt) +void annotation_options__init(void) { + struct annotation_options *opt = &annotate_opts; + memset(opt, 0, sizeof(*opt)); /* Default values. */ @@ -3349,16 +3452,15 @@ void annotation_options__init(struct annotation_options *opt) opt->percent_type = PERCENT_PERIOD_LOCAL; } - -void annotation_options__exit(struct annotation_options *opt) +void annotation_options__exit(void) { - zfree(&opt->disassembler_style); - zfree(&opt->objdump_path); + zfree(&annotate_opts.disassembler_style); + zfree(&annotate_opts.objdump_path); } -void annotation_config__init(struct annotation_options *opt) +void annotation_config__init(void) { - perf_config(annotation__config, opt); + perf_config(annotation__config, &annotate_opts); } static unsigned int parse_percent_type(char *str1, char *str2) @@ -3382,10 +3484,9 @@ static unsigned int parse_percent_type(char *str1, char *str2) return type; } -int annotate_parse_percent_type(const struct option *opt, const char *_str, +int annotate_parse_percent_type(const struct option *opt __maybe_unused, const char *_str, int unset __maybe_unused) { - struct annotation_options *opts = opt->value; unsigned int type; char *str1, *str2; int err = -1; @@ -3404,7 +3505,7 @@ int annotate_parse_percent_type(const struct option *opt, const char *_str, if (type == (unsigned int) -1) type = parse_percent_type(str2, str1); if (type != (unsigned int) -1) { - opts->percent_type = type; + annotate_opts.percent_type = type; err = 0; } @@ -3413,11 +3514,267 @@ out: return err; } -int annotate_check_args(struct annotation_options *args) +int annotate_check_args(void) { + struct annotation_options *args = &annotate_opts; + if (args->prefix_strip && !args->prefix) { pr_err("--prefix-strip requires --prefix\n"); return -1; } return 0; } + +/* + * Get register number and access offset from the given instruction. + * It assumes AT&T x86 asm format like OFFSET(REG). Maybe it needs + * to revisit the format when it handles different architecture. + * Fills @reg and @offset when return 0. + */ +static int extract_reg_offset(struct arch *arch, const char *str, + struct annotated_op_loc *op_loc) +{ + char *p; + char *regname; + + if (arch->objdump.register_char == 0) + return -1; + + /* + * It should start from offset, but it's possible to skip 0 + * in the asm. So 0(%rax) should be same as (%rax). + * + * However, it also start with a segment select register like + * %gs:0x18(%rbx). In that case it should skip the part. + */ + if (*str == arch->objdump.register_char) { + while (*str && !isdigit(*str) && + *str != arch->objdump.memory_ref_char) + str++; + } + + op_loc->offset = strtol(str, &p, 0); + + p = strchr(p, arch->objdump.register_char); + if (p == NULL) + return -1; + + regname = strdup(p); + if (regname == NULL) + return -1; + + op_loc->reg = get_dwarf_regnum(regname, 0); + free(regname); + return 0; +} + +/** + * annotate_get_insn_location - Get location of instruction + * @arch: the architecture info + * @dl: the target instruction + * @loc: a buffer to save the data + * + * Get detailed location info (register and offset) in the instruction. + * It needs both source and target operand and whether it accesses a + * memory location. The offset field is meaningful only when the + * corresponding mem flag is set. + * + * Some examples on x86: + * + * mov (%rax), %rcx # src_reg = rax, src_mem = 1, src_offset = 0 + * # dst_reg = rcx, dst_mem = 0 + * + * mov 0x18, %r8 # src_reg = -1, dst_reg = r8 + */ +int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, + struct annotated_insn_loc *loc) +{ + struct ins_operands *ops; + struct annotated_op_loc *op_loc; + int i; + + if (!strcmp(dl->ins.name, "lock")) + ops = dl->ops.locked.ops; + else + ops = &dl->ops; + + if (ops == NULL) + return -1; + + memset(loc, 0, sizeof(*loc)); + + for_each_insn_op_loc(loc, i, op_loc) { + const char *insn_str = ops->source.raw; + + if (i == INSN_OP_TARGET) + insn_str = ops->target.raw; + + /* Invalidate the register by default */ + op_loc->reg = -1; + + if (insn_str == NULL) + continue; + + if (strchr(insn_str, arch->objdump.memory_ref_char)) { + op_loc->mem_ref = true; + extract_reg_offset(arch, insn_str, op_loc); + } else { + char *s = strdup(insn_str); + + if (s) { + op_loc->reg = get_dwarf_regnum(s, 0); + free(s); + } + } + } + + return 0; +} + +static void symbol__ensure_annotate(struct map_symbol *ms, struct evsel *evsel) +{ + struct disasm_line *dl, *tmp_dl; + struct annotation *notes; + + notes = symbol__annotation(ms->sym); + if (!list_empty(¬es->src->source)) + return; + + if (symbol__annotate(ms, evsel, NULL) < 0) + return; + + /* remove non-insn disasm lines for simplicity */ + list_for_each_entry_safe(dl, tmp_dl, ¬es->src->source, al.node) { + if (dl->al.offset == -1) { + list_del(&dl->al.node); + free(dl); + } + } +} + +static struct disasm_line *find_disasm_line(struct symbol *sym, u64 ip) +{ + struct disasm_line *dl; + struct annotation *notes; + + notes = symbol__annotation(sym); + + list_for_each_entry(dl, ¬es->src->source, al.node) { + if (sym->start + dl->al.offset == ip) + return dl; + } + return NULL; +} + +static struct annotated_item_stat *annotate_data_stat(struct list_head *head, + const char *name) +{ + struct annotated_item_stat *istat; + + list_for_each_entry(istat, head, list) { + if (!strcmp(istat->name, name)) + return istat; + } + + istat = zalloc(sizeof(*istat)); + if (istat == NULL) + return NULL; + + istat->name = strdup(name); + if (istat->name == NULL) { + free(istat); + return NULL; + } + + list_add_tail(&istat->list, head); + return istat; +} + +/** + * hist_entry__get_data_type - find data type for given hist entry + * @he: hist entry + * + * This function first annotates the instruction at @he->ip and extracts + * register and offset info from it. Then it searches the DWARF debug + * info to get a variable and type information using the address, register, + * and offset. + */ +struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) +{ + struct map_symbol *ms = &he->ms; + struct evsel *evsel = hists_to_evsel(he->hists); + struct arch *arch; + struct disasm_line *dl; + struct annotated_insn_loc loc; + struct annotated_op_loc *op_loc; + struct annotated_data_type *mem_type; + struct annotated_item_stat *istat; + u64 ip = he->ip; + int i; + + ann_data_stat.total++; + + if (ms->map == NULL || ms->sym == NULL) { + ann_data_stat.no_sym++; + return NULL; + } + + if (!symbol_conf.init_annotation) { + ann_data_stat.no_sym++; + return NULL; + } + + if (evsel__get_arch(evsel, &arch) < 0) { + ann_data_stat.no_insn++; + return NULL; + } + + /* Make sure it runs objdump to get disasm of the function */ + symbol__ensure_annotate(ms, evsel); + + /* + * Get a disasm to extract the location from the insn. + * This is too slow... + */ + dl = find_disasm_line(ms->sym, ip); + if (dl == NULL) { + ann_data_stat.no_insn++; + return NULL; + } + + istat = annotate_data_stat(&ann_insn_stat, dl->ins.name); + if (istat == NULL) { + ann_data_stat.no_insn++; + return NULL; + } + + if (annotate_get_insn_location(arch, dl, &loc) < 0) { + ann_data_stat.no_insn_ops++; + istat->bad++; + return NULL; + } + + for_each_insn_op_loc(&loc, i, op_loc) { + if (!op_loc->mem_ref) + continue; + + mem_type = find_data_type(ms, ip, op_loc->reg, op_loc->offset); + if (mem_type) + istat->good++; + else + istat->bad++; + + if (symbol_conf.annotate_data_sample) { + annotated_data_type__update_samples(mem_type, evsel, + op_loc->offset, + he->stat.nr_events, + he->stat.period); + } + he->mem_type_off = op_loc->offset; + return mem_type; + } + + ann_data_stat.no_mem_ops++; + istat->bad++; + return NULL; +} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 962780559176..dba50762c6e8 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -23,6 +23,7 @@ struct option; struct perf_sample; struct evsel; struct symbol; +struct annotated_data_type; struct ins { const char *name; @@ -31,8 +32,6 @@ struct ins { struct ins_operands { char *raw; - char *raw_comment; - char *raw_func_start; struct { char *raw; char *name; @@ -41,22 +40,30 @@ struct ins_operands { s64 offset; bool offset_avail; bool outside; + bool multi_regs; } target; union { struct { char *raw; char *name; u64 addr; + bool multi_regs; } source; struct { struct ins ins; struct ins_operands *ops; } locked; + struct { + char *raw_comment; + char *raw_func_start; + } jump; }; }; struct arch; +bool arch__is(struct arch *arch, const char *name); + struct ins_ops { void (*free)(struct ins_operands *ops); int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms); @@ -101,6 +108,8 @@ struct annotation_options { unsigned int percent_type; }; +extern struct annotation_options annotate_opts; + enum { ANNOTATION__OFFSET_JUMP_TARGETS = 1, ANNOTATION__OFFSET_CALL, @@ -130,6 +139,13 @@ struct annotation_data { struct sym_hist_entry he; }; +struct cycles_info { + float ipc; + u64 avg; + u64 max; + u64 min; +}; + struct annotation_line { struct list_head node; struct rb_node rb_node; @@ -137,12 +153,9 @@ struct annotation_line { char *line; int line_nr; char *fileloc; - int jump_sources; - float ipc; - u64 cycles; - u64 cycles_max; - u64 cycles_min; char *path; + struct cycles_info *cycles; + int jump_sources; u32 idx; int idx_asm; int data_nr; @@ -214,8 +227,7 @@ struct annotation_write_ops { }; void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *ops, - struct annotation_options *opts); + struct annotation_write_ops *ops); int __annotation__scnprintf_samples_period(struct annotation *notes, char *bf, size_t size, @@ -264,27 +276,29 @@ struct cyc_hist { * returns. */ struct annotated_source { - struct list_head source; - int nr_histograms; - size_t sizeof_sym_hist; - struct cyc_hist *cycles_hist; - struct sym_hist *histograms; + struct list_head source; + size_t sizeof_sym_hist; + struct sym_hist *histograms; + struct annotation_line **offsets; + int nr_histograms; + int nr_entries; + int nr_asm_entries; + u16 max_line_len; }; -struct LOCKABLE annotation { - u64 max_coverage; - u64 start; +struct annotated_branch { u64 hit_cycles; u64 hit_insn; unsigned int total_insn; unsigned int cover_insn; - struct annotation_options *options; - struct annotation_line **offsets; + struct cyc_hist *cycles_hist; + u64 max_coverage; +}; + +struct LOCKABLE annotation { + u64 start; int nr_events; int max_jump_sources; - int nr_entries; - int nr_asm_entries; - u16 max_line_len; struct { u8 addr; u8 jumps; @@ -293,8 +307,8 @@ struct LOCKABLE annotation { u8 max_addr; u8 max_ins_name; } widths; - bool have_cycles; struct annotated_source *src; + struct annotated_branch *branch; }; static inline void annotation__init(struct annotation *notes __maybe_unused) @@ -308,10 +322,10 @@ bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(tr static inline int annotation__cycles_width(struct annotation *notes) { - if (notes->have_cycles && notes->options->show_minmax_cycle) + if (notes->branch && annotate_opts.show_minmax_cycle) return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH; - return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; + return notes->branch ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; } static inline int annotation__pcnt_width(struct annotation *notes) @@ -319,13 +333,12 @@ static inline int annotation__pcnt_width(struct annotation *notes) return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events; } -static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes) +static inline bool annotation_line__filter(struct annotation_line *al) { - return notes->options->hide_src_code && al->offset == -1; + return annotate_opts.hide_src_code && al->offset == -1; } void annotation__set_offsets(struct annotation *notes, s64 size); -void annotation__compute_ipc(struct annotation *notes, size_t size); void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym); void annotation__update_column_widths(struct annotation *notes); void annotation__init_column_widths(struct annotation *notes, struct symbol *sym); @@ -349,6 +362,8 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, struct evsel *evsel); +struct annotated_branch *annotation__get_branch(struct annotation *notes); + int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, struct addr_map_symbol *start, unsigned cycles); @@ -361,11 +376,9 @@ void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch); int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch); enum symbol_disassemble_errno { @@ -392,43 +405,86 @@ enum symbol_disassemble_errno { int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen); -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options); +int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); void annotated_source__purge(struct annotated_source *as); -int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts); +int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel); bool ui__has_annotation(void); -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts); +int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel); -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts); +int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel); #ifdef HAVE_SLANG_SUPPORT int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts); + struct hist_browser_timer *hbt); #else static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *opts __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } #endif -void annotation_options__init(struct annotation_options *opt); -void annotation_options__exit(struct annotation_options *opt); +void annotation_options__init(void); +void annotation_options__exit(void); -void annotation_config__init(struct annotation_options *opt); +void annotation_config__init(void); int annotate_parse_percent_type(const struct option *opt, const char *_str, int unset); -int annotate_check_args(struct annotation_options *args); +int annotate_check_args(void); + +/** + * struct annotated_op_loc - Location info of instruction operand + * @reg: Register in the operand + * @offset: Memory access offset in the operand + * @mem_ref: Whether the operand accesses memory + */ +struct annotated_op_loc { + int reg; + int offset; + bool mem_ref; +}; + +enum annotated_insn_ops { + INSN_OP_SOURCE = 0, + INSN_OP_TARGET = 1, + + INSN_OP_MAX, +}; + +/** + * struct annotated_insn_loc - Location info of instruction + * @ops: Array of location info for source and target operands + */ +struct annotated_insn_loc { + struct annotated_op_loc ops[INSN_OP_MAX]; +}; + +#define for_each_insn_op_loc(insn_loc, i, op_loc) \ + for (i = INSN_OP_SOURCE, op_loc = &(insn_loc)->ops[i]; \ + i < INSN_OP_MAX; \ + i++, op_loc++) + +/* Get detailed location info in the instruction */ +int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, + struct annotated_insn_loc *loc); + +/* Returns a data type from the sample instruction (if any) */ +struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he); + +struct annotated_item_stat { + struct list_head list; + char *name; + int good; + int bad; +}; +extern struct list_head ann_insn_stat; #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a0368202a746..3684e6009b63 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -174,7 +174,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, struct evlist *evlist, struct evsel *evsel, int idx) { - bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + bool per_cpu = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus); mp->mmap_needed = evsel->needs_auxtrace_mmap; @@ -648,7 +648,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx) { - bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus); if (per_cpu_mmaps) { struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); @@ -1638,6 +1638,9 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, case 'Z': synth_opts->timeless_decoding = true; break; + case 'T': + synth_opts->use_timestamp = true; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 29eb82dff574..55702215a82d 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -99,6 +99,7 @@ enum itrace_period_type { * @remote_access: whether to synthesize remote access events * @mem: whether to synthesize memory events * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps + * @use_timestamp: use the timestamp trace as kernel time * @vm_time_correlation: perform VM Time Correlation * @vm_tm_corr_dry_run: VM Time Correlation dry-run * @vm_tm_corr_args: VM Time Correlation implementation-specific arguments @@ -146,6 +147,7 @@ struct itrace_synth_opts { bool remote_access; bool mem; bool timeless_decoding; + bool use_timestamp; bool vm_time_correlation; bool vm_tm_corr_dry_run; char *vm_tm_corr_args; @@ -678,6 +680,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " q: quicker (less detailed) decoding\n" \ " A: approximate IPC\n" \ " Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ +" T: use the timestamp trace as kernel time\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is iybxwpe or cewp\n" diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 591fc1edd385..dec910989701 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -129,9 +129,9 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, al.sym = he->ms.sym; notes = symbol__annotation(he->ms.sym); - if (!notes || !notes->src || !notes->src->cycles_hist) + if (!notes || !notes->branch || !notes->branch->cycles_hist) return 0; - ch = notes->src->cycles_hist; + ch = notes->branch->cycles_hist; for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) { if (ch[i].num_aggr) { struct block_info *bi; @@ -464,8 +464,7 @@ void block_info__free_report(struct block_report *reps, int nr_reps) } int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel, struct perf_env *env, - struct annotation_options *annotation_opts) + struct evsel *evsel, struct perf_env *env) { int ret; @@ -477,8 +476,7 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent, return 0; case 1: symbol_conf.report_individual_block = true; - ret = block_hists_tui_browse(bh, evsel, min_percent, - env, annotation_opts); + ret = block_hists_tui_browse(bh, evsel, min_percent, env); return ret; default: return -1; diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index 42e9dcc4cf0a..96f53e89795e 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -78,8 +78,7 @@ struct block_report *block_info__create_report(struct evlist *evlist, void block_info__free_report(struct block_report *reps, int nr_reps); int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel, struct perf_env *env, - struct annotation_options *annotation_opts); + struct evsel *evsel, struct perf_env *env); float block_info__total_cycles_percent(struct hist_entry *he); diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c index 680e92774d0c..15c42196c24c 100644 --- a/tools/perf/util/block-range.c +++ b/tools/perf/util/block-range.c @@ -311,6 +311,7 @@ done: double block_range__coverage(struct block_range *br) { struct symbol *sym; + struct annotated_branch *branch; if (!br) { if (block_ranges.blocks) @@ -323,5 +324,9 @@ double block_range__coverage(struct block_range *br) if (!sym) return -1; - return (double)br->coverage / symbol__annotation(sym)->max_coverage; + branch = symbol__annotation(sym)->branch; + if (!branch) + return -1; + + return (double)br->coverage / branch->max_coverage; } diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 38fcf3ba5749..3573e0b7ef3e 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -386,6 +386,9 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, int err; int fd; + if (opts->no_bpf_event) + return 0; + event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size); if (!event) return -1; @@ -542,9 +545,9 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); } -void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, - struct perf_env *env, - FILE *fp) +void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp) { __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens); __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms); @@ -560,7 +563,7 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, if (info->btf_id) { struct btf_node *node; - node = perf_env__find_btf(env, info->btf_id); + node = __perf_env__find_btf(env, info->btf_id); if (node) btf = btf__new((__u8 *)(node->data), node->data_size); diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 1bcbd4fb6c66..e2f0420905f5 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -33,9 +33,9 @@ struct btf_node { int machine__process_bpf(struct machine *machine, union perf_event *event, struct perf_sample *sample); int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env); -void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, - struct perf_env *env, - FILE *fp); +void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp); #else static inline int machine__process_bpf(struct machine *machine __maybe_unused, union perf_event *event __maybe_unused, @@ -50,9 +50,9 @@ static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused, return 0; } -static inline void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, - struct perf_env *env __maybe_unused, - FILE *fp __maybe_unused) +static inline void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, + struct perf_env *env __maybe_unused, + FILE *fp __maybe_unused) { } diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 7f9b0e46e008..7a8af60e0f51 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -455,7 +455,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) return -1; if (!all_cpu_map) { - all_cpu_map = perf_cpu_map__new(NULL); + all_cpu_map = perf_cpu_map__new_online_cpus(); if (!all_cpu_map) return -1; } diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index f1716c089c99..31ff19afc20c 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -318,7 +318,7 @@ int lock_contention_read(struct lock_contention *con) } /* make sure it loads the kernel map */ - map__load(maps__first(machine->kmaps)->map); + maps__load_first(machine->kmaps); prev_key = NULL; while (!bpf_map_get_next_key(fd, prev_key, &key)) { diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h index 0cd3369af2a4..b29109cd3609 100644 --- a/tools/perf/util/compress.h +++ b/tools/perf/util/compress.h @@ -3,6 +3,8 @@ #define PERF_COMPRESS_H #include <stdbool.h> +#include <stddef.h> +#include <sys/types.h> #ifdef HAVE_ZSTD_SUPPORT #include <zstd.h> #endif @@ -21,6 +23,7 @@ struct zstd_data { #ifdef HAVE_ZSTD_SUPPORT ZSTD_CStream *cstream; ZSTD_DStream *dstream; + int comp_level; #endif }; @@ -29,7 +32,7 @@ struct zstd_data { int zstd_init(struct zstd_data *data, int level); int zstd_fini(struct zstd_data *data); -size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size, size_t process_header(void *record, size_t increment)); @@ -48,7 +51,7 @@ static inline int zstd_fini(struct zstd_data *data __maybe_unused) } static inline -size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, void *dst __maybe_unused, size_t dst_size __maybe_unused, void *src __maybe_unused, size_t src_size __maybe_unused, size_t max_record_size __maybe_unused, diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 0e090e8bc334..0581ee0fa5f2 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -672,7 +672,7 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ static struct perf_cpu_map *online; if (!online) - online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ + online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */ return online; } diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 81cfc85f4668..8bbeb2dc76fd 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -267,7 +267,7 @@ struct cpu_topology *cpu_topology__new(void) ncpus = cpu__max_present_cpu().cpu; /* build online CPU map */ - map = perf_cpu_map__new(NULL); + map = perf_cpu_map__new_online_cpus(); if (map == NULL) { pr_debug("failed to get system cpumap\n"); return NULL; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a9873d14c632..d65d7485886c 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -3346,12 +3346,27 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, etm->metadata = metadata; etm->auxtrace_type = auxtrace_info->type; - /* Use virtual timestamps if all ETMs report ts_source = 1 */ - etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); + if (etm->synth_opts.use_timestamp) + /* + * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, + * therefore the decoder cannot know if the timestamp trace is + * same with the kernel time. + * + * If a user has knowledge for the working platform and can + * specify itrace option 'T' to tell decoder to forcely use the + * traced timestamp as the kernel time. + */ + etm->has_virtual_ts = true; + else + /* Use virtual timestamps if all ETMs report ts_source = 1 */ + etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); if (!etm->has_virtual_ts) ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" - "The time field of the samples will not be set accurately.\n\n"); + "The time field of the samples will not be set accurately.\n" + "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" + "you can specify the itrace option 'T' for timestamp decoding\n" + "if the Coresight timestamp on the platform is same with the kernel time.\n\n"); etm->auxtrace.process_event = cs_etm__process_event; etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index b9fb71ab7a73..106429155c2e 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -253,8 +253,8 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, */ addr_location__init(&al); al.sym = node->ms.sym; - al.map = node->ms.map; - al.maps = thread__maps(thread); + al.map = map__get(node->ms.map); + al.maps = maps__get(thread__maps(thread)); al.addr = node->ip; if (al.map && !al.sym) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 88378c4c5dd9..e282b4ceb4d2 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -38,12 +38,21 @@ bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; -static FILE *debug_file; +static FILE *_debug_file; bool debug_display_time; +FILE *debug_file(void) +{ + if (!_debug_file) { + pr_warning_once("debug_file not set"); + debug_set_file(stderr); + } + return _debug_file; +} + void debug_set_file(FILE *file) { - debug_file = file; + _debug_file = file; } void debug_set_display_time(bool set) @@ -78,8 +87,8 @@ int veprintf(int level, int var, const char *fmt, va_list args) if (use_browser >= 1 && !redirect_to_stderr) { ui_helpline__vshow(fmt, args); } else { - ret = fprintf_time(debug_file); - ret += vfprintf(debug_file, fmt, args); + ret = fprintf_time(debug_file()); + ret += vfprintf(debug_file(), fmt, args); } } @@ -107,9 +116,8 @@ static int veprintf_time(u64 t, const char *fmt, va_list args) nsecs -= secs * NSEC_PER_SEC; usecs = nsecs / NSEC_PER_USEC; - ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ", - secs, usecs); - ret += vfprintf(stderr, fmt, args); + ret = fprintf(debug_file(), "[%13" PRIu64 ".%06" PRIu64 "] ", secs, usecs); + ret += vfprintf(debug_file(), fmt, args); return ret; } diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index f99468a7f681..de8870980d44 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -77,6 +77,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5) int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); +FILE *debug_file(void); void debug_set_file(FILE *file); void debug_set_display_time(bool set); void perf_debug_setup(void); diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c new file mode 100644 index 000000000000..19acf4775d35 --- /dev/null +++ b/tools/perf/util/debuginfo.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * DWARF debug information handling code. Copied from probe-finder.c. + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/zalloc.h> + +#include "build-id.h" +#include "dso.h" +#include "debug.h" +#include "debuginfo.h" +#include "symbol.h" + +#ifdef HAVE_DEBUGINFOD_SUPPORT +#include <elfutils/debuginfod.h> +#endif + +/* Dwarf FL wrappers */ +static char *debuginfo_path; /* Currently dummy */ + +static const Dwfl_Callbacks offline_callbacks = { + .find_debuginfo = dwfl_standard_find_debuginfo, + .debuginfo_path = &debuginfo_path, + + .section_address = dwfl_offline_section_address, + + /* We use this table for core files too. */ + .find_elf = dwfl_build_id_find_elf, +}; + +/* Get a Dwarf from offline image */ +static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, + const char *path) +{ + GElf_Addr dummy; + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + dbg->dwfl = dwfl_begin(&offline_callbacks); + if (!dbg->dwfl) + goto error; + + dwfl_report_begin(dbg->dwfl); + dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); + if (!dbg->mod) + goto error; + + dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); + if (!dbg->dbg) + goto error; + + dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); + + dwfl_report_end(dbg->dwfl, NULL, NULL); + + return 0; +error: + if (dbg->dwfl) + dwfl_end(dbg->dwfl); + else + close(fd); + memset(dbg, 0, sizeof(*dbg)); + + return -ENOENT; +} + +static struct debuginfo *__debuginfo__new(const char *path) +{ + struct debuginfo *dbg = zalloc(sizeof(*dbg)); + if (!dbg) + return NULL; + + if (debuginfo__init_offline_dwarf(dbg, path) < 0) + zfree(&dbg); + if (dbg) + pr_debug("Open Debuginfo file: %s\n", path); + return dbg; +} + +enum dso_binary_type distro_dwarf_types[] = { + DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, + DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__NOT_FOUND, +}; + +struct debuginfo *debuginfo__new(const char *path) +{ + enum dso_binary_type *type; + char buf[PATH_MAX], nil = '\0'; + struct dso *dso; + struct debuginfo *dinfo = NULL; + struct build_id bid; + + /* Try to open distro debuginfo files */ + dso = dso__new(path); + if (!dso) + goto out; + + /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ + if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) + dso__set_build_id(dso, &bid); + + for (type = distro_dwarf_types; + !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; + type++) { + if (dso__read_binary_type_filename(dso, *type, &nil, + buf, PATH_MAX) < 0) + continue; + dinfo = __debuginfo__new(buf); + } + dso__put(dso); + +out: + /* if failed to open all distro debuginfo, open given binary */ + return dinfo ? : __debuginfo__new(path); +} + +void debuginfo__delete(struct debuginfo *dbg) +{ + if (dbg) { + if (dbg->dwfl) + dwfl_end(dbg->dwfl); + free(dbg); + } +} + +/* For the kernel module, we need a special code to get a DIE */ +int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, + bool adjust_offset) +{ + int n, i; + Elf32_Word shndx; + Elf_Scn *scn; + Elf *elf; + GElf_Shdr mem, *shdr; + const char *p; + + elf = dwfl_module_getelf(dbg->mod, &dbg->bias); + if (!elf) + return -EINVAL; + + /* Get the number of relocations */ + n = dwfl_module_relocations(dbg->mod); + if (n < 0) + return -ENOENT; + /* Search the relocation related .text section */ + for (i = 0; i < n; i++) { + p = dwfl_module_relocation_info(dbg->mod, i, &shndx); + if (strcmp(p, ".text") == 0) { + /* OK, get the section header */ + scn = elf_getscn(elf, shndx); + if (!scn) + return -ENOENT; + shdr = gelf_getshdr(scn, &mem); + if (!shdr) + return -ENOENT; + *offs = shdr->sh_addr; + if (adjust_offset) + *offs -= shdr->sh_offset; + } + } + return 0; +} + +#ifdef HAVE_DEBUGINFOD_SUPPORT +int get_source_from_debuginfod(const char *raw_path, + const char *sbuild_id, char **new_path) +{ + debuginfod_client *c = debuginfod_begin(); + const char *p = raw_path; + int fd; + + if (!c) + return -ENOMEM; + + fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, + 0, p, new_path); + pr_debug("Search %s from debuginfod -> %d\n", p, fd); + if (fd >= 0) + close(fd); + debuginfod_end(c); + if (fd < 0) { + pr_debug("Failed to find %s in debuginfod (%s)\n", + raw_path, sbuild_id); + return -ENOENT; + } + pr_debug("Got a source %s\n", *new_path); + + return 0; +} +#endif /* HAVE_DEBUGINFOD_SUPPORT */ diff --git a/tools/perf/util/debuginfo.h b/tools/perf/util/debuginfo.h new file mode 100644 index 000000000000..4d65b8c605fc --- /dev/null +++ b/tools/perf/util/debuginfo.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _PERF_DEBUGINFO_H +#define _PERF_DEBUGINFO_H + +#include <errno.h> +#include <linux/compiler.h> + +#ifdef HAVE_DWARF_SUPPORT + +#include "dwarf-aux.h" + +/* debug information structure */ +struct debuginfo { + Dwarf *dbg; + Dwfl_Module *mod; + Dwfl *dwfl; + Dwarf_Addr bias; + const unsigned char *build_id; +}; + +/* This also tries to open distro debuginfo */ +struct debuginfo *debuginfo__new(const char *path); +void debuginfo__delete(struct debuginfo *dbg); + +int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, + bool adjust_offset); + +#else /* HAVE_DWARF_SUPPORT */ + +/* dummy debug information structure */ +struct debuginfo { +}; + +static inline struct debuginfo *debuginfo__new(const char *path __maybe_unused) +{ + return NULL; +} + +static inline void debuginfo__delete(struct debuginfo *dbg __maybe_unused) +{ +} + +static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unused, + Dwarf_Addr *offs __maybe_unused, + bool adjust_offset __maybe_unused) +{ + return -EINVAL; +} + +#endif /* HAVE_DWARF_SUPPORT */ + +#ifdef HAVE_DEBUGINFOD_SUPPORT +int get_source_from_debuginfod(const char *raw_path, const char *sbuild_id, + char **new_path); +#else /* HAVE_DEBUGINFOD_SUPPORT */ +static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, + const char *sbuild_id __maybe_unused, + char **new_path __maybe_unused) +{ + return -ENOTSUP; +} +#endif /* HAVE_DEBUGINFOD_SUPPORT */ + +#endif /* _PERF_DEBUGINFO_H */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 1f629b6fb7cf..22fd5fa806ed 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -31,6 +31,7 @@ #include "debug.h" #include "string2.h" #include "vdso.h" +#include "annotate-data.h" static const char * const debuglink_paths[] = { "%.0s%s", @@ -1327,6 +1328,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id) dso->data.cache = RB_ROOT; dso->inlined_nodes = RB_ROOT_CACHED; dso->srclines = RB_ROOT_CACHED; + dso->data_types = RB_ROOT; dso->data.fd = -1; dso->data.status = DSO_DATA_STATUS_UNKNOWN; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; @@ -1370,6 +1372,8 @@ void dso__delete(struct dso *dso) symbols__delete(&dso->symbols); dso->symbol_names_len = 0; zfree(&dso->symbol_names); + annotated_data_type__tree_delete(&dso->data_types); + if (dso->short_name_allocated) { zfree((char **)&dso->short_name); dso->short_name_allocated = false; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 3759de8c2267..ce9f3849a773 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -154,6 +154,8 @@ struct dso { size_t symbol_names_len; struct rb_root_cached inlined_nodes; struct rb_root_cached srclines; + struct rb_root data_types; + struct { u64 addr; struct symbol *symbol; diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 2941d88f2199..7aa5fee0da19 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1051,32 +1051,28 @@ Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, } /** - * die_get_typename - Get the name of given variable DIE - * @vr_die: a variable DIE + * die_get_typename_from_type - Get the name of given type DIE + * @type_die: a type DIE * @buf: a strbuf for result type name * - * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded. + * Get the name of @type_die and stores it to @buf. Return 0 if succeeded. * and Return -ENOENT if failed to find type name. * Note that the result will stores typedef name if possible, and stores * "*(function_type)" if the type is a function pointer. */ -int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) +int die_get_typename_from_type(Dwarf_Die *type_die, struct strbuf *buf) { - Dwarf_Die type; int tag, ret; const char *tmp = ""; - if (__die_get_real_type(vr_die, &type) == NULL) - return -ENOENT; - - tag = dwarf_tag(&type); + tag = dwarf_tag(type_die); if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type) tmp = "*"; else if (tag == DW_TAG_subroutine_type) { /* Function pointer */ return strbuf_add(buf, "(function_type)", 15); } else { - const char *name = dwarf_diename(&type); + const char *name = dwarf_diename(type_die); if (tag == DW_TAG_union_type) tmp = "union "; @@ -1089,8 +1085,35 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) /* Write a base name */ return strbuf_addf(buf, "%s%s", tmp, name ?: ""); } - ret = die_get_typename(&type, buf); - return ret ? ret : strbuf_addstr(buf, tmp); + ret = die_get_typename(type_die, buf); + if (ret < 0) { + /* void pointer has no type attribute */ + if (tag == DW_TAG_pointer_type && ret == -ENOENT) + return strbuf_addf(buf, "void*"); + + return ret; + } + return strbuf_addstr(buf, tmp); +} + +/** + * die_get_typename - Get the name of given variable DIE + * @vr_die: a variable DIE + * @buf: a strbuf for result type name + * + * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded. + * and Return -ENOENT if failed to find type name. + * Note that the result will stores typedef name if possible, and stores + * "*(function_type)" if the type is a function pointer. + */ +int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) +{ + Dwarf_Die type; + + if (__die_get_real_type(vr_die, &type) == NULL) + return -ENOENT; + + return die_get_typename_from_type(&type, buf); } /** @@ -1238,12 +1261,151 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) out: return ret; } -#else -int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, - Dwarf_Die *vr_die __maybe_unused, - struct strbuf *buf __maybe_unused) + +/* Interval parameters for __die_find_var_reg_cb() */ +struct find_var_data { + /* Target instruction address */ + Dwarf_Addr pc; + /* Target memory address (for global data) */ + Dwarf_Addr addr; + /* Target register */ + unsigned reg; + /* Access offset, set for global data */ + int offset; +}; + +/* Max number of registers DW_OP_regN supports */ +#define DWARF_OP_DIRECT_REGS 32 + +/* Only checks direct child DIEs in the given scope. */ +static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_var_data *data = arg; + int tag = dwarf_tag(die_mem); + ptrdiff_t off = 0; + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Op *ops; + size_t nops; + + if (tag != DW_TAG_variable && tag != DW_TAG_formal_parameter) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { + /* Assuming the location list is sorted by address */ + if (end < data->pc) + continue; + if (start > data->pc) + break; + + /* Only match with a simple case */ + if (data->reg < DWARF_OP_DIRECT_REGS) { + if (ops->atom == (DW_OP_reg0 + data->reg) && nops == 1) + return DIE_FIND_CB_END; + } else { + if (ops->atom == DW_OP_regx && ops->number == data->reg && + nops == 1) + return DIE_FIND_CB_END; + } + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_variable_by_reg - Find a variable saved in a register + * @sc_die: a scope DIE + * @pc: the program address to find + * @reg: the register number to find + * @die_mem: a buffer to save the resulting DIE + * + * Find the variable DIE accessed by the given register. + */ +Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, + Dwarf_Die *die_mem) +{ + struct find_var_data data = { + .pc = pc, + .reg = reg, + }; + return die_find_child(sc_die, __die_find_var_reg_cb, &data, die_mem); +} + +/* Only checks direct child DIEs in the given scope */ +static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_var_data *data = arg; + int tag = dwarf_tag(die_mem); + ptrdiff_t off = 0; + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Word size; + Dwarf_Die type_die; + Dwarf_Op *ops; + size_t nops; + + if (tag != DW_TAG_variable) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { + if (ops->atom != DW_OP_addr) + continue; + + if (data->addr < ops->number) + continue; + + if (data->addr == ops->number) { + /* Update offset relative to the start of the variable */ + data->offset = 0; + return DIE_FIND_CB_END; + } + + if (die_get_real_type(die_mem, &type_die) == NULL) + continue; + + if (dwarf_aggregate_size(&type_die, &size) < 0) + continue; + + if (data->addr >= ops->number + size) + continue; + + /* Update offset relative to the start of the variable */ + data->offset = data->addr - ops->number; + return DIE_FIND_CB_END; + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_variable_by_addr - Find variable located at given address + * @sc_die: a scope DIE + * @pc: the program address to find + * @addr: the data address to find + * @die_mem: a buffer to save the resulting DIE + * @offset: the offset in the resulting type + * + * Find the variable DIE located at the given address (in PC-relative mode). + * This is usually for global variables. + */ +Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, + Dwarf_Addr addr, Dwarf_Die *die_mem, + int *offset) { - return -ENOTSUP; + struct find_var_data data = { + .pc = pc, + .addr = addr, + }; + Dwarf_Die *result; + + result = die_find_child(sc_die, __die_find_var_addr_cb, &data, die_mem); + if (result) + *offset = data.offset; + return result; } #endif @@ -1425,3 +1587,56 @@ void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, *entrypc = postprologue_addr; } + +/* Internal parameters for __die_find_scope_cb() */ +struct find_scope_data { + /* Target instruction address */ + Dwarf_Addr pc; + /* Number of scopes found [output] */ + int nr; + /* Array of scopes found, 0 for the outermost one. [output] */ + Dwarf_Die *scopes; +}; + +static int __die_find_scope_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_scope_data *data = arg; + + if (dwarf_haspc(die_mem, data->pc)) { + Dwarf_Die *tmp; + + tmp = realloc(data->scopes, (data->nr + 1) * sizeof(*tmp)); + if (tmp == NULL) + return DIE_FIND_CB_END; + + memcpy(tmp + data->nr, die_mem, sizeof(*die_mem)); + data->scopes = tmp; + data->nr++; + return DIE_FIND_CB_CHILD; + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_get_scopes - Return a list of scopes including the address + * @cu_die: a compile unit DIE + * @pc: the address to find + * @scopes: the array of DIEs for scopes (result) + * + * This function does the same as the dwarf_getscopes() but doesn't follow + * the origins of inlined functions. It returns the number of scopes saved + * in the @scopes argument. The outer scope will be saved first (index 0) and + * the last one is the innermost scope at the @pc. + */ +int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes) +{ + struct find_scope_data data = { + .pc = pc, + }; + Dwarf_Die die_mem; + + die_find_child(cu_die, __die_find_scope_cb, &data, &die_mem); + + *scopes = data.scopes; + return data.nr; +} diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 7ec8bc1083bb..4e64caac6df8 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -116,12 +116,14 @@ Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, Dwarf_Die *die_mem); +/* Get the name of given type DIE */ +int die_get_typename_from_type(Dwarf_Die *type_die, struct strbuf *buf); + /* Get the name of given variable DIE */ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); /* Get the name and type of given variable DIE, stored as "type\tname" */ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); -int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); /* Check if target program is compiled with optimization */ bool die_is_optimized_target(Dwarf_Die *cu_die); @@ -130,4 +132,49 @@ bool die_is_optimized_target(Dwarf_Die *cu_die); void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, Dwarf_Addr *entrypc); -#endif +/* Get the list of including scopes */ +int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes); + +#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT + +/* Get byte offset range of given variable DIE */ +int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); + +/* Find a variable saved in the 'reg' at given address */ +Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, + Dwarf_Die *die_mem); + +/* Find a (global) variable located in the 'addr' */ +Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, + Dwarf_Addr addr, Dwarf_Die *die_mem, + int *offset); + +#else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ + +static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, + Dwarf_Die *vr_die __maybe_unused, + struct strbuf *buf __maybe_unused) +{ + return -ENOTSUP; +} + +static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unused, + Dwarf_Addr pc __maybe_unused, + int reg __maybe_unused, + Dwarf_Die *die_mem __maybe_unused) +{ + return NULL; +} + +static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unused, + Dwarf_Addr pc __maybe_unused, + Dwarf_Addr addr __maybe_unused, + Dwarf_Die *die_mem __maybe_unused, + int *offset __maybe_unused) +{ + return NULL; +} + +#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ + +#endif /* _DWARF_AUX_H */ diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 69cfaa5953bf..5b7f86c0063f 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -5,9 +5,12 @@ * Written by: Masami Hiramatsu <mhiramat@kernel.org> */ +#include <stdlib.h> +#include <string.h> #include <debug.h> #include <dwarf-regs.h> #include <elf.h> +#include <errno.h> #include <linux/kernel.h> #ifndef EM_AARCH64 @@ -68,3 +71,34 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) } return NULL; } + +__weak int get_arch_regnum(const char *name __maybe_unused) +{ + return -ENOTSUP; +} + +/* Return DWARF register number from architecture register name */ +int get_dwarf_regnum(const char *name, unsigned int machine) +{ + char *regname = strdup(name); + int reg = -1; + char *p; + + if (regname == NULL) + return -EINVAL; + + /* For convenience, remove trailing characters */ + p = strpbrk(regname, " ,)"); + if (p) + *p = '\0'; + + switch (machine) { + case EM_NONE: /* Generic arch - use host arch */ + reg = get_arch_regnum(regname); + break; + default: + pr_err("ELF MACHINE %x is not supported.\n", machine); + } + free(regname); + return reg; +} diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 44140b7f596a..a459374d0a1a 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -3,6 +3,7 @@ #include "debug.h" #include "env.h" #include "util/header.h" +#include "linux/compiler.h" #include <linux/ctype.h> #include <linux/zalloc.h> #include "cgroup.h" @@ -12,6 +13,7 @@ #include <string.h> #include "pmus.h" #include "strbuf.h" +#include "trace/beauty/beauty.h" struct perf_env perf_env; @@ -23,12 +25,18 @@ struct perf_env perf_env; void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) { + down_write(&env->bpf_progs.lock); + __perf_env__insert_bpf_prog_info(env, info_node); + up_write(&env->bpf_progs.lock); +} + +void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) +{ __u32 prog_id = info_node->info_linear->info.id; struct bpf_prog_info_node *node; struct rb_node *parent = NULL; struct rb_node **p; - down_write(&env->bpf_progs.lock); p = &env->bpf_progs.infos.rb_node; while (*p != NULL) { @@ -40,15 +48,13 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env, p = &(*p)->rb_right; } else { pr_debug("duplicated bpf prog info %u\n", prog_id); - goto out; + return; } } rb_link_node(&info_node->rb_node, parent, p); rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos); env->bpf_progs.infos_cnt++; -out: - up_write(&env->bpf_progs.lock); } struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, @@ -78,13 +84,21 @@ out: bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) { + bool ret; + + down_write(&env->bpf_progs.lock); + ret = __perf_env__insert_btf(env, btf_node); + up_write(&env->bpf_progs.lock); + return ret; +} + +bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) +{ struct rb_node *parent = NULL; __u32 btf_id = btf_node->id; struct btf_node *node; struct rb_node **p; - bool ret = true; - down_write(&env->bpf_progs.lock); p = &env->bpf_progs.btfs.rb_node; while (*p != NULL) { @@ -96,25 +110,31 @@ bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) p = &(*p)->rb_right; } else { pr_debug("duplicated btf %u\n", btf_id); - ret = false; - goto out; + return false; } } rb_link_node(&btf_node->rb_node, parent, p); rb_insert_color(&btf_node->rb_node, &env->bpf_progs.btfs); env->bpf_progs.btfs_cnt++; -out: - up_write(&env->bpf_progs.lock); - return ret; + return true; } struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) { + struct btf_node *res; + + down_read(&env->bpf_progs.lock); + res = __perf_env__find_btf(env, btf_id); + up_read(&env->bpf_progs.lock); + return res; +} + +struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id) +{ struct btf_node *node = NULL; struct rb_node *n; - down_read(&env->bpf_progs.lock); n = env->bpf_progs.btfs.rb_node; while (n) { @@ -124,13 +144,9 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) else if (btf_id > node->id) n = n->rb_right; else - goto out; + return node; } - node = NULL; - -out: - up_read(&env->bpf_progs.lock); - return node; + return NULL; } /* purge data in bpf_progs.infos tree */ @@ -453,6 +469,18 @@ const char *perf_env__arch(struct perf_env *env) return normalize_arch(arch_name); } +const char *perf_env__arch_strerrno(struct perf_env *env __maybe_unused, int err __maybe_unused) +{ +#if defined(HAVE_SYSCALL_TABLE_SUPPORT) && defined(HAVE_LIBTRACEEVENT) + if (env->arch_strerrno == NULL) + env->arch_strerrno = arch_syscalls__strerrno_function(perf_env__arch(env)); + + return env->arch_strerrno ? env->arch_strerrno(err) : "no arch specific strerrno function"; +#else + return "!(HAVE_SYSCALL_TABLE_SUPPORT && HAVE_LIBTRACEEVENT)"; +#endif +} + const char *perf_env__cpuid(struct perf_env *env) { int status; @@ -531,6 +559,24 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1; } +bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name) +{ + char *pmu_mapping = env->pmu_mappings, *colon; + + for (int i = 0; i < env->nr_pmu_mappings; ++i) { + if (strtoul(pmu_mapping, &colon, 0) == ULONG_MAX || *colon != ':') + goto out_error; + + pmu_mapping = colon + 1; + if (strcmp(pmu_mapping, pmu_name) == 0) + return true; + + pmu_mapping += strlen(pmu_mapping) + 1; + } +out_error: + return false; +} + char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, const char *cap) { diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 4566c51f2fd9..7c527e65c186 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -46,10 +46,17 @@ struct hybrid_node { struct pmu_caps { int nr_caps; unsigned int max_branches; + unsigned int br_cntr_nr; + unsigned int br_cntr_width; + char **caps; char *pmu_name; }; +typedef const char *(arch_syscalls__strerrno_t)(int err); + +arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch); + struct perf_env { char *hostname; char *os_release; @@ -62,6 +69,8 @@ struct perf_env { unsigned long long total_mem; unsigned int msr_pmu_type; unsigned int max_branches; + unsigned int br_cntr_nr; + unsigned int br_cntr_width; int kernel_is_64_bit; int nr_cmdline; @@ -130,6 +139,7 @@ struct perf_env { */ bool enabled; } clock; + arch_syscalls__strerrno_t *arch_strerrno; }; enum perf_compress_type { @@ -159,19 +169,26 @@ int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); const char *perf_env__arch(struct perf_env *env); +const char *perf_env__arch_strerrno(struct perf_env *env, int err); const char *perf_env__cpuid(struct perf_env *env); const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); void perf_env__init(struct perf_env *env); +void __perf_env__insert_bpf_prog_info(struct perf_env *env, + struct bpf_prog_info_node *info_node); void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); +bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); +struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id); int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, const char *cap); + +bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 923c0fb15122..68f45e9e63b6 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -617,13 +617,13 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { al->level = 'k'; maps = machine__kernel_maps(machine); - load_map = true; + load_map = !symbol_conf.lazy_load_kernel_maps; } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { al->level = '.'; } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { al->level = 'g'; maps = machine__kernel_maps(machine); - load_map = true; + load_map = !symbol_conf.lazy_load_kernel_maps; } else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) { al->level = 'u'; } else { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e36da58522ef..95f25e9fb994 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1056,7 +1056,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target) return -1; if (target__uses_dummy_map(target)) - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); else cpus = perf_cpu_map__new(target->cpu_list); @@ -1352,7 +1352,7 @@ static int evlist__create_syswide_maps(struct evlist *evlist) * error, and we may not want to do that fallback to a * default cpu identity map :-\ */ - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) goto out; @@ -2518,3 +2518,33 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis } perf_cpu_map__put(user_requested_cpus); } + +void evlist__uniquify_name(struct evlist *evlist) +{ + char *new_name, empty_attributes[2] = ":", *attributes; + struct evsel *pos; + + if (perf_pmus__num_core_pmus() == 1) + return; + + evlist__for_each_entry(evlist, pos) { + if (!evsel__is_hybrid(pos)) + continue; + + if (strchr(pos->name, '/')) + continue; + + attributes = strchr(pos->name, ':'); + if (attributes) + *attributes = '\0'; + else + attributes = empty_attributes; + + if (asprintf(&new_name, "%s/%s/%s", pos->pmu_name, pos->name, attributes + 1)) { + free(pos->name); + pos->name = new_name; + } else { + *attributes = ':'; + } + } +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 98e7ddb2bd30..cb91dc9117a2 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -442,5 +442,6 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); void evlist__check_mem_load_aux(struct evlist *evlist); void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list); +void evlist__uniquify_name(struct evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 72a5dfc38d38..6d7c9c58a9bc 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1801,7 +1801,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, if (cpus == NULL) { if (empty_cpu_map == NULL) { - empty_cpu_map = perf_cpu_map__dummy_new(); + empty_cpu_map = perf_cpu_map__new_any_cpu(); if (empty_cpu_map == NULL) return -ENOMEM; } @@ -1832,6 +1832,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.branch_counters) + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS; if (perf_missing_features.read_lost) evsel->core.attr.read_format &= ~PERF_FORMAT_LOST; if (perf_missing_features.weight_struct) { @@ -1885,7 +1887,12 @@ bool evsel__detect_missing_features(struct evsel *evsel) * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.read_lost && + if (!perf_missing_features.branch_counters && + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { + perf_missing_features.branch_counters = true; + pr_debug2("switching off branch counters support\n"); + return true; + } else if (!perf_missing_features.read_lost && (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { perf_missing_features.read_lost = true; pr_debug2("switching off PERF_FORMAT_LOST support\n"); @@ -2318,6 +2325,22 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) return new_val; } +static inline bool evsel__has_branch_counters(const struct evsel *evsel) +{ + struct evsel *cur, *leader = evsel__leader(evsel); + + /* The branch counters feature only supports group */ + if (!leader || !evsel->evlist) + return false; + + evlist__for_each_entry(evsel->evlist, cur) { + if ((leader == evsel__leader(cur)) && + (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) + return true; + } + return false; +} + int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -2551,6 +2574,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; + + if (evsel__has_branch_counters(evsel)) { + OVERFLOW_CHECK_u64(array); + + data->branch_stack_cntr = (u64 *)array; + sz = data->branch_stack->nr * sizeof(u64); + + OVERFLOW_CHECK(array, sz, max_size); + array = (void *)array + sz; + } } if (type & PERF_SAMPLE_REGS_USER) { @@ -2820,7 +2853,8 @@ u64 evsel__intval_common(struct evsel *evsel, struct perf_sample *sample, const #endif -bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) +bool evsel__fallback(struct evsel *evsel, struct target *target, int err, + char *msg, size_t msgsize) { int paranoid; @@ -2828,18 +2862,19 @@ bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) evsel->core.attr.type == PERF_TYPE_HARDWARE && evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) { /* - * If it's cycles then fall back to hrtimer based - * cpu-clock-tick sw counter, which is always available even if - * no PMU support. + * If it's cycles then fall back to hrtimer based cpu-clock sw + * counter, which is always available even if no PMU support. * * PPC returns ENXIO until 2.6.37 (behavior changed with commit * b0a873e). */ - scnprintf(msg, msgsize, "%s", -"The cycles event is not supported, trying to fall back to cpu-clock-ticks"); - evsel->core.attr.type = PERF_TYPE_SOFTWARE; - evsel->core.attr.config = PERF_COUNT_SW_CPU_CLOCK; + evsel->core.attr.config = target__has_cpu(target) + ? PERF_COUNT_SW_CPU_CLOCK + : PERF_COUNT_SW_TASK_CLOCK; + scnprintf(msg, msgsize, + "The cycles event is not supported, trying to fall back to %s", + target__has_cpu(target) ? "cpu-clock" : "task-clock"); zfree(&evsel->name); return true; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d791316a1792..efbb6e848287 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -191,6 +191,7 @@ struct perf_missing_features { bool code_page_size; bool weight_struct; bool read_lost; + bool branch_counters; }; extern struct perf_missing_features perf_missing_features; @@ -459,7 +460,8 @@ static inline bool evsel__is_clock(const struct evsel *evsel) evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); } -bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize); +bool evsel__fallback(struct evsel *evsel, struct target *target, int err, + char *msg, size_t msgsize); int evsel__open_strerror(struct evsel *evsel, struct target *target, int err, char *msg, size_t size); diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index fefc72066c4e..ac17a3cb59dc 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -293,9 +293,9 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, */ phdr = elf_newphdr(e, 1); phdr[0].p_type = PT_LOAD; - phdr[0].p_offset = 0; - phdr[0].p_vaddr = 0; - phdr[0].p_paddr = 0; + phdr[0].p_offset = GEN_ELF_TEXT_OFFSET; + phdr[0].p_vaddr = GEN_ELF_TEXT_OFFSET; + phdr[0].p_paddr = GEN_ELF_TEXT_OFFSET; phdr[0].p_filesz = csize; phdr[0].p_memsz = csize; phdr[0].p_flags = PF_X | PF_R; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e86b9439ffee..3fe28edc3d01 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1444,7 +1444,9 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) nodes = new_nodes; size += 4; } - ret = memory_node__read(&nodes[cnt++], idx); + ret = memory_node__read(&nodes[cnt], idx); + if (!ret) + cnt += 1; } out: closedir(dir); @@ -1847,8 +1849,8 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); - bpf_event__print_bpf_prog_info(&node->info_linear->info, - env, fp); + __bpf_event__print_bpf_prog_info(&node->info_linear->info, + env, fp); } up_read(&env->bpf_progs.lock); @@ -2145,6 +2147,14 @@ static void print_pmu_caps(struct feat_fd *ff, FILE *fp) __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps, pmu_caps->pmu_name); } + + if (strcmp(perf_env__arch(&ff->ph->env), "x86") == 0 && + perf_env__has_pmu_mapping(&ff->ph->env, "ibs_op")) { + char *max_precise = perf_env__find_pmu_cap(&ff->ph->env, "cpu", "max_precise"); + + if (max_precise != NULL && atoi(max_precise) == 0) + fprintf(fp, "# AMD systems uses ibs_op// PMU for some precise events, e.g.: cycles:p, see the 'perf list' man page for further details.\n"); + } } static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) @@ -3178,7 +3188,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) /* after reading from file, translate offset to address */ bpil_offs_to_addr(info_linear); info_node->info_linear = info_linear; - perf_env__insert_bpf_prog_info(env, info_node); + __perf_env__insert_bpf_prog_info(env, info_node); } up_write(&env->bpf_progs.lock); @@ -3225,7 +3235,7 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) if (__do_read(ff, node->data, data_size)) goto out; - perf_env__insert_btf(env, node); + __perf_env__insert_btf(env, node); node = NULL; } @@ -3259,7 +3269,9 @@ static int process_compressed(struct feat_fd *ff, } static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, - char ***caps, unsigned int *max_branches) + char ***caps, unsigned int *max_branches, + unsigned int *br_cntr_nr, + unsigned int *br_cntr_width) { char *name, *value, *ptr; u32 nr_pmu_caps, i; @@ -3294,6 +3306,12 @@ static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, if (!strcmp(name, "branches")) *max_branches = atoi(value); + if (!strcmp(name, "branch_counter_nr")) + *br_cntr_nr = atoi(value); + + if (!strcmp(name, "branch_counter_width")) + *br_cntr_width = atoi(value); + free(value); free(name); } @@ -3318,7 +3336,9 @@ static int process_cpu_pmu_caps(struct feat_fd *ff, { int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps, &ff->ph->env.cpu_pmu_caps, - &ff->ph->env.max_branches); + &ff->ph->env.max_branches, + &ff->ph->env.br_cntr_nr, + &ff->ph->env.br_cntr_width); if (!ret && !ff->ph->env.cpu_pmu_caps) pr_debug("cpu pmu capabilities not available\n"); @@ -3347,7 +3367,9 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) for (i = 0; i < nr_pmu; i++) { ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps, &pmu_caps[i].caps, - &pmu_caps[i].max_branches); + &pmu_caps[i].max_branches, + &pmu_caps[i].br_cntr_nr, + &pmu_caps[i].br_cntr_width); if (ret) goto err; @@ -4369,9 +4391,10 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) ret += fprintf(fp, "... "); map = cpu_map__new_data(&ev->cpus.cpus); - if (map) + if (map) { ret += cpu_map__fprintf(map, fp); - else + perf_cpu_map__put(map); + } else ret += fprintf(fp, "failed to get cpus\n"); break; default: diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c index 43bd1ca62d58..52d0ce302ca0 100644 --- a/tools/perf/util/hisi-ptt.c +++ b/tools/perf/util/hisi-ptt.c @@ -123,6 +123,7 @@ static int hisi_ptt_process_auxtrace_event(struct perf_session *session, if (dump_trace) hisi_ptt_dump_event(ptt, data, size); + free(data); return 0; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index afc9f1c7f4dc..4a0aea0c9e00 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -82,6 +82,9 @@ enum hist_column { HISTC_ADDR_TO, HISTC_ADDR, HISTC_SIMD, + HISTC_TYPE, + HISTC_TYPE_OFFSET, + HISTC_SYMBOL_OFFSET, HISTC_NR_COLS, /* Last entry */ }; @@ -457,7 +460,6 @@ struct hist_browser_timer { int refresh; }; -struct annotation_options; struct res_sample; enum rstype { @@ -473,16 +475,13 @@ struct block_hist; void attr_to_script(char *buf, struct perf_event_attr *attr); int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *annotation_opts); + struct hist_browser_timer *hbt); int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *annotation_opts); + struct hist_browser_timer *hbt); int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, - float min_pcnt, struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_options); + float min_pcnt, struct perf_env *env, bool warn_lost_event); int script_browse(const char *script_opt, struct evsel *evsel); @@ -492,8 +491,7 @@ int res_sample_browse(struct res_sample *res_samples, int num_res, void res_sample_init(void); int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent, struct perf_env *env, - struct annotation_options *annotation_opts); + float min_percent, struct perf_env *env); #else static inline int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, @@ -501,23 +499,20 @@ int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, struct perf_env *env __maybe_unused, - bool warn_lost_event __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) + bool warn_lost_event __maybe_unused) { return 0; } static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *annotation_opts __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } @@ -541,8 +536,7 @@ static inline void res_sample_init(void) {} static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, struct evsel *evsel __maybe_unused, float min_percent __maybe_unused, - struct perf_env *env __maybe_unused, - struct annotation_options *annotation_opts __maybe_unused) + struct perf_env *env __maybe_unused) { return 0; } diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h index 7d99a084e82d..01fb25a1150a 100644 --- a/tools/perf/util/include/dwarf-regs.h +++ b/tools/perf/util/include/dwarf-regs.h @@ -2,6 +2,9 @@ #ifndef _PERF_DWARF_REGS_H_ #define _PERF_DWARF_REGS_H_ +#define DWARF_REG_PC 0xd3af9c /* random number */ +#define DWARF_REG_FB 0xd3affb /* random number */ + #ifdef HAVE_DWARF_SUPPORT const char *get_arch_regstr(unsigned int n); /* @@ -10,6 +13,22 @@ const char *get_arch_regstr(unsigned int n); * machine: ELF machine signature (EM_*) */ const char *get_dwarf_regstr(unsigned int n, unsigned int machine); + +int get_arch_regnum(const char *name); +/* + * get_dwarf_regnum - Returns DWARF regnum from register name + * name: architecture register name + * machine: ELF machine signature (EM_*) + */ +int get_dwarf_regnum(const char *name, unsigned int machine); + +#else /* HAVE_DWARF_SUPPORT */ + +static inline int get_dwarf_regnum(const char *name __maybe_unused, + unsigned int machine __maybe_unused) +{ + return -1; +} #endif #ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 90c750150b19..b397a769006f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -453,7 +453,7 @@ static struct thread *findnew_guest_code(struct machine *machine, * Guest code can be found in hypervisor process at the same address * so copy host maps. */ - err = maps__clone(thread, thread__maps(host_thread)); + err = maps__copy_from(thread__maps(thread), thread__maps(host_thread)); thread__put(host_thread); if (err) goto out_err; @@ -1285,33 +1285,46 @@ static u64 find_entry_trampoline(struct dso *dso) #define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000 #define X86_64_ENTRY_TRAMPOLINE 0x6000 +struct machine__map_x86_64_entry_trampolines_args { + struct maps *kmaps; + bool found; +}; + +static int machine__map_x86_64_entry_trampolines_cb(struct map *map, void *data) +{ + struct machine__map_x86_64_entry_trampolines_args *args = data; + struct map *dest_map; + struct kmap *kmap = __map__kmap(map); + + if (!kmap || !is_entry_trampoline(kmap->name)) + return 0; + + dest_map = maps__find(args->kmaps, map__pgoff(map)); + if (dest_map != map) + map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map))); + + args->found = true; + return 0; +} + /* Map x86_64 PTI entry trampolines */ int machine__map_x86_64_entry_trampolines(struct machine *machine, struct dso *kernel) { - struct maps *kmaps = machine__kernel_maps(machine); + struct machine__map_x86_64_entry_trampolines_args args = { + .kmaps = machine__kernel_maps(machine), + .found = false, + }; int nr_cpus_avail, cpu; - bool found = false; - struct map_rb_node *rb_node; u64 pgoff; /* * In the vmlinux case, pgoff is a virtual address which must now be * mapped to a vmlinux offset. */ - maps__for_each_entry(kmaps, rb_node) { - struct map *dest_map, *map = rb_node->map; - struct kmap *kmap = __map__kmap(map); - - if (!kmap || !is_entry_trampoline(kmap->name)) - continue; + maps__for_each_map(args.kmaps, machine__map_x86_64_entry_trampolines_cb, &args); - dest_map = maps__find(kmaps, map__pgoff(map)); - if (dest_map != map) - map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map))); - found = true; - } - if (found || machine->trampolines_mapped) + if (args.found || machine->trampolines_mapped) return 0; pgoff = find_entry_trampoline(kernel); @@ -1359,8 +1372,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) if (machine->vmlinux_map == NULL) return -ENOMEM; - map__set_map_ip(machine->vmlinux_map, identity__map_ip); - map__set_unmap_ip(machine->vmlinux_map, identity__map_ip); + map__set_mapping_type(machine->vmlinux_map, MAPPING_TYPE__IDENTITY); return maps__insert(machine__kernel_maps(machine), machine->vmlinux_map); } @@ -1750,12 +1762,11 @@ int machine__create_kernel_maps(struct machine *machine) if (end == ~0ULL) { /* update end address of the kernel map using adjacent module address */ - struct map_rb_node *rb_node = maps__find_node(machine__kernel_maps(machine), - machine__kernel_map(machine)); - struct map_rb_node *next = map_rb_node__next(rb_node); + struct map *next = maps__find_next_entry(machine__kernel_maps(machine), + machine__kernel_map(machine)); if (next) - machine__set_kernel_mmap(machine, start, map__start(next->map)); + machine__set_kernel_mmap(machine, start, map__start(next)); } out_put: @@ -2157,9 +2168,13 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_task(event, stdout); - if (thread != NULL) - thread__put(thread); - + if (thread != NULL) { + if (symbol_conf.keep_exited_threads) + thread__set_exited(thread, /*exited=*/true); + else + machine__remove_thread(machine, thread); + } + thread__put(thread); return 0; } @@ -3395,16 +3410,8 @@ int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, void *priv) { struct maps *maps = machine__kernel_maps(machine); - struct map_rb_node *pos; - int err = 0; - maps__for_each_entry(maps, pos) { - err = fn(pos->map, priv); - if (err != 0) { - break; - } - } - return err; + return maps__for_each_map(maps, fn, priv); } bool machine__is_lock_function(struct machine *machine, u64 addr) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index f64b83004421..54c67cb7ecef 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -109,8 +109,7 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) map__set_pgoff(map, pgoff); map__set_reloc(map, 0); map__set_dso(map, dso__get(dso)); - map__set_map_ip(map, map__dso_map_ip); - map__set_unmap_ip(map, map__dso_unmap_ip); + map__set_mapping_type(map, MAPPING_TYPE__DSO); map__set_erange_warned(map, false); refcount_set(map__refcnt(map), 1); } @@ -172,7 +171,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, map__init(result, start, start + len, pgoff, dso); if (anon || no_dso) { - map->map_ip = map->unmap_ip = identity__map_ip; + map->mapping_type = MAPPING_TYPE__IDENTITY; /* * Set memory without DSO as loaded. All map__find_* @@ -630,18 +629,3 @@ struct maps *map__kmaps(struct map *map) } return kmap->kmaps; } - -u64 map__dso_map_ip(const struct map *map, u64 ip) -{ - return ip - map__start(map) + map__pgoff(map); -} - -u64 map__dso_unmap_ip(const struct map *map, u64 ip) -{ - return ip + map__start(map) - map__pgoff(map); -} - -u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip) -{ - return ip; -} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 1b53d53adc86..49756716cb13 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -16,23 +16,25 @@ struct dso; struct maps; struct machine; +enum mapping_type { + /* map__map_ip/map__unmap_ip are given as offsets in the DSO. */ + MAPPING_TYPE__DSO, + /* map__map_ip/map__unmap_ip are just the given ip value. */ + MAPPING_TYPE__IDENTITY, +}; + DECLARE_RC_STRUCT(map) { u64 start; u64 end; - bool erange_warned:1; - bool priv:1; - u32 prot; u64 pgoff; u64 reloc; - - /* ip -> dso rip */ - u64 (*map_ip)(const struct map *, u64); - /* dso rip -> ip */ - u64 (*unmap_ip)(const struct map *, u64); - struct dso *dso; refcount_t refcnt; + u32 prot; u32 flags; + enum mapping_type mapping_type:8; + bool erange_warned; + bool priv; }; struct kmap; @@ -41,38 +43,11 @@ struct kmap *__map__kmap(struct map *map); struct kmap *map__kmap(struct map *map); struct maps *map__kmaps(struct map *map); -/* ip -> dso rip */ -u64 map__dso_map_ip(const struct map *map, u64 ip); -/* dso rip -> ip */ -u64 map__dso_unmap_ip(const struct map *map, u64 ip); -/* Returns ip */ -u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip); - static inline struct dso *map__dso(const struct map *map) { return RC_CHK_ACCESS(map)->dso; } -static inline u64 map__map_ip(const struct map *map, u64 ip) -{ - return RC_CHK_ACCESS(map)->map_ip(map, ip); -} - -static inline u64 map__unmap_ip(const struct map *map, u64 ip) -{ - return RC_CHK_ACCESS(map)->unmap_ip(map, ip); -} - -static inline void *map__map_ip_ptr(struct map *map) -{ - return RC_CHK_ACCESS(map)->map_ip; -} - -static inline void* map__unmap_ip_ptr(struct map *map) -{ - return RC_CHK_ACCESS(map)->unmap_ip; -} - static inline u64 map__start(const struct map *map) { return RC_CHK_ACCESS(map)->start; @@ -123,6 +98,34 @@ static inline size_t map__size(const struct map *map) return map__end(map) - map__start(map); } +/* ip -> dso rip */ +static inline u64 map__dso_map_ip(const struct map *map, u64 ip) +{ + return ip - map__start(map) + map__pgoff(map); +} + +/* dso rip -> ip */ +static inline u64 map__dso_unmap_ip(const struct map *map, u64 rip) +{ + return rip + map__start(map) - map__pgoff(map); +} + +static inline u64 map__map_ip(const struct map *map, u64 ip_or_rip) +{ + if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) + return map__dso_map_ip(map, ip_or_rip); + else + return ip_or_rip; +} + +static inline u64 map__unmap_ip(const struct map *map, u64 ip_or_rip) +{ + if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) + return map__dso_unmap_ip(map, ip_or_rip); + else + return ip_or_rip; +} + /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ u64 map__rip_2objdump(struct map *map, u64 rip); @@ -294,13 +297,13 @@ static inline void map__set_dso(struct map *map, struct dso *dso) RC_CHK_ACCESS(map)->dso = dso; } -static inline void map__set_map_ip(struct map *map, u64 (*map_ip)(const struct map *map, u64 ip)) +static inline void map__set_mapping_type(struct map *map, enum mapping_type type) { - RC_CHK_ACCESS(map)->map_ip = map_ip; + RC_CHK_ACCESS(map)->mapping_type = type; } -static inline void map__set_unmap_ip(struct map *map, u64 (*unmap_ip)(const struct map *map, u64 rip)) +static inline enum mapping_type map__mapping_type(struct map *map) { - RC_CHK_ACCESS(map)->unmap_ip = unmap_ip; + return RC_CHK_ACCESS(map)->mapping_type; } #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 233438c95b53..0334fc18d9c6 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -10,6 +10,68 @@ #include "ui/ui.h" #include "unwind.h" +struct map_rb_node { + struct rb_node rb_node; + struct map *map; +}; + +#define maps__for_each_entry(maps, map) \ + for (map = maps__first(maps); map; map = map_rb_node__next(map)) + +#define maps__for_each_entry_safe(maps, map, next) \ + for (map = maps__first(maps), next = map_rb_node__next(map); map; \ + map = next, next = map_rb_node__next(map)) + +static struct rb_root *maps__entries(struct maps *maps) +{ + return &RC_CHK_ACCESS(maps)->entries; +} + +static struct rw_semaphore *maps__lock(struct maps *maps) +{ + return &RC_CHK_ACCESS(maps)->lock; +} + +static struct map **maps__maps_by_name(struct maps *maps) +{ + return RC_CHK_ACCESS(maps)->maps_by_name; +} + +static struct map_rb_node *maps__first(struct maps *maps) +{ + struct rb_node *first = rb_first(maps__entries(maps)); + + if (first) + return rb_entry(first, struct map_rb_node, rb_node); + return NULL; +} + +static struct map_rb_node *map_rb_node__next(struct map_rb_node *node) +{ + struct rb_node *next; + + if (!node) + return NULL; + + next = rb_next(&node->rb_node); + + if (!next) + return NULL; + + return rb_entry(next, struct map_rb_node, rb_node); +} + +static struct map_rb_node *maps__find_node(struct maps *maps, struct map *map) +{ + struct map_rb_node *rb_node; + + maps__for_each_entry(maps, rb_node) { + if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map)) + return rb_node; + } + return NULL; +} + static void maps__init(struct maps *maps, struct machine *machine) { refcount_set(maps__refcnt(maps), 1); @@ -196,6 +258,41 @@ void maps__put(struct maps *maps) RC_CHK_PUT(maps); } +int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data) +{ + struct map_rb_node *pos; + int ret = 0; + + down_read(maps__lock(maps)); + maps__for_each_entry(maps, pos) { + ret = cb(pos->map, data); + if (ret) + break; + } + up_read(maps__lock(maps)); + return ret; +} + +void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) +{ + struct map_rb_node *pos, *next; + unsigned int start_nr_maps; + + down_write(maps__lock(maps)); + + start_nr_maps = maps__nr_maps(maps); + maps__for_each_entry_safe(maps, pos, next) { + if (cb(pos->map, data)) { + __maps__remove(maps, pos); + --RC_CHK_ACCESS(maps)->nr_maps; + } + } + if (maps__maps_by_name(maps) && start_nr_maps != maps__nr_maps(maps)) + __maps__free_maps_by_name(maps); + + up_write(maps__lock(maps)); +} + struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) { struct map *map = maps__find(maps, addr); @@ -210,31 +307,40 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return NULL; } -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ +struct maps__find_symbol_by_name_args { + struct map **mapp; + const char *name; struct symbol *sym; - struct map_rb_node *pos; +}; - down_read(maps__lock(maps)); +static int maps__find_symbol_by_name_cb(struct map *map, void *data) +{ + struct maps__find_symbol_by_name_args *args = data; - maps__for_each_entry(maps, pos) { - sym = map__find_symbol_by_name(pos->map, name); + args->sym = map__find_symbol_by_name(map, args->name); + if (!args->sym) + return 0; - if (sym == NULL) - continue; - if (!map__contains_symbol(pos->map, sym)) { - sym = NULL; - continue; - } - if (mapp != NULL) - *mapp = pos->map; - goto out; + if (!map__contains_symbol(map, args->sym)) { + args->sym = NULL; + return 0; } - sym = NULL; -out: - up_read(maps__lock(maps)); - return sym; + if (args->mapp != NULL) + *args->mapp = map__get(map); + return 1; +} + +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) +{ + struct maps__find_symbol_by_name_args args = { + .mapp = mapp, + .name = name, + .sym = NULL, + }; + + maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); + return args.sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) @@ -253,41 +359,46 @@ int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) return ams->ms.sym ? 0 : -1; } -size_t maps__fprintf(struct maps *maps, FILE *fp) -{ - size_t printed = 0; - struct map_rb_node *pos; +struct maps__fprintf_args { + FILE *fp; + size_t printed; +}; - down_read(maps__lock(maps)); +static int maps__fprintf_cb(struct map *map, void *data) +{ + struct maps__fprintf_args *args = data; - maps__for_each_entry(maps, pos) { - printed += fprintf(fp, "Map:"); - printed += map__fprintf(pos->map, fp); - if (verbose > 2) { - printed += dso__fprintf(map__dso(pos->map), fp); - printed += fprintf(fp, "--\n"); - } + args->printed += fprintf(args->fp, "Map:"); + args->printed += map__fprintf(map, args->fp); + if (verbose > 2) { + args->printed += dso__fprintf(map__dso(map), args->fp); + args->printed += fprintf(args->fp, "--\n"); } + return 0; +} - up_read(maps__lock(maps)); +size_t maps__fprintf(struct maps *maps, FILE *fp) +{ + struct maps__fprintf_args args = { + .fp = fp, + .printed = 0, + }; + + maps__for_each_map(maps, maps__fprintf_cb, &args); - return printed; + return args.printed; } -int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) +/* + * Find first map where end > map->start. + * Same as find_vma() in kernel. + */ +static struct rb_node *first_ending_after(struct maps *maps, const struct map *map) { struct rb_root *root; struct rb_node *next, *first; - int err = 0; - - down_write(maps__lock(maps)); root = maps__entries(maps); - - /* - * Find first map where end > map->start. - * Same as find_vma() in kernel. - */ next = root->rb_node; first = NULL; while (next) { @@ -301,8 +412,23 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) } else next = next->rb_right; } + return first; +} - next = first; +/* + * Adds new to maps, if new overlaps existing entries then the existing maps are + * adjusted or removed so that new fits without overlapping any entries. + */ +int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) +{ + + struct rb_node *next; + int err = 0; + FILE *fp = debug_file(); + + down_write(maps__lock(maps)); + + next = first_ending_after(maps, new); while (next && !err) { struct map_rb_node *pos = rb_entry(next, struct map_rb_node, rb_node); next = rb_next(&pos->rb_node); @@ -311,27 +437,27 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) * Stop if current map starts after map->end. * Maps are ordered by start: next will not overlap for sure. */ - if (map__start(pos->map) >= map__end(map)) + if (map__start(pos->map) >= map__end(new)) break; if (verbose >= 2) { if (use_browser) { pr_debug("overlapping maps in %s (disable tui for more info)\n", - map__dso(map)->name); + map__dso(new)->name); } else { - fputs("overlapping maps:\n", fp); - map__fprintf(map, fp); + pr_debug("overlapping maps:\n"); + map__fprintf(new, fp); map__fprintf(pos->map, fp); } } - rb_erase_init(&pos->rb_node, root); + rb_erase_init(&pos->rb_node, maps__entries(maps)); /* * Now check if we need to create new maps for areas not * overlapped by the new map: */ - if (map__start(map) > map__start(pos->map)) { + if (map__start(new) > map__start(pos->map)) { struct map *before = map__clone(pos->map); if (before == NULL) { @@ -339,7 +465,7 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) goto put_map; } - map__set_end(before, map__start(map)); + map__set_end(before, map__start(new)); err = __maps__insert(maps, before); if (err) { map__put(before); @@ -351,7 +477,7 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) map__put(before); } - if (map__end(map) < map__end(pos->map)) { + if (map__end(new) < map__end(pos->map)) { struct map *after = map__clone(pos->map); if (after == NULL) { @@ -359,10 +485,10 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) goto put_map; } - map__set_start(after, map__end(map)); - map__add_pgoff(after, map__end(map) - map__start(pos->map)); - assert(map__map_ip(pos->map, map__end(map)) == - map__map_ip(after, map__end(map))); + map__set_start(after, map__end(new)); + map__add_pgoff(after, map__end(new) - map__start(pos->map)); + assert(map__map_ip(pos->map, map__end(new)) == + map__map_ip(after, map__end(new))); err = __maps__insert(maps, after); if (err) { map__put(after); @@ -376,16 +502,14 @@ put_map: map__put(pos->map); free(pos); } + /* Add the map. */ + err = __maps__insert(maps, new); up_write(maps__lock(maps)); return err; } -/* - * XXX This should not really _copy_ te maps, but refcount them. - */ -int maps__clone(struct thread *thread, struct maps *parent) +int maps__copy_from(struct maps *maps, struct maps *parent) { - struct maps *maps = thread__maps(thread); int err; struct map_rb_node *rb_node; @@ -416,17 +540,6 @@ out_unlock: return err; } -struct map_rb_node *maps__find_node(struct maps *maps, struct map *map) -{ - struct map_rb_node *rb_node; - - maps__for_each_entry(maps, rb_node) { - if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map)) - return rb_node; - } - return NULL; -} - struct map *maps__find(struct maps *maps, u64 ip) { struct rb_node *p; @@ -452,26 +565,275 @@ out: return m ? m->map : NULL; } -struct map_rb_node *maps__first(struct maps *maps) +static int map__strcmp(const void *a, const void *b) { - struct rb_node *first = rb_first(maps__entries(maps)); + const struct map *map_a = *(const struct map **)a; + const struct map *map_b = *(const struct map **)b; + const struct dso *dso_a = map__dso(map_a); + const struct dso *dso_b = map__dso(map_b); + int ret = strcmp(dso_a->short_name, dso_b->short_name); - if (first) - return rb_entry(first, struct map_rb_node, rb_node); - return NULL; + if (ret == 0 && map_a != map_b) { + /* + * Ensure distinct but name equal maps have an order in part to + * aid reference counting. + */ + ret = (int)map__start(map_a) - (int)map__start(map_b); + if (ret == 0) + ret = (int)((intptr_t)map_a - (intptr_t)map_b); + } + + return ret; } -struct map_rb_node *map_rb_node__next(struct map_rb_node *node) +static int map__strcmp_name(const void *name, const void *b) { - struct rb_node *next; + const struct dso *dso = map__dso(*(const struct map **)b); - if (!node) - return NULL; + return strcmp(name, dso->short_name); +} - next = rb_next(&node->rb_node); +void __maps__sort_by_name(struct maps *maps) +{ + qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp); +} - if (!next) +static int map__groups__sort_by_name_from_rbtree(struct maps *maps) +{ + struct map_rb_node *rb_node; + struct map **maps_by_name = realloc(maps__maps_by_name(maps), + maps__nr_maps(maps) * sizeof(struct map *)); + int i = 0; + + if (maps_by_name == NULL) + return -1; + + up_read(maps__lock(maps)); + down_write(maps__lock(maps)); + + RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name; + RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps); + + maps__for_each_entry(maps, rb_node) + maps_by_name[i++] = map__get(rb_node->map); + + __maps__sort_by_name(maps); + + up_write(maps__lock(maps)); + down_read(maps__lock(maps)); + + return 0; +} + +static struct map *__maps__find_by_name(struct maps *maps, const char *name) +{ + struct map **mapp; + + if (maps__maps_by_name(maps) == NULL && + map__groups__sort_by_name_from_rbtree(maps)) return NULL; - return rb_entry(next, struct map_rb_node, rb_node); + mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps), + sizeof(*mapp), map__strcmp_name); + if (mapp) + return *mapp; + return NULL; +} + +struct map *maps__find_by_name(struct maps *maps, const char *name) +{ + struct map_rb_node *rb_node; + struct map *map; + + down_read(maps__lock(maps)); + + + if (RC_CHK_ACCESS(maps)->last_search_by_name) { + const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name); + + if (strcmp(dso->short_name, name) == 0) { + map = RC_CHK_ACCESS(maps)->last_search_by_name; + goto out_unlock; + } + } + /* + * If we have maps->maps_by_name, then the name isn't in the rbtree, + * as maps->maps_by_name mirrors the rbtree when lookups by name are + * made. + */ + map = __maps__find_by_name(maps, name); + if (map || maps__maps_by_name(maps) != NULL) + goto out_unlock; + + /* Fallback to traversing the rbtree... */ + maps__for_each_entry(maps, rb_node) { + struct dso *dso; + + map = rb_node->map; + dso = map__dso(map); + if (strcmp(dso->short_name, name) == 0) { + RC_CHK_ACCESS(maps)->last_search_by_name = map; + goto out_unlock; + } + } + map = NULL; + +out_unlock: + up_read(maps__lock(maps)); + return map; +} + +struct map *maps__find_next_entry(struct maps *maps, struct map *map) +{ + struct map_rb_node *rb_node = maps__find_node(maps, map); + struct map_rb_node *next = map_rb_node__next(rb_node); + + if (next) + return next->map; + + return NULL; +} + +void maps__fixup_end(struct maps *maps) +{ + struct map_rb_node *prev = NULL, *curr; + + down_write(maps__lock(maps)); + + maps__for_each_entry(maps, curr) { + if (prev && (!map__end(prev->map) || map__end(prev->map) > map__start(curr->map))) + map__set_end(prev->map, map__start(curr->map)); + + prev = curr; + } + + /* + * We still haven't the actual symbols, so guess the + * last map final address. + */ + if (curr && !map__end(curr->map)) + map__set_end(curr->map, ~0ULL); + + up_write(maps__lock(maps)); +} + +/* + * Merges map into maps by splitting the new map within the existing map + * regions. + */ +int maps__merge_in(struct maps *kmaps, struct map *new_map) +{ + struct map_rb_node *rb_node; + struct rb_node *first; + bool overlaps; + LIST_HEAD(merged); + int err = 0; + + down_read(maps__lock(kmaps)); + first = first_ending_after(kmaps, new_map); + rb_node = first ? rb_entry(first, struct map_rb_node, rb_node) : NULL; + overlaps = rb_node && map__start(rb_node->map) < map__end(new_map); + up_read(maps__lock(kmaps)); + + if (!overlaps) + return maps__insert(kmaps, new_map); + + maps__for_each_entry(kmaps, rb_node) { + struct map *old_map = rb_node->map; + + /* no overload with this one */ + if (map__end(new_map) < map__start(old_map) || + map__start(new_map) >= map__end(old_map)) + continue; + + if (map__start(new_map) < map__start(old_map)) { + /* + * |new...... + * |old.... + */ + if (map__end(new_map) < map__end(old_map)) { + /* + * |new......| -> |new..| + * |old....| -> |old....| + */ + map__set_end(new_map, map__start(old_map)); + } else { + /* + * |new.............| -> |new..| |new..| + * |old....| -> |old....| + */ + struct map_list_node *m = map_list_node__new(); + + if (!m) { + err = -ENOMEM; + goto out; + } + + m->map = map__clone(new_map); + if (!m->map) { + free(m); + err = -ENOMEM; + goto out; + } + + map__set_end(m->map, map__start(old_map)); + list_add_tail(&m->node, &merged); + map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); + map__set_start(new_map, map__end(old_map)); + } + } else { + /* + * |new...... + * |old.... + */ + if (map__end(new_map) < map__end(old_map)) { + /* + * |new..| -> x + * |old.........| -> |old.........| + */ + map__put(new_map); + new_map = NULL; + break; + } else { + /* + * |new......| -> |new...| + * |old....| -> |old....| + */ + map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); + map__set_start(new_map, map__end(old_map)); + } + } + } + +out: + while (!list_empty(&merged)) { + struct map_list_node *old_node; + + old_node = list_entry(merged.next, struct map_list_node, node); + list_del_init(&old_node->node); + if (!err) + err = maps__insert(kmaps, old_node->map); + map__put(old_node->map); + free(old_node); + } + + if (new_map) { + if (!err) + err = maps__insert(kmaps, new_map); + map__put(new_map); + } + return err; +} + +void maps__load_first(struct maps *maps) +{ + struct map_rb_node *first; + + down_read(maps__lock(maps)); + + first = maps__first(maps); + if (first) + map__load(first->map); + + up_read(maps__lock(maps)); } diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 83144e0645ed..d836d04c9402 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -14,24 +14,18 @@ struct ref_reloc_sym; struct machine; struct map; struct maps; -struct thread; -struct map_rb_node { - struct rb_node rb_node; +struct map_list_node { + struct list_head node; struct map *map; }; -struct map_rb_node *maps__first(struct maps *maps); -struct map_rb_node *map_rb_node__next(struct map_rb_node *node); -struct map_rb_node *maps__find_node(struct maps *maps, struct map *map); -struct map *maps__find(struct maps *maps, u64 addr); - -#define maps__for_each_entry(maps, map) \ - for (map = maps__first(maps); map; map = map_rb_node__next(map)) +static inline struct map_list_node *map_list_node__new(void) +{ + return malloc(sizeof(struct map_list_node)); +} -#define maps__for_each_entry_safe(maps, map, next) \ - for (map = maps__first(maps), next = map_rb_node__next(map); map; \ - map = next, next = map_rb_node__next(map)) +struct map *maps__find(struct maps *maps, u64 addr); DECLARE_RC_STRUCT(maps) { struct rb_root entries; @@ -58,7 +52,7 @@ struct kmap { struct maps *maps__new(struct machine *machine); bool maps__empty(struct maps *maps); -int maps__clone(struct thread *thread, struct maps *parent); +int maps__copy_from(struct maps *maps, struct maps *parent); struct maps *maps__get(struct maps *maps); void maps__put(struct maps *maps); @@ -71,26 +65,16 @@ static inline void __maps__zput(struct maps **map) #define maps__zput(map) __maps__zput(&map) -static inline struct rb_root *maps__entries(struct maps *maps) -{ - return &RC_CHK_ACCESS(maps)->entries; -} +/* Iterate over map calling cb for each entry. */ +int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data); +/* Iterate over map removing an entry if cb returns true. */ +void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data); static inline struct machine *maps__machine(struct maps *maps) { return RC_CHK_ACCESS(maps)->machine; } -static inline struct rw_semaphore *maps__lock(struct maps *maps) -{ - return &RC_CHK_ACCESS(maps)->lock; -} - -static inline struct map **maps__maps_by_name(struct maps *maps) -{ - return RC_CHK_ACCESS(maps)->maps_by_name; -} - static inline unsigned int maps__nr_maps(const struct maps *maps) { return RC_CHK_ACCESS(maps)->nr_maps; @@ -125,12 +109,18 @@ struct addr_map_symbol; int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams); -int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp); +int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new); struct map *maps__find_by_name(struct maps *maps, const char *name); +struct map *maps__find_next_entry(struct maps *maps, struct map *map); + int maps__merge_in(struct maps *kmaps, struct map *new_map); void __maps__sort_by_name(struct maps *maps); +void maps__fixup_end(struct maps *maps); + +void maps__load_first(struct maps *maps); + #endif // __PERF_MAPS_H diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 954b235e12e5..3a2e3687878c 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -100,11 +100,14 @@ int perf_mem_events__parse(const char *str) return -1; } -static bool perf_mem_event__supported(const char *mnt, char *sysfs_name) +static bool perf_mem_event__supported(const char *mnt, struct perf_pmu *pmu, + struct perf_mem_event *e) { + char sysfs_name[100]; char path[PATH_MAX]; struct stat st; + scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name); return !stat(path, &st); } @@ -120,7 +123,6 @@ int perf_mem_events__init(void) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { struct perf_mem_event *e = perf_mem_events__ptr(j); - char sysfs_name[100]; struct perf_pmu *pmu = NULL; /* @@ -136,12 +138,12 @@ int perf_mem_events__init(void) * of core PMU. */ while ((pmu = perf_pmus__scan(pmu)) != NULL) { - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); - e->supported |= perf_mem_event__supported(mnt, sysfs_name); + e->supported |= perf_mem_event__supported(mnt, pmu, e); + if (e->supported) { + found = true; + break; + } } - - if (e->supported) - found = true; } return found ? 0 : -ENOENT; @@ -167,13 +169,10 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, int idx) { const char *mnt = sysfs__mount(); - char sysfs_name[100]; struct perf_pmu *pmu = NULL; while ((pmu = perf_pmus__scan(pmu)) != NULL) { - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, - pmu->name); - if (!perf_mem_event__supported(mnt, sysfs_name)) { + if (!perf_mem_event__supported(mnt, pmu, e)) { pr_err("failed: event '%s' not supported\n", perf_mem_events__name(idx, pmu->name)); } @@ -183,6 +182,7 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **rec_tmp, int *tmp_nr) { + const char *mnt = sysfs__mount(); int i = *argv_nr, k = 0; struct perf_mem_event *e; @@ -211,6 +211,9 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, while ((pmu = perf_pmus__scan(pmu)) != NULL) { const char *s = perf_mem_events__name(j, pmu->name); + if (!perf_mem_event__supported(mnt, pmu, e)) + continue; + rec_argv[i++] = "-e"; if (s) { char *copy = strdup(s); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 49093b21ee2d..122ee198a86e 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -295,15 +295,14 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu map->core.flush = mp->flush; - map->comp_level = mp->comp_level; #ifndef PYTHON_PERF - if (zstd_init(&map->zstd_data, map->comp_level)) { + if (zstd_init(&map->zstd_data, mp->comp_level)) { pr_debug2("failed to init mmap compressor, error %d\n", errno); return -1; } #endif - if (map->comp_level && !perf_mmap__aio_enabled(map)) { + if (mp->comp_level && !perf_mmap__aio_enabled(map)) { map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (map->data == MAP_FAILED) { diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index f944c3cd5efa..0df6e1621c7e 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -39,7 +39,6 @@ struct mmap { #endif struct mmap_cpu_mask affinity_mask; void *data; - int comp_level; struct perf_data_file *file; struct zstd_data zstd_data; }; diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index fd67d204d720..f7f7aff3d85a 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -36,6 +36,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX), BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE), + BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS), BRANCH_END }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index aa2f5c6fc7fc..66eabcea4242 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -976,7 +976,7 @@ static int config_term_pmu(struct perf_event_attr *attr, struct parse_events_error *err) { if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) { - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); if (!pmu) { char *err_str; @@ -986,15 +986,23 @@ static int config_term_pmu(struct perf_event_attr *attr, err_str, /*help=*/NULL); return -EINVAL; } - if (perf_pmu__supports_legacy_cache(pmu)) { + /* + * Rewrite the PMU event to a legacy cache one unless the PMU + * doesn't support legacy cache events or the event is present + * within the PMU. + */ + if (perf_pmu__supports_legacy_cache(pmu) && + !perf_pmu__have_event(pmu, term->config)) { attr->type = PERF_TYPE_HW_CACHE; return parse_events__decode_legacy_cache(term->config, pmu->type, &attr->config); - } else + } else { term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->no_value = true; + } } if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) { - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); if (!pmu) { char *err_str; @@ -1004,10 +1012,19 @@ static int config_term_pmu(struct perf_event_attr *attr, err_str, /*help=*/NULL); return -EINVAL; } - attr->type = PERF_TYPE_HARDWARE; - attr->config = term->val.num; - if (perf_pmus__supports_extended_type()) - attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + /* + * If the PMU has a sysfs or json event prefer it over + * legacy. ARM requires this. + */ + if (perf_pmu__have_event(pmu, term->config)) { + term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->no_value = true; + } else { + attr->type = PERF_TYPE_HARDWARE; + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + } return 0; } if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER || @@ -1381,6 +1398,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, YYLTYPE *loc = loc_; LIST_HEAD(config_terms); struct parse_events_terms parsed_terms; + bool alias_rewrote_terms = false; pmu = parse_state->fake_pmu ?: perf_pmus__find(name); @@ -1433,7 +1451,15 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, return evsel ? 0 : -ENOMEM; } - if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms, &info, err)) { + /* Configure attr/terms with a known PMU, this will set hardcoded terms. */ + if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { + parse_events_terms__exit(&parsed_terms); + return -EINVAL; + } + + /* Look for event names in the terms and rewrite into format based terms. */ + if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms, + &info, &alias_rewrote_terms, err)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } @@ -1447,11 +1473,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, strbuf_release(&sb); } - /* - * Configure hardcoded terms first, no need to check - * return value when called with fail == 0 ;) - */ - if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { + /* Configure attr/terms again if an alias was expanded. */ + if (alias_rewrote_terms && + config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index e1e2d701599c..1de3b69cdf4a 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -64,7 +64,7 @@ static bool perf_probe_api(setup_probe_fn_t fn) struct perf_cpu cpu; int ret, i = 0; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return false; cpu = perf_cpu_map__cpu(cpus, 0); @@ -140,7 +140,7 @@ bool perf_can_record_cpu_wide(void) struct perf_cpu cpu; int fd; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return false; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 2247991451f3..8f04d3b7f3ec 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -55,6 +55,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE), + bit_name(COUNTERS), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d3c9aa4326be..3c9609944a2f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1494,12 +1494,14 @@ static int check_info_data(struct perf_pmu *pmu, * defined for the alias */ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, - struct perf_pmu_info *info, struct parse_events_error *err) + struct perf_pmu_info *info, bool *rewrote_terms, + struct parse_events_error *err) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; int ret; + *rewrote_terms = false; info->per_pkg = false; /* @@ -1521,7 +1523,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ NULL); return ret; } - + *rewrote_terms = true; ret = check_info_data(pmu, alias, info, err, term->err_term); if (ret) return ret; @@ -1615,6 +1617,8 @@ bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu) bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) { + if (!name) + return false; if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) return true; if (pmu->cpu_aliases_added || !pmu->events_table) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index d2895d415f08..424c3fee0949 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -201,7 +201,8 @@ int perf_pmu__config_terms(const struct perf_pmu *pmu, __u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name); int perf_pmu__format_type(struct perf_pmu *pmu, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, - struct perf_pmu_info *info, struct parse_events_error *err); + struct perf_pmu_info *info, bool *rewrote_terms, + struct parse_events_error *err); int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb); int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 1a5b7fa459b2..a1a796043691 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -149,10 +149,32 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, return 0; } +struct kernel_get_module_map_cb_args { + const char *module; + struct map *result; +}; + +static int kernel_get_module_map_cb(struct map *map, void *data) +{ + struct kernel_get_module_map_cb_args *args = data; + struct dso *dso = map__dso(map); + const char *short_name = dso->short_name; /* short_name is "[module]" */ + u16 short_name_len = dso->short_name_len; + + if (strncmp(short_name + 1, args->module, short_name_len - 2) == 0 && + args->module[short_name_len - 2] == '\0') { + args->result = map__get(map); + return 1; + } + return 0; +} + static struct map *kernel_get_module_map(const char *module) { - struct maps *maps = machine__kernel_maps(host_machine); - struct map_rb_node *pos; + struct kernel_get_module_map_cb_args args = { + .module = module, + .result = NULL, + }; /* A file path -- this is an offline module */ if (module && strchr(module, '/')) @@ -164,19 +186,9 @@ static struct map *kernel_get_module_map(const char *module) return map__get(map); } - maps__for_each_entry(maps, pos) { - /* short_name is "[module]" */ - struct dso *dso = map__dso(pos->map); - const char *short_name = dso->short_name; - u16 short_name_len = dso->short_name_len; + maps__for_each_map(machine__kernel_maps(host_machine), kernel_get_module_map_cb, &args); - if (strncmp(short_name + 1, module, - short_name_len - 2) == 0 && - module[short_name_len - 2] == '\0') { - return map__get(pos->map); - } - } - return NULL; + return args.result; } struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index f171360b0ef4..c8923375e30d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -23,6 +23,7 @@ #include "event.h" #include "dso.h" #include "debug.h" +#include "debuginfo.h" #include "intlist.h" #include "strbuf.h" #include "strlist.h" @@ -31,128 +32,9 @@ #include "probe-file.h" #include "string2.h" -#ifdef HAVE_DEBUGINFOD_SUPPORT -#include <elfutils/debuginfod.h> -#endif - /* Kprobe tracer basic type is up to u64 */ #define MAX_BASIC_TYPE_BITS 64 -/* Dwarf FL wrappers */ -static char *debuginfo_path; /* Currently dummy */ - -static const Dwfl_Callbacks offline_callbacks = { - .find_debuginfo = dwfl_standard_find_debuginfo, - .debuginfo_path = &debuginfo_path, - - .section_address = dwfl_offline_section_address, - - /* We use this table for core files too. */ - .find_elf = dwfl_build_id_find_elf, -}; - -/* Get a Dwarf from offline image */ -static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, - const char *path) -{ - GElf_Addr dummy; - int fd; - - fd = open(path, O_RDONLY); - if (fd < 0) - return fd; - - dbg->dwfl = dwfl_begin(&offline_callbacks); - if (!dbg->dwfl) - goto error; - - dwfl_report_begin(dbg->dwfl); - dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); - if (!dbg->mod) - goto error; - - dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); - if (!dbg->dbg) - goto error; - - dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); - - dwfl_report_end(dbg->dwfl, NULL, NULL); - - return 0; -error: - if (dbg->dwfl) - dwfl_end(dbg->dwfl); - else - close(fd); - memset(dbg, 0, sizeof(*dbg)); - - return -ENOENT; -} - -static struct debuginfo *__debuginfo__new(const char *path) -{ - struct debuginfo *dbg = zalloc(sizeof(*dbg)); - if (!dbg) - return NULL; - - if (debuginfo__init_offline_dwarf(dbg, path) < 0) - zfree(&dbg); - if (dbg) - pr_debug("Open Debuginfo file: %s\n", path); - return dbg; -} - -enum dso_binary_type distro_dwarf_types[] = { - DSO_BINARY_TYPE__FEDORA_DEBUGINFO, - DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, - DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, - DSO_BINARY_TYPE__BUILDID_DEBUGINFO, - DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, - DSO_BINARY_TYPE__NOT_FOUND, -}; - -struct debuginfo *debuginfo__new(const char *path) -{ - enum dso_binary_type *type; - char buf[PATH_MAX], nil = '\0'; - struct dso *dso; - struct debuginfo *dinfo = NULL; - struct build_id bid; - - /* Try to open distro debuginfo files */ - dso = dso__new(path); - if (!dso) - goto out; - - /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ - if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) - dso__set_build_id(dso, &bid); - - for (type = distro_dwarf_types; - !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; - type++) { - if (dso__read_binary_type_filename(dso, *type, &nil, - buf, PATH_MAX) < 0) - continue; - dinfo = __debuginfo__new(buf); - } - dso__put(dso); - -out: - /* if failed to open all distro debuginfo, open given binary */ - return dinfo ? : __debuginfo__new(path); -} - -void debuginfo__delete(struct debuginfo *dbg) -{ - if (dbg) { - if (dbg->dwfl) - dwfl_end(dbg->dwfl); - free(dbg); - } -} - /* * Probe finder related functions */ @@ -722,7 +604,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) { pf->fb_ops = NULL; -#if _ELFUTILS_PREREQ(0, 142) +#ifdef HAVE_DWARF_CFI_SUPPORT } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa && (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) { if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 && @@ -733,7 +615,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) free(frame); return -ENOENT; } -#endif +#endif /* HAVE_DWARF_CFI_SUPPORT */ } /* Call finder's callback handler */ @@ -1258,7 +1140,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->machine = ehdr.e_machine; -#if _ELFUTILS_PREREQ(0, 142) +#ifdef HAVE_DWARF_CFI_SUPPORT do { GElf_Shdr shdr; @@ -1268,7 +1150,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->cfi_dbg = dwarf_getcfi(dbg->dbg); } while (0); -#endif +#endif /* HAVE_DWARF_CFI_SUPPORT */ ret = debuginfo__find_probe_location(dbg, pf); return ret; @@ -1677,44 +1559,6 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg, return (ret < 0) ? ret : af.nvls; } -/* For the kernel module, we need a special code to get a DIE */ -int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, - bool adjust_offset) -{ - int n, i; - Elf32_Word shndx; - Elf_Scn *scn; - Elf *elf; - GElf_Shdr mem, *shdr; - const char *p; - - elf = dwfl_module_getelf(dbg->mod, &dbg->bias); - if (!elf) - return -EINVAL; - - /* Get the number of relocations */ - n = dwfl_module_relocations(dbg->mod); - if (n < 0) - return -ENOENT; - /* Search the relocation related .text section */ - for (i = 0; i < n; i++) { - p = dwfl_module_relocation_info(dbg->mod, i, &shndx); - if (strcmp(p, ".text") == 0) { - /* OK, get the section header */ - scn = elf_getscn(elf, shndx); - if (!scn) - return -ENOENT; - shdr = gelf_getshdr(scn, &mem); - if (!shdr) - return -ENOENT; - *offs = shdr->sh_addr; - if (adjust_offset) - *offs -= shdr->sh_offset; - } - } - return 0; -} - /* Reverse search */ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt) @@ -2009,41 +1853,6 @@ found: return (ret < 0) ? ret : lf.found; } -#ifdef HAVE_DEBUGINFOD_SUPPORT -/* debuginfod doesn't require the comp_dir but buildid is required */ -static int get_source_from_debuginfod(const char *raw_path, - const char *sbuild_id, char **new_path) -{ - debuginfod_client *c = debuginfod_begin(); - const char *p = raw_path; - int fd; - - if (!c) - return -ENOMEM; - - fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, - 0, p, new_path); - pr_debug("Search %s from debuginfod -> %d\n", p, fd); - if (fd >= 0) - close(fd); - debuginfod_end(c); - if (fd < 0) { - pr_debug("Failed to find %s in debuginfod (%s)\n", - raw_path, sbuild_id); - return -ENOENT; - } - pr_debug("Got a source %s\n", *new_path); - - return 0; -} -#else -static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, - const char *sbuild_id __maybe_unused, - char **new_path __maybe_unused) -{ - return -ENOTSUP; -} -#endif /* * Find a src file from a DWARF tag path. Prepend optional source path prefix * and chop off leading directories that do not exist. Result is passed back as diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 8bc1c80d3c1c..3add5ff516e1 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -24,21 +24,7 @@ static inline int is_c_varname(const char *name) #ifdef HAVE_DWARF_SUPPORT #include "dwarf-aux.h" - -/* TODO: export debuginfo data structure even if no dwarf support */ - -/* debug information structure */ -struct debuginfo { - Dwarf *dbg; - Dwfl_Module *mod; - Dwfl *dwfl; - Dwarf_Addr bias; - const unsigned char *build_id; -}; - -/* This also tries to open distro debuginfo */ -struct debuginfo *debuginfo__new(const char *path); -void debuginfo__delete(struct debuginfo *dbg); +#include "debuginfo.h" /* Find probe_trace_events specified by perf_probe_event from debuginfo */ int debuginfo__find_trace_events(struct debuginfo *dbg, @@ -49,9 +35,6 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt); -int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, - bool adjust_offset); - /* Find a line range */ int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 9eb5c6a08999..87e817b3cf7e 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -237,8 +237,8 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) evsel = evlist__last(temp_evlist); - if (!evlist || perf_cpu_map__empty(evlist->core.user_requested_cpus)) { - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); + if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) { + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); if (cpus) cpu = perf_cpu_map__cpu(cpus, 0); diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h index f55ca07f3ca1..74b36644e384 100644 --- a/tools/perf/util/s390-cpumcf-kernel.h +++ b/tools/perf/util/s390-cpumcf-kernel.h @@ -12,6 +12,8 @@ #define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */ #define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */ #define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */ +#define PERF_EVENT_PAI_CRYPTO_ALL 0x1000 /* Event: CRYPTO_ALL */ +#define PERF_EVENT_PAI_NNPA_ALL 0x1800 /* Event: NNPA_ALL */ struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ unsigned int def:16; /* 0-15 Data Entry Format */ diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 115b16edb451..53383e97ec9d 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -51,8 +51,6 @@ static bool s390_cpumcfdg_testctr(struct perf_sample *sample) struct cf_trailer_entry *te; struct cf_ctrset_entry *cep, ce; - if (!len) - return false; while (offset < len) { cep = (struct cf_ctrset_entry *)(buf + offset); ce.def = be16_to_cpu(cep->def); @@ -125,6 +123,9 @@ static int get_counterset_start(int setnr) return 128; case CPUMF_CTR_SET_MT_DIAG: /* Diagnostic counter set */ return 448; + case PERF_EVENT_PAI_NNPA_ALL: /* PAI NNPA counter set */ + case PERF_EVENT_PAI_CRYPTO_ALL: /* PAI CRYPTO counter set */ + return setnr; default: return -1; } @@ -212,27 +213,120 @@ static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample) } } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" +/* + * Check for consistency of PAI_CRYPTO/PAI_NNPA raw data. + */ +struct pai_data { /* Event number and value */ + u16 event_nr; + u64 event_val; +} __packed; + +#pragma GCC diagnostic pop + +/* + * Test for valid raw data. At least one PAI event should be in the raw + * data section. + */ +static bool s390_pai_all_test(struct perf_sample *sample) +{ + size_t len = sample->raw_size; + + if (len < 0xa) + return false; + return true; +} + +static void s390_pai_all_dump(struct evsel *evsel, struct perf_sample *sample) +{ + size_t len = sample->raw_size, offset = 0; + unsigned char *p = sample->raw_data; + const char *color = PERF_COLOR_BLUE; + struct pai_data pai_data; + char *ev_name; + + while (offset < len) { + memcpy(&pai_data.event_nr, p, sizeof(pai_data.event_nr)); + pai_data.event_nr = be16_to_cpu(pai_data.event_nr); + p += sizeof(pai_data.event_nr); + offset += sizeof(pai_data.event_nr); + + memcpy(&pai_data.event_val, p, sizeof(pai_data.event_val)); + pai_data.event_val = be64_to_cpu(pai_data.event_val); + p += sizeof(pai_data.event_val); + offset += sizeof(pai_data.event_val); + + ev_name = get_counter_name(evsel->core.attr.config, + pai_data.event_nr, evsel->pmu); + color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018lx\n", + pai_data.event_nr, ev_name ?: "<unknown>", + pai_data.event_val); + free(ev_name); + + if (offset + 0xa > len) + break; + } + color_fprintf(stdout, color, "\n"); +} + /* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events - * and if the event was triggered by a counter set diagnostic event display - * its raw data. + * and if the event was triggered by a + * - counter set diagnostic event + * - processor activity assist (PAI) crypto counter event + * - processor activity assist (PAI) neural network processor assist (NNPA) + * counter event + * display its raw data. * The function is only invoked when the dump flag -D is set. + * + * Function evlist__s390_sample_raw() is defined as call back after it has + * been verified that the perf.data file was created on s390 platform. */ -void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) +void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, + struct perf_sample *sample) { + const char *pai_name; struct evsel *evsel; if (event->header.type != PERF_RECORD_SAMPLE) return; evsel = evlist__event2evsel(evlist, event); - if (evsel == NULL || - evsel->core.attr.config != PERF_EVENT_CPUM_CF_DIAG) + if (!evsel) + return; + + /* Check for raw data in sample */ + if (!sample->raw_size || !sample->raw_data) return; /* Display raw data on screen */ - if (!s390_cpumcfdg_testctr(sample)) { - pr_err("Invalid counter set data encountered\n"); + if (evsel->core.attr.config == PERF_EVENT_CPUM_CF_DIAG) { + if (!evsel->pmu) + evsel->pmu = perf_pmus__find("cpum_cf"); + if (!s390_cpumcfdg_testctr(sample)) + pr_err("Invalid counter set data encountered\n"); + else + s390_cpumcfdg_dump(evsel->pmu, sample); + return; + } + + switch (evsel->core.attr.config) { + case PERF_EVENT_PAI_NNPA_ALL: + pai_name = "NNPA_ALL"; + break; + case PERF_EVENT_PAI_CRYPTO_ALL: + pai_name = "CRYPTO_ALL"; + break; + default: return; } - s390_cpumcfdg_dump(evsel->pmu, sample); + + if (!s390_pai_all_test(sample)) { + pr_err("Invalid %s raw data encountered\n", pai_name); + } else { + if (!evsel->pmu) + evsel->pmu = perf_pmus__find_by_type(evsel->core.attr.type); + s390_pai_all_dump(evsel, sample); + } } diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index c92ad0f51ecd..70b2c3135555 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -113,6 +113,7 @@ struct perf_sample { void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; + u64 *branch_stack_cntr; struct regs_dump user_regs; struct regs_dump intr_regs; struct stack_dump user_stack; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 603091317bed..b072ac5d3bc2 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -490,6 +490,9 @@ static int perl_start_script(const char *script, int argc, const char **argv, scripting_context->session = session; command_line = malloc((argc + 2) * sizeof(const char *)); + if (!command_line) + return -ENOMEM; + command_line[0] = ""; command_line[1] = script; for (i = 2; i < argc + 2; i++) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 94312741443a..860e1837ba96 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -353,6 +353,8 @@ static PyObject *get_field_numeric_entry(struct tep_event *event, if (is_array) { list = PyList_New(field->arraylen); + if (!list) + Py_FatalError("couldn't create Python list"); item_size = field->size / field->arraylen; n_items = field->arraylen; } else { @@ -754,7 +756,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, ch } } -static void set_regs_in_dict(PyObject *dict, +static int set_regs_in_dict(PyObject *dict, struct perf_sample *sample, struct evsel *evsel) { @@ -770,6 +772,8 @@ static void set_regs_in_dict(PyObject *dict, */ int size = __sw_hweight64(attr->sample_regs_intr) * 28; char *bf = malloc(size); + if (!bf) + return -1; regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size); @@ -781,6 +785,8 @@ static void set_regs_in_dict(PyObject *dict, pydict_set_item_string_decref(dict, "uregs", _PyUnicode_FromString(bf)); free(bf); + + return 0; } static void set_sym_in_dict(PyObject *dict, struct addr_location *al, @@ -920,7 +926,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyLong_FromUnsignedLongLong(sample->cyc_cnt)); } - set_regs_in_dict(dict, sample, evsel); + if (set_regs_in_dict(dict, sample, evsel)) + Py_FatalError("Failed to setting regs in dict"); return dict; } @@ -1918,12 +1925,18 @@ static int python_start_script(const char *script, int argc, const char **argv, scripting_context->session = session; #if PY_MAJOR_VERSION < 3 command_line = malloc((argc + 1) * sizeof(const char *)); + if (!command_line) + return -1; + command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; PyImport_AppendInittab(name, initperf_trace_context); #else command_line = malloc((argc + 1) * sizeof(wchar_t *)); + if (!command_line) + return -1; + command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1e9aa8ed15b6..199d3e8df315 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -115,6 +115,11 @@ static int perf_session__open(struct perf_session *session, int repipe_fd) return -1; } + if (perf_header__has_feat(&session->header, HEADER_AUXTRACE)) { + /* Auxiliary events may reference exited threads, hold onto dead ones. */ + symbol_conf.keep_exited_threads = true; + } + if (perf_data__is_pipe(data)) return 0; @@ -1150,9 +1155,13 @@ static void callchain__printf(struct evsel *evsel, i, callchain->ips[i]); } -static void branch_stack__printf(struct perf_sample *sample, bool callstack) +static void branch_stack__printf(struct perf_sample *sample, + struct evsel *evsel) { struct branch_entry *entries = perf_sample__branch_entries(sample); + bool callstack = evsel__has_branch_callstack(evsel); + u64 *branch_stack_cntr = sample->branch_stack_cntr; + struct perf_env *env = evsel__env(evsel); uint64_t i; if (!callstack) { @@ -1194,6 +1203,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) } } } + + if (branch_stack_cntr) { + printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n", + sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr); + for (i = 0; i < sample->branch_stack->nr; i++) + printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]); + } } static void regs_dump__printf(u64 mask, u64 *regs, const char *arch) @@ -1355,7 +1371,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, callchain__printf(evsel, sample); if (evsel__has_br_stack(evsel)) - branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); + branch_stack__printf(sample, evsel); if (sample_type & PERF_SAMPLE_REGS_USER) regs_user__printf(sample, arch); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 80e4f6132740..30254eb63709 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -24,6 +24,7 @@ #include "strbuf.h" #include "mem-events.h" #include "annotate.h" +#include "annotate-data.h" #include "event.h" #include "time-utils.h" #include "cgroup.h" @@ -418,6 +419,52 @@ struct sort_entry sort_sym = { .se_width_idx = HISTC_SYMBOL, }; +/* --sort symoff */ + +static int64_t +sort__symoff_cmp(struct hist_entry *left, struct hist_entry *right) +{ + int64_t ret; + + ret = sort__sym_cmp(left, right); + if (ret) + return ret; + + return left->ip - right->ip; +} + +static int64_t +sort__symoff_sort(struct hist_entry *left, struct hist_entry *right) +{ + int64_t ret; + + ret = sort__sym_sort(left, right); + if (ret) + return ret; + + return left->ip - right->ip; +} + +static int +hist_entry__symoff_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) +{ + struct symbol *sym = he->ms.sym; + + if (sym == NULL) + return repsep_snprintf(bf, size, "[%c] %-#.*llx", he->level, width - 4, he->ip); + + return repsep_snprintf(bf, size, "[%c] %s+0x%llx", he->level, sym->name, he->ip - sym->start); +} + +struct sort_entry sort_sym_offset = { + .se_header = "Symbol Offset", + .se_cmp = sort__symoff_cmp, + .se_sort = sort__symoff_sort, + .se_snprintf = hist_entry__symoff_snprintf, + .se_filter = hist_entry__sym_filter, + .se_width_idx = HISTC_SYMBOL_OFFSET, +}; + /* --sort srcline */ char *hist_entry__srcline(struct hist_entry *he) @@ -583,21 +630,21 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, { struct symbol *sym = he->ms.sym; - struct annotation *notes; + struct annotated_branch *branch; double ipc = 0.0, coverage = 0.0; char tmp[64]; if (!sym) return repsep_snprintf(bf, size, "%-*s", width, "-"); - notes = symbol__annotation(sym); + branch = symbol__annotation(sym)->branch; - if (notes->hit_cycles) - ipc = notes->hit_insn / ((double)notes->hit_cycles); + if (branch && branch->hit_cycles) + ipc = branch->hit_insn / ((double)branch->hit_cycles); - if (notes->total_insn) { - coverage = notes->cover_insn * 100.0 / - ((double)notes->total_insn); + if (branch && branch->total_insn) { + coverage = branch->cover_insn * 100.0 / + ((double)branch->total_insn); } snprintf(tmp, sizeof(tmp), "%-5.2f [%5.1f%%]", ipc, coverage); @@ -2094,7 +2141,7 @@ struct sort_entry sort_dso_size = { .se_width_idx = HISTC_DSO_SIZE, }; -/* --sort dso_size */ +/* --sort addr */ static int64_t sort__addr_cmp(struct hist_entry *left, struct hist_entry *right) @@ -2131,6 +2178,152 @@ struct sort_entry sort_addr = { .se_width_idx = HISTC_ADDR, }; +/* --sort type */ + +struct annotated_data_type unknown_type = { + .self = { + .type_name = (char *)"(unknown)", + .children = LIST_HEAD_INIT(unknown_type.self.children), + }, +}; + +static int64_t +sort__type_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return sort__addr_cmp(left, right); +} + +static void sort__type_init(struct hist_entry *he) +{ + if (he->mem_type) + return; + + he->mem_type = hist_entry__get_data_type(he); + if (he->mem_type == NULL) { + he->mem_type = &unknown_type; + he->mem_type_off = 0; + } +} + +static int64_t +sort__type_collapse(struct hist_entry *left, struct hist_entry *right) +{ + struct annotated_data_type *left_type = left->mem_type; + struct annotated_data_type *right_type = right->mem_type; + + if (!left_type) { + sort__type_init(left); + left_type = left->mem_type; + } + + if (!right_type) { + sort__type_init(right); + right_type = right->mem_type; + } + + return strcmp(left_type->self.type_name, right_type->self.type_name); +} + +static int64_t +sort__type_sort(struct hist_entry *left, struct hist_entry *right) +{ + return sort__type_collapse(left, right); +} + +static int hist_entry__type_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*s", width, he->mem_type->self.type_name); +} + +struct sort_entry sort_type = { + .se_header = "Data Type", + .se_cmp = sort__type_cmp, + .se_collapse = sort__type_collapse, + .se_sort = sort__type_sort, + .se_init = sort__type_init, + .se_snprintf = hist_entry__type_snprintf, + .se_width_idx = HISTC_TYPE, +}; + +/* --sort typeoff */ + +static int64_t +sort__typeoff_sort(struct hist_entry *left, struct hist_entry *right) +{ + struct annotated_data_type *left_type = left->mem_type; + struct annotated_data_type *right_type = right->mem_type; + int64_t ret; + + if (!left_type) { + sort__type_init(left); + left_type = left->mem_type; + } + + if (!right_type) { + sort__type_init(right); + right_type = right->mem_type; + } + + ret = strcmp(left_type->self.type_name, right_type->self.type_name); + if (ret) + return ret; + return left->mem_type_off - right->mem_type_off; +} + +static void fill_member_name(char *buf, size_t sz, struct annotated_member *m, + int offset, bool first) +{ + struct annotated_member *child; + + if (list_empty(&m->children)) + return; + + list_for_each_entry(child, &m->children, node) { + if (child->offset <= offset && offset < child->offset + child->size) { + int len = 0; + + /* It can have anonymous struct/union members */ + if (child->var_name) { + len = scnprintf(buf, sz, "%s%s", + first ? "" : ".", child->var_name); + first = false; + } + + fill_member_name(buf + len, sz - len, child, offset, first); + return; + } + } +} + +static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width __maybe_unused) +{ + struct annotated_data_type *he_type = he->mem_type; + char buf[4096]; + + buf[0] = '\0'; + if (list_empty(&he_type->self.children)) + snprintf(buf, sizeof(buf), "no field"); + else + fill_member_name(buf, sizeof(buf), &he_type->self, + he->mem_type_off, true); + buf[4095] = '\0'; + + return repsep_snprintf(bf, size, "%s %+d (%s)", he_type->self.type_name, + he->mem_type_off, buf); +} + +struct sort_entry sort_type_offset = { + .se_header = "Data Type Offset", + .se_cmp = sort__type_cmp, + .se_collapse = sort__typeoff_sort, + .se_sort = sort__typeoff_sort, + .se_init = sort__type_init, + .se_snprintf = hist_entry__typeoff_snprintf, + .se_width_idx = HISTC_TYPE_OFFSET, +}; + struct sort_dimension { const char *name; @@ -2185,7 +2378,10 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_ADDR, "addr", sort_addr), DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc), DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc), - DIM(SORT_SIMD, "simd", sort_simd) + DIM(SORT_SIMD, "simd", sort_simd), + DIM(SORT_ANNOTATE_DATA_TYPE, "type", sort_type), + DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset), + DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset), }; #undef DIM @@ -3205,6 +3401,8 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, list->thread = 1; } else if (sd->entry == &sort_comm) { list->comm = 1; + } else if (sd->entry == &sort_type_offset) { + symbol_conf.annotate_data_member = true; } return __sort_dimension__add(sd, list, level); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index ecfb7f1359d5..6f6b4189a389 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -15,6 +15,7 @@ struct option; struct thread; +struct annotated_data_type; extern regex_t parent_regex; extern const char *sort_order; @@ -34,6 +35,7 @@ extern struct sort_entry sort_dso_to; extern struct sort_entry sort_sym_from; extern struct sort_entry sort_sym_to; extern struct sort_entry sort_srcline; +extern struct sort_entry sort_type; extern const char default_mem_sort_order[]; extern bool chk_double_cl; @@ -111,6 +113,7 @@ struct hist_entry { u64 p_stage_cyc; u8 cpumode; u8 depth; + int mem_type_off; struct simd_flags simd_flags; /* We are added by hists__add_dummy_entry. */ @@ -154,6 +157,7 @@ struct hist_entry { struct perf_hpp_list *hpp_list; struct hist_entry *parent_he; struct hist_entry_ops *ops; + struct annotated_data_type *mem_type; union { /* this is for hierarchical entry structure */ struct { @@ -243,6 +247,9 @@ enum sort_type { SORT_LOCAL_RETIRE_LAT, SORT_GLOBAL_RETIRE_LAT, SORT_SIMD, + SORT_ANNOTATE_DATA_TYPE, + SORT_ANNOTATE_DATA_TYPE_OFFSET, + SORT_SYM_OFFSET, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index afe6db8e7bf4..8c61f8627ebc 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -898,7 +898,7 @@ static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter) { - if (config->no_merge || hybrid_uniquify(counter, config)) + if (config->aggr_mode == AGGR_NONE || hybrid_uniquify(counter, config)) uniquify_event_name(counter); } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 1c5c3eeba4cf..e31426167852 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -264,7 +264,7 @@ static void print_ll_miss(struct perf_stat_config *config, static const double color_ratios[3] = {20.0, 10.0, 5.0}; print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios, - "of all L1-icache accesses"); + "of all LL-cache accesses"); } static void print_dtlb_miss(struct perf_stat_config *config, diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index ec3506042217..b0bcf92f0f9c 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -315,7 +315,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, if (!counter->per_pkg) return 0; - if (perf_cpu_map__empty(cpus)) + if (perf_cpu_map__has_any_cpu_or_is_empty(cpus)) return 0; if (!mask) { @@ -592,7 +592,7 @@ void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *ev { struct evsel *evsel; - if (config->no_merge) + if (config->aggr_mode == AGGR_NONE) return; evlist__for_each_entry(evlist, evsel) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 325d0fad1842..4357ba114822 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -76,7 +76,6 @@ struct perf_stat_config { bool null_run; bool ru_display; bool big_num; - bool no_merge; bool hybrid_merge; bool walltime_run_table; bool all_kernel; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 9e7eeaf616b8..4b934ed3bfd1 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1392,8 +1392,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); map__set_end(map, map__start(map) + shdr->sh_size); map__set_pgoff(map, shdr->sh_offset); - map__set_map_ip(map, map__dso_map_ip); - map__set_unmap_ip(map, map__dso_unmap_ip); + map__set_mapping_type(map, MAPPING_TYPE__DSO); /* Ensure maps are correctly ordered */ if (kmaps) { int err; @@ -1455,8 +1454,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, map__set_end(curr_map, map__start(curr_map) + shdr->sh_size); map__set_pgoff(curr_map, shdr->sh_offset); } else { - map__set_map_ip(curr_map, identity__map_ip); - map__set_unmap_ip(curr_map, identity__map_ip); + map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); } curr_dso->symtab_type = dso->symtab_type; if (maps__insert(kmaps, curr_map)) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index a81a14769bd1..1da8b713509c 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -159,9 +159,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out_free; ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) + if (ret == 0) { ret = bid->size; - break; + break; + } } } else { Elf64_Ehdr ehdr; @@ -210,9 +211,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out_free; ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) + if (ret == 0) { ret = bid->size; - break; + break; + } } } out_free: diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 82cc74b9358e..be212ba157dc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,11 +48,6 @@ static bool symbol__is_idle(const char *name); int vmlinux_path__nr_entries; char **vmlinux_path; -struct map_list_node { - struct list_head node; - struct map *map; -}; - struct symbol_conf symbol_conf = { .nanosecs = false, .use_modules = true, @@ -90,11 +85,6 @@ static enum dso_binary_type binary_type_symtab[] = { #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) -static struct map_list_node *map_list_node__new(void) -{ - return malloc(sizeof(struct map_list_node)); -} - static bool symbol_type__filter(char symbol_type) { symbol_type = toupper(symbol_type); @@ -270,29 +260,6 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms) curr->end = roundup(curr->start, 4096) + 4096; } -void maps__fixup_end(struct maps *maps) -{ - struct map_rb_node *prev = NULL, *curr; - - down_write(maps__lock(maps)); - - maps__for_each_entry(maps, curr) { - if (prev != NULL && !map__end(prev->map)) - map__set_end(prev->map, map__start(curr->map)); - - prev = curr; - } - - /* - * We still haven't the actual symbols, so guess the - * last map final address. - */ - if (curr && !map__end(curr->map)) - map__set_end(curr->map, ~0ULL); - - up_write(maps__lock(maps)); -} - struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name) { size_t namelen = strlen(name) + 1; @@ -956,8 +923,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, return -1; } - map__set_map_ip(curr_map, identity__map_ip); - map__set_unmap_ip(curr_map, identity__map_ip); + map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); if (maps__insert(kmaps, curr_map)) { dso__put(ndso); return -1; @@ -1148,33 +1114,35 @@ out_delete_from: return ret; } +static int do_validate_kcore_modules_cb(struct map *old_map, void *data) +{ + struct rb_root *modules = data; + struct module_info *mi; + struct dso *dso; + + if (!__map__is_kmodule(old_map)) + return 0; + + dso = map__dso(old_map); + /* Module must be in memory at the same address */ + mi = find_module(dso->short_name, modules); + if (!mi || mi->start != map__start(old_map)) + return -EINVAL; + + return 0; +} + static int do_validate_kcore_modules(const char *filename, struct maps *kmaps) { struct rb_root modules = RB_ROOT; - struct map_rb_node *old_node; int err; err = read_proc_modules(filename, &modules); if (err) return err; - maps__for_each_entry(kmaps, old_node) { - struct map *old_map = old_node->map; - struct module_info *mi; - struct dso *dso; + err = maps__for_each_map(kmaps, do_validate_kcore_modules_cb, &modules); - if (!__map__is_kmodule(old_map)) { - continue; - } - dso = map__dso(old_map); - /* Module must be in memory at the same address */ - mi = find_module(dso->short_name, &modules); - if (!mi || mi->start != map__start(old_map)) { - err = -EINVAL; - goto out; - } - } -out: delete_modules(&modules); return err; } @@ -1271,101 +1239,15 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) return 0; } -/* - * Merges map into maps by splitting the new map within the existing map - * regions. - */ -int maps__merge_in(struct maps *kmaps, struct map *new_map) +static bool remove_old_maps(struct map *map, void *data) { - struct map_rb_node *rb_node; - LIST_HEAD(merged); - int err = 0; - - maps__for_each_entry(kmaps, rb_node) { - struct map *old_map = rb_node->map; - - /* no overload with this one */ - if (map__end(new_map) < map__start(old_map) || - map__start(new_map) >= map__end(old_map)) - continue; - - if (map__start(new_map) < map__start(old_map)) { - /* - * |new...... - * |old.... - */ - if (map__end(new_map) < map__end(old_map)) { - /* - * |new......| -> |new..| - * |old....| -> |old....| - */ - map__set_end(new_map, map__start(old_map)); - } else { - /* - * |new.............| -> |new..| |new..| - * |old....| -> |old....| - */ - struct map_list_node *m = map_list_node__new(); - - if (!m) { - err = -ENOMEM; - goto out; - } - - m->map = map__clone(new_map); - if (!m->map) { - free(m); - err = -ENOMEM; - goto out; - } - - map__set_end(m->map, map__start(old_map)); - list_add_tail(&m->node, &merged); - map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); - map__set_start(new_map, map__end(old_map)); - } - } else { - /* - * |new...... - * |old.... - */ - if (map__end(new_map) < map__end(old_map)) { - /* - * |new..| -> x - * |old.........| -> |old.........| - */ - map__put(new_map); - new_map = NULL; - break; - } else { - /* - * |new......| -> |new...| - * |old....| -> |old....| - */ - map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); - map__set_start(new_map, map__end(old_map)); - } - } - } - -out: - while (!list_empty(&merged)) { - struct map_list_node *old_node; - - old_node = list_entry(merged.next, struct map_list_node, node); - list_del_init(&old_node->node); - if (!err) - err = maps__insert(kmaps, old_node->map); - map__put(old_node->map); - free(old_node); - } + const struct map *map_to_save = data; - if (new_map) { - if (!err) - err = maps__insert(kmaps, new_map); - map__put(new_map); - } - return err; + /* + * We need to preserve eBPF maps even if they are covered by kcore, + * because we need to access eBPF dso for source data. + */ + return !RC_CHK_EQUAL(map, map_to_save) && !__map__is_bpf_prog(map); } static int dso__load_kcore(struct dso *dso, struct map *map, @@ -1374,7 +1256,6 @@ static int dso__load_kcore(struct dso *dso, struct map *map, struct maps *kmaps = map__kmaps(map); struct kcore_mapfn_data md; struct map *replacement_map = NULL; - struct map_rb_node *old_node, *next; struct machine *machine; bool is_64_bit; int err, fd; @@ -1421,17 +1302,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, } /* Remove old maps */ - maps__for_each_entry_safe(kmaps, old_node, next) { - struct map *old_map = old_node->map; - - /* - * We need to preserve eBPF maps even if they are - * covered by kcore, because we need to access - * eBPF dso for source data. - */ - if (old_map != map && !__map__is_bpf_prog(old_map)) - maps__remove(kmaps, old_map); - } + maps__remove_maps(kmaps, remove_old_maps, map); machine->trampolines_mapped = false; /* Find the kernel map using the '_stext' symbol */ @@ -1475,8 +1346,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, map__set_start(map, map__start(new_map)); map__set_end(map, map__end(new_map)); map__set_pgoff(map, map__pgoff(new_map)); - map__set_map_ip(map, map__map_ip_ptr(new_map)); - map__set_unmap_ip(map, map__unmap_ip_ptr(new_map)); + map__set_mapping_type(map, map__mapping_type(new_map)); /* Ensure maps are correctly ordered */ map_ref = map__get(map); maps__remove(kmaps, map_ref); @@ -2067,124 +1937,6 @@ out: return ret; } -static int map__strcmp(const void *a, const void *b) -{ - const struct map *map_a = *(const struct map **)a; - const struct map *map_b = *(const struct map **)b; - const struct dso *dso_a = map__dso(map_a); - const struct dso *dso_b = map__dso(map_b); - int ret = strcmp(dso_a->short_name, dso_b->short_name); - - if (ret == 0 && map_a != map_b) { - /* - * Ensure distinct but name equal maps have an order in part to - * aid reference counting. - */ - ret = (int)map__start(map_a) - (int)map__start(map_b); - if (ret == 0) - ret = (int)((intptr_t)map_a - (intptr_t)map_b); - } - - return ret; -} - -static int map__strcmp_name(const void *name, const void *b) -{ - const struct dso *dso = map__dso(*(const struct map **)b); - - return strcmp(name, dso->short_name); -} - -void __maps__sort_by_name(struct maps *maps) -{ - qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp); -} - -static int map__groups__sort_by_name_from_rbtree(struct maps *maps) -{ - struct map_rb_node *rb_node; - struct map **maps_by_name = realloc(maps__maps_by_name(maps), - maps__nr_maps(maps) * sizeof(struct map *)); - int i = 0; - - if (maps_by_name == NULL) - return -1; - - up_read(maps__lock(maps)); - down_write(maps__lock(maps)); - - RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name; - RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps); - - maps__for_each_entry(maps, rb_node) - maps_by_name[i++] = map__get(rb_node->map); - - __maps__sort_by_name(maps); - - up_write(maps__lock(maps)); - down_read(maps__lock(maps)); - - return 0; -} - -static struct map *__maps__find_by_name(struct maps *maps, const char *name) -{ - struct map **mapp; - - if (maps__maps_by_name(maps) == NULL && - map__groups__sort_by_name_from_rbtree(maps)) - return NULL; - - mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps), - sizeof(*mapp), map__strcmp_name); - if (mapp) - return *mapp; - return NULL; -} - -struct map *maps__find_by_name(struct maps *maps, const char *name) -{ - struct map_rb_node *rb_node; - struct map *map; - - down_read(maps__lock(maps)); - - - if (RC_CHK_ACCESS(maps)->last_search_by_name) { - const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name); - - if (strcmp(dso->short_name, name) == 0) { - map = RC_CHK_ACCESS(maps)->last_search_by_name; - goto out_unlock; - } - } - /* - * If we have maps->maps_by_name, then the name isn't in the rbtree, - * as maps->maps_by_name mirrors the rbtree when lookups by name are - * made. - */ - map = __maps__find_by_name(maps, name); - if (map || maps__maps_by_name(maps) != NULL) - goto out_unlock; - - /* Fallback to traversing the rbtree... */ - maps__for_each_entry(maps, rb_node) { - struct dso *dso; - - map = rb_node->map; - dso = map__dso(map); - if (strcmp(dso->short_name, name) == 0) { - RC_CHK_ACCESS(maps)->last_search_by_name = map; - goto out_unlock; - } - } - map = NULL; - -out_unlock: - up_read(maps__lock(maps)); - return map; -} - int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, bool vmlinux_allocated) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index af87c46b3f89..071837ddce2a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -189,7 +189,6 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym, void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root_cached *symbols); void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms); -void maps__fixup_end(struct maps *maps); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 0b589570d1d0..c114bbceef40 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -42,7 +42,11 @@ struct symbol_conf { inline_name, disable_add2line_warn, buildid_mmap2, - guest_code; + guest_code, + lazy_load_kernel_maps, + keep_exited_threads, + annotate_data_member, + annotate_data_sample; const char *vmlinux_name, *kallsyms_name, *source_prefix, diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index a0579c7d7b9e..3712186353fb 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -665,18 +665,74 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused, } #endif +struct perf_event__synthesize_modules_maps_cb_args { + struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; + union perf_event *event; +}; + +static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data) +{ + struct perf_event__synthesize_modules_maps_cb_args *args = data; + union perf_event *event = args->event; + struct dso *dso; + size_t size; + + if (!__map__is_kmodule(map)) + return 0; + + dso = map__dso(map); + if (symbol_conf.buildid_mmap2) { + size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, args->machine->id_hdr_size); + event->mmap2.header.size += args->machine->id_hdr_size; + event->mmap2.start = map__start(map); + event->mmap2.len = map__size(map); + event->mmap2.pid = args->machine->pid; + + memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1); + + perf_record_mmap2__read_build_id(&event->mmap2, args->machine, false); + } else { + size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, args->machine->id_hdr_size); + event->mmap.header.size += args->machine->id_hdr_size; + event->mmap.start = map__start(map); + event->mmap.len = map__size(map); + event->mmap.pid = args->machine->pid; + + memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1); + } + + if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0) + return -1; + + return 0; +} + int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { - int rc = 0; - struct map_rb_node *pos; + int rc; struct maps *maps = machine__kernel_maps(machine); - union perf_event *event; - size_t size = symbol_conf.buildid_mmap2 ? - sizeof(event->mmap2) : sizeof(event->mmap); + struct perf_event__synthesize_modules_maps_cb_args args = { + .tool = tool, + .process = process, + .machine = machine, + }; + size_t size = symbol_conf.buildid_mmap2 + ? sizeof(args.event->mmap2) + : sizeof(args.event->mmap); - event = zalloc(size + machine->id_hdr_size); - if (event == NULL) { + args.event = zalloc(size + machine->id_hdr_size); + if (args.event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); return -1; @@ -687,53 +743,13 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t * __perf_event_mmap */ if (machine__is_host(machine)) - event->header.misc = PERF_RECORD_MISC_KERNEL; + args.event->header.misc = PERF_RECORD_MISC_KERNEL; else - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - - maps__for_each_entry(maps, pos) { - struct map *map = pos->map; - struct dso *dso; + args.event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - if (!__map__is_kmodule(map)) - continue; + rc = maps__for_each_map(maps, perf_event__synthesize_modules_maps_cb, &args); - dso = map__dso(map); - if (symbol_conf.buildid_mmap2) { - size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); - event->mmap2.header.type = PERF_RECORD_MMAP2; - event->mmap2.header.size = (sizeof(event->mmap2) - - (sizeof(event->mmap2.filename) - size)); - memset(event->mmap2.filename + size, 0, machine->id_hdr_size); - event->mmap2.header.size += machine->id_hdr_size; - event->mmap2.start = map__start(map); - event->mmap2.len = map__size(map); - event->mmap2.pid = machine->pid; - - memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1); - - perf_record_mmap2__read_build_id(&event->mmap2, machine, false); - } else { - size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.start = map__start(map); - event->mmap.len = map__size(map); - event->mmap.pid = machine->pid; - - memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1); - } - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { - rc = -1; - break; - } - } - - free(event); + free(args.event); return rc; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index fe5e6991ae4b..89c47a5098e2 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -345,38 +345,36 @@ int thread__insert_map(struct thread *thread, struct map *map) if (ret) return ret; - maps__fixup_overlappings(thread__maps(thread), map, stderr); - return maps__insert(thread__maps(thread), map); + return maps__fixup_overlap_and_insert(thread__maps(thread), map); } -static int __thread__prepare_access(struct thread *thread) +struct thread__prepare_access_maps_cb_args { + int err; + struct maps *maps; +}; + +static int thread__prepare_access_maps_cb(struct map *map, void *data) { bool initialized = false; - int err = 0; - struct maps *maps = thread__maps(thread); - struct map_rb_node *rb_node; - - down_read(maps__lock(maps)); - - maps__for_each_entry(maps, rb_node) { - err = unwind__prepare_access(thread__maps(thread), rb_node->map, &initialized); - if (err || initialized) - break; - } + struct thread__prepare_access_maps_cb_args *args = data; - up_read(maps__lock(maps)); + args->err = unwind__prepare_access(args->maps, map, &initialized); - return err; + return (args->err || initialized) ? 1 : 0; } static int thread__prepare_access(struct thread *thread) { - int err = 0; + struct thread__prepare_access_maps_cb_args args = { + .err = 0, + }; - if (dwarf_callchain_users) - err = __thread__prepare_access(thread); + if (dwarf_callchain_users) { + args.maps = thread__maps(thread); + maps__for_each_map(thread__maps(thread), thread__prepare_access_maps_cb, &args); + } - return err; + return args.err; } static int thread__clone_maps(struct thread *thread, struct thread *parent, bool do_maps_clone) @@ -385,14 +383,14 @@ static int thread__clone_maps(struct thread *thread, struct thread *parent, bool if (thread__pid(thread) == thread__pid(parent)) return thread__prepare_access(thread); - if (thread__maps(thread) == thread__maps(parent)) { + if (RC_CHK_EQUAL(thread__maps(thread), thread__maps(parent))) { pr_debug("broken map groups on thread %d/%d parent %d/%d\n", thread__pid(thread), thread__tid(thread), thread__pid(parent), thread__tid(parent)); return 0; } /* But this one is new process, copy maps. */ - return do_maps_clone ? maps__clone(thread, thread__maps(parent)) : 0; + return do_maps_clone ? maps__copy_from(thread__maps(thread), thread__maps(parent)) : 0; } int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index e79225a0ea46..0df775b5c110 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -36,13 +36,22 @@ struct thread_rb_node { }; DECLARE_RC_STRUCT(thread) { + /** @maps: mmaps associated with this thread. */ struct maps *maps; pid_t pid_; /* Not all tools update this */ + /** @tid: thread ID number unique to a machine. */ pid_t tid; + /** @ppid: parent process of the process this thread belongs to. */ pid_t ppid; int cpu; int guest_cpu; /* For QEMU thread */ refcount_t refcnt; + /** + * @exited: Has the thread had an exit event. Such threads are usually + * removed from the machine's threads but some events/tools require + * access to dead threads. + */ + bool exited; bool comm_set; int comm_len; struct list_head namespaces_list; @@ -189,6 +198,11 @@ static inline refcount_t *thread__refcnt(struct thread *thread) return &RC_CHK_ACCESS(thread)->refcnt; } +static inline void thread__set_exited(struct thread *thread, bool exited) +{ + RC_CHK_ACCESS(thread)->exited = exited; +} + static inline bool thread__comm_set(const struct thread *thread) { return RC_CHK_ACCESS(thread)->comm_set; diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index be7157de0451..4db3d1bd686c 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -28,6 +28,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) struct record_opts *opts = &top->record_opts; struct target *target = &opts->target; size_t ret = 0; + int nr_cpus; if (top->samples) { samples_per_sec = top->samples / top->delay_secs; @@ -93,19 +94,17 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) else ret += SNPRINTF(bf + ret, size - ret, " (all"); + nr_cpus = perf_cpu_map__nr(top->evlist->core.user_requested_cpus); if (target->cpu_list) ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1 - ? "s" : "", + nr_cpus > 1 ? "s" : "", target->cpu_list); else { if (target->tid) ret += SNPRINTF(bf + ret, size - ret, ")"); else ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", - perf_cpu_map__nr(top->evlist->core.user_requested_cpus), - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1 - ? "s" : ""); + nr_cpus, nr_cpus > 1 ? "s" : ""); } perf_top__reset_sample_counters(top); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index a8b0d79bd96c..4c5588dbb131 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -21,7 +21,6 @@ struct perf_top { struct perf_tool tool; struct evlist *evlist, *sb_evlist; struct record_opts record_opts; - struct annotation_options annotation_opts; struct evswitch evswitch; /* * Symbols will be added here in perf_event__process_sample and will diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 8554db3fc0d7..6013335a8dae 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -46,6 +46,7 @@ static int __report_module(struct addr_location *al, u64 ip, { Dwfl_Module *mod; struct dso *dso = NULL; + Dwarf_Addr base; /* * Some callers will use al->sym, so we can't just use the * cheaper thread__find_map() here. @@ -58,13 +59,25 @@ static int __report_module(struct addr_location *al, u64 ip, if (!dso) return 0; + /* + * The generated JIT DSO files only map the code segment without + * ELF headers. Since JIT codes used to be packed in a memory + * segment, calculating the base address using pgoff falls into + * a different code in another DSO. So just use the map->start + * directly to pick the correct one. + */ + if (!strncmp(dso->long_name, "/tmp/jitted-", 12)) + base = map__start(al->map); + else + base = map__start(al->map) - map__pgoff(al->map); + mod = dwfl_addrmodule(ui->dwfl, ip); if (mod) { Dwarf_Addr s; dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); - if (s != map__start(al->map) - map__pgoff(al->map)) - mod = 0; + if (s != base) + mod = NULL; } if (!mod) { @@ -72,14 +85,14 @@ static int __report_module(struct addr_location *al, u64 ip, __symbol__join_symfs(filename, sizeof(filename), dso->long_name); mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, - map__start(al->map) - map__pgoff(al->map), false); + base, false); } if (!mod) { char filename[PATH_MAX]; if (dso__build_id_filename(dso, filename, sizeof(filename), false)) mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, - map__start(al->map) - map__pgoff(al->map), false); + base, false); } if (mod) { diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index c0641882fd2f..dac536e28360 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -302,12 +302,31 @@ static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, return 0; } +struct read_unwind_spec_eh_frame_maps_cb_args { + struct dso *dso; + u64 base_addr; +}; + +static int read_unwind_spec_eh_frame_maps_cb(struct map *map, void *data) +{ + + struct read_unwind_spec_eh_frame_maps_cb_args *args = data; + + if (map__dso(map) == args->dso && map__start(map) - map__pgoff(map) < args->base_addr) + args->base_addr = map__start(map) - map__pgoff(map); + + return 0; +} + + static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, u64 *table_data, u64 *segbase, u64 *fde_count) { - struct map_rb_node *map_node; - u64 base_addr = UINT64_MAX; + struct read_unwind_spec_eh_frame_maps_cb_args args = { + .dso = dso, + .base_addr = UINT64_MAX, + }; int ret, fd; if (dso->data.eh_frame_hdr_offset == 0) { @@ -325,16 +344,11 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, return -EINVAL; } - maps__for_each_entry(thread__maps(ui->thread), map_node) { - struct map *map = map_node->map; - u64 start = map__start(map); + maps__for_each_map(thread__maps(ui->thread), read_unwind_spec_eh_frame_maps_cb, &args); - if (map__dso(map) == dso && start < base_addr) - base_addr = start; - } - base_addr -= dso->data.elf_base_addr; + args.base_addr -= dso->data.elf_base_addr; /* Address of .eh_frame_hdr */ - *segbase = base_addr + dso->data.eh_frame_hdr_addr; + *segbase = args.base_addr + dso->data.eh_frame_hdr_addr; ret = unwind_spec_ehframe(dso, ui->machine, dso->data.eh_frame_hdr_offset, table_data, fde_count); if (ret) diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index ae3eee69b659..df8963796187 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -140,23 +140,34 @@ static struct dso *__machine__addnew_vdso(struct machine *machine, const char *s return dso; } +struct machine__thread_dso_type_maps_cb_args { + struct machine *machine; + enum dso_type dso_type; +}; + +static int machine__thread_dso_type_maps_cb(struct map *map, void *data) +{ + struct machine__thread_dso_type_maps_cb_args *args = data; + struct dso *dso = map__dso(map); + + if (!dso || dso->long_name[0] != '/') + return 0; + + args->dso_type = dso__type(dso, args->machine); + return (args->dso_type != DSO__TYPE_UNKNOWN) ? 1 : 0; +} + static enum dso_type machine__thread_dso_type(struct machine *machine, struct thread *thread) { - enum dso_type dso_type = DSO__TYPE_UNKNOWN; - struct map_rb_node *rb_node; - - maps__for_each_entry(thread__maps(thread), rb_node) { - struct dso *dso = map__dso(rb_node->map); + struct machine__thread_dso_type_maps_cb_args args = { + .machine = machine, + .dso_type = DSO__TYPE_UNKNOWN, + }; - if (!dso || dso->long_name[0] != '/') - continue; - dso_type = dso__type(dso, machine); - if (dso_type != DSO__TYPE_UNKNOWN) - break; - } + maps__for_each_map(thread__maps(thread), machine__thread_dso_type_maps_cb, &args); - return dso_type; + return args.dso_type; } #if BITS_PER_LONG == 64 diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c index 48dd2b018c47..57027e0ac7b6 100644 --- a/tools/perf/util/zstd.c +++ b/tools/perf/util/zstd.c @@ -7,35 +7,9 @@ int zstd_init(struct zstd_data *data, int level) { - size_t ret; - - data->dstream = ZSTD_createDStream(); - if (data->dstream == NULL) { - pr_err("Couldn't create decompression stream.\n"); - return -1; - } - - ret = ZSTD_initDStream(data->dstream); - if (ZSTD_isError(ret)) { - pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret)); - return -1; - } - - if (!level) - return 0; - - data->cstream = ZSTD_createCStream(); - if (data->cstream == NULL) { - pr_err("Couldn't create compression stream.\n"); - return -1; - } - - ret = ZSTD_initCStream(data->cstream, level); - if (ZSTD_isError(ret)) { - pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret)); - return -1; - } - + data->comp_level = level; + data->dstream = NULL; + data->cstream = NULL; return 0; } @@ -54,7 +28,7 @@ int zstd_fini(struct zstd_data *data) return 0; } -size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size, size_t process_header(void *record, size_t increment)) { @@ -63,6 +37,21 @@ size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t ZSTD_outBuffer output; void *record; + if (!data->cstream) { + data->cstream = ZSTD_createCStream(); + if (data->cstream == NULL) { + pr_err("Couldn't create compression stream.\n"); + return -1; + } + + ret = ZSTD_initCStream(data->cstream, data->comp_level); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize compression stream: %s\n", + ZSTD_getErrorName(ret)); + return -1; + } + } + while (input.pos < input.size) { record = dst; size = process_header(record, 0); @@ -96,6 +85,20 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size ZSTD_inBuffer input = { src, src_size, 0 }; ZSTD_outBuffer output = { dst, dst_size, 0 }; + if (!data->dstream) { + data->dstream = ZSTD_createDStream(); + if (data->dstream == NULL) { + pr_err("Couldn't create decompression stream.\n"); + return 0; + } + + ret = ZSTD_initDStream(data->dstream); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize decompression stream: %s\n", + ZSTD_getErrorName(ret)); + return 0; + } + } while (input.pos < input.size) { ret = ZSTD_decompressStream(data->dstream, &output, &input); if (ZSTD_isError(ret)) { |