diff options
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/builtin-trace.c | 2 | ||||
-rwxr-xr-x | tools/perf/scripts/python/arm-cs-trace-disasm.py | 34 | ||||
-rw-r--r-- | tools/perf/tests/perf-time-to-tsc.c | 27 | ||||
-rw-r--r-- | tools/perf/util/bpf-loader.c | 222 | ||||
-rw-r--r-- | tools/perf/util/bpf-utils.c | 5 | ||||
-rw-r--r-- | tools/perf/util/bpf_off_cpu.c | 7 | ||||
-rw-r--r-- | tools/perf/util/bpf_skel/off_cpu.bpf.c | 20 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 9 | ||||
-rw-r--r-- | tools/perf/util/off_cpu.h | 9 | ||||
-rw-r--r-- | tools/perf/util/symbol-elf.c | 56 | ||||
-rw-r--r-- | tools/perf/util/synthetic-events.c | 9 | ||||
-rw-r--r-- | tools/perf/util/unwind-libunwind-local.c | 2 |
12 files changed, 310 insertions, 92 deletions
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 897fc504918b..f075cf37a65e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4280,6 +4280,7 @@ static int trace__replay(struct trace *trace) goto out; evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter"); + trace->syscalls.events.sys_enter = evsel; /* older kernels have syscalls tp versus raw_syscalls */ if (evsel == NULL) evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter"); @@ -4292,6 +4293,7 @@ static int trace__replay(struct trace *trace) } evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit"); + trace->syscalls.events.sys_exit = evsel; if (evsel == NULL) evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit"); if (evsel && diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py index 5f57d9829956..4339692a8d0b 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -61,7 +61,7 @@ def get_optional(perf_dict, field): def get_offset(perf_dict, field): if field in perf_dict: - return f"+0x{perf_dict[field]:x}" + return "+%#x" % perf_dict[field] return "" def get_dso_file_path(dso_name, dso_build_id): @@ -76,7 +76,7 @@ def get_dso_file_path(dso_name, dso_build_id): else: append = "/elf" - dso_path = f"{os.environ['PERF_BUILDID_DIR']}/{dso_name}/{dso_build_id}{append}" + dso_path = os.environ['PERF_BUILDID_DIR'] + "/" + dso_name + "/" + dso_build_id + append; # Replace duplicate slash chars to single slash char dso_path = dso_path.replace('//', '/', 1) return dso_path @@ -94,8 +94,8 @@ def read_disam(dso_fname, dso_start, start_addr, stop_addr): start_addr = start_addr - dso_start; stop_addr = stop_addr - dso_start; disasm = [ options.objdump_name, "-d", "-z", - f"--start-address=0x{start_addr:x}", - f"--stop-address=0x{stop_addr:x}" ] + "--start-address="+format(start_addr,"#x"), + "--stop-address="+format(stop_addr,"#x") ] disasm += [ dso_fname ] disasm_output = check_output(disasm).decode('utf-8').split('\n') disasm_cache[addr_range] = disasm_output @@ -109,12 +109,14 @@ def print_disam(dso_fname, dso_start, start_addr, stop_addr): m = disasm_re.search(line) if m is None: continue - print(f"\t{line}") + print("\t" + line) def print_sample(sample): - print(f"Sample = {{ cpu: {sample['cpu']:04} addr: 0x{sample['addr']:016x} " \ - f"phys_addr: 0x{sample['phys_addr']:016x} ip: 0x{sample['ip']:016x} " \ - f"pid: {sample['pid']} tid: {sample['tid']} period: {sample['period']} time: {sample['time']} }}") + print("Sample = { cpu: %04d addr: 0x%016x phys_addr: 0x%016x ip: 0x%016x " \ + "pid: %d tid: %d period: %d time: %d }" % \ + (sample['cpu'], sample['addr'], sample['phys_addr'], \ + sample['ip'], sample['pid'], sample['tid'], \ + sample['period'], sample['time'])) def trace_begin(): print('ARM CoreSight Trace Data Assembler Dump') @@ -131,7 +133,7 @@ def common_start_str(comm, sample): cpu = sample["cpu"] pid = sample["pid"] tid = sample["tid"] - return f"{comm:>16} {pid:>5}/{tid:<5} [{cpu:04}] {sec:9}.{ns:09} " + return "%16s %5u/%-5u [%04u] %9u.%09u " % (comm, pid, tid, cpu, sec, ns) # This code is copied from intel-pt-events.py for printing source code # line and symbols. @@ -171,7 +173,7 @@ def print_srccode(comm, param_dict, sample, symbol, dso): glb_line_number = line_number glb_source_file_name = source_file_name - print(f"{start_str}{src_str}") + print(start_str, src_str) def process_event(param_dict): global cache_size @@ -188,7 +190,7 @@ def process_event(param_dict): symbol = get_optional(param_dict, "symbol") if (options.verbose == True): - print(f"Event type: {name}") + print("Event type: %s" % name) print_sample(sample) # If cannot find dso so cannot dump assembler, bail out @@ -197,7 +199,7 @@ def process_event(param_dict): # Validate dso start and end addresses if ((dso_start == '[unknown]') or (dso_end == '[unknown]')): - print(f"Failed to find valid dso map for dso {dso}") + print("Failed to find valid dso map for dso %s" % dso) return if (name[0:12] == "instructions"): @@ -244,15 +246,15 @@ def process_event(param_dict): # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4 if (start_addr == 0 and stop_addr == 4): - print(f"CPU{cpu}: CS_ETM_TRACE_ON packet is inserted") + print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu) return if (start_addr < int(dso_start) or start_addr > int(dso_end)): - print(f"Start address 0x{start_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}") + print("Start address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (start_addr, int(dso_start), int(dso_end), dso)) return if (stop_addr < int(dso_start) or stop_addr > int(dso_end)): - print(f"Stop address 0x{stop_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}") + print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso)) return if (options.objdump_name != None): @@ -267,6 +269,6 @@ def process_event(param_dict): if path.exists(dso_fname): print_disam(dso_fname, dso_vm_start, start_addr, stop_addr) else: - print(f"Failed to find dso {dso} for address range [ 0x{start_addr:x} .. 0x{stop_addr:x} ]") + print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr)) print_srccode(comm, param_dict, sample, symbol, dso) diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index 4ad0dfbc8b21..7c7d20fc503a 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -20,8 +20,6 @@ #include "tsc.h" #include "mmap.h" #include "tests.h" -#include "pmu.h" -#include "pmu-hybrid.h" /* * Except x86_64/i386 and Arm64, other archs don't support TSC in perf. Just @@ -106,28 +104,21 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su evlist__config(evlist, &opts, NULL); - evsel = evlist__first(evlist); - - evsel->core.attr.comm = 1; - evsel->core.attr.disabled = 1; - evsel->core.attr.enable_on_exec = 0; - - /* - * For hybrid "cycles:u", it creates two events. - * Init the second evsel here. - */ - if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) { - evsel = evsel__next(evsel); + /* For hybrid "cycles:u", it creates two events */ + evlist__for_each_entry(evlist, evsel) { evsel->core.attr.comm = 1; evsel->core.attr.disabled = 1; evsel->core.attr.enable_on_exec = 0; } - if (evlist__open(evlist) == -ENOENT) { - err = TEST_SKIP; + ret = evlist__open(evlist); + if (ret < 0) { + if (ret == -ENOENT) + err = TEST_SKIP; + else + pr_debug("evlist__open() failed\n"); goto out_err; } - CHECK__(evlist__open(evlist)); CHECK__(evlist__mmap(evlist, UINT_MAX)); @@ -167,10 +158,12 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su goto next_event; if (strcmp(event->comm.comm, comm1) == 0) { + CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event)); CHECK__(evsel__parse_sample(evsel, event, &sample)); comm1_time = sample.time; } if (strcmp(event->comm.comm, comm2) == 0) { + CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event)); CHECK__(evsel__parse_sample(evsel, event, &sample)); comm2_time = sample.time; } diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index f8ad581ea247..d2c9b09ddb48 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -9,6 +9,7 @@ #include <linux/bpf.h> #include <bpf/libbpf.h> #include <bpf/bpf.h> +#include <linux/filter.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/string.h> @@ -49,6 +50,7 @@ struct bpf_prog_priv { struct bpf_insn *insns_buf; int nr_types; int *type_mapping; + int *prologue_fds; }; struct bpf_perf_object { @@ -56,6 +58,11 @@ struct bpf_perf_object { struct bpf_object *obj; }; +struct bpf_preproc_result { + struct bpf_insn *new_insn_ptr; + int new_insn_cnt; +}; + static LIST_HEAD(bpf_objects_list); static struct hashmap *bpf_program_hash; static struct hashmap *bpf_map_hash; @@ -63,20 +70,16 @@ static struct hashmap *bpf_map_hash; static struct bpf_perf_object * bpf_perf_object__next(struct bpf_perf_object *prev) { - struct bpf_perf_object *next; - - if (!prev) - next = list_first_entry(&bpf_objects_list, - struct bpf_perf_object, - list); - else - next = list_next_entry(prev, list); + if (!prev) { + if (list_empty(&bpf_objects_list)) + return NULL; - /* Empty list is noticed here so don't need checking on entry. */ - if (&next->list == &bpf_objects_list) + return list_first_entry(&bpf_objects_list, struct bpf_perf_object, list); + } + if (list_is_last(&prev->list, &bpf_objects_list)) return NULL; - return next; + return list_next_entry(prev, list); } #define bpf_perf_object__for_each(perf_obj, tmp) \ @@ -86,6 +89,7 @@ bpf_perf_object__next(struct bpf_perf_object *prev) (perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp)) static bool libbpf_initialized; +static int libbpf_sec_handler; static int bpf_perf_object__add(struct bpf_object *obj) { @@ -99,12 +103,76 @@ static int bpf_perf_object__add(struct bpf_object *obj) return perf_obj ? 0 : -ENOMEM; } +static void *program_priv(const struct bpf_program *prog) +{ + void *priv; + + if (IS_ERR_OR_NULL(bpf_program_hash)) + return NULL; + if (!hashmap__find(bpf_program_hash, prog, &priv)) + return NULL; + return priv; +} + +static struct bpf_insn prologue_init_insn[] = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), +}; + +static int libbpf_prog_prepare_load_fn(struct bpf_program *prog, + struct bpf_prog_load_opts *opts __maybe_unused, + long cookie __maybe_unused) +{ + size_t init_size_cnt = ARRAY_SIZE(prologue_init_insn); + size_t orig_insn_cnt, insn_cnt, init_size, orig_size; + struct bpf_prog_priv *priv = program_priv(prog); + const struct bpf_insn *orig_insn; + struct bpf_insn *insn; + + if (IS_ERR_OR_NULL(priv)) { + pr_debug("bpf: failed to get private field\n"); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + if (!priv->need_prologue) + return 0; + + /* prepend initialization code to program instructions */ + orig_insn = bpf_program__insns(prog); + orig_insn_cnt = bpf_program__insn_cnt(prog); + init_size = init_size_cnt * sizeof(*insn); + orig_size = orig_insn_cnt * sizeof(*insn); + + insn_cnt = orig_insn_cnt + init_size_cnt; + insn = malloc(insn_cnt * sizeof(*insn)); + if (!insn) + return -ENOMEM; + + memcpy(insn, prologue_init_insn, init_size); + memcpy((char *) insn + init_size, orig_insn, orig_size); + bpf_program__set_insns(prog, insn, insn_cnt); + return 0; +} + static int libbpf_init(void) { + LIBBPF_OPTS(libbpf_prog_handler_opts, handler_opts, + .prog_prepare_load_fn = libbpf_prog_prepare_load_fn, + ); + if (libbpf_initialized) return 0; libbpf_set_print(libbpf_perf_print); + libbpf_sec_handler = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_KPROBE, + 0, &handler_opts); + if (libbpf_sec_handler < 0) { + pr_debug("bpf: failed to register libbpf section handler: %d\n", + libbpf_sec_handler); + return -BPF_LOADER_ERRNO__INTERNAL; + } libbpf_initialized = true; return 0; } @@ -188,14 +256,31 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) return obj; } +static void close_prologue_programs(struct bpf_prog_priv *priv) +{ + struct perf_probe_event *pev; + int i, fd; + + if (!priv->need_prologue) + return; + pev = &priv->pev; + for (i = 0; i < pev->ntevs; i++) { + fd = priv->prologue_fds[i]; + if (fd != -1) + close(fd); + } +} + static void clear_prog_priv(const struct bpf_program *prog __maybe_unused, void *_priv) { struct bpf_prog_priv *priv = _priv; + close_prologue_programs(priv); cleanup_perf_probe_events(&priv->pev, 1); zfree(&priv->insns_buf); + zfree(&priv->prologue_fds); zfree(&priv->type_mapping); zfree(&priv->sys_name); zfree(&priv->evt_name); @@ -243,17 +328,6 @@ static bool ptr_equal(const void *key1, const void *key2, return key1 == key2; } -static void *program_priv(const struct bpf_program *prog) -{ - void *priv; - - if (IS_ERR_OR_NULL(bpf_program_hash)) - return NULL; - if (!hashmap__find(bpf_program_hash, prog, &priv)) - return NULL; - return priv; -} - static int program_set_priv(struct bpf_program *prog, void *priv) { void *old_priv; @@ -558,8 +632,8 @@ static int bpf__prepare_probe(void) static int preproc_gen_prologue(struct bpf_program *prog, int n, - struct bpf_insn *orig_insns, int orig_insns_cnt, - struct bpf_prog_prep_result *res) + const struct bpf_insn *orig_insns, int orig_insns_cnt, + struct bpf_preproc_result *res) { struct bpf_prog_priv *priv = program_priv(prog); struct probe_trace_event *tev; @@ -607,7 +681,6 @@ preproc_gen_prologue(struct bpf_program *prog, int n, res->new_insn_ptr = buf; res->new_insn_cnt = prologue_cnt + orig_insns_cnt; - res->pfd = NULL; return 0; errout: @@ -715,7 +788,7 @@ static int hook_load_preprocessor(struct bpf_program *prog) struct bpf_prog_priv *priv = program_priv(prog); struct perf_probe_event *pev; bool need_prologue = false; - int err, i; + int i; if (IS_ERR_OR_NULL(priv)) { pr_debug("Internal error when hook preprocessor\n"); @@ -753,6 +826,13 @@ static int hook_load_preprocessor(struct bpf_program *prog) return -ENOMEM; } + priv->prologue_fds = malloc(sizeof(int) * pev->ntevs); + if (!priv->prologue_fds) { + pr_debug("Not enough memory: alloc prologue fds failed\n"); + return -ENOMEM; + } + memset(priv->prologue_fds, -1, sizeof(int) * pev->ntevs); + priv->type_mapping = malloc(sizeof(int) * pev->ntevs); if (!priv->type_mapping) { pr_debug("Not enough memory: alloc type_mapping failed\n"); @@ -761,13 +841,7 @@ static int hook_load_preprocessor(struct bpf_program *prog) memset(priv->type_mapping, -1, sizeof(int) * pev->ntevs); - err = map_prologue(pev, priv->type_mapping, &priv->nr_types); - if (err) - return err; - - err = bpf_program__set_prep(prog, priv->nr_types, - preproc_gen_prologue); - return err; + return map_prologue(pev, priv->type_mapping, &priv->nr_types); } int bpf__probe(struct bpf_object *obj) @@ -874,6 +948,77 @@ int bpf__unprobe(struct bpf_object *obj) return ret; } +static int bpf_object__load_prologue(struct bpf_object *obj) +{ + int init_cnt = ARRAY_SIZE(prologue_init_insn); + const struct bpf_insn *orig_insns; + struct bpf_preproc_result res; + struct perf_probe_event *pev; + struct bpf_program *prog; + int orig_insns_cnt; + + bpf_object__for_each_program(prog, obj) { + struct bpf_prog_priv *priv = program_priv(prog); + int err, i, fd; + + if (IS_ERR_OR_NULL(priv)) { + pr_debug("bpf: failed to get private field\n"); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + if (!priv->need_prologue) + continue; + + /* + * For each program that needs prologue we do following: + * + * - take its current instructions and use them + * to generate the new code with prologue + * - load new instructions with bpf_prog_load + * and keep the fd in prologue_fds + * - new fd will be used in bpf__foreach_event + * to connect this program with perf evsel + */ + orig_insns = bpf_program__insns(prog); + orig_insns_cnt = bpf_program__insn_cnt(prog); + + pev = &priv->pev; + for (i = 0; i < pev->ntevs; i++) { + /* + * Skipping artificall prologue_init_insn instructions + * (init_cnt), so the prologue can be generated instead + * of them. + */ + err = preproc_gen_prologue(prog, i, + orig_insns + init_cnt, + orig_insns_cnt - init_cnt, + &res); + if (err) + return err; + + fd = bpf_prog_load(bpf_program__get_type(prog), + bpf_program__name(prog), "GPL", + res.new_insn_ptr, + res.new_insn_cnt, NULL); + if (fd < 0) { + char bf[128]; + + libbpf_strerror(-errno, bf, sizeof(bf)); + pr_debug("bpf: load objects with prologue failed: err=%d: (%s)\n", + -errno, bf); + return -errno; + } + priv->prologue_fds[i] = fd; + } + /* + * We no longer need the original program, + * we can unload it. + */ + bpf_program__unload(prog); + } + return 0; +} + int bpf__load(struct bpf_object *obj) { int err; @@ -885,7 +1030,7 @@ int bpf__load(struct bpf_object *obj) pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf); return err; } - return 0; + return bpf_object__load_prologue(obj); } int bpf__foreach_event(struct bpf_object *obj, @@ -920,13 +1065,10 @@ int bpf__foreach_event(struct bpf_object *obj, for (i = 0; i < pev->ntevs; i++) { tev = &pev->tevs[i]; - if (priv->need_prologue) { - int type = priv->type_mapping[i]; - - fd = bpf_program__nth_fd(prog, type); - } else { + if (priv->need_prologue) + fd = priv->prologue_fds[i]; + else fd = bpf_program__fd(prog); - } if (fd < 0) { pr_debug("bpf: failed to get file descriptor\n"); diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c index e271e05e51bc..80b1d2b3729b 100644 --- a/tools/perf/util/bpf-utils.c +++ b/tools/perf/util/bpf-utils.c @@ -149,11 +149,10 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - data_len += count * size; + data_len += roundup(count * size, sizeof(__u64)); } /* step 3: allocate continuous memory */ - data_len = roundup(data_len, sizeof(__u64)); info_linear = malloc(sizeof(struct perf_bpil) + data_len); if (!info_linear) return ERR_PTR(-ENOMEM); @@ -180,7 +179,7 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) bpf_prog_info_set_offset_u64(&info_linear->info, desc->array_offset, ptr_to_u64(ptr)); - ptr += count * size; + ptr += roundup(count * size, sizeof(__u64)); } /* step 5: call syscall again to get required arrays */ diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index b73e84a02264..f289b7713598 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -265,6 +265,12 @@ int off_cpu_write(struct perf_session *session) sample_type = evsel->core.attr.sample_type; + if (sample_type & ~OFFCPU_SAMPLE_TYPES) { + pr_err("not supported sample type: %llx\n", + (unsigned long long)sample_type); + return -1; + } + if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) { if (evsel->core.id) sid = evsel->core.id[0]; @@ -319,7 +325,6 @@ int off_cpu_write(struct perf_session *session) } if (sample_type & PERF_SAMPLE_CGROUP) data.array[n++] = key.cgroup_id; - /* TODO: handle more sample types */ size = n * sizeof(u64); data.hdr.size = size; diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index 792ae2847080..cc6d7fd55118 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -71,6 +71,11 @@ struct { __uint(max_entries, 1); } cgroup_filter SEC(".maps"); +/* new kernel task_struct definition */ +struct task_struct___new { + long __state; +} __attribute__((preserve_access_index)); + /* old kernel task_struct definition */ struct task_struct___old { long state; @@ -93,14 +98,17 @@ const volatile bool uses_cgroup_v1 = false; */ static inline int get_task_state(struct task_struct *t) { - if (bpf_core_field_exists(t->__state)) - return BPF_CORE_READ(t, __state); + /* recast pointer to capture new type for compiler */ + struct task_struct___new *t_new = (void *)t; - /* recast pointer to capture task_struct___old type for compiler */ - struct task_struct___old *t_old = (void *)t; + if (bpf_core_field_exists(t_new->__state)) { + return BPF_CORE_READ(t_new, __state); + } else { + /* recast pointer to capture old type for compiler */ + struct task_struct___old *t_old = (void *)t; - /* now use old "state" name of the field */ - return BPF_CORE_READ(t_old, state); + return BPF_CORE_READ(t_old, state); + } } static inline __u64 get_cgroup_id(struct task_struct *t) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ce499c5da8d7..094b0a9c0bc0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -48,6 +48,7 @@ #include "util.h" #include "hashmap.h" #include "pmu-hybrid.h" +#include "off_cpu.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" #include <internal/xyarray.h> @@ -1102,6 +1103,11 @@ static void evsel__set_default_freq_period(struct record_opts *opts, } } +static bool evsel__is_offcpu_event(struct evsel *evsel) +{ + return evsel__is_bpf_output(evsel) && !strcmp(evsel->name, OFFCPU_EVENT); +} + /* * The enable_on_exec/disabled value strategy: * @@ -1366,6 +1372,9 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, */ if (evsel__is_dummy_event(evsel)) evsel__reset_sample_bit(evsel, BRANCH_STACK); + + if (evsel__is_offcpu_event(evsel)) + evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES; } int evsel__set_filter(struct evsel *evsel, const char *filter) diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h index 548008f74d42..2dd67c60f211 100644 --- a/tools/perf/util/off_cpu.h +++ b/tools/perf/util/off_cpu.h @@ -1,6 +1,8 @@ #ifndef PERF_UTIL_OFF_CPU_H #define PERF_UTIL_OFF_CPU_H +#include <linux/perf_event.h> + struct evlist; struct target; struct perf_session; @@ -8,6 +10,13 @@ struct record_opts; #define OFFCPU_EVENT "offcpu-time" +#define OFFCPU_SAMPLE_TYPES (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ID | PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_CGROUP) + + #ifdef HAVE_BPF_SKEL int off_cpu_prepare(struct evlist *evlist, struct target *target, struct record_opts *opts); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index ecd377938eea..b3be5b1d9dbb 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -233,6 +233,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, return NULL; } +static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) +{ + size_t i, phdrnum; + u64 sz; + + if (elf_getphdrnum(elf, &phdrnum)) + return -1; + + for (i = 0; i < phdrnum; i++) { + if (gelf_getphdr(elf, i, phdr) == NULL) + return -1; + + if (phdr->p_type != PT_LOAD) + continue; + + sz = max(phdr->p_memsz, phdr->p_filesz); + if (!sz) + continue; + + if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz)) + return 0; + } + + /* Not found any valid program header */ + return -1; +} + static bool want_demangle(bool is_kernel_sym) { return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; @@ -1209,6 +1236,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, sym.st_value); used_opd = true; } + /* * When loading symbols in a data mapping, ABS symbols (which * has a value of SHN_ABS in its st_shndx) failed at @@ -1227,6 +1255,17 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, gelf_getshdr(sec, &shdr); + /* + * If the attribute bit SHF_ALLOC is not set, the section + * doesn't occupy memory during process execution. + * E.g. ".gnu.warning.*" section is used by linker to generate + * warnings when calling deprecated functions, the symbols in + * the section aren't loaded to memory during process execution, + * so skip them. + */ + if (!(shdr.sh_flags & SHF_ALLOC)) + continue; + secstrs = secstrs_sym; /* @@ -1262,11 +1301,20 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, goto out_elf_end; } else if ((used_opd && runtime_ss->adjust_symbols) || (!used_opd && syms_ss->adjust_symbols)) { + GElf_Phdr phdr; + + if (elf_read_program_header(syms_ss->elf, + (u64)sym.st_value, &phdr)) { + pr_warning("%s: failed to find program header for " + "symbol: %s st_value: %#" PRIx64 "\n", + __func__, elf_name, (u64)sym.st_value); + continue; + } pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " - "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, - (u64)sym.st_value, (u64)shdr.sh_addr, - (u64)shdr.sh_offset); - sym.st_value -= shdr.sh_addr - shdr.sh_offset; + "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", + __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, + (u64)phdr.p_offset); + sym.st_value -= phdr.p_vaddr - phdr.p_offset; } demangled = demangle_sym(dso, kmodule, elf_name); diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 27acdc5e5723..84d17bd4efae 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -754,7 +754,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, snprintf(filename, sizeof(filename), "%s/proc/%d/task", machine->root_dir, pid); - n = scandir(filename, &dirent, filter_task, alphasort); + n = scandir(filename, &dirent, filter_task, NULL); if (n < 0) return n; @@ -767,11 +767,12 @@ static int __event__synthesize_thread(union perf_event *comm_event, if (*end) continue; - rc = -1; + /* some threads may exit just after scan, ignore it */ if (perf_event__prepare_comm(comm_event, pid, _pid, machine, &tgid, &ppid, &kernel_thread) != 0) - break; + continue; + rc = -1; if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, ppid, process, machine) < 0) break; @@ -987,7 +988,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, return 0; snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); - n = scandir(proc_path, &dirent, filter_task, alphasort); + n = scandir(proc_path, &dirent, filter_task, NULL); if (n < 0) return err; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 6e5b8cce47bf..81b6bd6e1536 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -197,7 +197,7 @@ out_err: #ifndef NO_LIBUNWIND_DEBUG_FRAME static u64 elf_section_offset(int fd, const char *name) { - u64 address, offset; + u64 address, offset = 0; if (elf_section_address_and_offset(fd, name, &address, &offset)) return 0; |