diff options
Diffstat (limited to 'tools/perf/util')
28 files changed, 914 insertions, 341 deletions
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 28b233c3dcbe..49ff825f745c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -187,6 +187,9 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2) static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map) { char *endptr, *tok, *name; + struct addr_map_symbol target = { + .map = map, + }; ops->target.addr = strtoull(ops->raw, &endptr, 16); @@ -208,28 +211,29 @@ static int call__parse(struct arch *arch, struct ins_operands *ops, struct map * ops->target.name = strdup(name); *tok = '>'; - return ops->target.name == NULL ? -1 : 0; + if (ops->target.name == NULL) + return -1; +find_target: + target.addr = map__objdump_2mem(map, ops->target.addr); -indirect_call: - tok = strchr(endptr, '*'); - if (tok == NULL) { - struct symbol *sym = map__find_symbol(map, map->map_ip(map, ops->target.addr)); - if (sym != NULL) - ops->target.name = strdup(sym->name); - else - ops->target.addr = 0; - return 0; - } + if (map_groups__find_ams(&target) == 0 && + map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr) + ops->target.sym = target.sym; - ops->target.addr = strtoull(tok + 1, NULL, 16); return 0; + +indirect_call: + tok = strchr(endptr, '*'); + if (tok != NULL) + ops->target.addr = strtoull(tok + 1, NULL, 16); + goto find_target; } static int call__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - if (ops->target.name) - return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name); + if (ops->target.sym) + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name); if (ops->target.addr == 0) return ins__raw_scnprintf(ins, bf, size, ops); @@ -1283,8 +1287,8 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, dl->ops.target.offset_avail = true; } - /* kcore has no symbols, so add the call target name */ - if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.name) { + /* kcore has no symbols, so add the call target symbol */ + if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) { struct addr_map_symbol target = { .map = map, .addr = dl->ops.target.addr, @@ -1292,7 +1296,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, if (!map_groups__find_ams(&target) && target.sym->start == target.al_addr) - dl->ops.target.name = strdup(target.sym->name); + dl->ops.target.sym = target.sym; } annotation_line__add(&dl->al, ¬es->src->source); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index ce427445671f..7e914e834101 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -24,6 +24,7 @@ struct ins_operands { struct { char *raw; char *name; + struct symbol *sym; u64 addr; s64 offset; bool offset_avail; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 7f8553630c4d..537eadd81914 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -316,7 +316,6 @@ static int machine__write_buildid_table(struct machine *machine, struct feat_fd *fd) { int err = 0; - char nm[PATH_MAX]; struct dso *pos; u16 kmisc = PERF_RECORD_MISC_KERNEL, umisc = PERF_RECORD_MISC_USER; @@ -338,9 +337,8 @@ static int machine__write_buildid_table(struct machine *machine, name = pos->short_name; name_len = pos->short_name_len; } else if (dso__is_kcore(pos)) { - machine__mmap_name(machine, nm, sizeof(nm)); - name = nm; - name_len = strlen(nm); + name = machine->mmap_name; + name_len = strlen(name); } else { name = pos->long_name; name_len = pos->long_name_len; @@ -813,12 +811,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine) bool is_kallsyms = dso__is_kallsyms(dso); bool is_vdso = dso__is_vdso(dso); const char *name = dso->long_name; - char nm[PATH_MAX]; if (dso__is_kcore(dso)) { is_kallsyms = true; - machine__mmap_name(machine, nm, sizeof(nm)); - name = nm; + name = machine->mmap_name; } return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, dso->nsinfo, is_kallsyms, is_vdso); diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 984f69144f87..5dd9b5ea314d 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -157,9 +157,11 @@ int parse_cgroups(const struct option *opt __maybe_unused, const char *str, int unset __maybe_unused) { struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; + struct perf_evsel *counter; + struct cgroup_sel *cgrp = NULL; const char *p, *e, *eos = str + strlen(str); char *s; - int ret; + int ret, i; if (list_empty(&evlist->entries)) { fprintf(stderr, "must define events before cgroups\n"); @@ -188,5 +190,18 @@ int parse_cgroups(const struct option *opt __maybe_unused, const char *str, break; str = p+1; } + /* for the case one cgroup combine to multiple events */ + i = 0; + if (nr_cgroups == 1) { + evlist__for_each_entry(evlist, counter) { + if (i == 0) + cgrp = counter->cgrp; + else { + counter->cgrp = cgrp; + refcount_inc(&cgrp->refcnt); + } + i++; + } + } return 0; } diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 1fb01849f1c7..640af88331b4 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -78,6 +78,8 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) { ocsd_datapath_resp_t dp_ret; + decoder->prev_return = OCSD_RESP_CONT; + dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, 0, 0, NULL, NULL); if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) @@ -253,16 +255,16 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->packet_count = 0; for (i = 0; i < MAX_BUFFER; i++) { decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL; - decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; - decoder->packet_buffer[i].exc = false; - decoder->packet_buffer[i].exc_ret = false; - decoder->packet_buffer[i].cpu = INT_MIN; + decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[i].last_instr_taken_branch = false; + decoder->packet_buffer[i].exc = false; + decoder->packet_buffer[i].exc_ret = false; + decoder->packet_buffer[i].cpu = INT_MIN; } } static ocsd_datapath_resp_t cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, - const ocsd_generic_trace_elem *elem, const u8 trace_chan_id, enum cs_etm_sample_type sample_type) { @@ -278,18 +280,16 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, return OCSD_RESP_FATAL_SYS_ERR; et = decoder->tail; + et = (et + 1) & (MAX_BUFFER - 1); + decoder->tail = et; + decoder->packet_count++; + decoder->packet_buffer[et].sample_type = sample_type; - decoder->packet_buffer[et].start_addr = elem->st_addr; - decoder->packet_buffer[et].end_addr = elem->en_addr; decoder->packet_buffer[et].exc = false; decoder->packet_buffer[et].exc_ret = false; decoder->packet_buffer[et].cpu = *((int *)inode->priv); - - /* Wrap around if need be */ - et = (et + 1) & (MAX_BUFFER - 1); - - decoder->tail = et; - decoder->packet_count++; + decoder->packet_buffer[et].start_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[et].end_addr = 0xdeadbeefdeadbeefUL; if (decoder->packet_count == MAX_BUFFER - 1) return OCSD_RESP_WAIT; @@ -297,6 +297,47 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, return OCSD_RESP_CONT; } +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, + const ocsd_generic_trace_elem *elem, + const uint8_t trace_chan_id) +{ + int ret = 0; + struct cs_etm_packet *packet; + + ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + CS_ETM_RANGE); + if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) + return ret; + + packet = &decoder->packet_buffer[decoder->tail]; + + packet->start_addr = elem->st_addr; + packet->end_addr = elem->en_addr; + switch (elem->last_i_type) { + case OCSD_INSTR_BR: + case OCSD_INSTR_BR_INDIRECT: + packet->last_instr_taken_branch = elem->last_instr_exec; + break; + case OCSD_INSTR_ISB: + case OCSD_INSTR_DSB_DMB: + case OCSD_INSTR_OTHER: + default: + packet->last_instr_taken_branch = false; + break; + } + + return ret; +} + +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_trace_on(struct cs_etm_decoder *decoder, + const uint8_t trace_chan_id) +{ + return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, + CS_ETM_TRACE_ON); +} + static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const void *context, const ocsd_trc_index_t indx __maybe_unused, @@ -313,12 +354,13 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( decoder->trace_on = false; break; case OCSD_GEN_TRC_ELEM_TRACE_ON: + resp = cs_etm_decoder__buffer_trace_on(decoder, + trace_chan_id); decoder->trace_on = true; break; case OCSD_GEN_TRC_ELEM_INSTR_RANGE: - resp = cs_etm_decoder__buffer_packet(decoder, elem, - trace_chan_id, - CS_ETM_RANGE); + resp = cs_etm_decoder__buffer_range(decoder, elem, + trace_chan_id); break; case OCSD_GEN_TRC_ELEM_EXCEPTION: decoder->packet_buffer[decoder->tail].exc = true; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 3d2e6205d186..743f5f444304 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -24,12 +24,14 @@ struct cs_etm_buffer { enum cs_etm_sample_type { CS_ETM_RANGE = 1 << 0, + CS_ETM_TRACE_ON = 1 << 1, }; struct cs_etm_packet { enum cs_etm_sample_type sample_type; u64 start_addr; u64 end_addr; + u8 last_instr_taken_branch; u8 exc; u8 exc_ret; int cpu; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index b9f0a53dfa65..1b0d422373be 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -32,6 +32,14 @@ #define MAX_TIMESTAMP (~0ULL) +/* + * A64 instructions are always 4 bytes + * + * Only A64 is supported, so can use this constant for converting between + * addresses and instruction counts, calculting offsets etc + */ +#define A64_INSTR_SIZE 4 + struct cs_etm_auxtrace { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -45,11 +53,15 @@ struct cs_etm_auxtrace { u8 snapshot_mode; u8 data_queued; u8 sample_branches; + u8 sample_instructions; int num_cpu; u32 auxtrace_type; u64 branches_sample_type; u64 branches_id; + u64 instructions_sample_type; + u64 instructions_sample_period; + u64 instructions_id; u64 **metadata; u64 kernel_start; unsigned int pmu_type; @@ -68,6 +80,12 @@ struct cs_etm_queue { u64 time; u64 timestamp; u64 offset; + u64 period_instructions; + struct branch_stack *last_branch; + struct branch_stack *last_branch_rb; + size_t last_branch_pos; + struct cs_etm_packet *prev_packet; + struct cs_etm_packet *packet; }; static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); @@ -174,6 +192,16 @@ static void cs_etm__free_queue(void *priv) { struct cs_etm_queue *etmq = priv; + if (!etmq) + return; + + thread__zput(etmq->thread); + cs_etm_decoder__free(etmq->decoder); + zfree(&etmq->event_buf); + zfree(&etmq->last_branch); + zfree(&etmq->last_branch_rb); + zfree(&etmq->prev_packet); + zfree(&etmq->packet); free(etmq); } @@ -270,11 +298,35 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params; struct cs_etm_queue *etmq; + size_t szp = sizeof(struct cs_etm_packet); etmq = zalloc(sizeof(*etmq)); if (!etmq) return NULL; + etmq->packet = zalloc(szp); + if (!etmq->packet) + goto out_free; + + if (etm->synth_opts.last_branch || etm->sample_branches) { + etmq->prev_packet = zalloc(szp); + if (!etmq->prev_packet) + goto out_free; + } + + if (etm->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + sz += etm->synth_opts.last_branch_sz * + sizeof(struct branch_entry); + etmq->last_branch = zalloc(sz); + if (!etmq->last_branch) + goto out_free; + etmq->last_branch_rb = zalloc(sz); + if (!etmq->last_branch_rb) + goto out_free; + } + etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); if (!etmq->event_buf) goto out_free; @@ -329,6 +381,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, goto out_free_decoder; etmq->offset = 0; + etmq->period_instructions = 0; return etmq; @@ -336,6 +389,10 @@ out_free_decoder: cs_etm_decoder__free(etmq->decoder); out_free: zfree(&etmq->event_buf); + zfree(&etmq->last_branch); + zfree(&etmq->last_branch_rb); + zfree(&etmq->prev_packet); + zfree(&etmq->packet); free(etmq); return NULL; @@ -389,6 +446,129 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) return 0; } +static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) +{ + struct branch_stack *bs_src = etmq->last_branch_rb; + struct branch_stack *bs_dst = etmq->last_branch; + size_t nr = 0; + + /* + * Set the number of records before early exit: ->nr is used to + * determine how many branches to copy from ->entries. + */ + bs_dst->nr = bs_src->nr; + + /* + * Early exit when there is nothing to copy. + */ + if (!bs_src->nr) + return; + + /* + * As bs_src->entries is a circular buffer, we need to copy from it in + * two steps. First, copy the branches from the most recently inserted + * branch ->last_branch_pos until the end of bs_src->entries buffer. + */ + nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos; + memcpy(&bs_dst->entries[0], + &bs_src->entries[etmq->last_branch_pos], + sizeof(struct branch_entry) * nr); + + /* + * If we wrapped around at least once, the branches from the beginning + * of the bs_src->entries buffer and until the ->last_branch_pos element + * are older valid branches: copy them over. The total number of + * branches copied over will be equal to the number of branches asked by + * the user in last_branch_sz. + */ + if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { + memcpy(&bs_dst->entries[nr], + &bs_src->entries[0], + sizeof(struct branch_entry) * etmq->last_branch_pos); + } +} + +static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) +{ + etmq->last_branch_pos = 0; + etmq->last_branch_rb->nr = 0; +} + +static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet) +{ + /* + * The packet records the execution range with an exclusive end address + * + * A64 instructions are constant size, so the last executed + * instruction is A64_INSTR_SIZE before the end address + * Will need to do instruction level decode for T32 instructions as + * they can be variable size (not yet supported). + */ + return packet->end_addr - A64_INSTR_SIZE; +} + +static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet) +{ + /* + * Only A64 instructions are currently supported, so can get + * instruction count by dividing. + * Will need to do instruction level decode for T32 instructions as + * they can be variable size (not yet supported). + */ + return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE; +} + +static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet, + u64 offset) +{ + /* + * Only A64 instructions are currently supported, so can get + * instruction address by muliplying. + * Will need to do instruction level decode for T32 instructions as + * they can be variable size (not yet supported). + */ + return packet->start_addr + offset * A64_INSTR_SIZE; +} + +static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) +{ + struct branch_stack *bs = etmq->last_branch_rb; + struct branch_entry *be; + + /* + * The branches are recorded in a circular buffer in reverse + * chronological order: we start recording from the last element of the + * buffer down. After writing the first element of the stack, move the + * insert position back to the end of the buffer. + */ + if (!etmq->last_branch_pos) + etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; + + etmq->last_branch_pos -= 1; + + be = &bs->entries[etmq->last_branch_pos]; + be->from = cs_etm__last_executed_instr(etmq->prev_packet); + be->to = etmq->packet->start_addr; + /* No support for mispredict */ + be->flags.mispred = 0; + be->flags.predicted = 1; + + /* + * Increment bs->nr until reaching the number of last branches asked by + * the user on the command line. + */ + if (bs->nr < etmq->etm->synth_opts.last_branch_sz) + bs->nr += 1; +} + +static int cs_etm__inject_event(union perf_event *event, + struct perf_sample *sample, u64 type) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample); +} + + static int cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) { @@ -453,35 +633,105 @@ static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, } } +static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, + u64 addr, u64 period) +{ + int ret = 0; + struct cs_etm_auxtrace *etm = etmq->etm; + union perf_event *event = etmq->event_buf; + struct perf_sample sample = {.ip = 0,}; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + sample.ip = addr; + sample.pid = etmq->pid; + sample.tid = etmq->tid; + sample.id = etmq->etm->instructions_id; + sample.stream_id = etmq->etm->instructions_id; + sample.period = period; + sample.cpu = etmq->packet->cpu; + sample.flags = 0; + sample.insn_len = 1; + sample.cpumode = event->header.misc; + + if (etm->synth_opts.last_branch) { + cs_etm__copy_last_branch_rb(etmq); + sample.branch_stack = etmq->last_branch; + } + + if (etm->synth_opts.inject) { + ret = cs_etm__inject_event(event, &sample, + etm->instructions_sample_type); + if (ret) + return ret; + } + + ret = perf_session__deliver_synth_event(etm->session, event, &sample); + + if (ret) + pr_err( + "CS ETM Trace: failed to deliver instruction event, error %d\n", + ret); + + if (etm->synth_opts.last_branch) + cs_etm__reset_last_branch_rb(etmq); + + return ret; +} + /* * The cs etm packet encodes an instruction range between a branch target * and the next taken branch. Generate sample accordingly. */ -static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, - struct cs_etm_packet *packet) +static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) { int ret = 0; struct cs_etm_auxtrace *etm = etmq->etm; struct perf_sample sample = {.ip = 0,}; union perf_event *event = etmq->event_buf; - u64 start_addr = packet->start_addr; - u64 end_addr = packet->end_addr; + struct dummy_branch_stack { + u64 nr; + struct branch_entry entries; + } dummy_bs; event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); - sample.ip = start_addr; + sample.ip = cs_etm__last_executed_instr(etmq->prev_packet); sample.pid = etmq->pid; sample.tid = etmq->tid; - sample.addr = end_addr; + sample.addr = etmq->packet->start_addr; sample.id = etmq->etm->branches_id; sample.stream_id = etmq->etm->branches_id; sample.period = 1; - sample.cpu = packet->cpu; + sample.cpu = etmq->packet->cpu; sample.flags = 0; sample.cpumode = PERF_RECORD_MISC_USER; + /* + * perf report cannot handle events without a branch stack + */ + if (etm->synth_opts.last_branch) { + dummy_bs = (struct dummy_branch_stack){ + .nr = 1, + .entries = { + .from = sample.ip, + .to = sample.addr, + }, + }; + sample.branch_stack = (struct branch_stack *)&dummy_bs; + } + + if (etm->synth_opts.inject) { + ret = cs_etm__inject_event(event, &sample, + etm->branches_sample_type); + if (ret) + return ret; + } + ret = perf_session__deliver_synth_event(etm->session, event, &sample); if (ret) @@ -578,6 +828,24 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, etm->sample_branches = true; etm->branches_sample_type = attr.sample_type; etm->branches_id = id; + id += 1; + attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; + } + + if (etm->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + + if (etm->synth_opts.instructions) { + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + attr.sample_period = etm->synth_opts.period; + etm->instructions_sample_period = attr.sample_period; + err = cs_etm__synth_event(session, &attr, id); + if (err) + return err; + etm->sample_instructions = true; + etm->instructions_sample_type = attr.sample_type; + etm->instructions_id = id; + id += 1; } return 0; @@ -585,25 +853,108 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, static int cs_etm__sample(struct cs_etm_queue *etmq) { + struct cs_etm_auxtrace *etm = etmq->etm; + struct cs_etm_packet *tmp; int ret; - struct cs_etm_packet packet; + u64 instrs_executed; - while (1) { - ret = cs_etm_decoder__get_packet(etmq->decoder, &packet); - if (ret <= 0) + instrs_executed = cs_etm__instr_count(etmq->packet); + etmq->period_instructions += instrs_executed; + + /* + * Record a branch when the last instruction in + * PREV_PACKET is a branch. + */ + if (etm->synth_opts.last_branch && + etmq->prev_packet && + etmq->prev_packet->sample_type == CS_ETM_RANGE && + etmq->prev_packet->last_instr_taken_branch) + cs_etm__update_last_branch_rb(etmq); + + if (etm->sample_instructions && + etmq->period_instructions >= etm->instructions_sample_period) { + /* + * Emit instruction sample periodically + * TODO: allow period to be defined in cycles and clock time + */ + + /* Get number of instructions executed after the sample point */ + u64 instrs_over = etmq->period_instructions - + etm->instructions_sample_period; + + /* + * Calculate the address of the sampled instruction (-1 as + * sample is reported as though instruction has just been + * executed, but PC has not advanced to next instruction) + */ + u64 offset = (instrs_executed - instrs_over - 1); + u64 addr = cs_etm__instr_addr(etmq->packet, offset); + + ret = cs_etm__synth_instruction_sample( + etmq, addr, etm->instructions_sample_period); + if (ret) + return ret; + + /* Carry remaining instructions into next sample period */ + etmq->period_instructions = instrs_over; + } + + if (etm->sample_branches && + etmq->prev_packet && + etmq->prev_packet->sample_type == CS_ETM_RANGE && + etmq->prev_packet->last_instr_taken_branch) { + ret = cs_etm__synth_branch_sample(etmq); + if (ret) return ret; + } + if (etm->sample_branches || etm->synth_opts.last_branch) { /* - * If the packet contains an instruction range, generate an - * instruction sequence event. + * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for + * the next incoming packet. */ - if (packet.sample_type & CS_ETM_RANGE) - cs_etm__synth_branch_sample(etmq, &packet); + tmp = etmq->packet; + etmq->packet = etmq->prev_packet; + etmq->prev_packet = tmp; } return 0; } +static int cs_etm__flush(struct cs_etm_queue *etmq) +{ + int err = 0; + struct cs_etm_packet *tmp; + + if (etmq->etm->synth_opts.last_branch && + etmq->prev_packet && + etmq->prev_packet->sample_type == CS_ETM_RANGE) { + /* + * Generate a last branch event for the branches left in the + * circular buffer at the end of the trace. + * + * Use the address of the end of the last reported execution + * range + */ + u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); + + err = cs_etm__synth_instruction_sample( + etmq, addr, + etmq->period_instructions); + etmq->period_instructions = 0; + + /* + * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for + * the next incoming packet. + */ + tmp = etmq->packet; + etmq->packet = etmq->prev_packet; + etmq->prev_packet = tmp; + } + + return err; +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { struct cs_etm_auxtrace *etm = etmq->etm; @@ -615,45 +966,72 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) etm->kernel_start = machine__kernel_start(etm->machine); /* Go through each buffer in the queue and decode them one by one */ -more: - buffer_used = 0; - memset(&buffer, 0, sizeof(buffer)); - err = cs_etm__get_trace(&buffer, etmq); - if (err <= 0) - return err; - /* - * We cannot assume consecutive blocks in the data file are contiguous, - * reset the decoder to force re-sync. - */ - err = cs_etm_decoder__reset(etmq->decoder); - if (err != 0) - return err; - - /* Run trace decoder until buffer consumed or end of trace */ - do { - processed = 0; - - err = cs_etm_decoder__process_data_block( - etmq->decoder, - etmq->offset, - &buffer.buf[buffer_used], - buffer.len - buffer_used, - &processed); - - if (err) + while (1) { + buffer_used = 0; + memset(&buffer, 0, sizeof(buffer)); + err = cs_etm__get_trace(&buffer, etmq); + if (err <= 0) return err; - - etmq->offset += processed; - buffer_used += processed; - /* - * Nothing to do with an error condition, let's hope the next - * chunk will be better. + * We cannot assume consecutive blocks in the data file are + * contiguous, reset the decoder to force re-sync. */ - err = cs_etm__sample(etmq); - } while (buffer.len > buffer_used); + err = cs_etm_decoder__reset(etmq->decoder); + if (err != 0) + return err; + + /* Run trace decoder until buffer consumed or end of trace */ + do { + processed = 0; + err = cs_etm_decoder__process_data_block( + etmq->decoder, + etmq->offset, + &buffer.buf[buffer_used], + buffer.len - buffer_used, + &processed); + if (err) + return err; + + etmq->offset += processed; + buffer_used += processed; + + /* Process each packet in this chunk */ + while (1) { + err = cs_etm_decoder__get_packet(etmq->decoder, + etmq->packet); + if (err <= 0) + /* + * Stop processing this chunk on + * end of data or error + */ + break; + + switch (etmq->packet->sample_type) { + case CS_ETM_RANGE: + /* + * If the packet contains an instruction + * range, generate instruction sequence + * events. + */ + cs_etm__sample(etmq); + break; + case CS_ETM_TRACE_ON: + /* + * Discontinuity in trace, flush + * previous branch stack + */ + cs_etm__flush(etmq); + break; + default: + break; + } + } + } while (buffer.len > buffer_used); -goto more; + if (err == 0) + /* Flush any remaining branch stack entries */ + err = cs_etm__flush(etmq); + } return err; } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 44e603c27944..f0a6cbd033cc 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -894,8 +894,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, struct machine *machine) { size_t size; - const char *mmap_name; - char name_buff[PATH_MAX]; struct map *map = machine__kernel_map(machine); struct kmap *kmap; int err; @@ -918,7 +916,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, return -1; } - mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff)); if (machine__is_host(machine)) { /* * kernel uses PERF_RECORD_MISC_USER for user space maps, @@ -931,7 +928,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, kmap = map__kmap(map); size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), - "%s%s", mmap_name, kmap->ref_reloc_sym->name) + 1; + "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; size = PERF_ALIGN(size, sizeof(u64)); event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - @@ -1591,17 +1588,6 @@ int machine__resolve(struct machine *machine, struct addr_location *al, return -1; dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); - /* - * Have we already created the kernel maps for this machine? - * - * This should have happened earlier, when we processed the kernel MMAP - * events, but for older perf.data files there was no such thing, so do - * it now. - */ - if (sample->cpumode == PERF_RECORD_MISC_KERNEL && - machine__kernel_map(machine) == NULL) - machine__create_kernel_maps(machine); - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e5fc14e53c05..41a4666f1519 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -702,29 +702,6 @@ static int perf_evlist__resume(struct perf_evlist *evlist) return perf_evlist__set_paused(evlist, false); } -union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) -{ - struct perf_mmap *md = &evlist->mmap[idx]; - - /* - * Check messup is required for forward overwritable ring buffer: - * memory pointed by md->prev can be overwritten in this case. - * No need for read-write ring buffer: kernel stop outputting when - * it hit md->prev (perf_mmap__consume()). - */ - return perf_mmap__read_forward(md); -} - -union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) -{ - return perf_evlist__mmap_read_forward(evlist, idx); -} - -void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) -{ - perf_mmap__consume(&evlist->mmap[idx], false); -} - static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) { int i; @@ -761,7 +738,7 @@ static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) map[i].fd = -1; /* * When the perf_mmap() call is made we grab one refcount, plus - * one extra to let perf_evlist__mmap_consume() get the last + * one extra to let perf_mmap__consume() get the last * events after all real references (perf_mmap__get()) are * dropped. * @@ -1086,11 +1063,30 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { + bool all_threads = (target->per_thread && target->system_wide); struct cpu_map *cpus; struct thread_map *threads; + /* + * If specify '-a' and '--per-thread' to perf record, perf record + * will override '--per-thread'. target->per_thread = false and + * target->system_wide = true. + * + * If specify '--per-thread' only to perf record, + * target->per_thread = true and target->system_wide = false. + * + * So target->per_thread && target->system_wide is false. + * For perf record, thread_map__new_str doesn't call + * thread_map__new_all_cpus. That will keep perf record's + * current behavior. + * + * For perf stat, it allows the case that target->per_thread and + * target->system_wide are all true. It means to collect system-wide + * per-thread data. thread_map__new_str will call + * thread_map__new_all_cpus to enumerate all threads. + */ threads = thread_map__new_str(target->pid, target->tid, target->uid, - target->per_thread); + all_threads); if (!threads) return -1; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 336b838e6957..6c41b2f78713 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -129,10 +129,6 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state); -union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); - -union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, - int idx); void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx); int perf_evlist__open(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ef351688b797..b56e1c2ddaee 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1915,6 +1915,9 @@ try_fallback: goto fallback_missing_features; } out_close: + if (err) + threads->err_thread = thread; + do { while (--thread >= 0) { close(FD(evsel, cpu, thread)); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index f28aaaa3a440..942bdec6d70d 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -174,4 +174,5 @@ int write_padded(struct feat_fd *fd, const void *bf, int get_cpuid(char *buffer, size_t sz); char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); +int strcmp_cpuid_str(const char *s1, const char *s2); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b6140950301e..44a8456cea10 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -879,7 +879,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter, * cumulated only one time to prevent entries more than 100% * overhead. */ - he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1)); + he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1)); if (he_cache == NULL) return -ENOMEM; @@ -1045,8 +1045,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, if (err) return err; - iter->max_stack = max_stack_depth; - err = iter->ops->prepare_entry(iter, al); if (err) goto out; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 02721b579746..e869cad4d89f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -107,7 +107,6 @@ struct hist_entry_iter { int curr; bool hide_unresolved; - int max_stack; struct perf_evsel *evsel; struct perf_sample *sample; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b05a67464c03..12b7427444a3 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -48,8 +48,31 @@ static void machine__threads_init(struct machine *machine) } } +static int machine__set_mmap_name(struct machine *machine) +{ + if (machine__is_host(machine)) { + if (symbol_conf.vmlinux_name) + machine->mmap_name = strdup(symbol_conf.vmlinux_name); + else + machine->mmap_name = strdup("[kernel.kallsyms]"); + } else if (machine__is_default_guest(machine)) { + if (symbol_conf.default_guest_vmlinux_name) + machine->mmap_name = strdup(symbol_conf.default_guest_vmlinux_name); + else + machine->mmap_name = strdup("[guest.kernel.kallsyms]"); + } else { + if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]", + machine->pid) < 0) + machine->mmap_name = NULL; + } + + return machine->mmap_name ? 0 : -ENOMEM; +} + int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { + int err = -ENOMEM; + memset(machine, 0, sizeof(*machine)); map_groups__init(&machine->kmaps, machine); RB_CLEAR_NODE(&machine->rb_node); @@ -73,13 +96,16 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) if (machine->root_dir == NULL) return -ENOMEM; + if (machine__set_mmap_name(machine)) + goto out; + if (pid != HOST_KERNEL_ID) { struct thread *thread = machine__findnew_thread(machine, -1, pid); char comm[64]; if (thread == NULL) - return -ENOMEM; + goto out; snprintf(comm, sizeof(comm), "[guest/%d]", pid); thread__set_comm(thread, comm, 0); @@ -87,7 +113,13 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) } machine->current_tid = NULL; + err = 0; +out: + if (err) { + zfree(&machine->root_dir); + zfree(&machine->mmap_name); + } return 0; } @@ -119,7 +151,7 @@ struct machine *machine__new_kallsyms(void) * ask for not using the kcore parsing code, once this one is fixed * to create a map per module. */ - if (machine && __machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION, true) <= 0) { + if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) { machine__delete(machine); machine = NULL; } @@ -180,6 +212,7 @@ void machine__exit(struct machine *machine) dsos__exit(&machine->dsos); machine__exit_vdso(machine); zfree(&machine->root_dir); + zfree(&machine->mmap_name); zfree(&machine->current_tid); for (i = 0; i < THREADS__TABLE_SIZE; i++) { @@ -322,20 +355,6 @@ void machines__process_guests(struct machines *machines, } } -char *machine__mmap_name(struct machine *machine, char *bf, size_t size) -{ - if (machine__is_host(machine)) - snprintf(bf, size, "[%s]", "kernel.kallsyms"); - else if (machine__is_default_guest(machine)) - snprintf(bf, size, "[%s]", "guest.kernel.kallsyms"); - else { - snprintf(bf, size, "[%s.%d]", "guest.kernel.kallsyms", - machine->pid); - } - - return bf; -} - void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size) { struct rb_node *node; @@ -771,25 +790,13 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) static struct dso *machine__get_kernel(struct machine *machine) { - const char *vmlinux_name = NULL; + const char *vmlinux_name = machine->mmap_name; struct dso *kernel; if (machine__is_host(machine)) { - vmlinux_name = symbol_conf.vmlinux_name; - if (!vmlinux_name) - vmlinux_name = DSO__NAME_KALLSYMS; - kernel = machine__findnew_kernel(machine, vmlinux_name, "[kernel]", DSO_TYPE_KERNEL); } else { - char bf[PATH_MAX]; - - if (machine__is_default_guest(machine)) - vmlinux_name = symbol_conf.default_guest_vmlinux_name; - if (!vmlinux_name) - vmlinux_name = machine__mmap_name(machine, bf, - sizeof(bf)); - kernel = machine__findnew_kernel(machine, vmlinux_name, "[guest.kernel]", DSO_TYPE_GUEST_KERNEL); @@ -849,13 +856,10 @@ static int machine__get_running_kernel_start(struct machine *machine, return 0; } -int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) +static int +__machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { int type; - u64 start = 0; - - if (machine__get_running_kernel_start(machine, NULL, &start)) - return -1; /* In case of renewal the kernel map, destroy previous one */ machine__destroy_kernel_maps(machine); @@ -864,7 +868,7 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) struct kmap *kmap; struct map *map; - machine->vmlinux_maps[type] = map__new2(start, kernel, type); + machine->vmlinux_maps[type] = map__new2(0, kernel, type); if (machine->vmlinux_maps[type] == NULL) return -1; @@ -987,11 +991,11 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid) return machine__create_kernel_maps(machine); } -int __machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type, bool no_kcore) +int machine__load_kallsyms(struct machine *machine, const char *filename, + enum map_type type) { struct map *map = machine__kernel_map(machine); - int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore); + int ret = __dso__load_kallsyms(map->dso, filename, map, true); if (ret > 0) { dso__set_loaded(map->dso, type); @@ -1006,12 +1010,6 @@ int __machine__load_kallsyms(struct machine *machine, const char *filename, return ret; } -int machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type) -{ - return __machine__load_kallsyms(machine, filename, type, false); -} - int machine__load_vmlinux_path(struct machine *machine, enum map_type type) { struct map *map = machine__kernel_map(machine); @@ -1215,6 +1213,24 @@ static int machine__create_modules(struct machine *machine) return 0; } +static void machine__set_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + int i; + + for (i = 0; i < MAP__NR_TYPES; i++) { + machine->vmlinux_maps[i]->start = start; + machine->vmlinux_maps[i]->end = end; + + /* + * Be a bit paranoid here, some perf.data file came with + * a zero sized synthesized MMAP event for the kernel. + */ + if (start == 0 && end == 0) + machine->vmlinux_maps[i]->end = ~0ULL; + } +} + int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); @@ -1239,40 +1255,22 @@ int machine__create_kernel_maps(struct machine *machine) "continuing anyway...\n", machine->pid); } - /* - * Now that we have all the maps created, just set the ->end of them: - */ - map_groups__fixup_end(&machine->kmaps); - if (!machine__get_running_kernel_start(machine, &name, &addr)) { if (name && maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { machine__destroy_kernel_maps(machine); return -1; } + machine__set_kernel_mmap(machine, addr, 0); } + /* + * Now that we have all the maps created, just set the ->end of them: + */ + map_groups__fixup_end(&machine->kmaps); return 0; } -static void machine__set_kernel_mmap_len(struct machine *machine, - union perf_event *event) -{ - int i; - - for (i = 0; i < MAP__NR_TYPES; i++) { - machine->vmlinux_maps[i]->start = event->mmap.start; - machine->vmlinux_maps[i]->end = (event->mmap.start + - event->mmap.len); - /* - * Be a bit paranoid here, some perf.data file came with - * a zero sized synthesized MMAP event for the kernel. - */ - if (machine->vmlinux_maps[i]->end == 0) - machine->vmlinux_maps[i]->end = ~0ULL; - } -} - static bool machine__uses_kcore(struct machine *machine) { struct dso *dso; @@ -1289,7 +1287,6 @@ static int machine__process_kernel_mmap_event(struct machine *machine, union perf_event *event) { struct map *map; - char kmmap_prefix[PATH_MAX]; enum dso_kernel_type kernel_type; bool is_kernel_mmap; @@ -1297,15 +1294,14 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (machine__uses_kcore(machine)) return 0; - machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix)); if (machine__is_host(machine)) kernel_type = DSO_TYPE_KERNEL; else kernel_type = DSO_TYPE_GUEST_KERNEL; is_kernel_mmap = memcmp(event->mmap.filename, - kmmap_prefix, - strlen(kmmap_prefix) - 1) == 0; + machine->mmap_name, + strlen(machine->mmap_name) - 1) == 0; if (event->mmap.filename[0] == '/' || (!is_kernel_mmap && event->mmap.filename[0] == '[')) { map = machine__findnew_module_map(machine, event->mmap.start, @@ -1316,7 +1312,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, map->end = map->start + event->mmap.len; } else if (is_kernel_mmap) { const char *symbol_name = (event->mmap.filename + - strlen(kmmap_prefix)); + strlen(machine->mmap_name)); /* * Should be there already, from the build-id table in * the header. @@ -1357,7 +1353,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, up_read(&machine->dsos.lock); if (kernel == NULL) - kernel = machine__findnew_dso(machine, kmmap_prefix); + kernel = machine__findnew_dso(machine, machine->mmap_name); if (kernel == NULL) goto out_problem; @@ -1370,7 +1366,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (strstr(kernel->long_name, "vmlinux")) dso__set_short_name(kernel, "[kernel.vmlinux]", false); - machine__set_kernel_mmap_len(machine, event); + machine__set_kernel_mmap(machine, event->mmap.start, + event->mmap.start + event->mmap.len); /* * Avoid using a zero address (kptr_restrict) for the ref reloc diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 5ce860b64c74..66cc200ef86f 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -43,6 +43,7 @@ struct machine { bool comm_exec; bool kptr_restrict_warned; char *root_dir; + char *mmap_name; struct threads threads[THREADS__TABLE_SIZE]; struct vdso_info *vdso_info; struct perf_env *env; @@ -142,8 +143,6 @@ struct machine *machines__find(struct machines *machines, pid_t pid); struct machine *machines__findnew(struct machines *machines, pid_t pid); void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size); -char *machine__mmap_name(struct machine *machine, char *bf, size_t size); - void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); @@ -226,8 +225,6 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename); int arch__fix_module_text_start(u64 *start, const char *name); -int __machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type, bool no_kcore); int machine__load_kallsyms(struct machine *machine, const char *filename, enum map_type type); int machine__load_vmlinux_path(struct machine *machine, enum map_type type); @@ -239,7 +236,6 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm); void machine__destroy_kernel_maps(struct machine *machine); -int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel); int machine__create_kernel_maps(struct machine *machine); int machines__create_kernel_maps(struct machines *machines, pid_t pid); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 91531a7c8fbf..4f27c464ce0b 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -64,25 +64,6 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, } /* - * legacy interface for mmap read. - * Don't use it. Use perf_mmap__read_event(). - */ -union perf_event *perf_mmap__read_forward(struct perf_mmap *map) -{ - u64 head; - - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->refcnt)) - return NULL; - - head = perf_mmap__read_head(map); - - return perf_mmap__read(map, &map->prev, head); -} - -/* * Read event from ring buffer one by one. * Return one event for each call. * @@ -191,7 +172,7 @@ void perf_mmap__munmap(struct perf_mmap *map) int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) { /* - * The last one will be done at perf_evlist__mmap_consume(), so that we + * The last one will be done at perf_mmap__consume(), so that we * make sure we don't prevent tools from consuming every last event in * the ring buffer. * diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 57e38fdf0b34..1111d5bf15ca 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -576,6 +576,34 @@ char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) return NULL; } +/* Return zero when the cpuid from the mapfile.csv matches the + * cpuid string generated on this platform. + * Otherwise return non-zero. + */ +int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) +{ + regex_t re; + regmatch_t pmatch[1]; + int match; + + if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) { + /* Warn unable to generate match particular string. */ + pr_info("Invalid regular expression %s\n", mapcpuid); + return 1; + } + + match = !regexec(&re, cpuid, 1, pmatch, 0); + regfree(&re); + if (match) { + size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); + + /* Verify the entire string matched. */ + if (match_len == strlen(cpuid)) + return 0; + } + return 1; +} + static char *perf_pmu__getcpuid(struct perf_pmu *pmu) { char *cpuid; @@ -610,31 +638,14 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) i = 0; for (;;) { - regex_t re; - regmatch_t pmatch[1]; - int match; - map = &pmu_events_map[i++]; if (!map->table) { map = NULL; break; } - if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) { - /* Warn unable to generate match particular string. */ - pr_info("Invalid regular expression %s\n", map->cpuid); + if (!strcmp_cpuid_str(map->cpuid, cpuid)) break; - } - - match = !regexec(&re, cpuid, 1, pmatch, 0); - regfree(&re); - if (match) { - size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); - - /* Verify the entire string matched. */ - if (match_len == strlen(cpuid)) - break; - } } free(cpuid); return map; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index b1e999bd21ef..35fb5ef7d290 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -12,6 +12,30 @@ #include "print_binary.h" #include "thread_map.h" +#if PY_MAJOR_VERSION < 3 +#define _PyUnicode_FromString(arg) \ + PyString_FromString(arg) +#define _PyUnicode_AsString(arg) \ + PyString_AsString(arg) +#define _PyUnicode_FromFormat(...) \ + PyString_FromFormat(__VA_ARGS__) +#define _PyLong_FromLong(arg) \ + PyInt_FromLong(arg) + +#else + +#define _PyUnicode_FromString(arg) \ + PyUnicode_FromString(arg) +#define _PyUnicode_FromFormat(...) \ + PyUnicode_FromFormat(__VA_ARGS__) +#define _PyLong_FromLong(arg) \ + PyLong_FromLong(arg) +#endif + +#ifndef Py_TYPE +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif + /* * Provide these two so that we don't have to link against callchain.c and * start dragging hist.c, etc. @@ -49,7 +73,11 @@ int eprintf(int level, int var, const char *fmt, ...) # define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, #endif +#if PY_MAJOR_VERSION < 3 PyMODINIT_FUNC initperf(void); +#else +PyMODINIT_FUNC PyInit_perf(void); +#endif #define member_def(type, member, ptype, help) \ { #member, ptype, \ @@ -107,7 +135,7 @@ static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent) pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) { ret = PyErr_NoMemory(); } else { - ret = PyString_FromString(s); + ret = _PyUnicode_FromString(s); free(s); } return ret; @@ -138,7 +166,7 @@ static PyMemberDef pyrf_task_event__members[] = { static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent) { - return PyString_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, " + return _PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, " "ptid: %u, time: %" PRIu64 "}", pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit", pevent->event.fork.pid, @@ -171,7 +199,7 @@ static PyMemberDef pyrf_comm_event__members[] = { static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent) { - return PyString_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }", + return _PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }", pevent->event.comm.pid, pevent->event.comm.tid, pevent->event.comm.comm); @@ -202,7 +230,7 @@ static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent) { struct throttle_event *te = (struct throttle_event *)(&pevent->event.header + 1); - return PyString_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64 + return _PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64 ", stream_id: %" PRIu64 " }", pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un", te->time, te->id, te->stream_id); @@ -237,7 +265,7 @@ static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent) pevent->event.lost.id, pevent->event.lost.lost) < 0) { ret = PyErr_NoMemory(); } else { - ret = PyString_FromString(s); + ret = _PyUnicode_FromString(s); free(s); } return ret; @@ -264,7 +292,7 @@ static PyMemberDef pyrf_read_event__members[] = { static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent) { - return PyString_FromFormat("{ type: read, pid: %u, tid: %u }", + return _PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }", pevent->event.read.pid, pevent->event.read.tid); /* @@ -299,7 +327,7 @@ static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent) if (asprintf(&s, "{ type: sample }") < 0) { ret = PyErr_NoMemory(); } else { - ret = PyString_FromString(s); + ret = _PyUnicode_FromString(s); free(s); } return ret; @@ -330,7 +358,7 @@ tracepoint_field(struct pyrf_event *pe, struct format_field *field) } if (field->flags & FIELD_IS_STRING && is_printable_array(data + offset, len)) { - ret = PyString_FromString((char *)data + offset); + ret = _PyUnicode_FromString((char *)data + offset); } else { ret = PyByteArray_FromStringAndSize((const char *) data + offset, len); field->flags &= ~FIELD_IS_STRING; @@ -352,7 +380,7 @@ tracepoint_field(struct pyrf_event *pe, struct format_field *field) static PyObject* get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) { - const char *str = PyString_AsString(PyObject_Str(attr_name)); + const char *str = _PyUnicode_AsString(PyObject_Str(attr_name)); struct perf_evsel *evsel = pevent->evsel; struct format_field *field; @@ -416,7 +444,7 @@ static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent) !!(pevent->event.header.misc & PERF_RECORD_MISC_SWITCH_OUT)) < 0) { ret = PyErr_NoMemory(); } else { - ret = PyString_FromString(s); + ret = _PyUnicode_FromString(s); free(s); } return ret; @@ -528,7 +556,7 @@ static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus, static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus) { cpu_map__put(pcpus->cpus); - pcpus->ob_type->tp_free((PyObject*)pcpus); + Py_TYPE(pcpus)->tp_free((PyObject*)pcpus); } static Py_ssize_t pyrf_cpu_map__length(PyObject *obj) @@ -597,7 +625,7 @@ static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads) { thread_map__put(pthreads->threads); - pthreads->ob_type->tp_free((PyObject*)pthreads); + Py_TYPE(pthreads)->tp_free((PyObject*)pthreads); } static Py_ssize_t pyrf_thread_map__length(PyObject *obj) @@ -759,7 +787,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel, static void pyrf_evsel__delete(struct pyrf_evsel *pevsel) { perf_evsel__exit(&pevsel->evsel); - pevsel->ob_type->tp_free((PyObject*)pevsel); + Py_TYPE(pevsel)->tp_free((PyObject*)pevsel); } static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, @@ -850,7 +878,7 @@ static int pyrf_evlist__init(struct pyrf_evlist *pevlist, static void pyrf_evlist__delete(struct pyrf_evlist *pevlist) { perf_evlist__exit(&pevlist->evlist); - pevlist->ob_type->tp_free((PyObject*)pevlist); + Py_TYPE(pevlist)->tp_free((PyObject*)pevlist); } static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, @@ -902,12 +930,16 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, for (i = 0; i < evlist->pollfd.nr; ++i) { PyObject *file; +#if PY_MAJOR_VERSION < 3 FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r"); if (fp == NULL) goto free_list; file = PyFile_FromFile(fp, "perf", "r", NULL); +#else + file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 1); +#endif if (file == NULL) goto free_list; @@ -951,13 +983,19 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, union perf_event *event; int sample_id_all = 1, cpu; static char *kwlist[] = { "cpu", "sample_id_all", NULL }; + struct perf_mmap *md; + u64 end, start; int err; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist, &cpu, &sample_id_all)) return NULL; - event = perf_evlist__mmap_read(evlist, cpu); + md = &evlist->mmap[cpu]; + if (perf_mmap__read_init(md, false, &start, &end) < 0) + goto end; + + event = perf_mmap__read_event(md, false, &start, end); if (event != NULL) { PyObject *pyevent = pyrf_event__new(event); struct pyrf_event *pevent = (struct pyrf_event *)pyevent; @@ -975,14 +1013,14 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, err = perf_evsel__parse_sample(evsel, event, &pevent->sample); /* Consume the even only after we parsed it out. */ - perf_evlist__mmap_consume(evlist, cpu); + perf_mmap__consume(md, false); if (err) return PyErr_Format(PyExc_OSError, "perf: can't parse sample, err=%d", err); return pyevent; } - +end: Py_INCREF(Py_None); return Py_None; } @@ -1194,9 +1232,9 @@ static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, tp_format = trace_event__tp_format(sys, name); if (IS_ERR(tp_format)) - return PyInt_FromLong(-1); + return _PyLong_FromLong(-1); - return PyInt_FromLong(tp_format->id); + return _PyLong_FromLong(tp_format->id); } static PyMethodDef perf__methods[] = { @@ -1209,11 +1247,31 @@ static PyMethodDef perf__methods[] = { { .ml_name = NULL, } }; +#if PY_MAJOR_VERSION < 3 PyMODINIT_FUNC initperf(void) +#else +PyMODINIT_FUNC PyInit_perf(void) +#endif { PyObject *obj; int i; - PyObject *dict, *module = Py_InitModule("perf", perf__methods); + PyObject *dict; +#if PY_MAJOR_VERSION < 3 + PyObject *module = Py_InitModule("perf", perf__methods); +#else + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "perf", /* m_name */ + "", /* m_doc */ + -1, /* m_size */ + perf__methods, /* m_methods */ + NULL, /* m_reload */ + NULL, /* m_traverse */ + NULL, /* m_clear */ + NULL, /* m_free */ + }; + PyObject *module = PyModule_Create(&moduledef); +#endif if (module == NULL || pyrf_event__setup_types() < 0 || @@ -1221,7 +1279,11 @@ PyMODINIT_FUNC initperf(void) pyrf_evsel__setup_types() < 0 || pyrf_thread_map__setup_types() < 0 || pyrf_cpu_map__setup_types() < 0) +#if PY_MAJOR_VERSION < 3 return; +#else + return module; +#endif /* The page_size is placed in util object. */ page_size = sysconf(_SC_PAGE_SIZE); @@ -1270,7 +1332,7 @@ PyMODINIT_FUNC initperf(void) goto error; for (i = 0; perf__constants[i].name != NULL; i++) { - obj = PyInt_FromLong(perf__constants[i].value); + obj = _PyLong_FromLong(perf__constants[i].value); if (obj == NULL) goto error; PyDict_SetItemString(dict, perf__constants[i].name, obj); @@ -1280,6 +1342,9 @@ PyMODINIT_FUNC initperf(void) error: if (PyErr_Occurred()) PyErr_SetString(PyExc_ImportError, "perf: Init failed!"); +#if PY_MAJOR_VERSION >= 3 + return module; +#endif } /* diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 6f09e4962dad..9cfc7bf16531 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -5,6 +5,7 @@ #include "parse-events.h" #include <errno.h> #include <api/fs/fs.h> +#include <subcmd/parse-options.h> #include "util.h" #include "cloexec.h" @@ -219,11 +220,21 @@ static int record_opts__config_freq(struct record_opts *opts) * User specified frequency is over current maximum. */ if (user_freq && (max_rate < opts->freq)) { - pr_err("Maximum frequency rate (%u) reached.\n" - "Please use -F freq option with lower value or consider\n" - "tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n", - max_rate); - return -1; + if (opts->strict_freq) { + pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n" + " Please use -F freq option with a lower value or consider\n" + " tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n", + max_rate); + return -1; + } else { + pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n" + " The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n" + " The kernel will lower it when perf's interrupts take too long.\n" + " Use --strict-freq to disable this throttling, refusing to record.\n", + max_rate, opts->freq, max_rate); + + opts->freq = max_rate; + } } /* @@ -291,3 +302,25 @@ out_delete: perf_evlist__delete(temp_evlist); return ret; } + +int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused) +{ + unsigned int freq; + struct record_opts *opts = opt->value; + + if (!str) + return -EINVAL; + + if (strcasecmp(str, "max") == 0) { + if (get_max_rate(&freq)) { + pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n"); + return -1; + } + pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq); + } else { + freq = atoi(str); + } + + opts->user_freq = freq; + return 0; +} diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index ea070883c593..10dd5fce082b 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -49,7 +49,37 @@ #include "print_binary.h" #include "stat.h" +#if PY_MAJOR_VERSION < 3 +#define _PyUnicode_FromString(arg) \ + PyString_FromString(arg) +#define _PyUnicode_FromStringAndSize(arg1, arg2) \ + PyString_FromStringAndSize((arg1), (arg2)) +#define _PyBytes_FromStringAndSize(arg1, arg2) \ + PyString_FromStringAndSize((arg1), (arg2)) +#define _PyLong_FromLong(arg) \ + PyInt_FromLong(arg) +#define _PyLong_AsLong(arg) \ + PyInt_AsLong(arg) +#define _PyCapsule_New(arg1, arg2, arg3) \ + PyCObject_FromVoidPtr((arg1), (arg2)) + PyMODINIT_FUNC initperf_trace_context(void); +#else +#define _PyUnicode_FromString(arg) \ + PyUnicode_FromString(arg) +#define _PyUnicode_FromStringAndSize(arg1, arg2) \ + PyUnicode_FromStringAndSize((arg1), (arg2)) +#define _PyBytes_FromStringAndSize(arg1, arg2) \ + PyBytes_FromStringAndSize((arg1), (arg2)) +#define _PyLong_FromLong(arg) \ + PyLong_FromLong(arg) +#define _PyLong_AsLong(arg) \ + PyLong_AsLong(arg) +#define _PyCapsule_New(arg1, arg2, arg3) \ + PyCapsule_New((arg1), (arg2), (arg3)) + +PyMODINIT_FUNC PyInit_perf_trace_context(void); +#endif #define TRACE_EVENT_TYPE_MAX \ ((1 << (sizeof(unsigned short) * 8)) - 1) @@ -135,7 +165,7 @@ static int get_argument_count(PyObject *handler) PyObject *arg_count_obj = PyObject_GetAttrString(code_obj, "co_argcount"); if (arg_count_obj) { - arg_count = (int) PyInt_AsLong(arg_count_obj); + arg_count = (int) _PyLong_AsLong(arg_count_obj); Py_DECREF(arg_count_obj); } Py_DECREF(code_obj); @@ -182,10 +212,10 @@ static void define_value(enum print_arg_type field_type, value = eval_flag(field_value); - PyTuple_SetItem(t, n++, PyString_FromString(ev_name)); - PyTuple_SetItem(t, n++, PyString_FromString(field_name)); - PyTuple_SetItem(t, n++, PyInt_FromLong(value)); - PyTuple_SetItem(t, n++, PyString_FromString(field_str)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(value)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_str)); try_call_object(handler_name, t); @@ -223,10 +253,10 @@ static void define_field(enum print_arg_type field_type, if (!t) Py_FatalError("couldn't create Python tuple"); - PyTuple_SetItem(t, n++, PyString_FromString(ev_name)); - PyTuple_SetItem(t, n++, PyString_FromString(field_name)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name)); if (field_type == PRINT_FLAGS) - PyTuple_SetItem(t, n++, PyString_FromString(delim)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(delim)); try_call_object(handler_name, t); @@ -325,12 +355,12 @@ static PyObject *get_field_numeric_entry(struct event_format *event, if (field->flags & FIELD_IS_SIGNED) { if ((long long)val >= LONG_MIN && (long long)val <= LONG_MAX) - obj = PyInt_FromLong(val); + obj = _PyLong_FromLong(val); else obj = PyLong_FromLongLong(val); } else { if (val <= LONG_MAX) - obj = PyInt_FromLong(val); + obj = _PyLong_FromLong(val); else obj = PyLong_FromUnsignedLongLong(val); } @@ -389,9 +419,9 @@ static PyObject *python_process_callchain(struct perf_sample *sample, pydict_set_item_string_decref(pysym, "end", PyLong_FromUnsignedLongLong(node->sym->end)); pydict_set_item_string_decref(pysym, "binding", - PyInt_FromLong(node->sym->binding)); + _PyLong_FromLong(node->sym->binding)); pydict_set_item_string_decref(pysym, "name", - PyString_FromStringAndSize(node->sym->name, + _PyUnicode_FromStringAndSize(node->sym->name, node->sym->namelen)); pydict_set_item_string_decref(pyelem, "sym", pysym); } @@ -406,7 +436,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample, dsoname = map->dso->name; } pydict_set_item_string_decref(pyelem, "dso", - PyString_FromString(dsoname)); + _PyUnicode_FromString(dsoname)); } callchain_cursor_advance(&callchain_cursor); @@ -483,16 +513,16 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, if (!dict_sample) Py_FatalError("couldn't create Python dictionary"); - pydict_set_item_string_decref(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); - pydict_set_item_string_decref(dict, "attr", PyString_FromStringAndSize( + pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel))); + pydict_set_item_string_decref(dict, "attr", _PyUnicode_FromStringAndSize( (const char *)&evsel->attr, sizeof(evsel->attr))); pydict_set_item_string_decref(dict_sample, "pid", - PyInt_FromLong(sample->pid)); + _PyLong_FromLong(sample->pid)); pydict_set_item_string_decref(dict_sample, "tid", - PyInt_FromLong(sample->tid)); + _PyLong_FromLong(sample->tid)); pydict_set_item_string_decref(dict_sample, "cpu", - PyInt_FromLong(sample->cpu)); + _PyLong_FromLong(sample->cpu)); pydict_set_item_string_decref(dict_sample, "ip", PyLong_FromUnsignedLongLong(sample->ip)); pydict_set_item_string_decref(dict_sample, "time", @@ -504,17 +534,17 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, set_sample_read_in_dict(dict_sample, sample, evsel); pydict_set_item_string_decref(dict, "sample", dict_sample); - pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize( + pydict_set_item_string_decref(dict, "raw_buf", _PyBytes_FromStringAndSize( (const char *)sample->raw_data, sample->raw_size)); pydict_set_item_string_decref(dict, "comm", - PyString_FromString(thread__comm_str(al->thread))); + _PyUnicode_FromString(thread__comm_str(al->thread))); if (al->map) { pydict_set_item_string_decref(dict, "dso", - PyString_FromString(al->map->dso->name)); + _PyUnicode_FromString(al->map->dso->name)); } if (al->sym) { pydict_set_item_string_decref(dict, "symbol", - PyString_FromString(al->sym->name)); + _PyUnicode_FromString(al->sym->name)); } pydict_set_item_string_decref(dict, "callchain", callchain); @@ -574,9 +604,9 @@ static void python_process_tracepoint(struct perf_sample *sample, scripting_context->event_data = data; scripting_context->pevent = evsel->tp_format->pevent; - context = PyCObject_FromVoidPtr(scripting_context, NULL); + context = _PyCapsule_New(scripting_context, NULL, NULL); - PyTuple_SetItem(t, n++, PyString_FromString(handler_name)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(handler_name)); PyTuple_SetItem(t, n++, context); /* ip unwinding */ @@ -585,18 +615,18 @@ static void python_process_tracepoint(struct perf_sample *sample, Py_INCREF(callchain); if (!dict) { - PyTuple_SetItem(t, n++, PyInt_FromLong(cpu)); - PyTuple_SetItem(t, n++, PyInt_FromLong(s)); - PyTuple_SetItem(t, n++, PyInt_FromLong(ns)); - PyTuple_SetItem(t, n++, PyInt_FromLong(pid)); - PyTuple_SetItem(t, n++, PyString_FromString(comm)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(s)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(ns)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(pid)); + PyTuple_SetItem(t, n++, _PyUnicode_FromString(comm)); PyTuple_SetItem(t, n++, callchain); } else { - pydict_set_item_string_decref(dict, "common_cpu", PyInt_FromLong(cpu)); - pydict_set_item_string_decref(dict, "common_s", PyInt_FromLong(s)); - pydict_set_item_string_decref(dict, "common_ns", PyInt_FromLong(ns)); - pydict_set_item_string_decref(dict, "common_pid", PyInt_FromLong(pid)); - pydict_set_item_string_decref(dict, "common_comm", PyString_FromString(comm)); + pydict_set_item_string_decref(dict, "common_cpu", _PyLong_FromLong(cpu)); + pydict_set_item_string_decref(dict, "common_s", _PyLong_FromLong(s)); + pydict_set_item_string_decref(dict, "common_ns", _PyLong_FromLong(ns)); + pydict_set_item_string_decref(dict, "common_pid", _PyLong_FromLong(pid)); + pydict_set_item_string_decref(dict, "common_comm", _PyUnicode_FromString(comm)); pydict_set_item_string_decref(dict, "common_callchain", callchain); } for (field = event->format.fields; field; field = field->next) { @@ -615,7 +645,7 @@ static void python_process_tracepoint(struct perf_sample *sample, } if (field->flags & FIELD_IS_STRING && is_printable_array(data + offset, len)) { - obj = PyString_FromString((char *) data + offset); + obj = _PyUnicode_FromString((char *) data + offset); } else { obj = PyByteArray_FromStringAndSize((const char *) data + offset, len); field->flags &= ~FIELD_IS_STRING; @@ -668,7 +698,7 @@ static PyObject *tuple_new(unsigned int sz) static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val) { #if BITS_PER_LONG == 64 - return PyTuple_SetItem(t, pos, PyInt_FromLong(val)); + return PyTuple_SetItem(t, pos, _PyLong_FromLong(val)); #endif #if BITS_PER_LONG == 32 return PyTuple_SetItem(t, pos, PyLong_FromLongLong(val)); @@ -677,12 +707,12 @@ static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val) static int tuple_set_s32(PyObject *t, unsigned int pos, s32 val) { - return PyTuple_SetItem(t, pos, PyInt_FromLong(val)); + return PyTuple_SetItem(t, pos, _PyLong_FromLong(val)); } static int tuple_set_string(PyObject *t, unsigned int pos, const char *s) { - return PyTuple_SetItem(t, pos, PyString_FromString(s)); + return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s)); } static int python_export_evsel(struct db_export *dbe, struct perf_evsel *evsel) @@ -1029,8 +1059,8 @@ process_stat(struct perf_evsel *counter, int cpu, int thread, u64 tstamp, return; } - PyTuple_SetItem(t, n++, PyInt_FromLong(cpu)); - PyTuple_SetItem(t, n++, PyInt_FromLong(thread)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(thread)); tuple_set_u64(t, n++, tstamp); tuple_set_u64(t, n++, count->val); @@ -1212,27 +1242,58 @@ static void set_table_handlers(struct tables *tables) SET_TABLE_HANDLER(call_return); } +#if PY_MAJOR_VERSION < 3 +static void _free_command_line(const char **command_line, int num) +{ + free(command_line); +} +#else +static void _free_command_line(wchar_t **command_line, int num) +{ + int i; + for (i = 0; i < num; i++) + PyMem_RawFree(command_line[i]); + free(command_line); +} +#endif + + /* * Start trace script */ static int python_start_script(const char *script, int argc, const char **argv) { struct tables *tables = &tables_global; +#if PY_MAJOR_VERSION < 3 const char **command_line; +#else + wchar_t **command_line; +#endif char buf[PATH_MAX]; int i, err = 0; FILE *fp; +#if PY_MAJOR_VERSION < 3 command_line = malloc((argc + 1) * sizeof(const char *)); command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; +#else + command_line = malloc((argc + 1) * sizeof(wchar_t *)); + command_line[0] = Py_DecodeLocale(script, NULL); + for (i = 1; i < argc + 1; i++) + command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); +#endif Py_Initialize(); +#if PY_MAJOR_VERSION < 3 initperf_trace_context(); - PySys_SetArgv(argc + 1, (char **)command_line); +#else + PyInit_perf_trace_context(); + PySys_SetArgv(argc + 1, command_line); +#endif fp = fopen(script, "r"); if (!fp) { @@ -1262,12 +1323,12 @@ static int python_start_script(const char *script, int argc, const char **argv) goto error; } - free(command_line); + _free_command_line(command_line, argc + 1); return err; error: Py_Finalize(); - free(command_line); + _free_command_line(command_line, argc + 1); return err; } diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index af415febbc46..6891635b50c3 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python from os import getenv @@ -35,11 +35,11 @@ build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') libapikfs = getenv('LIBAPI') -ext_sources = [f.strip() for f in file('util/python-ext-sources') +ext_sources = [f.strip() for f in open('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] # use full paths with source files -ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources) +ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)) perf = Extension('perf', sources = ext_sources, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2da4d0456a03..e8514f651865 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -111,17 +111,20 @@ struct sort_entry sort_thread = { /* --sort comm */ +/* + * We can't use pointer comparison in functions below, + * because it gives different results based on pointer + * values, which could break some sorting assumptions. + */ static int64_t sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) { - /* Compare the addr that should be unique among comm */ return strcmp(comm__str(right->comm), comm__str(left->comm)); } static int64_t sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) { - /* Compare the addr that should be unique among comm */ return strcmp(comm__str(right->comm), comm__str(left->comm)); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index dbc6f7134f61..2f44e386a0e8 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -90,6 +90,8 @@ struct perf_stat_config { bool scale; FILE *output; unsigned int interval; + unsigned int timeout; + int times; struct runtime_stat *stats; int stats_num; }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index cc065d4bfafc..a1a312d99f30 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1582,7 +1582,7 @@ int dso__load(struct dso *dso, struct map *map) bool next_slot = false; bool is_reg; bool nsexit; - int sirc; + int sirc = -1; enum dso_binary_type symtab_type = binary_type_symtab[i]; @@ -1600,16 +1600,14 @@ int dso__load(struct dso *dso, struct map *map) nsinfo__mountns_exit(&nsc); is_reg = is_regular_file(name); - sirc = symsrc__init(ss, dso, name, symtab_type); + if (is_reg) + sirc = symsrc__init(ss, dso, name, symtab_type); if (nsexit) nsinfo__mountns_enter(dso->nsinfo, &nsc); - if (!is_reg || sirc < 0) { - if (sirc >= 0) - symsrc__destroy(ss); + if (!is_reg || sirc < 0) continue; - } if (!syms_ss && symsrc__has_symtab(ss)) { syms_ss = ss; @@ -1960,8 +1958,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; - machine__mmap_name(machine, path, sizeof(path)); - dso__set_long_name(dso, strdup(path), true); + dso__set_long_name(dso, machine->mmap_name, false); map__fixup_start(map); map__fixup_end(map); } diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 303bdb84ab5a..895122d638dd 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -30,6 +30,14 @@ static const char **syscalltbl_native = syscalltbl_x86_64; #include <asm/syscalls_64.c> const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; static const char **syscalltbl_native = syscalltbl_s390_64; +#elif defined(__powerpc64__) +#include <asm/syscalls_64.c> +const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_powerpc_64; +#elif defined(__powerpc__) +#include <asm/syscalls_32.c> +const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID; +static const char **syscalltbl_native = syscalltbl_powerpc_32; #endif struct syscall { diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 3e1038f6491c..5d467d8ae9ab 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -32,6 +32,7 @@ static void thread_map__reset(struct thread_map *map, int start, int nr) size_t size = (nr - start) * sizeof(map->map[0]); memset(&map->map[start], 0, size); + map->err_thread = -1; } static struct thread_map *thread_map__realloc(struct thread_map *map, int nr) @@ -323,7 +324,7 @@ out_free_threads: } struct thread_map *thread_map__new_str(const char *pid, const char *tid, - uid_t uid, bool per_thread) + uid_t uid, bool all_threads) { if (pid) return thread_map__new_by_pid_str(pid); @@ -331,7 +332,7 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, if (!tid && uid != UINT_MAX) return thread_map__new_by_uid(uid); - if (per_thread) + if (all_threads) return thread_map__new_all_cpus(); return thread_map__new_by_tid_str(tid); diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 0a806b99e73c..2f689c90a8c6 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -14,6 +14,7 @@ struct thread_map_data { struct thread_map { refcount_t refcnt; int nr; + int err_thread; struct thread_map_data map[]; }; @@ -31,7 +32,7 @@ struct thread_map *thread_map__get(struct thread_map *map); void thread_map__put(struct thread_map *map); struct thread_map *thread_map__new_str(const char *pid, - const char *tid, uid_t uid, bool per_thread); + const char *tid, uid_t uid, bool all_threads); struct thread_map *thread_map__new_by_tid_str(const char *tid_str); |