diff options
92 files changed, 4714 insertions, 861 deletions
diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index 75c6ecdb8f37..7422a999a39a 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -36,9 +36,8 @@ typedef ppc_opcode_t uprobe_opcode_t; struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; - u32 ainsn; + u32 insn; + u32 ixol; }; }; diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 59f419b935f2..003b20964ea0 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -186,7 +186,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) * emulate_step() returns 1 if the insn was successfully emulated. * For all other cases, we need to single-step in hardware. */ - ret = emulate_step(regs, auprobe->ainsn); + ret = emulate_step(regs, auprobe->insn); if (ret > 0) return true; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 47b56a7e99cb..6359506a19ee 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -36,7 +36,7 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o endif obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_rapl.o endif diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c new file mode 100644 index 000000000000..bf8e4a736d48 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -0,0 +1,661 @@ +/* + * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters + * Copyright (C) 2013 Google, Inc., Stephane Eranian + * + * Intel RAPL interface is specified in the IA-32 Manual Vol3b + * section 14.7.1 (September 2013) + * + * RAPL provides more controls than just reporting energy consumption + * however here we only expose the 3 energy consumption free running + * counters (pp0, pkg, dram). + * + * Each of those counters increments in a power unit defined by the + * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules + * but it can vary. + * + * Counter to rapl events mappings: + * + * pp0 counter: consumption of all physical cores (power plane 0) + * event: rapl_energy_cores + * perf code: 0x1 + * + * pkg counter: consumption of the whole processor package + * event: rapl_energy_pkg + * perf code: 0x2 + * + * dram counter: consumption of the dram domain (servers only) + * event: rapl_energy_dram + * perf code: 0x3 + * + * We manage those counters as free running (read-only). They may be + * use simultaneously by other tools, such as turbostat. + * + * The events only support system-wide mode counting. There is no + * sampling support because it does not make sense and is not + * supported by the RAPL hardware. + * + * Because we want to avoid floating-point operations in the kernel, + * the events are all reported in fixed point arithmetic (32.32). + * Tools must adjust the counts to convert them to Watts using + * the duration of the measurement. Tools may use a function such as + * ldexp(raw_count, -32); + */ +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/perf_event.h> +#include <asm/cpu_device_id.h> +#include "perf_event.h" + +/* + * RAPL energy status counters + */ +#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */ +#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */ +#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */ +#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */ +#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */ +#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */ + +/* Clients have PP0, PKG */ +#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ + 1<<RAPL_IDX_PKG_NRG_STAT) + +/* Servers have PP0, PKG, RAM */ +#define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\ + 1<<RAPL_IDX_PKG_NRG_STAT|\ + 1<<RAPL_IDX_RAM_NRG_STAT) + +/* + * event code: LSB 8 bits, passed in attr->config + * any other bit is reserved + */ +#define RAPL_EVENT_MASK 0xFFULL + +#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ +static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct kobj_attribute format_attr_##_var = \ + __ATTR(_name, 0444, __rapl_##_var##_show, NULL) + +#define RAPL_EVENT_DESC(_name, _config) \ +{ \ + .attr = __ATTR(_name, 0444, rapl_event_show, NULL), \ + .config = _config, \ +} + +#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ + +struct rapl_pmu { + spinlock_t lock; + int hw_unit; /* 1/2^hw_unit Joule */ + int n_active; /* number of active events */ + struct list_head active_list; + struct pmu *pmu; /* pointer to rapl_pmu_class */ + ktime_t timer_interval; /* in ktime_t unit */ + struct hrtimer hrtimer; +}; + +static struct pmu rapl_pmu_class; +static cpumask_t rapl_cpu_mask; +static int rapl_cntr_mask; + +static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); +static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); + +static inline u64 rapl_read_counter(struct perf_event *event) +{ + u64 raw; + rdmsrl(event->hw.event_base, raw); + return raw; +} + +static inline u64 rapl_scale(u64 v) +{ + /* + * scale delta to smallest unit (1/2^32) + * users must then scale back: count * 1/(1e9*2^32) to get Joules + * or use ldexp(count, -32). + * Watts = Joules/Time delta + */ + return v << (32 - __get_cpu_var(rapl_pmu)->hw_unit); +} + +static u64 rapl_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev_raw_count, new_raw_count; + s64 delta, sdelta; + int shift = RAPL_CNTR_WIDTH; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + rdmsrl(event->hw.event_base, new_raw_count); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) { + cpu_relax(); + goto again; + } + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (event-)time and add that to the generic event. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + sdelta = rapl_scale(delta); + + local64_add(sdelta, &event->count); + + return new_raw_count; +} + +static void rapl_start_hrtimer(struct rapl_pmu *pmu) +{ + __hrtimer_start_range_ns(&pmu->hrtimer, + pmu->timer_interval, 0, + HRTIMER_MODE_REL_PINNED, 0); +} + +static void rapl_stop_hrtimer(struct rapl_pmu *pmu) +{ + hrtimer_cancel(&pmu->hrtimer); +} + +static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) +{ + struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct perf_event *event; + unsigned long flags; + + if (!pmu->n_active) + return HRTIMER_NORESTART; + + spin_lock_irqsave(&pmu->lock, flags); + + list_for_each_entry(event, &pmu->active_list, active_entry) { + rapl_event_update(event); + } + + spin_unlock_irqrestore(&pmu->lock, flags); + + hrtimer_forward_now(hrtimer, pmu->timer_interval); + + return HRTIMER_RESTART; +} + +static void rapl_hrtimer_init(struct rapl_pmu *pmu) +{ + struct hrtimer *hr = &pmu->hrtimer; + + hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hr->function = rapl_hrtimer_handle; +} + +static void __rapl_pmu_event_start(struct rapl_pmu *pmu, + struct perf_event *event) +{ + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + event->hw.state = 0; + + list_add_tail(&event->active_entry, &pmu->active_list); + + local64_set(&event->hw.prev_count, rapl_read_counter(event)); + + pmu->n_active++; + if (pmu->n_active == 1) + rapl_start_hrtimer(pmu); +} + +static void rapl_pmu_event_start(struct perf_event *event, int mode) +{ + struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + unsigned long flags; + + spin_lock_irqsave(&pmu->lock, flags); + __rapl_pmu_event_start(pmu, event); + spin_unlock_irqrestore(&pmu->lock, flags); +} + +static void rapl_pmu_event_stop(struct perf_event *event, int mode) +{ + struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long flags; + + spin_lock_irqsave(&pmu->lock, flags); + + /* mark event as deactivated and stopped */ + if (!(hwc->state & PERF_HES_STOPPED)) { + WARN_ON_ONCE(pmu->n_active <= 0); + pmu->n_active--; + if (pmu->n_active == 0) + rapl_stop_hrtimer(pmu); + + list_del(&event->active_entry); + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } + + /* check if update of sw counter is necessary */ + if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + rapl_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } + + spin_unlock_irqrestore(&pmu->lock, flags); +} + +static int rapl_pmu_event_add(struct perf_event *event, int mode) +{ + struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long flags; + + spin_lock_irqsave(&pmu->lock, flags); + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (mode & PERF_EF_START) + __rapl_pmu_event_start(pmu, event); + + spin_unlock_irqrestore(&pmu->lock, flags); + + return 0; +} + +static void rapl_pmu_event_del(struct perf_event *event, int flags) +{ + rapl_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int rapl_pmu_event_init(struct perf_event *event) +{ + u64 cfg = event->attr.config & RAPL_EVENT_MASK; + int bit, msr, ret = 0; + + /* only look at RAPL events */ + if (event->attr.type != rapl_pmu_class.type) + return -ENOENT; + + /* check only supported bits are set */ + if (event->attr.config & ~RAPL_EVENT_MASK) + return -EINVAL; + + /* + * check event is known (determines counter) + */ + switch (cfg) { + case INTEL_RAPL_PP0: + bit = RAPL_IDX_PP0_NRG_STAT; + msr = MSR_PP0_ENERGY_STATUS; + break; + case INTEL_RAPL_PKG: + bit = RAPL_IDX_PKG_NRG_STAT; + msr = MSR_PKG_ENERGY_STATUS; + break; + case INTEL_RAPL_RAM: + bit = RAPL_IDX_RAM_NRG_STAT; + msr = MSR_DRAM_ENERGY_STATUS; + break; + default: + return -EINVAL; + } + /* check event supported */ + if (!(rapl_cntr_mask & (1 << bit))) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest || + event->attr.sample_period) /* no sampling */ + return -EINVAL; + + /* must be done before validate_group */ + event->hw.event_base = msr; + event->hw.config = cfg; + event->hw.idx = bit; + + return ret; +} + +static void rapl_pmu_event_read(struct perf_event *event) +{ + rapl_event_update(event); +} + +static ssize_t rapl_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &rapl_cpu_mask); + + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} + +static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); + +static struct attribute *rapl_pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group rapl_pmu_attr_group = { + .attrs = rapl_pmu_attrs, +}; + +EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); +EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); +EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); + +EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); +EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); +EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); + +/* + * we compute in 0.23 nJ increments regardless of MSR + */ +EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10"); +EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10"); +EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); + +static struct attribute *rapl_events_srv_attr[] = { + EVENT_PTR(rapl_cores), + EVENT_PTR(rapl_pkg), + EVENT_PTR(rapl_ram), + + EVENT_PTR(rapl_cores_unit), + EVENT_PTR(rapl_pkg_unit), + EVENT_PTR(rapl_ram_unit), + + EVENT_PTR(rapl_cores_scale), + EVENT_PTR(rapl_pkg_scale), + EVENT_PTR(rapl_ram_scale), + NULL, +}; + +static struct attribute *rapl_events_cln_attr[] = { + EVENT_PTR(rapl_cores), + EVENT_PTR(rapl_pkg), + + EVENT_PTR(rapl_cores_unit), + EVENT_PTR(rapl_pkg_unit), + + EVENT_PTR(rapl_cores_scale), + EVENT_PTR(rapl_pkg_scale), + NULL, +}; + +static struct attribute_group rapl_pmu_events_group = { + .name = "events", + .attrs = NULL, /* patched at runtime */ +}; + +DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +static struct attribute *rapl_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group rapl_pmu_format_group = { + .name = "format", + .attrs = rapl_formats_attr, +}; + +const struct attribute_group *rapl_attr_groups[] = { + &rapl_pmu_attr_group, + &rapl_pmu_format_group, + &rapl_pmu_events_group, + NULL, +}; + +static struct pmu rapl_pmu_class = { + .attr_groups = rapl_attr_groups, + .task_ctx_nr = perf_invalid_context, /* system-wide only */ + .event_init = rapl_pmu_event_init, + .add = rapl_pmu_event_add, /* must have */ + .del = rapl_pmu_event_del, /* must have */ + .start = rapl_pmu_event_start, + .stop = rapl_pmu_event_stop, + .read = rapl_pmu_event_read, +}; + +static void rapl_cpu_exit(int cpu) +{ + struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); + int i, phys_id = topology_physical_package_id(cpu); + int target = -1; + + /* find a new cpu on same package */ + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (phys_id == topology_physical_package_id(i)) { + target = i; + break; + } + } + /* + * clear cpu from cpumask + * if was set in cpumask and still some cpu on package, + * then move to new cpu + */ + if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0) + cpumask_set_cpu(target, &rapl_cpu_mask); + + WARN_ON(cpumask_empty(&rapl_cpu_mask)); + /* + * migrate events and context to new cpu + */ + if (target >= 0) + perf_pmu_migrate_context(pmu->pmu, cpu, target); + + /* cancel overflow polling timer for CPU */ + rapl_stop_hrtimer(pmu); +} + +static void rapl_cpu_init(int cpu) +{ + int i, phys_id = topology_physical_package_id(cpu); + + /* check if phys_is is already covered */ + for_each_cpu(i, &rapl_cpu_mask) { + if (phys_id == topology_physical_package_id(i)) + return; + } + /* was not found, so add it */ + cpumask_set_cpu(cpu, &rapl_cpu_mask); +} + +static int rapl_cpu_prepare(int cpu) +{ + struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); + int phys_id = topology_physical_package_id(cpu); + u64 ms; + + if (pmu) + return 0; + + if (phys_id < 0) + return -1; + + pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); + if (!pmu) + return -1; + + spin_lock_init(&pmu->lock); + + INIT_LIST_HEAD(&pmu->active_list); + + /* + * grab power unit as: 1/2^unit Joules + * + * we cache in local PMU instance + */ + rdmsrl(MSR_RAPL_POWER_UNIT, pmu->hw_unit); + pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL; + pmu->pmu = &rapl_pmu_class; + + /* + * use reference of 200W for scaling the timeout + * to avoid missing counter overflows. + * 200W = 200 Joules/sec + * divide interval by 2 to avoid lockstep (2 * 100) + * if hw unit is 32, then we use 2 ms 1/200/2 + */ + if (pmu->hw_unit < 32) + ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1)); + else + ms = 2; + + pmu->timer_interval = ms_to_ktime(ms); + + rapl_hrtimer_init(pmu); + + /* set RAPL pmu for this cpu for now */ + per_cpu(rapl_pmu, cpu) = pmu; + per_cpu(rapl_pmu_to_free, cpu) = NULL; + + return 0; +} + +static void rapl_cpu_kfree(int cpu) +{ + struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu); + + kfree(pmu); + + per_cpu(rapl_pmu_to_free, cpu) = NULL; +} + +static int rapl_cpu_dying(int cpu) +{ + struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); + + if (!pmu) + return 0; + + per_cpu(rapl_pmu, cpu) = NULL; + + per_cpu(rapl_pmu_to_free, cpu) = pmu; + + return 0; +} + +static int rapl_cpu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + rapl_cpu_prepare(cpu); + break; + case CPU_STARTING: + rapl_cpu_init(cpu); + break; + case CPU_UP_CANCELED: + case CPU_DYING: + rapl_cpu_dying(cpu); + break; + case CPU_ONLINE: + case CPU_DEAD: + rapl_cpu_kfree(cpu); + break; + case CPU_DOWN_PREPARE: + rapl_cpu_exit(cpu); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static const struct x86_cpu_id rapl_cpu_match[] = { + [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, + [1] = {}, +}; + +static int __init rapl_pmu_init(void) +{ + struct rapl_pmu *pmu; + int cpu, ret; + + /* + * check for Intel processor family 6 + */ + if (!x86_match_cpu(rapl_cpu_match)) + return 0; + + /* check supported CPU */ + switch (boot_cpu_data.x86_model) { + case 42: /* Sandy Bridge */ + case 58: /* Ivy Bridge */ + case 60: /* Haswell */ + rapl_cntr_mask = RAPL_IDX_CLN; + rapl_pmu_events_group.attrs = rapl_events_cln_attr; + break; + case 45: /* Sandy Bridge-EP */ + case 62: /* IvyTown */ + rapl_cntr_mask = RAPL_IDX_SRV; + rapl_pmu_events_group.attrs = rapl_events_srv_attr; + break; + + default: + /* unsupported */ + return 0; + } + get_online_cpus(); + + for_each_online_cpu(cpu) { + rapl_cpu_prepare(cpu); + rapl_cpu_init(cpu); + } + + perf_cpu_notifier(rapl_cpu_notifier); + + ret = perf_pmu_register(&rapl_pmu_class, "power", -1); + if (WARN_ON(ret)) { + pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret); + put_online_cpus(); + return -1; + } + + pmu = __get_cpu_var(rapl_pmu); + + pr_info("RAPL PMU detected, hw unit 2^-%d Joules," + " API unit is 2^-32 Joules," + " %d fixed counters" + " %llu ms ovfl timer\n", + pmu->hw_unit, + hweight32(rapl_cntr_mask), + ktime_to_ms(pmu->timer_interval)); + + put_online_cpus(); + + return 0; +} +device_initcall(rapl_pmu_init); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2e069d1288df..8f4a70f2eca8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -319,7 +319,10 @@ struct perf_event { */ struct list_head migrate_entry; - struct hlist_node hlist_entry; + union { + struct hlist_node hlist_entry; + struct list_head active_entry; + }; int nr_siblings; int group_flags; struct perf_event *group_leader; diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 319eae70fe84..e32251e00e62 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -26,16 +26,13 @@ #include <linux/errno.h> #include <linux/rbtree.h> +#include <linux/types.h> struct vm_area_struct; struct mm_struct; struct inode; struct notifier_block; -#ifdef CONFIG_ARCH_SUPPORTS_UPROBES -# include <asm/uprobes.h> -#endif - #define UPROBE_HANDLER_REMOVE 1 #define UPROBE_HANDLER_MASK 1 @@ -60,6 +57,8 @@ struct uprobe_consumer { }; #ifdef CONFIG_UPROBES +#include <asm/uprobes.h> + enum uprobe_task_state { UTASK_RUNNING, UTASK_SSTEP, @@ -72,35 +71,28 @@ enum uprobe_task_state { */ struct uprobe_task { enum uprobe_task_state state; - struct arch_uprobe_task autask; - struct return_instance *return_instances; - unsigned int depth; - struct uprobe *active_uprobe; + union { + struct { + struct arch_uprobe_task autask; + unsigned long vaddr; + }; + struct { + struct callback_head dup_xol_work; + unsigned long dup_xol_addr; + }; + }; + + struct uprobe *active_uprobe; unsigned long xol_vaddr; - unsigned long vaddr; -}; -/* - * On a breakpoint hit, thread contests for a slot. It frees the - * slot after singlestep. Currently a fixed number of slots are - * allocated. - */ -struct xol_area { - wait_queue_head_t wq; /* if all slots are busy */ - atomic_t slot_count; /* number of in-use slots */ - unsigned long *bitmap; /* 0 = free slot */ - struct page *page; - - /* - * We keep the vma's vm_start rather than a pointer to the vma - * itself. The probed process or a naughty kernel module could make - * the vma go away, and we must handle that reasonably gracefully. - */ - unsigned long vaddr; /* Page(s) of instruction slots */ + struct return_instance *return_instances; + unsigned int depth; }; +struct xol_area; + struct uprobes_state { struct xol_area *xol_area; }; @@ -109,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern bool __weak is_trap_insn(uprobe_opcode_t *insn); +extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); @@ -120,7 +113,6 @@ extern void uprobe_end_dup_mmap(void); extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t, unsigned long flags); -extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); @@ -176,10 +168,6 @@ static inline bool uprobe_deny_signal(void) { return false; } -static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) -{ - return 0; -} static inline void uprobe_free_utask(struct task_struct *t) { } diff --git a/kernel/events/core.c b/kernel/events/core.c index 72348dc192c1..403b781daafb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6655,6 +6655,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, INIT_LIST_HEAD(&event->event_entry); INIT_LIST_HEAD(&event->sibling_list); INIT_LIST_HEAD(&event->rb_entry); + INIT_LIST_HEAD(&event->active_entry); init_waitqueue_head(&event->waitq); init_irq_work(&event->pending, perf_pending_event); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index e8b168af135b..146a5792b1d2 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -61,19 +61,20 @@ again: * * kernel user * - * READ ->data_tail READ ->data_head - * smp_mb() (A) smp_rmb() (C) - * WRITE $data READ $data - * smp_wmb() (B) smp_mb() (D) - * STORE ->data_head WRITE ->data_tail + * if (LOAD ->data_tail) { LOAD ->data_head + * (A) smp_rmb() (C) + * STORE $data LOAD $data + * smp_wmb() (B) smp_mb() (D) + * STORE ->data_head STORE ->data_tail + * } * * Where A pairs with D, and B pairs with C. * - * I don't think A needs to be a full barrier because we won't in fact - * write data until we see the store from userspace. So we simply don't - * issue the data WRITE until we observe it. Be conservative for now. + * In our case (A) is a control dependency that separates the load of + * the ->data_tail and the stores of $data. In case ->data_tail + * indicates there is no room in the buffer to store $data we do not. * - * OTOH, D needs to be a full barrier since it separates the data READ + * D needs to be a full barrier since it separates the data READ * from the tail WRITE. * * For B a WMB is sufficient since it separates two WRITEs, and for C @@ -81,7 +82,7 @@ again: * * See perf_output_begin(). */ - smp_wmb(); + smp_wmb(); /* B, matches C */ rb->user_page->data_head = head; /* @@ -144,17 +145,26 @@ int perf_output_begin(struct perf_output_handle *handle, if (!rb->overwrite && unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size)) goto fail; + + /* + * The above forms a control dependency barrier separating the + * @tail load above from the data stores below. Since the @tail + * load is required to compute the branch to fail below. + * + * A, matches D; the full memory barrier userspace SHOULD issue + * after reading the data and before storing the new tail + * position. + * + * See perf_output_put_handle(). + */ + head += size; } while (local_cmpxchg(&rb->head, offset, head) != offset); /* - * Separate the userpage->tail read from the data stores below. - * Matches the MB userspace SHOULD issue after reading the data - * and before storing the new tail position. - * - * See perf_output_put_handle(). + * We rely on the implied barrier() by local_cmpxchg() to ensure + * none of the data stores below can be lifted up by the compiler. */ - smp_mb(); if (unlikely(head - local_read(&rb->wakeup) > rb->watermark)) local_add(rb->watermark, &rb->wakeup); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 24b7d6ca871b..b886a5e7d4ff 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -73,6 +73,17 @@ struct uprobe { struct inode *inode; /* Also hold a ref to inode */ loff_t offset; unsigned long flags; + + /* + * The generic code assumes that it has two members of unknown type + * owned by the arch-specific code: + * + * insn - copy_insn() saves the original instruction here for + * arch_uprobe_analyze_insn(). + * + * ixol - potentially modified instruction to execute out of + * line, copied to xol_area by xol_get_insn_slot(). + */ struct arch_uprobe arch; }; @@ -86,6 +97,29 @@ struct return_instance { }; /* + * Execute out of line area: anonymous executable mapping installed + * by the probed task to execute the copy of the original instruction + * mangled by set_swbp(). + * + * On a breakpoint hit, thread contests for a slot. It frees the + * slot after singlestep. Currently a fixed number of slots are + * allocated. + */ +struct xol_area { + wait_queue_head_t wq; /* if all slots are busy */ + atomic_t slot_count; /* number of in-use slots */ + unsigned long *bitmap; /* 0 = free slot */ + struct page *page; + + /* + * We keep the vma's vm_start rather than a pointer to the vma + * itself. The probed process or a naughty kernel module could make + * the vma go away, and we must handle that reasonably gracefully. + */ + unsigned long vaddr; /* Page(s) of instruction slots */ +}; + +/* * valid_vma: Verify if the specified vma is an executable vma * Relax restrictions while unregistering: vm_flags might have * changed after breakpoint was inserted. @@ -330,7 +364,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned int __weak set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); + return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)&auprobe->insn); } static int match_uprobe(struct uprobe *l, struct uprobe *r) @@ -529,8 +563,8 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp) { struct address_space *mapping = uprobe->inode->i_mapping; loff_t offs = uprobe->offset; - void *insn = uprobe->arch.insn; - int size = MAX_UINSN_BYTES; + void *insn = &uprobe->arch.insn; + int size = sizeof(uprobe->arch.insn); int len, err = -EIO; /* Copy only available bytes, -EIO if nothing was read */ @@ -569,7 +603,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, goto out; ret = -ENOTSUPP; - if (is_trap_insn((uprobe_opcode_t *)uprobe->arch.insn)) + if (is_trap_insn((uprobe_opcode_t *)&uprobe->arch.insn)) goto out; ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); @@ -1264,7 +1298,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe) /* Initialize the slot */ copy_to_page(area->page, xol_vaddr, - uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); + &uprobe->arch.ixol, sizeof(uprobe->arch.ixol)); /* * We probably need flush_icache_user_range() but it needs vma. * This should work on supported architectures too. @@ -1403,12 +1437,10 @@ static void uprobe_warn(struct task_struct *t, const char *msg) static void dup_xol_work(struct callback_head *work) { - kfree(work); - if (current->flags & PF_EXITING) return; - if (!__create_xol_area(current->utask->vaddr)) + if (!__create_xol_area(current->utask->dup_xol_addr)) uprobe_warn(current, "dup xol area"); } @@ -1419,7 +1451,6 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; - struct callback_head *work; struct xol_area *area; t->utask = NULL; @@ -1441,14 +1472,9 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags) if (mm == t->mm) return; - /* TODO: move it into the union in uprobe_task */ - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) - return uprobe_warn(t, "dup xol area"); - - t->utask->vaddr = area->vaddr; - init_task_work(work, dup_xol_work); - task_work_add(t, work, true); + t->utask->dup_xol_addr = area->vaddr; + init_task_work(&t->utask->dup_xol_work, dup_xol_work); + task_work_add(t, &t->utask->dup_xol_work, true); } /* diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index fc1502098595..0d9cbb426b44 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -43,6 +43,30 @@ man_dir_SQ = '$(subst ','\'',$(man_dir))' export man_dir man_dir_SQ INSTALL export DESTDIR DESTDIR_SQ +set_plugin_dir := 1 + +# Set plugin_dir to preffered global plugin location +# If we install under $HOME directory we go under +# $(HOME)/.traceevent/plugins +# +# We dont set PLUGIN_DIR in case we install under $HOME +# directory, because by default the code looks under: +# $(HOME)/.traceevent/plugins by default. +# +ifeq ($(plugin_dir),) +ifeq ($(prefix),$(HOME)) +override plugin_dir = $(HOME)/.traceevent/plugins +set_plugin_dir := 0 +else +override plugin_dir = $(prefix)/lib/traceevent/plugins +endif +endif + +ifeq ($(set_plugin_dir),1) +PLUGIN_DIR = -DPLUGIN_DIR="$(DESTDIR)/$(plugin_dir)" +PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))' +endif + # copy a bit from Linux kbuild ifeq ("$(origin V)", "command line") @@ -96,6 +120,7 @@ export prefix bindir src obj # Shell quotes bindir_SQ = $(subst ','\'',$(bindir)) bindir_relative_SQ = $(subst ','\'',$(bindir_relative)) +plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) LIB_FILE = libtraceevent.a libtraceevent.so @@ -138,10 +163,10 @@ else print_app_build = echo ' BUILD '$(OBJ); print_fpic_compile = echo ' CC FPIC '$(OBJ); print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ); - print_plugin_obj_compile = echo ' BUILD PLUGIN OBJ '$(OBJ); - print_plugin_build = echo ' BUILD PLUGIN '$(OBJ); + print_plugin_obj_compile = echo ' CC FPIC '$(OBJ); + print_plugin_build = echo ' BUILD PLUGIN '$(OBJ); print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ); - print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; + print_install = echo ' INSTALL '$1; endif do_fpic_compile = \ @@ -180,12 +205,29 @@ $(obj)/%.o: $(src)/%.c %.o: $(src)/%.c $(Q)$(call do_compile) -PEVENT_LIB_OBJS = event-parse.o trace-seq.o parse-filter.o parse-utils.o +PEVENT_LIB_OBJS = event-parse.o +PEVENT_LIB_OBJS += event-plugin.o +PEVENT_LIB_OBJS += trace-seq.o +PEVENT_LIB_OBJS += parse-filter.o +PEVENT_LIB_OBJS += parse-utils.o PEVENT_LIB_OBJS += kbuffer-parse.o -ALL_OBJS = $(PEVENT_LIB_OBJS) +PLUGIN_OBJS = plugin_jbd2.o +PLUGIN_OBJS += plugin_hrtimer.o +PLUGIN_OBJS += plugin_kmem.o +PLUGIN_OBJS += plugin_kvm.o +PLUGIN_OBJS += plugin_mac80211.o +PLUGIN_OBJS += plugin_sched_switch.o +PLUGIN_OBJS += plugin_function.o +PLUGIN_OBJS += plugin_xen.o +PLUGIN_OBJS += plugin_scsi.o +PLUGIN_OBJS += plugin_cfg80211.o + +PLUGINS := $(PLUGIN_OBJS:.o=.so) -CMD_TARGETS = $(LIB_FILE) +ALL_OBJS = $(PEVENT_LIB_OBJS) $(PLUGIN_OBJS) + +CMD_TARGETS = $(LIB_FILE) $(PLUGINS) TARGETS = $(CMD_TARGETS) @@ -200,9 +242,17 @@ libtraceevent.so: $(PEVENT_LIB_OBJS) libtraceevent.a: $(PEVENT_LIB_OBJS) $(Q)$(do_build_static_lib) +plugins: $(PLUGINS) + $(PEVENT_LIB_OBJS): %.o: $(src)/%.c TRACEEVENT-CFLAGS $(Q)$(do_fpic_compile) +$(PLUGIN_OBJS): %.o : $(src)/%.c + $(Q)$(do_compile_plugin_obj) + +$(PLUGINS): %.so: %.o + $(Q)$(do_plugin_build) + define make_version.h (echo '/* This file is automatically generated. Do not modify. */'; \ echo \#define VERSION_CODE $(shell \ @@ -290,9 +340,16 @@ define do_install $(INSTALL) $1 '$(DESTDIR_SQ)$2' endef -install_lib: all_cmd +install_lib: all_cmd install_plugins $(Q)$(call do_install,$(LIB_FILE),$(bindir_SQ)) +PLUGINS_INSTALL = $(subst .so,.install,$(PLUGINS)) + +$(PLUGINS_INSTALL): %.install : %.so force + $(Q)$(call do_install,$<,$(plugin_dir_SQ)) + +install_plugins: $(PLUGINS_INSTALL) + install: install_lib clean: diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 217c82ee3665..22566c271275 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2710,7 +2710,6 @@ process_func_handler(struct event_format *event, struct pevent_function_handler struct print_arg *farg; enum event_type type; char *token; - const char *test; int i; arg->type = PRINT_FUNC; @@ -2727,15 +2726,19 @@ process_func_handler(struct event_format *event, struct pevent_function_handler } type = process_arg(event, farg, &token); - if (i < (func->nr_args - 1)) - test = ","; - else - test = ")"; - - if (test_type_token(type, token, EVENT_DELIM, test)) { - free_arg(farg); - free_token(token); - return EVENT_ERROR; + if (i < (func->nr_args - 1)) { + if (type != EVENT_DELIM || strcmp(token, ",") != 0) { + warning("Error: function '%s()' expects %d arguments but event %s only uses %d", + func->name, func->nr_args, + event->name, i + 1); + goto err; + } + } else { + if (type != EVENT_DELIM || strcmp(token, ")") != 0) { + warning("Error: function '%s()' only expects %d arguments but event %s has more", + func->name, func->nr_args, event->name); + goto err; + } } *next_arg = farg; @@ -2747,6 +2750,11 @@ process_func_handler(struct event_format *event, struct pevent_function_handler *tok = token; return type; + +err: + free_arg(farg); + free_token(token); + return EVENT_ERROR; } static enum event_type @@ -4099,6 +4107,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event unsigned long long val; struct func_map *func; const char *saveptr; + struct trace_seq p; char *bprint_fmt = NULL; char format[32]; int show_func; @@ -4306,8 +4315,12 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event format[len] = 0; if (!len_as_arg) len_arg = -1; - print_str_arg(s, data, size, event, + /* Use helper trace_seq */ + trace_seq_init(&p); + print_str_arg(&p, data, size, event, format, len_arg, arg); + trace_seq_terminate(&p); + trace_seq_puts(s, p.buffer); arg = arg->next; break; default: @@ -5116,8 +5129,38 @@ enum pevent_errno __pevent_parse_format(struct event_format **eventp, return ret; } +static enum pevent_errno +__pevent_parse_event(struct pevent *pevent, + struct event_format **eventp, + const char *buf, unsigned long size, + const char *sys) +{ + int ret = __pevent_parse_format(eventp, pevent, buf, size, sys); + struct event_format *event = *eventp; + + if (event == NULL) + return ret; + + if (pevent && add_event(pevent, event)) { + ret = PEVENT_ERRNO__MEM_ALLOC_FAILED; + goto event_add_failed; + } + +#define PRINT_ARGS 0 + if (PRINT_ARGS && event->print_fmt.args) + print_args(event->print_fmt.args); + + return 0; + +event_add_failed: + pevent_free_format(event); + return ret; +} + /** * pevent_parse_format - parse the event format + * @pevent: the handle to the pevent + * @eventp: returned format * @buf: the buffer storing the event format string * @size: the size of @buf * @sys: the system the event belongs to @@ -5129,10 +5172,12 @@ enum pevent_errno __pevent_parse_format(struct event_format **eventp, * * /sys/kernel/debug/tracing/events/.../.../format */ -enum pevent_errno pevent_parse_format(struct event_format **eventp, const char *buf, +enum pevent_errno pevent_parse_format(struct pevent *pevent, + struct event_format **eventp, + const char *buf, unsigned long size, const char *sys) { - return __pevent_parse_format(eventp, NULL, buf, size, sys); + return __pevent_parse_event(pevent, eventp, buf, size, sys); } /** @@ -5153,25 +5198,7 @@ enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, unsigned long size, const char *sys) { struct event_format *event = NULL; - int ret = __pevent_parse_format(&event, pevent, buf, size, sys); - - if (event == NULL) - return ret; - - if (add_event(pevent, event)) { - ret = PEVENT_ERRNO__MEM_ALLOC_FAILED; - goto event_add_failed; - } - -#define PRINT_ARGS 0 - if (PRINT_ARGS && event->print_fmt.args) - print_args(event->print_fmt.args); - - return 0; - -event_add_failed: - pevent_free_format(event); - return ret; + return __pevent_parse_event(pevent, &event, buf, size, sys); } #undef _PE diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 8d73d2594f65..6e23f197175f 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -23,6 +23,7 @@ #include <stdbool.h> #include <stdarg.h> #include <regex.h> +#include <string.h> #ifndef __maybe_unused #define __maybe_unused __attribute__((unused)) @@ -377,6 +378,11 @@ enum pevent_errno { }; #undef _PE +struct plugin_list; + +struct plugin_list *traceevent_load_plugins(struct pevent *pevent); +void traceevent_unload_plugins(struct plugin_list *plugin_list); + struct cmdline; struct cmdline_list; struct func_map; @@ -522,6 +528,15 @@ __data2host8(struct pevent *pevent, unsigned long long data) __data2host8(pevent, __val); \ }) +static inline int traceevent_host_bigendian(void) +{ + unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 }; + unsigned int val; + + memcpy(&val, str, 4); + return val == 0x01020304; +} + /* taken from kernel/trace/trace.h */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, @@ -547,7 +562,9 @@ int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long siz enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, unsigned long size, const char *sys); -enum pevent_errno pevent_parse_format(struct event_format **eventp, const char *buf, +enum pevent_errno pevent_parse_format(struct pevent *pevent, + struct event_format **eventp, + const char *buf, unsigned long size, const char *sys); void pevent_free_format(struct event_format *event); @@ -843,7 +860,7 @@ int pevent_event_filtered(struct event_filter *filter, void pevent_filter_reset(struct event_filter *filter); -void pevent_filter_clear_trivial(struct event_filter *filter, +int pevent_filter_clear_trivial(struct event_filter *filter, enum filter_trivial_type type); void pevent_filter_free(struct event_filter *filter); diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c new file mode 100644 index 000000000000..125f5676bcb5 --- /dev/null +++ b/tools/lib/traceevent/event-plugin.c @@ -0,0 +1,215 @@ +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <[email protected]> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include <string.h> +#include <dlfcn.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <dirent.h> +#include "event-parse.h" +#include "event-utils.h" + +#define LOCAL_PLUGIN_DIR ".traceevent/plugins" + +struct plugin_list { + struct plugin_list *next; + char *name; + void *handle; +}; + +static void +load_plugin(struct pevent *pevent, const char *path, + const char *file, void *data) +{ + struct plugin_list **plugin_list = data; + pevent_plugin_load_func func; + struct plugin_list *list; + const char *alias; + char *plugin; + void *handle; + + plugin = malloc(strlen(path) + strlen(file) + 2); + if (!plugin) { + warning("could not allocate plugin memory\n"); + return; + } + + strcpy(plugin, path); + strcat(plugin, "/"); + strcat(plugin, file); + + handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL); + if (!handle) { + warning("could not load plugin '%s'\n%s\n", + plugin, dlerror()); + goto out_free; + } + + alias = dlsym(handle, PEVENT_PLUGIN_ALIAS_NAME); + if (!alias) + alias = file; + + func = dlsym(handle, PEVENT_PLUGIN_LOADER_NAME); + if (!func) { + warning("could not find func '%s' in plugin '%s'\n%s\n", + PEVENT_PLUGIN_LOADER_NAME, plugin, dlerror()); + goto out_free; + } + + list = malloc(sizeof(*list)); + if (!list) { + warning("could not allocate plugin memory\n"); + goto out_free; + } + + list->next = *plugin_list; + list->handle = handle; + list->name = plugin; + *plugin_list = list; + + pr_stat("registering plugin: %s", plugin); + func(pevent); + return; + + out_free: + free(plugin); +} + +static void +load_plugins_dir(struct pevent *pevent, const char *suffix, + const char *path, + void (*load_plugin)(struct pevent *pevent, + const char *path, + const char *name, + void *data), + void *data) +{ + struct dirent *dent; + struct stat st; + DIR *dir; + int ret; + + ret = stat(path, &st); + if (ret < 0) + return; + + if (!S_ISDIR(st.st_mode)) + return; + + dir = opendir(path); + if (!dir) + return; + + while ((dent = readdir(dir))) { + const char *name = dent->d_name; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0) + continue; + + /* Only load plugins that end in suffix */ + if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0) + continue; + + load_plugin(pevent, path, name, data); + } + + closedir(dir); +} + +static void +load_plugins(struct pevent *pevent, const char *suffix, + void (*load_plugin)(struct pevent *pevent, + const char *path, + const char *name, + void *data), + void *data) +{ + char *home; + char *path; + char *envdir; + + /* + * If a system plugin directory was defined, + * check that first. + */ +#ifdef PLUGIN_DIR + load_plugins_dir(pevent, suffix, PLUGIN_DIR, load_plugin, data); +#endif + + /* + * Next let the environment-set plugin directory + * override the system defaults. + */ + envdir = getenv("TRACEEVENT_PLUGIN_DIR"); + if (envdir) + load_plugins_dir(pevent, suffix, envdir, load_plugin, data); + + /* + * Now let the home directory override the environment + * or system defaults. + */ + home = getenv("HOME"); + if (!home) + return; + + path = malloc(strlen(home) + strlen(LOCAL_PLUGIN_DIR) + 2); + if (!path) { + warning("could not allocate plugin memory\n"); + return; + } + + strcpy(path, home); + strcat(path, "/"); + strcat(path, LOCAL_PLUGIN_DIR); + + load_plugins_dir(pevent, suffix, path, load_plugin, data); + + free(path); +} + +struct plugin_list* +traceevent_load_plugins(struct pevent *pevent) +{ + struct plugin_list *list = NULL; + + load_plugins(pevent, ".so", load_plugin, &list); + return list; +} + +void +traceevent_unload_plugins(struct plugin_list *plugin_list) +{ + pevent_plugin_unload_func func; + struct plugin_list *list; + + while (plugin_list) { + list = plugin_list; + plugin_list = list->next; + func = dlsym(list->handle, PEVENT_PLUGIN_UNLOADER_NAME); + if (func) + func(); + dlclose(list->handle); + free(list->name); + free(list); + } +} diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 2500e75583fc..ab402fb2dcf7 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -182,7 +182,10 @@ struct event_filter *pevent_filter_alloc(struct pevent *pevent) { struct event_filter *filter; - filter = malloc_or_die(sizeof(*filter)); + filter = malloc(sizeof(*filter)); + if (filter == NULL) + return NULL; + memset(filter, 0, sizeof(*filter)); filter->pevent = pevent; pevent_ref(pevent); @@ -242,15 +245,19 @@ static void free_arg(struct filter_arg *arg) free(arg); } -static void add_event(struct event_list **events, +static int add_event(struct event_list **events, struct event_format *event) { struct event_list *list; - list = malloc_or_die(sizeof(*list)); + list = malloc(sizeof(*list)); + if (list == NULL) + return -1; + list->next = *events; *events = list; list->event = event; + return 0; } static int event_match(struct event_format *event, @@ -273,6 +280,7 @@ find_event(struct pevent *pevent, struct event_list **events, regex_t ereg; regex_t sreg; int match = 0; + int fail = 0; char *reg; int ret; int i; @@ -307,7 +315,10 @@ find_event(struct pevent *pevent, struct event_list **events, event = pevent->events[i]; if (event_match(event, sys_name ? &sreg : NULL, &ereg)) { match = 1; - add_event(events, event); + if (add_event(events, event) < 0) { + fail = 1; + break; + } } } @@ -317,6 +328,8 @@ find_event(struct pevent *pevent, struct event_list **events, if (!match) return -1; + if (fail) + return -2; return 0; } @@ -349,8 +362,11 @@ create_arg_item(struct event_format *event, const char *token, arg->value.type = type == EVENT_DQUOTE ? FILTER_STRING : FILTER_CHAR; arg->value.str = strdup(token); - if (!arg->value.str) - die("malloc string"); + if (!arg->value.str) { + free_arg(arg); + show_error(error_str, "failed to allocate string filter arg"); + return NULL; + } break; case EVENT_ITEM: /* if it is a number, then convert it */ @@ -1210,7 +1226,13 @@ int pevent_filter_add_filter_str(struct event_filter *filter, else len = strlen(filter_str); - this_event = malloc_or_die(len + 1); + this_event = malloc(len + 1); + if (this_event == NULL) { + show_error(error_str, "Memory allocation failure"); + /* This can only happen when events is NULL, but still */ + free_events(events); + return -1; + } memcpy(this_event, filter_str, len); this_event[len] = 0; @@ -1482,8 +1504,10 @@ int pevent_update_trivial(struct event_filter *dest, struct event_filter *source * @type: remove only true, false, or both * * Removes filters that only contain a TRUE or FALES boolean arg. + * + * Returns 0 on success and -1 if there was a problem. */ -void pevent_filter_clear_trivial(struct event_filter *filter, +int pevent_filter_clear_trivial(struct event_filter *filter, enum filter_trivial_type type) { struct filter_type *filter_type; @@ -1492,13 +1516,15 @@ void pevent_filter_clear_trivial(struct event_filter *filter, int i; if (!filter->filters) - return; + return 0; /* * Two steps, first get all ids with trivial filters. * then remove those ids. */ for (i = 0; i < filter->filters; i++) { + int *new_ids; + filter_type = &filter->event_filters[i]; if (filter_type->filter->type != FILTER_ARG_BOOLEAN) continue; @@ -1513,19 +1539,24 @@ void pevent_filter_clear_trivial(struct event_filter *filter, break; } - ids = realloc(ids, sizeof(*ids) * (count + 1)); - if (!ids) - die("Can't allocate ids"); + new_ids = realloc(ids, sizeof(*ids) * (count + 1)); + if (!new_ids) { + free(ids); + return -1; + } + + ids = new_ids; ids[count++] = filter_type->event_id; } if (!count) - return; + return 0; for (i = 0; i < count; i++) pevent_filter_remove_event(filter, ids[i]); free(ids); + return 0; } /** diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugin_cfg80211.c new file mode 100644 index 000000000000..dcab8e873c21 --- /dev/null +++ b/tools/lib/traceevent/plugin_cfg80211.c @@ -0,0 +1,24 @@ +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <endian.h> +#include "event-parse.h" + +static unsigned long long +process___le16_to_cpup(struct trace_seq *s, + unsigned long long *args) +{ + uint16_t *val = (uint16_t *) args[0]; + return val ? (long long) le16toh(*val) : 0; +} + +int PEVENT_PLUGIN_LOADER(struct pevent *pevent) +{ + pevent_register_print_function(pevent, + process___le16_to_cpup, + PEVENT_FUNC_ARG_INT, + "__le16_to_cpup", + PEVENT_FUNC_ARG_PTR, + PEVENT_FUNC_ARG_VOID); + return 0; +} diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c new file mode 100644 index 000000000000..aad92ad5e96f --- /dev/null +++ b/tools/lib/traceevent/plugin_function.c @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <[email protected]> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" +#include "event-utils.h" + +static struct func_stack { + int size; + char **stack; +} *fstack; + +static int cpus = -1; + +#define STK_BLK 10 + +static void add_child(struct func_stack *stack, const char *child, int pos) +{ + int i; + + if (!child) + return; + + if (pos < stack->size) + free(stack->stack[pos]); + else { + char **ptr; + + ptr = realloc(stack->stack, sizeof(char *) * + (stack->size + STK_BLK)); + if (!ptr) { + warning("could not allocate plugin memory\n"); + return; + } + + stack->stack = ptr; + + for (i = stack->size; i < stack->size + STK_BLK; i++) + stack->stack[i] = NULL; + stack->size += STK_BLK; + } + + stack->stack[pos] = strdup(child); +} + +static int add_and_get_index(const char *parent, const char *child, int cpu) +{ + int i; + + if (cpu < 0) + return 0; + + if (cpu > cpus) { + struct func_stack *ptr; + + ptr = realloc(fstack, sizeof(*fstack) * (cpu + 1)); + if (!ptr) { + warning("could not allocate plugin memory\n"); + return 0; + } + + fstack = ptr; + + /* Account for holes in the cpu count */ + for (i = cpus + 1; i <= cpu; i++) + memset(&fstack[i], 0, sizeof(fstack[i])); + cpus = cpu; + } + + for (i = 0; i < fstack[cpu].size && fstack[cpu].stack[i]; i++) { + if (strcmp(parent, fstack[cpu].stack[i]) == 0) { + add_child(&fstack[cpu], child, i+1); + return i; + } + } + + /* Not found */ + add_child(&fstack[cpu], parent, 0); + add_child(&fstack[cpu], child, 1); + return 0; +} + +static int function_handler(struct trace_seq *s, struct pevent_record *record, + struct event_format *event, void *context) +{ + struct pevent *pevent = event->pevent; + unsigned long long function; + unsigned long long pfunction; + const char *func; + const char *parent; + int index; + + if (pevent_get_field_val(s, event, "ip", record, &function, 1)) + return trace_seq_putc(s, '!'); + + func = pevent_find_function(pevent, function); + + if (pevent_get_field_val(s, event, "parent_ip", record, &pfunction, 1)) + return trace_seq_putc(s, '!'); + + parent = pevent_find_function(pevent, pfunction); + + index = add_and_get_index(parent, func, record->cpu); + + trace_seq_printf(s, "%*s", index*3, ""); + + if (func) + trace_seq_printf(s, "%s", func); + else + trace_seq_printf(s, "0x%llx", function); + + trace_seq_printf(s, " <-- "); + if (parent) + trace_seq_printf(s, "%s", parent); + else + trace_seq_printf(s, "0x%llx", pfunction); + + return 0; +} + +int PEVENT_PLUGIN_LOADER(struct pevent *pevent) +{ + pevent_register_event_handler(pevent, -1, "ftrace", "function", + function_handler, NULL); + return 0; +} + +void PEVENT_PLUGIN_UNLOADER(void) +{ + int i, x; + + for (i = 0; i <= cpus; i++) { + for (x = 0; x < fstack[i].size && fstack[i].stack[x]; x++) + free(fstack[i].stack[x]); + free(fstack[i].stack); + } + + free(fstack); + fstack = NULL; + cpus = -1; +} diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugin_hrtimer.c new file mode 100644 index 000000000000..0b0ebf30aa44 --- /dev/null +++ b/tools/lib/traceevent/plugin_hrtimer.c @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <[email protected]> + * Copyright (C) 2009 Johannes Berg <[email protected]> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" + +static int timer_expire_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +{ + trace_seq_printf(s, "hrtimer="); + + if (pevent_print_num_field(s, "0x%llx", event, "timer", + record, 0) == -1) + pevent_print_num_field(s, "0x%llx", event, "hrtimer", + record, 1); + + trace_seq_printf(s, " now="); + + pevent_print_num_field(s, "%llu", event, "now", record, 1); + + pevent_print_func_field(s, " function=%s", event, "function", + record, 0); + return 0; +} + +static int timer_start_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +{ + trace_seq_printf(s, "hrtimer="); + + if (pevent_print_num_field(s, "0x%llx", event, "timer", + record, 0) == -1) + pevent_print_num_field(s, "0x%llx", event, "hrtimer", + record, 1); + + pevent_print_func_field(s, " function=%s", event, "function", + record, 0); + + trace_seq_printf(s, " expires="); + pevent_print_num_field(s, "%llu", event, "expires", record, 1); + + trace_seq_printf(s, " softexpires="); + pevent_print_num_field(s, "%llu", event, "softexpires", record, 1); + return 0; +} + +int PEVENT_PLUGIN_LOADER(struct pevent *pevent) +{ + pevent_register_event_handler(pevent, -1, + "timer", "hrtimer_expire_entry", + timer_expire_handler, NULL); + + pevent_register_event_handler(pevent, -1, "timer", "hrtimer_start", + timer_start_handler, NULL); + return 0; +} diff --git a/tools/lib/traceevent/plugin_jbd2.c b/tools/lib/traceevent/plugin_jbd2.c new file mode 100644 index 000000000000..2f93f81f0bac --- /dev/null +++ b/tools/lib/traceevent/plugin_jbd2.c @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <[email protected]> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" + +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) + +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) + +static unsigned long long +process_jbd2_dev_to_name(struct trace_seq *s, + unsigned long long *args) +{ + unsigned int dev = args[0]; + + trace_seq_printf(s, "%d:%d", MAJOR(dev), MINOR(dev)); + return 0; +} + +static unsigned long long +process_jiffies_to_msecs(struct trace_seq *s, + unsigned long long *args) +{ + unsigned long long jiffies = args[0]; + + trace_seq_printf(s, "%lld", jiffies); + return jiffies; +} + +int PEVENT_PLUGIN_LOADER(struct pevent *pevent) +{ + pevent_register_print_function(pevent, + process_jbd2_dev_to_name, + PEVENT_FUNC_ARG_STRING, + "jbd2_dev_to_name", + PEVENT_FUNC_ARG_INT, + PEVENT_FUNC_ARG_VOID); + + pevent_register_print_function(pevent, + process_jiffies_to_msecs, + PEVENT_FUNC_ARG_LONG, + "jiffies_to_msecs", + PEVENT_FUNC_ARG_LONG, + PEVENT_FUNC_ARG_VOID); + return 0; +} diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugin_kmem.c new file mode 100644 index 000000000000..7115c8037ea8 --- /dev/null +++ b/tools/lib/traceevent/plugin_kmem.c @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <[email protected]> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" + +static int call_site_handler(struct trace_seq *s, struct pevent_record *record, + struct event_format *event, void *context) +{ + struct format_field *field; + unsigned long long val, addr; + void *data = record->data; + const char *func; + + field = pevent_find_field(event, "call_site"); + if (!field) + return 1; + + if (pevent_read_number_field(field, data, &val)) + return 1; + + func = pevent_find_function(event->pevent, val); + if (!func) + return 1; + + addr = pevent_find_function_address(event->pevent, val); + + trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr)); + return 1; +} + +int PEVENT_PLUGIN_LOADER(struct pevent *pevent) +{ + pevent_register_event_handler(pevent, -1, "kmem", "kfree", + call_site_handler, NULL); + + pevent_register_event_handler(pevent, -1, "kmem", "kmalloc", + call_site_handler, NULL); + + pevent_register_event_handler(pevent, -1, "kmem", "kmalloc_node", + call_site_handler, NULL); + + pevent_register_event_handler(pevent, -1, "kmem", "kmem_cache_alloc", + call_site_handler, NULL); + + pevent_register_event_handler(pevent, -1, "kmem", + "kmem_cache_alloc_node", + call_site_handler, NULL); + + pevent_register_event_handler(pevent, -1, "kmem", "kmem_cache_free", + call_site_handler, NULL); + return 0; +} diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c new file mode 100644 index 000000000000..a0e282c6b967 --- /dev/null +++ b/tools/lib/traceevent/plugin_kvm.c @@ -0,0 +1,436 @@ +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <[email protected]> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> + +#include "event-parse.h" + +#ifdef HAVE_UDIS86 + +#include <udis86.h> + +static ud_t ud; + +static void init_disassembler(void) +{ + ud_init(&ud); + ud_set_syntax(&ud, UD_SYN_ATT); +} + +static const char *disassemble(unsigned char *insn, int len, uint64_t rip, + int cr0_pe, int eflags_vm, + int cs_d, int cs_l) +{ + int mode; + + if (!cr0_pe) + mode = 16; + else if (eflags_vm) + mode = 16; + else if (cs_l) + mode = 64; + else if (cs_d) + mode = 32; + else + mode = 16; + + ud_set_pc(&ud, rip); + ud_set_mode(&ud, mode); + ud_set_input_buffer(&ud, insn, len); + ud_disassemble(&ud); + return ud_insn_asm(&ud); +} + +#else + +static void init_disassembler(void) +{ +} + +static const char *disassemble(unsigned char *insn, int len, uint64_t rip, + int cr0_pe, int eflags_vm, + int cs_d, int cs_l) +{ + static char out[15*3+1]; + int i; + + for (i = 0; i < len; ++i) + sprintf(out + i * 3, "%02x ", insn[i]); + out[len*3-1] = '\0'; + return out; +} + +#endif + + +#define VMX_EXIT_REASONS \ + _ER(EXCEPTION_NMI, 0) \ + _ER(EXTERNAL_INTERRUPT, 1) \ + _ER(TRIPLE_FAULT, 2) \ + _ER(PENDING_INTERRUPT, 7) \ + _ER(NMI_WINDOW, 8) \ + _ER(TASK_SWITCH, 9) \ + _ER(CPUID, 10) \ + _ER(HLT, 12) \ + _ER(INVD, 13) \ + _ER(INVLPG, 14) \ + _ER(RDPMC, 15) \ + _ER(RDTSC, 16) \ + _ER(VMCALL, 18) \ + _ER(VMCLEAR, 19) \ + _ER(VMLAUNCH, 20) \ + _ER(VMPTRLD, 21) \ + _ER(VMPTRST, 22) \ + _ER(VMREAD, 23) \ + _ER(VMRESUME, 24) \ + _ER(VMWRITE, 25) \ + _ER(VMOFF, 26) \ + _ER(VMON, 27) \ + _ER(CR_ACCESS, 28) \ + _ER(DR_ACCESS, 29) \ + _ER(IO_INSTRUCTION, 30) \ + _ER(MSR_READ, 31) \ + _ER(MSR_WRITE, 32) \ + _ER(MWAIT_INSTRUCTION, 36) \ + _ER(MONITOR_INSTRUCTION, 39) \ + _ER(PAUSE_INSTRUCTION, 40) \ + _ER(MCE_DURING_VMENTRY, 41) \ + _ER(TPR_BELOW_THRESHOLD, 43) \ + _ER(APIC_ACCESS, 44) \ + _ER(EOI_INDUCED, 45) \ + _ER(EPT_VIOLATION, 48) \ + _ER(EPT_MISCONFIG, 49) \ + _ER(INVEPT, 50) \ + _ER(PREEMPTION_TIMER, 52) \ + _ER(WBINVD, 54) \ + _ER(XSETBV, 55) \ + _ER(APIC_WRITE, 56) \ + _ER(INVPCID, 58) + +#define SVM_EXIT_REASONS \ + _ER(EXIT_READ_CR0, 0x000) \ + _ER(EXIT_READ_CR3, 0x003) \ + _ER(EXIT_READ_CR4, 0x004) \ + _ER(EXIT_READ_CR8, 0x008) \ + _ER(EXIT_WRITE_CR0, 0x010) \ + _ER(EXIT_WRITE_CR3, 0x013) \ + _ER(EXIT_WRITE_CR4, 0x014) \ + _ER(EXIT_WRITE_CR8, 0x018) \ + _ER(EXIT_READ_DR0, 0x020) \ + _ER(EXIT_READ_DR1, 0x021) \ + _ER(EXIT_READ_DR2, 0x022) \ + _ER(EXIT_READ_DR3, 0x023) \ + _ER(EXIT_READ_DR4, 0x024) \ + _ER(EXIT_READ_DR5, 0x025) \ + _ER(EXIT_READ_DR6, 0x026) \ + _ER(EXIT_READ_DR7, 0x027) \ + _ER(EXIT_WRITE_DR0, 0x030) \ + _ER(EXIT_WRITE_DR1, 0x031) \ + _ER(EXIT_WRITE_DR2, 0x032) \ + _ER(EXIT_WRITE_DR3, 0x033) \ + _ER(EXIT_WRITE_DR4, 0x034) \ + _ER(EXIT_WRITE_DR5, 0x035) \ + _ER(EXIT_WRITE_DR6, 0x036) \ + _ER(EXIT_WRITE_DR7, 0x037) \ + _ER(EXIT_EXCP_BASE, 0x040) \ + _ER(EXIT_INTR, 0x060) \ + _ER(EXIT_NMI, 0x061) \ + _ER(EXIT_SMI, 0x062) \ + _ER(EXIT_INIT, 0x063) \ + _ER(EXIT_VINTR, 0x064) \ + _ER(EXIT_CR0_SEL_WRITE, 0x065) \ + _ER(EXIT_IDTR_READ, 0x066) \ + _ER(EXIT_GDTR_READ, 0x067) \ + _ER(EXIT_LDTR_READ, 0x068) \ + _ER(EXIT_TR_READ, 0x069) \ + _ER(EXIT_IDTR_WRITE, 0x06a) \ + _ER(EXIT_GDTR_WRITE, 0x06b) \ + _ER(EXIT_LDTR_WRITE, 0x06c) \ + _ER(EXIT_TR_WRITE, 0x06d) \ + _ER(EXIT_RDTSC, 0x06e) \ + _ER(EXIT_RDPMC, 0x06f) \ + _ER(EXIT_PUSHF, 0x070) \ + _ER(EXIT_POPF, 0x071) \ + _ER(EXIT_CPUID, 0x072) \ + _ER(EXIT_RSM, 0x073) \ + _ER(EXIT_IRET, 0x074) \ + _ER(EXIT_SWINT, 0x075) \ + _ER(EXIT_INVD, 0x076) \ + _ER(EXIT_PAUSE, 0x077) \ + _ER(EXIT_HLT, 0x078) \ + _ER(EXIT_INVLPG, 0x079) \ + _ER(EXIT_INVLPGA, 0x07a) \ + _ER(EXIT_IOIO, 0x07b) \ + _ER(EXIT_MSR, 0x07c) \ + _ER(EXIT_TASK_SWITCH, 0x07d) \ + _ER(EXIT_FERR_FREEZE, 0x07e) \ + _ER(EXIT_SHUTDOWN, 0x07f) \ + _ER(EXIT_VMRUN, 0x080) \ + _ER(EXIT_VMMCALL, 0x081) \ + _ER(EXIT_VMLOAD, 0x082) \ + _ER(EXIT_VMSAVE, 0x083) \ + _ER(EXIT_STGI, 0x084) \ + _ER(EXIT_CLGI, 0x085) \ + _ER(EXIT_SKINIT, 0x086) \ + _ER(EXIT_RDTSCP, 0x087) \ + _ER(EXIT_ICEBP, 0x088) \ + _ER(EXIT_WBINVD, 0x089) \ + _ER(EXIT_MONITOR, 0x08a) \ + _ER(EXIT_MWAIT, 0x08b) \ + _ER(EXIT_MWAIT_COND, 0x08c) \ + _ER(EXIT_NPF, 0x400) \ + _ER(EXIT_ERR, -1) + +#define _ER(reason, val) { #reason, val }, +struct str_values { + const char *str; + int val; +}; + +static struct str_values vmx_exit_reasons[] = { + VMX_EXIT_REASONS + { NULL, -1} +}; + +static struct str_values svm_exit_reasons[] = { + SVM_EXIT_REASONS + { NULL, -1} +}; + +static struct isa_exit_reasons { + unsigned isa; + struct str_values *strings; +} isa_exit_reasons[] = { + { .isa = 1, .strings = vmx_exit_reasons }, + { .isa = 2, .strings = svm_exit_reasons }, + { } +}; + +static const char *find_exit_reason(unsigned isa, int val) +{ + struct str_values *strings = NULL; + int i; + + for (i = 0; isa_exit_reasons[i].strings; ++i) + if (isa_exit_reasons[i].isa == isa) { + strings = isa_exit_reasons[i].strings; + break; + } + if (!strings) + return "UNKNOWN-ISA"; + for (i = 0; strings[i].val >= 0; i++) + if (strings[i].val == val) + break; + if (strings[i].str) + return strings[i].str; + return "UNKNOWN"; +} + +static int kvm_exit_handler(struct trace_seq *s, struct pevent_record *record, + struct event_format *event, void *context) +{ + unsigned long long isa; + unsigned long long val; + unsigned long long info1 = 0, info2 = 0; + + if (pevent_get_field_val(s, event, "exit_reason", record, &val, 1) < 0) + return -1; + + if (pevent_get_field_val(s, event, "isa", record, &isa, 0) < 0) + isa = 1; + + trace_seq_printf(s, "reason %s", find_exit_reason(isa, val)); + + pevent_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1); + + if (pevent_get_field_val(s, event, "info1", record, &info1, 0) >= 0 + && pevent_get_field_val(s, event, "info2", record, &info2, 0) >= 0) + trace_seq_printf(s, " info %llx %llx", info1, info2); + + return 0; +} + +#define KVM_EMUL_INSN_F_CR0_PE (1 << 0) +#define KVM_EMUL_INSN_F_EFL_VM (1 << 1) +#define KVM_EMUL_INSN_F_CS_D (1 << 2) +#define KVM_EMUL_INSN_F_CS_L (1 << 3) + +static int kvm_emulate_insn_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +{ + unsigned long long rip, csbase, len, flags, failed; + int llen; + uint8_t *insn; + const char *disasm; + + if (pevent_get_field_val(s, event, "rip", record, &rip, 1) < 0) + return -1; + + if (pevent_get_field_val(s, event, "csbase", record, &csbase, 1) < 0) + return -1; + + if (pevent_get_field_val(s, event, "len", record, &len, 1) < 0) + return -1; + + if (pevent_get_field_val(s, event, "flags", record, &flags, 1) < 0) + return -1; + + if (pevent_get_field_val(s, event, "failed", record, &failed, 1) < 0) + return -1; + + insn = pevent_get_field_raw(s, event, "insn", record, &llen, 1); + if (!insn) + return -1; + + disasm = disassemble(insn, len, rip, + flags & KVM_EMUL_INSN_F_CR0_PE, + flags & KVM_EMUL_INSN_F_EFL_VM, + flags & KVM_EMUL_INSN_F_CS_D, + flags & KVM_EMUL_INSN_F_CS_L); + + trace_seq_printf(s, "%llx:%llx: %s%s", csbase, rip, disasm, + failed ? " FAIL" : ""); + return 0; +} + +union kvm_mmu_page_role { + unsigned word; + struct { + unsigned glevels:4; + unsigned level:4; + unsigned quadrant:2; + unsigned pad_for_nice_hex_output:6; + unsigned direct:1; + unsigned access:3; + unsigned invalid:1; + unsigned cr4_pge:1; + unsigned nxe:1; + }; +}; + +static int kvm_mmu_print_role(struct trace_seq *s, struct pevent_record *record, + struct event_format *event, void *context) +{ + unsigned long long val; + static const char *access_str[] = { + "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" + }; + union kvm_mmu_page_role role; + + if (pevent_get_field_val(s, event, "role", record, &val, 1) < 0) + return -1; + + role.word = (int)val; + + /* + * We can only use the structure if file is of the same + * endianess. + */ + if (pevent_is_file_bigendian(event->pevent) == + pevent_is_host_bigendian(event->pevent)) { + + trace_seq_printf(s, "%u/%u q%u%s %s%s %spge %snxe", + role.level, + role.glevels, + role.quadrant, + role.direct ? " direct" : "", + access_str[role.access], + role.invalid ? " invalid" : "", + role.cr4_pge ? "" : "!", + role.nxe ? "" : "!"); + } else + trace_seq_printf(s, "WORD: %08x", role.word); + + pevent_print_num_field(s, " root %u ", event, + "root_count", record, 1); + + if (pevent_get_field_val(s, event, "unsync", record, &val, 1) < 0) + return -1; + + trace_seq_printf(s, "%s%c", val ? "unsync" : "sync", 0); + return 0; +} + +static int kvm_mmu_get_page_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +{ + unsigned long long val; + + if (pevent_get_field_val(s, event, "created", record, &val, 1) < 0) + return -1; + + trace_seq_printf(s, "%s ", val ? "new" : "existing"); + + if (pevent_get_field_val(s, event, "gfn", record, &val, 1) < 0) + return -1; + + trace_seq_printf(s, "sp gfn %llx ", val); + return kvm_mmu_print_role(s, record, event, context); +} + +#define PT_WRITABLE_SHIFT 1 +#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) + +static unsigned long long +process_is_writable_pte(struct trace_seq *s, unsigned long long *args) +{ + unsigned long pte = args[0]; + return pte & PT_WRITABLE_MASK; +} + +int PEVENT_PLUGIN_LOADER(struct pevent *pevent) +{ + init_disassembler(); + + pevent_register_event_handler(pevent, -1, "kvm", "kvm_exit", + kvm_exit_handler, NULL); + + pevent_register_event_handler(pevent, -1, "kvm", "kvm_emulate_insn", + kvm_emulate_insn_handler, NULL); + + pevent_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page", + kvm_mmu_get_page_handler, NULL); + + pevent_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page", + kvm_mmu_print_role, NULL); + + pevent_register_event_handler(pevent, -1, + "kvmmmu", "kvm_mmu_unsync_page", + kvm_mmu_print_role, NULL); + + pevent_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page", + kvm_mmu_print_role, NULL); + + pevent_register_event_handler(pevent, -1, "kvmmmu", + "kvm_mmu_prepare_zap_page", kvm_mmu_print_role, + NULL); + + pevent_register_print_function(pevent, + process_is_writable_pte, + PEVENT_FUNC_ARG_INT, + "is_writable_pte", + PEVENT_FUNC_ARG_LONG, + PEVENT_FUNC_ARG_VOID); + return 0; +} diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugin_mac80211.c new file mode 100644 index 000000000000..558a3b91c046 --- /dev/null +++ b/tools/lib/traceevent/plugin_mac80211.c @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2009 Johannes Berg <[email protected]> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" + +#define INDENT 65 + +static void print_string(struct trace_seq *s, struct event_format *event, + const char *name, const void *data) +{ + struct format_field *f = pevent_find_field(event, name); + int offset; + int length; + + if (!f) { + trace_seq_printf(s, "NOTFOUND:%s", name); + return; + } + + offset = f->offset; + length = f->size; + + if (!strncmp(f->type, "__data_loc", 10)) { + unsigned long long v; + if (pevent_read_number_field(f, data, &v)) { + trace_seq_printf(s, "invalid_data_loc"); + return; + } + offset = v & 0xffff; + length = v >> 16; + } + + trace_seq_printf(s, "%.*s", length, (char *)data + offset); +} + +#define SF(fn) pevent_print_num_field(s, fn ":%d", event, fn, record, 0) +#define SFX(fn) pevent_print_num_field(s, fn ":%#x", event, fn, record, 0) +#define SP() trace_seq_putc(s, ' ') + +static int drv_bss_info_changed(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +{ + void *data = record->data; + + print_string(s, event, "wiphy_name", data); + trace_seq_printf(s, " vif:"); + print_string(s, event, "vif_name", data); + pevent_print_num_field(s, "(%d)", event, "vif_type", record, 1); + + trace_seq_printf(s, "\n%*s", INDENT, ""); + SF("assoc"); SP(); + SF("aid"); SP(); + SF("cts"); SP(); + SF("shortpre"); SP(); + SF("shortslot"); SP(); + SF("dtimper"); SP(); + trace_seq_printf(s, "\n%*s", INDENT, ""); + SF("bcnint"); SP(); + SFX("assoc_cap"); SP(); + SFX("basic_rates"); SP(); + SF("enable_beacon"); + trace_seq_printf(s, "\n%*s", INDENT, ""); + SF("ht_operation_mode"); + + return 0; +} + +int PEVENT_PLUGIN_LOADER(struct pevent *pevent) +{ + pevent_register_event_handler(pevent, -1, "mac80211", + "drv_bss_info_changed", + drv_bss_info_changed, NULL); + return 0; +} diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c new file mode 100644 index 000000000000..fea3724aa24f --- /dev/null +++ b/tools/lib/traceevent/plugin_sched_switch.c @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <[email protected]> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" + +static void write_state(struct trace_seq *s, int val) +{ + const char states[] = "SDTtZXxW"; + int found = 0; + int i; + + for (i = 0; i < (sizeof(states) - 1); i++) { + if (!(val & (1 << i))) + continue; + + if (found) + trace_seq_putc(s, '|'); + + found = 1; + trace_seq_putc(s, states[i]); + } + + if (!found) + trace_seq_putc(s, 'R'); +} + +static void write_and_save_comm(struct format_field *field, + struct pevent_record *record, + struct trace_seq *s, int pid) +{ + const char *comm; + int len; + + comm = (char *)(record->data + field->offset); + len = s->len; + trace_seq_printf(s, "%.*s", + field->size, comm); + + /* make sure the comm has a \0 at the end. */ + trace_seq_terminate(s); + comm = &s->buffer[len]; + + /* Help out the comm to ids. This will handle dups */ + pevent_register_comm(field->event->pevent, comm, pid); +} + +static int sched_wakeup_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +{ + struct format_field *field; + unsigned long long val; + + if (pevent_get_field_val(s, event, "pid", record, &val, 1)) + return trace_seq_putc(s, '!'); + + field = pevent_find_any_field(event, "comm"); + if (field) { + write_and_save_comm(field, record, s, val); + trace_seq_putc(s, ':'); + } + trace_seq_printf(s, "%lld", val); + + if (pevent_get_field_val(s, event, "prio", record, &val, 0) == 0) + trace_seq_printf(s, " [%lld]", val); + + if (pevent_get_field_val(s, event, "success", record, &val, 1) == 0) + trace_seq_printf(s, " success=%lld", val); + + if (pevent_get_field_val(s, event, "target_cpu", record, &val, 0) == 0) + trace_seq_printf(s, " CPU:%03llu", val); + + return 0; +} + +static int sched_switch_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +{ + struct format_field *field; + unsigned long long val; + + if (pevent_get_field_val(s, event, "prev_pid", record, &val, 1)) + return trace_seq_putc(s, '!'); + + field = pevent_find_any_field(event, "prev_comm"); + if (field) { + write_and_save_comm(field, record, s, val); + trace_seq_putc(s, ':'); + } + trace_seq_printf(s, "%lld ", val); + + if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0) + trace_seq_printf(s, "[%lld] ", val); + + if (pevent_get_field_val(s, event, "prev_state", record, &val, 0) == 0) + write_state(s, val); + + trace_seq_puts(s, " ==> "); + + if (pevent_get_field_val(s, event, "next_pid", record, &val, 1)) + return trace_seq_putc(s, '!'); + + field = pevent_find_any_field(event, "next_comm"); + if (field) { + write_and_save_comm(field, record, s, val); + trace_seq_putc(s, ':'); + } + trace_seq_printf(s, "%lld", val); + + if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0) + trace_seq_printf(s, " [%lld]", val); + + return 0; +} + +int PEVENT_PLUGIN_LOADER(struct pevent *pevent) +{ + pevent_register_event_handler(pevent, -1, "sched", "sched_switch", + sched_switch_handler, NULL); + + pevent_register_event_handler(pevent, -1, "sched", "sched_wakeup", + sched_wakeup_handler, NULL); + + pevent_register_event_handler(pevent, -1, "sched", "sched_wakeup_new", + sched_wakeup_handler, NULL); + return 0; +} diff --git a/tools/lib/traceevent/plugin_scsi.c b/tools/lib/traceevent/plugin_scsi.c new file mode 100644 index 000000000000..6fb8e3e3fcad --- /dev/null +++ b/tools/lib/traceevent/plugin_scsi.c @@ -0,0 +1,423 @@ +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include "event-parse.h" + +typedef unsigned long sector_t; +typedef uint64_t u64; +typedef unsigned int u32; + +/* + * SCSI opcodes + */ +#define TEST_UNIT_READY 0x00 +#define REZERO_UNIT 0x01 +#define REQUEST_SENSE 0x03 +#define FORMAT_UNIT 0x04 +#define READ_BLOCK_LIMITS 0x05 +#define REASSIGN_BLOCKS 0x07 +#define INITIALIZE_ELEMENT_STATUS 0x07 +#define READ_6 0x08 +#define WRITE_6 0x0a +#define SEEK_6 0x0b +#define READ_REVERSE 0x0f +#define WRITE_FILEMARKS 0x10 +#define SPACE 0x11 +#define INQUIRY 0x12 +#define RECOVER_BUFFERED_DATA 0x14 +#define MODE_SELECT 0x15 +#define RESERVE 0x16 +#define RELEASE 0x17 +#define COPY 0x18 +#define ERASE 0x19 +#define MODE_SENSE 0x1a +#define START_STOP 0x1b +#define RECEIVE_DIAGNOSTIC 0x1c +#define SEND_DIAGNOSTIC 0x1d +#define ALLOW_MEDIUM_REMOVAL 0x1e + +#define READ_FORMAT_CAPACITIES 0x23 +#define SET_WINDOW 0x24 +#define READ_CAPACITY 0x25 +#define READ_10 0x28 +#define WRITE_10 0x2a +#define SEEK_10 0x2b +#define POSITION_TO_ELEMENT 0x2b +#define WRITE_VERIFY 0x2e +#define VERIFY 0x2f +#define SEARCH_HIGH 0x30 +#define SEARCH_EQUAL 0x31 +#define SEARCH_LOW 0x32 +#define SET_LIMITS 0x33 +#define PRE_FETCH 0x34 +#define READ_POSITION 0x34 +#define SYNCHRONIZE_CACHE 0x35 +#define LOCK_UNLOCK_CACHE 0x36 +#define READ_DEFECT_DATA 0x37 +#define MEDIUM_SCAN 0x38 +#define COMPARE 0x39 +#define COPY_VERIFY 0x3a +#define WRITE_BUFFER 0x3b +#define READ_BUFFER 0x3c +#define UPDATE_BLOCK 0x3d +#define READ_LONG 0x3e +#define WRITE_LONG 0x3f +#define CHANGE_DEFINITION 0x40 +#define WRITE_SAME 0x41 +#define UNMAP 0x42 +#define READ_TOC 0x43 +#define READ_HEADER 0x44 +#define GET_EVENT_STATUS_NOTIFICATION 0x4a +#define LOG_SELECT 0x4c +#define LOG_SENSE 0x4d +#define XDWRITEREAD_10 0x53 +#define MODE_SELECT_10 0x55 +#define RESERVE_10 0x56 +#define RELEASE_10 0x57 +#define MODE_SENSE_10 0x5a +#define PERSISTENT_RESERVE_IN 0x5e +#define PERSISTENT_RESERVE_OUT 0x5f +#define VARIABLE_LENGTH_CMD 0x7f +#define REPORT_LUNS 0xa0 +#define SECURITY_PROTOCOL_IN 0xa2 +#define MAINTENANCE_IN 0xa3 +#define MAINTENANCE_OUT 0xa4 +#define MOVE_MEDIUM 0xa5 +#define EXCHANGE_MEDIUM 0xa6 +#define READ_12 0xa8 +#define WRITE_12 0xaa +#define READ_MEDIA_SERIAL_NUMBER 0xab +#define WRITE_VERIFY_12 0xae +#define VERIFY_12 0xaf +#define SEARCH_HIGH_12 0xb0 +#define SEARCH_EQUAL_12 0xb1 +#define SEARCH_LOW_12 0xb2 +#define SECURITY_PROTOCOL_OUT 0xb5 +#define READ_ELEMENT_STATUS 0xb8 +#define SEND_VOLUME_TAG 0xb6 +#define WRITE_LONG_2 0xea +#define EXTENDED_COPY 0x83 +#define RECEIVE_COPY_RESULTS 0x84 +#define ACCESS_CONTROL_IN 0x86 +#define ACCESS_CONTROL_OUT 0x87 +#define READ_16 0x88 +#define WRITE_16 0x8a +#define READ_ATTRIBUTE 0x8c +#define WRITE_ATTRIBUTE 0x8d +#define VERIFY_16 0x8f +#define SYNCHRONIZE_CACHE_16 0x91 +#define WRITE_SAME_16 0x93 +#define SERVICE_ACTION_IN 0x9e +/* values for service action in */ +#define SAI_READ_CAPACITY_16 0x10 +#define SAI_GET_LBA_STATUS 0x12 +/* values for VARIABLE_LENGTH_CMD service action codes + * see spc4r17 Section D.3.5, table D.7 and D.8 */ +#define VLC_SA_RECEIVE_CREDENTIAL 0x1800 +/* values for maintenance in */ +#define MI_REPORT_IDENTIFYING_INFORMATION 0x05 +#define MI_REPORT_TARGET_PGS 0x0a +#define MI_REPORT_ALIASES 0x0b +#define MI_REPORT_SUPPORTED_OPERATION_CODES 0x0c +#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS 0x0d +#define MI_REPORT_PRIORITY 0x0e +#define MI_REPORT_TIMESTAMP 0x0f +#define MI_MANAGEMENT_PROTOCOL_IN 0x10 +/* value for MI_REPORT_TARGET_PGS ext header */ +#define MI_EXT_HDR_PARAM_FMT 0x20 +/* values for maintenance out */ +#define MO_SET_IDENTIFYING_INFORMATION 0x06 +#define MO_SET_TARGET_PGS 0x0a +#define MO_CHANGE_ALIASES 0x0b +#define MO_SET_PRIORITY 0x0e +#define MO_SET_TIMESTAMP 0x0f +#define MO_MANAGEMENT_PROTOCOL_OUT 0x10 +/* values for variable length command */ +#define XDREAD_32 0x03 +#define XDWRITE_32 0x04 +#define XPWRITE_32 0x06 +#define XDWRITEREAD_32 0x07 +#define READ_32 0x09 +#define VERIFY_32 0x0a +#define WRITE_32 0x0b +#define WRITE_SAME_32 0x0d + +#define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f) +#define SERVICE_ACTION32(cdb) ((cdb[8] << 8) | cdb[9]) + +static const char * +scsi_trace_misc(struct trace_seq *, unsigned char *, int); + +static const char * +scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len; + sector_t lba = 0, txlen = 0; + + lba |= ((cdb[1] & 0x1F) << 16); + lba |= (cdb[2] << 8); + lba |= cdb[3]; + txlen = cdb[4]; + + trace_seq_printf(p, "lba=%llu txlen=%llu", + (unsigned long long)lba, (unsigned long long)txlen); + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len; + sector_t lba = 0, txlen = 0; + + lba |= (cdb[2] << 24); + lba |= (cdb[3] << 16); + lba |= (cdb[4] << 8); + lba |= cdb[5]; + txlen |= (cdb[7] << 8); + txlen |= cdb[8]; + + trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", + (unsigned long long)lba, (unsigned long long)txlen, + cdb[1] >> 5); + + if (cdb[0] == WRITE_SAME) + trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1); + + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len; + sector_t lba = 0, txlen = 0; + + lba |= (cdb[2] << 24); + lba |= (cdb[3] << 16); + lba |= (cdb[4] << 8); + lba |= cdb[5]; + txlen |= (cdb[6] << 24); + txlen |= (cdb[7] << 16); + txlen |= (cdb[8] << 8); + txlen |= cdb[9]; + + trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", + (unsigned long long)lba, (unsigned long long)txlen, + cdb[1] >> 5); + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len; + sector_t lba = 0, txlen = 0; + + lba |= ((u64)cdb[2] << 56); + lba |= ((u64)cdb[3] << 48); + lba |= ((u64)cdb[4] << 40); + lba |= ((u64)cdb[5] << 32); + lba |= (cdb[6] << 24); + lba |= (cdb[7] << 16); + lba |= (cdb[8] << 8); + lba |= cdb[9]; + txlen |= (cdb[10] << 24); + txlen |= (cdb[11] << 16); + txlen |= (cdb[12] << 8); + txlen |= cdb[13]; + + trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", + (unsigned long long)lba, (unsigned long long)txlen, + cdb[1] >> 5); + + if (cdb[0] == WRITE_SAME_16) + trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1); + + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len, *cmd; + sector_t lba = 0, txlen = 0; + u32 ei_lbrt = 0; + + switch (SERVICE_ACTION32(cdb)) { + case READ_32: + cmd = "READ"; + break; + case VERIFY_32: + cmd = "VERIFY"; + break; + case WRITE_32: + cmd = "WRITE"; + break; + case WRITE_SAME_32: + cmd = "WRITE_SAME"; + break; + default: + trace_seq_printf(p, "UNKNOWN"); + goto out; + } + + lba |= ((u64)cdb[12] << 56); + lba |= ((u64)cdb[13] << 48); + lba |= ((u64)cdb[14] << 40); + lba |= ((u64)cdb[15] << 32); + lba |= (cdb[16] << 24); + lba |= (cdb[17] << 16); + lba |= (cdb[18] << 8); + lba |= cdb[19]; + ei_lbrt |= (cdb[20] << 24); + ei_lbrt |= (cdb[21] << 16); + ei_lbrt |= (cdb[22] << 8); + ei_lbrt |= cdb[23]; + txlen |= (cdb[28] << 24); + txlen |= (cdb[29] << 16); + txlen |= (cdb[30] << 8); + txlen |= cdb[31]; + + trace_seq_printf(p, "%s_32 lba=%llu txlen=%llu protect=%u ei_lbrt=%u", + cmd, (unsigned long long)lba, + (unsigned long long)txlen, cdb[10] >> 5, ei_lbrt); + + if (SERVICE_ACTION32(cdb) == WRITE_SAME_32) + trace_seq_printf(p, " unmap=%u", cdb[10] >> 3 & 1); + +out: + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len; + unsigned int regions = cdb[7] << 8 | cdb[8]; + + trace_seq_printf(p, "regions=%u", (regions - 8) / 16); + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len, *cmd; + sector_t lba = 0; + u32 alloc_len = 0; + + switch (SERVICE_ACTION16(cdb)) { + case SAI_READ_CAPACITY_16: + cmd = "READ_CAPACITY_16"; + break; + case SAI_GET_LBA_STATUS: + cmd = "GET_LBA_STATUS"; + break; + default: + trace_seq_printf(p, "UNKNOWN"); + goto out; + } + + lba |= ((u64)cdb[2] << 56); + lba |= ((u64)cdb[3] << 48); + lba |= ((u64)cdb[4] << 40); + lba |= ((u64)cdb[5] << 32); + lba |= (cdb[6] << 24); + lba |= (cdb[7] << 16); + lba |= (cdb[8] << 8); + lba |= cdb[9]; + alloc_len |= (cdb[10] << 24); + alloc_len |= (cdb[11] << 16); + alloc_len |= (cdb[12] << 8); + alloc_len |= cdb[13]; + + trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd, + (unsigned long long)lba, alloc_len); + +out: + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len) +{ + switch (SERVICE_ACTION32(cdb)) { + case READ_32: + case VERIFY_32: + case WRITE_32: + case WRITE_SAME_32: + return scsi_trace_rw32(p, cdb, len); + default: + return scsi_trace_misc(p, cdb, len); + } +} + +static const char * +scsi_trace_misc(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len; + + trace_seq_printf(p, "-"); + trace_seq_putc(p, 0); + return ret; +} + +const char * +scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len) +{ + switch (cdb[0]) { + case READ_6: + case WRITE_6: + return scsi_trace_rw6(p, cdb, len); + case READ_10: + case VERIFY: + case WRITE_10: + case WRITE_SAME: + return scsi_trace_rw10(p, cdb, len); + case READ_12: + case VERIFY_12: + case WRITE_12: + return scsi_trace_rw12(p, cdb, len); + case READ_16: + case VERIFY_16: + case WRITE_16: + case WRITE_SAME_16: + return scsi_trace_rw16(p, cdb, len); + case UNMAP: + return scsi_trace_unmap(p, cdb, len); + case SERVICE_ACTION_IN: + return scsi_trace_service_action_in(p, cdb, len); + case VARIABLE_LENGTH_CMD: + return scsi_trace_varlen(p, cdb, len); + default: + return scsi_trace_misc(p, cdb, len); + } +} + +unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s, + unsigned long long *args) +{ + scsi_trace_parse_cdb(s, (unsigned char *) args[1], args[2]); + return 0; +} + +int PEVENT_PLUGIN_LOADER(struct pevent *pevent) +{ + pevent_register_print_function(pevent, + process_scsi_trace_parse_cdb, + PEVENT_FUNC_ARG_STRING, + "scsi_trace_parse_cdb", + PEVENT_FUNC_ARG_PTR, + PEVENT_FUNC_ARG_PTR, + PEVENT_FUNC_ARG_INT, + PEVENT_FUNC_ARG_VOID); + return 0; +} diff --git a/tools/lib/traceevent/plugin_xen.c b/tools/lib/traceevent/plugin_xen.c new file mode 100644 index 000000000000..e7794298f3a9 --- /dev/null +++ b/tools/lib/traceevent/plugin_xen.c @@ -0,0 +1,130 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "event-parse.h" + +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_mmu_update 1 +#define __HYPERVISOR_set_gdt 2 +#define __HYPERVISOR_stack_switch 3 +#define __HYPERVISOR_set_callbacks 4 +#define __HYPERVISOR_fpu_taskswitch 5 +#define __HYPERVISOR_sched_op_compat 6 +#define __HYPERVISOR_dom0_op 7 +#define __HYPERVISOR_set_debugreg 8 +#define __HYPERVISOR_get_debugreg 9 +#define __HYPERVISOR_update_descriptor 10 +#define __HYPERVISOR_memory_op 12 +#define __HYPERVISOR_multicall 13 +#define __HYPERVISOR_update_va_mapping 14 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_event_channel_op_compat 16 +#define __HYPERVISOR_xen_version 17 +#define __HYPERVISOR_console_io 18 +#define __HYPERVISOR_physdev_op_compat 19 +#define __HYPERVISOR_grant_table_op 20 +#define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_vcpu_op 24 +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_acm_op 27 +#define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 +#define __HYPERVISOR_event_channel_op 32 +#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_tmem_op 38 + +/* Architecture-specific hypercall definitions. */ +#define __HYPERVISOR_arch_0 48 +#define __HYPERVISOR_arch_1 49 +#define __HYPERVISOR_arch_2 50 +#define __HYPERVISOR_arch_3 51 +#define __HYPERVISOR_arch_4 52 +#define __HYPERVISOR_arch_5 53 +#define __HYPERVISOR_arch_6 54 +#define __HYPERVISOR_arch_7 55 + +#define N(x) [__HYPERVISOR_##x] = "("#x")" +static const char *xen_hypercall_names[] = { + N(set_trap_table), + N(mmu_update), + N(set_gdt), + N(stack_switch), + N(set_callbacks), + N(fpu_taskswitch), + N(sched_op_compat), + N(dom0_op), + N(set_debugreg), + N(get_debugreg), + N(update_descriptor), + N(memory_op), + N(multicall), + N(update_va_mapping), + N(set_timer_op), + N(event_channel_op_compat), + N(xen_version), + N(console_io), + N(physdev_op_compat), + N(grant_table_op), + N(vm_assist), + N(update_va_mapping_otherdomain), + N(iret), + N(vcpu_op), + N(set_segment_base), + N(mmuext_op), + N(acm_op), + N(nmi_op), + N(sched_op), + N(callback_op), + N(xenoprof_op), + N(event_channel_op), + N(physdev_op), + N(hvm_op), + +/* Architecture-specific hypercall definitions. */ + N(arch_0), + N(arch_1), + N(arch_2), + N(arch_3), + N(arch_4), + N(arch_5), + N(arch_6), + N(arch_7), +}; +#undef N + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +static const char *xen_hypercall_name(unsigned op) +{ + if (op < ARRAY_SIZE(xen_hypercall_names) && + xen_hypercall_names[op] != NULL) + return xen_hypercall_names[op]; + + return ""; +} + +unsigned long long process_xen_hypercall_name(struct trace_seq *s, + unsigned long long *args) +{ + unsigned int op = args[0]; + + trace_seq_printf(s, "%s", xen_hypercall_name(op)); + return 0; +} + +int PEVENT_PLUGIN_LOADER(struct pevent *pevent) +{ + pevent_register_print_function(pevent, + process_xen_hypercall_name, + PEVENT_FUNC_ARG_STRING, + "xen_hypercall_name", + PEVENT_FUNC_ARG_INT, + PEVENT_FUNC_ARG_VOID); + return 0; +} diff --git a/tools/perf/Documentation/perf-archive.txt b/tools/perf/Documentation/perf-archive.txt index 5032a142853e..ac6ecbb3e669 100644 --- a/tools/perf/Documentation/perf-archive.txt +++ b/tools/perf/Documentation/perf-archive.txt @@ -12,9 +12,9 @@ SYNOPSIS DESCRIPTION ----------- -This command runs runs perf-buildid-list --with-hits, and collects the files -with the buildids found so that analysis of perf.data contents can be possible -on another machine. +This command runs perf-buildid-list --with-hits, and collects the files with the +buildids found so that analysis of perf.data contents can be possible on another +machine. SEE ALSO diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt index 6a06cefe9642..52276a6d2b75 100644 --- a/tools/perf/Documentation/perf-kvm.txt +++ b/tools/perf/Documentation/perf-kvm.txt @@ -10,9 +10,9 @@ SYNOPSIS [verse] 'perf kvm' [--host] [--guest] [--guestmount=<path> [--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]] - {top|record|report|diff|buildid-list} + {top|record|report|diff|buildid-list} [<options>] 'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path> - | --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat} + | --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat} [<options>] 'perf kvm stat [record|report|live] [<options>] DESCRIPTION @@ -24,10 +24,17 @@ There are a couple of variants of perf kvm: of an arbitrary workload. 'perf kvm record <command>' to record the performance counter profile - of an arbitrary workload and save it into a perf data file. If both - --host and --guest are input, the perf data file name is perf.data.kvm. - If there is no --host but --guest, the file name is perf.data.guest. - If there is no --guest but --host, the file name is perf.data.host. + of an arbitrary workload and save it into a perf data file. We set the + default behavior of perf kvm as --guest, so if neither --host nor --guest + is input, the perf data file name is perf.data.guest. If --host is input, + the perf data file name is perf.data.kvm. If you want to record data into + perf.data.host, please input --host --no-guest. The behaviors are shown as + following: + Default('') -> perf.data.guest + --host -> perf.data.kvm + --guest -> perf.data.guest + --host --guest -> perf.data.kvm + --host --no-guest -> perf.data.host 'perf kvm report' to display the performance counter profile information recorded via perf kvm record. @@ -37,7 +44,9 @@ There are a couple of variants of perf kvm: 'perf kvm buildid-list' to display the buildids found in a perf data file, so that other tools can be used to fetch packages with matching symbol tables - for use by perf report. + for use by perf report. As buildid is read from /sys/kernel/notes in os, then + if you want to list the buildid for guest, please make sure your perf data file + was captured with --guestmount in perf kvm record. 'perf kvm stat <command>' to run a command and gather performance counter statistics. @@ -58,14 +67,14 @@ There are a couple of variants of perf kvm: OPTIONS ------- -i:: ---input=:: +--input=<path>:: Input file name. -o:: ---output:: +--output=<path>:: Output file name. ---host=:: +--host:: Collect host side performance profile. ---guest=:: +--guest:: Collect guest side performance profile. --guestmount=<path>:: Guest os root file system mount directory. Users mounts guest os @@ -84,6 +93,9 @@ OPTIONS kernel module information. Users copy it out from guest os. --guestvmlinux=<path>:: Guest os kernel vmlinux. +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). STAT REPORT OPTIONS ------------------- diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 43b42c4f4a91..c407897f0435 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -57,6 +57,8 @@ OPTIONS -t:: --tid=:: Record events on existing thread ID (comma separated list). + This option also disables inheritance by default. Enable it by adding + --inherit. -u:: --uid=:: @@ -201,11 +203,11 @@ abort events and some memory events in precise mode on modern Intel CPUs. --transaction:: Record transaction flags for transaction related events. ---force-per-cpu:: -Force the use of per-cpu mmaps. By default, when tasks are specified (i.e. -p, --t or -u options) per-thread mmaps are created. This option overrides that and -forces per-cpu mmaps. A side-effect of that is that inheritance is -automatically enabled. Add the -i option also to disable inheritance. +--per-thread:: +Use per-thread mmaps. By default per-cpu mmaps are created. This option +overrides that and uses per-thread mmaps. A side-effect of that is that +inheritance is automatically disabled. --per-thread is ignored with a warning +if combined with -a or -C options. SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 10a279871251..8eab8a4bdeb8 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -237,6 +237,15 @@ OPTIONS Do not show entries which have an overhead under that percent. (Default: 0). +--header:: + Show header information in the perf.data file. This includes + various information like hostname, OS and perf version, cpu/mem + info, perf command line, event list and so on. Currently only + --stdio output supports this feature. + +--header-only:: + Show only perf.data header (forces --stdio). + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-annotate[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index e9cbfcddfa3f..05f9a0a6784c 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -115,7 +115,7 @@ OPTIONS -f:: --fields:: Comma separated list of fields to print. Options are: - comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff. + comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace @@ -203,6 +203,18 @@ OPTIONS --show-kernel-path:: Try to resolve the path of [kernel.kallsyms] +--show-task-events + Display task related events (e.g. FORK, COMM, EXIT). + +--show-mmap-events + Display mmap related events (e.g. MMAP, MMAP2). + +--header + Show perf.data header. + +--header-only + Show only perf.data header. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index 3ff8bd4f0b4d..271dd4ed5b05 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -8,8 +8,7 @@ perf-timechart - Tool to visualize total system behavior during a workload SYNOPSIS -------- [verse] -'perf timechart' record <command> -'perf timechart' [<options>] +'perf timechart' [<timechart options>] {record} [<record options>] DESCRIPTION ----------- @@ -21,8 +20,8 @@ There are two variants of perf timechart: 'perf timechart' to turn a trace into a Scalable Vector Graphics file, that can be viewed with popular SVG viewers such as 'Inkscape'. -OPTIONS -------- +TIMECHART OPTIONS +----------------- -o:: --output=:: Select the output file (default: output.svg) @@ -35,6 +34,9 @@ OPTIONS -P:: --power-only:: Only output the CPU power section of the diagram +-T:: +--tasks-only:: + Don't output processor state transitions -p:: --process:: Select the processes to display, by name or PID @@ -54,6 +56,22 @@ $ perf timechart Written 10.2 seconds of trace to output.svg. +-n:: +--proc-num:: + Print task info for at least given number of tasks. + +RECORD OPTIONS +-------------- +-P:: +--power-only:: + Record only power-related events +-T:: +--tasks-only:: + Record only tasks-related events +-g:: +--callchain:: + Do call-graph (stack chain/backtrace) recording + SEE ALSO -------- linkperf:perf-record[1] diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 7de01dd79688..cdd8d4946dba 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -50,7 +50,6 @@ Default is to monitor all CPUS. --count-filter=<count>:: Only display functions with more events than this. --g:: --group:: Put the counters into a counter group. @@ -143,12 +142,12 @@ Default is to monitor all CPUS. --asm-raw:: Show raw instruction encoding of assembly instructions. --G:: +-g:: Enables call-graph (stack chain/backtrace) recording. --call-graph:: Setup and enable call-graph (stack chain/backtrace) recording, - implies -G. + implies -g. --max-stack:: Set the stack depth limit when parsing the callchain, anything diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 4835618a5608..eefb9fb0c02f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -60,8 +60,11 @@ endef # # Needed if no target specified: +# (Except for tags and TAGS targets. The reason is that the +# Makefile does not treat tags/TAGS as targets but as files +# and thus won't rebuilt them once they are in place.) # -all: +all tags TAGS: $(print_msg) $(make) @@ -77,3 +80,5 @@ clean: %: $(print_msg) $(make) + +.PHONY: tags TAGS diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 7fc8f179cae7..9a8cf376f874 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -105,7 +105,7 @@ ifeq ($(config),1) include config/Makefile endif -export prefix bindir sharedir sysconfdir +export prefix bindir sharedir sysconfdir DESTDIR # sparse is architecture-neutral, which means that we need to tell it # explicitly what architecture to check for. Fix this up for yours.. @@ -353,6 +353,7 @@ LIB_OBJS += $(OUTPUT)util/pmu-bison.o LIB_OBJS += $(OUTPUT)util/trace-event-read.o LIB_OBJS += $(OUTPUT)util/trace-event-info.o LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o +LIB_OBJS += $(OUTPUT)util/trace-event.o LIB_OBJS += $(OUTPUT)util/svghelper.o LIB_OBJS += $(OUTPUT)util/sort.o LIB_OBJS += $(OUTPUT)util/hist.o @@ -710,13 +711,20 @@ $(LIB_FILE): $(LIB_OBJS) # libtraceevent.a TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch]) -$(LIBTRACEEVENT): $(TE_SOURCES) - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) CFLAGS="-g -Wall $(EXTRA_CFLAGS)" libtraceevent.a +LIBTRACEEVENT_FLAGS = $(QUIET_SUBDIR1) O=$(OUTPUT) +LIBTRACEEVENT_FLAGS += CFLAGS="-g -Wall $(EXTRA_CFLAGS)" +LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) + +$(LIBTRACEEVENT): $(TE_SOURCES) $(OUTPUT)PERF-CFLAGS + $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) libtraceevent.a plugins $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) @$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null +install-traceevent-plugins: $(LIBTRACEEVENT) + $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins + LIBLK_SOURCES = $(wildcard $(LK_PATH)*.[ch]) # if subdir is set, we've been called from above so target has been built @@ -785,7 +793,7 @@ cscope: ### Detect prefix changes TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ - $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ) + $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ):$(plugindir_SQ) $(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS @FLAGS='$(TRACK_CFLAGS)'; \ @@ -840,16 +848,16 @@ ifndef NO_LIBPYTHON $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \ $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' endif - $(call QUIET_INSTALL, bash_completion-script) \ + $(call QUIET_INSTALL, perf_completion-script) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \ - $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' + $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' $(call QUIET_INSTALL, tests) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' -install: install-bin try-install-man +install: install-bin try-install-man install-traceevent-plugins install-python_ext: $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4087ab19823c..6fd52c8fa682 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -373,7 +373,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) if (argc) { /* - * Special case: if there's an argument left then assume tha + * Special case: if there's an argument left then assume that * it's a symbol filter: */ if (argc > 1) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 3b67ea2444bd..2a85cc9a2d09 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1000,8 +1000,7 @@ static int data_init(int argc, const char **argv) data__files_cnt = argc; use_default = false; } - } else if (symbol_conf.default_guest_vmlinux_name || - symbol_conf.default_guest_kallsyms) { + } else if (perf_guest) { defaults[0] = "perf.data.host"; defaults[1] = "perf.data.guest"; } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index f8bf5f244d77..c6fa3cbd45a9 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1232,7 +1232,7 @@ static int read_events(struct perf_kvm_stat *kvm) .ordered_samples = true, }; struct perf_data_file file = { - .path = input_name, + .path = kvm->file_name, .mode = PERF_DATA_MODE_READ, }; @@ -1690,6 +1690,8 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) "file", "file saving guest os /proc/kallsyms"), OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules, "file", "file saving guest os /proc/modules"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show counter open errors, etc)"), OPT_END() }; @@ -1711,12 +1713,7 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) perf_guest = 1; if (!file_name) { - if (perf_host && !perf_guest) - file_name = strdup("perf.data.host"); - else if (!perf_host && perf_guest) - file_name = strdup("perf.data.guest"); - else - file_name = strdup("perf.data.kvm"); + file_name = get_filename_for_perf_kvm(); if (!file_name) { pr_err("Failed to allocate memory for filename\n"); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7c8020a32784..c1c1200d2f0a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -76,7 +76,7 @@ struct perf_record { long samples; }; -static int do_write_output(struct perf_record *rec, void *buf, size_t size) +static int perf_record__write(struct perf_record *rec, void *buf, size_t size) { struct perf_data_file *file = &rec->file; @@ -97,21 +97,13 @@ static int do_write_output(struct perf_record *rec, void *buf, size_t size) return 0; } -static int write_output(struct perf_record *rec, void *buf, size_t size) -{ - return do_write_output(rec, buf, size); -} - static int process_synthesized_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine __maybe_unused) { struct perf_record *rec = container_of(tool, struct perf_record, tool); - if (write_output(rec, event, event->header.size) < 0) - return -1; - - return 0; + return perf_record__write(rec, event, event->header.size); } static int perf_record__mmap_read(struct perf_record *rec, @@ -136,7 +128,7 @@ static int perf_record__mmap_read(struct perf_record *rec, size = md->mask + 1 - (old & md->mask); old += size; - if (write_output(rec, buf, size) < 0) { + if (perf_record__write(rec, buf, size) < 0) { rc = -1; goto out; } @@ -146,7 +138,7 @@ static int perf_record__mmap_read(struct perf_record *rec, size = head - old; old += size; - if (write_output(rec, buf, size) < 0) { + if (perf_record__write(rec, buf, size) < 0) { rc = -1; goto out; } @@ -232,7 +224,7 @@ try_again: "Consider increasing " "/proc/sys/kernel/perf_event_mlock_kb,\n" "or try again with a smaller value of -m/--mmap_pages.\n" - "(current value: %d)\n", opts->mmap_pages); + "(current value: %u)\n", opts->mmap_pages); rc = -errno; } else { pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno)); @@ -335,8 +327,8 @@ static int perf_record__mmap_read_all(struct perf_record *rec) } if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA)) - rc = write_output(rec, &finished_round_event, - sizeof(finished_round_event)); + rc = perf_record__write(rec, &finished_round_event, + sizeof(finished_round_event)); out: return rc; @@ -800,6 +792,7 @@ static struct perf_record record = { .freq = 4000, .target = { .uses_mmap = true, + .default_per_cpu = true, }, }, }; @@ -842,8 +835,9 @@ const struct option record_options[] = { OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), OPT_STRING('o', "output", &record.file.path, "file", "output file name"), - OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit, - "child tasks do not inherit counters"), + OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, + &record.opts.no_inherit_set, + "child tasks do not inherit counters"), OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages", "number of mmap data pages", @@ -888,8 +882,8 @@ const struct option record_options[] = { "sample by weight (on special events only)"), OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, "sample transaction flags (special events only)"), - OPT_BOOLEAN(0, "force-per-cpu", &record.opts.target.force_per_cpu, - "force the use of per-cpu mmaps"), + OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, + "use per-thread mmaps"), OPT_END() }; @@ -938,6 +932,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) goto out_symbol_exit; } + if (rec->opts.target.tid && !rec->opts.no_inherit_set) + rec->opts.no_inherit = true; + err = target__validate(&rec->opts.target); if (err) { target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8cf8e66ba594..3a14dbed387c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -49,6 +49,8 @@ struct perf_report { bool show_threads; bool inverted_callchain; bool mem_mode; + bool header; + bool header_only; int max_stack; struct perf_read_values show_threads_values; const char *pretty_printing_style; @@ -514,9 +516,6 @@ static int __cmd_report(struct perf_report *rep) return ret; } - if (use_browser <= 0) - perf_session__fprintf_info(session, stdout, rep->show_full_info); - if (rep->show_threads) perf_read_values_init(&rep->show_threads_values); @@ -820,6 +819,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN(0, "gtk", &report.use_gtk, "Use the GTK2 interface"), OPT_BOOLEAN(0, "stdio", &report.use_stdio, "Use the stdio interface"), + OPT_BOOLEAN(0, "header", &report.header, "Show data header."), + OPT_BOOLEAN(0, "header-only", &report.header_only, + "Show only data header."), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline," " dso_to, dso_from, symbol_to, symbol_from, mispredict," @@ -963,6 +965,10 @@ repeat: goto error; } + /* Force tty output for header output. */ + if (report.header || report.header_only) + use_browser = 0; + if (strcmp(input_name, "-") != 0) setup_browser(true); else { @@ -970,6 +976,16 @@ repeat: perf_hpp__init(); } + if (report.header || report.header_only) { + perf_session__fprintf_info(session, stdout, + report.show_full_info); + if (report.header_only) + return 0; + } else if (use_browser == 0) { + fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n", + stdout); + } + /* * Only in the TUI browser we are doing integrated annotation, * so don't allocate extra space that won't be used in the stdio diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index baf17989a216..f8ab125aac48 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -43,6 +43,7 @@ enum perf_output_field { PERF_OUTPUT_DSO = 1U << 9, PERF_OUTPUT_ADDR = 1U << 10, PERF_OUTPUT_SYMOFFSET = 1U << 11, + PERF_OUTPUT_SRCLINE = 1U << 12, }; struct output_option { @@ -61,6 +62,7 @@ struct output_option { {.str = "dso", .field = PERF_OUTPUT_DSO}, {.str = "addr", .field = PERF_OUTPUT_ADDR}, {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, + {.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, }; /* default set to maintain compatibility with current format */ @@ -210,6 +212,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "to DSO.\n"); return -EINVAL; } + if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) { + pr_err("Display of source line number requested but sample IP is not\n" + "selected. Hence, no address to lookup the source line number.\n"); + return -EINVAL; + } if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", @@ -245,6 +252,9 @@ static void set_print_ip_opts(struct perf_event_attr *attr) if (PRINT_FIELD(SYMOFFSET)) output[type].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET; + + if (PRINT_FIELD(SRCLINE)) + output[type].print_ip_opts |= PRINT_IP_OPT_SRCLINE; } /* @@ -280,6 +290,30 @@ static int perf_session__check_output_opt(struct perf_session *session) set_print_ip_opts(&evsel->attr); } + /* + * set default for tracepoints to print symbols only + * if callchains are present + */ + if (symbol_conf.use_callchain && + !output[PERF_TYPE_TRACEPOINT].user_set) { + struct perf_event_attr *attr; + + j = PERF_TYPE_TRACEPOINT; + evsel = perf_session__find_first_evtype(session, j); + if (evsel == NULL) + goto out; + + attr = &evsel->attr; + + if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) { + output[j].fields |= PERF_OUTPUT_IP; + output[j].fields |= PERF_OUTPUT_SYM; + output[j].fields |= PERF_OUTPUT_DSO; + set_print_ip_opts(attr); + } + } + +out: return 0; } @@ -288,7 +322,6 @@ static void print_sample_start(struct perf_sample *sample, struct perf_evsel *evsel) { struct perf_event_attr *attr = &evsel->attr; - const char *evname = NULL; unsigned long secs; unsigned long usecs; unsigned long long nsecs; @@ -323,11 +356,6 @@ static void print_sample_start(struct perf_sample *sample, usecs = nsecs / NSECS_PER_USEC; printf("%5lu.%06lu: ", secs, usecs); } - - if (PRINT_FIELD(EVNAME)) { - evname = perf_evsel__name(evsel); - printf("%s: ", evname ? evname : "[unknown]"); - } } static bool is_bts_event(struct perf_event_attr *attr) @@ -396,7 +424,8 @@ static void print_sample_bts(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, - struct thread *thread) + struct thread *thread, + struct addr_location *al) { struct perf_event_attr *attr = &evsel->attr; @@ -406,7 +435,7 @@ static void print_sample_bts(union perf_event *event, printf(" "); else printf("\n"); - perf_evsel__print_ip(evsel, event, sample, machine, + perf_evsel__print_ip(evsel, sample, machine, al, output[attr->type].print_ip_opts, PERF_MAX_STACK_DEPTH); } @@ -425,7 +454,7 @@ static void print_sample_bts(union perf_event *event, static void process_event(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, struct thread *thread, - struct addr_location *al __maybe_unused) + struct addr_location *al) { struct perf_event_attr *attr = &evsel->attr; @@ -434,8 +463,13 @@ static void process_event(union perf_event *event, struct perf_sample *sample, print_sample_start(sample, thread, evsel); + if (PRINT_FIELD(EVNAME)) { + const char *evname = perf_evsel__name(evsel); + printf("%s: ", evname ? evname : "[unknown]"); + } + if (is_bts_event(attr)) { - print_sample_bts(event, sample, evsel, machine, thread); + print_sample_bts(event, sample, evsel, machine, thread, al); return; } @@ -451,7 +485,7 @@ static void process_event(union perf_event *event, struct perf_sample *sample, else printf("\n"); - perf_evsel__print_ip(evsel, event, sample, machine, + perf_evsel__print_ip(evsel, sample, machine, al, output[attr->type].print_ip_opts, PERF_MAX_STACK_DEPTH); } @@ -549,6 +583,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, struct perf_script { struct perf_tool tool; struct perf_session *session; + bool show_task_events; + bool show_mmap_events; }; static int process_attr(struct perf_tool *tool, union perf_event *event, @@ -579,6 +615,163 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, return perf_evsel__check_attr(evsel, scr->session); } +static int process_comm_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__first(session->evlist); + int ret = -1; + + thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid); + if (thread == NULL) { + pr_debug("problem processing COMM event, skipping it.\n"); + return -1; + } + + if (perf_event__process_comm(tool, event, sample, machine) < 0) + goto out; + + if (!evsel->attr.sample_id_all) { + sample->cpu = 0; + sample->time = 0; + sample->tid = event->comm.tid; + sample->pid = event->comm.pid; + } + print_sample_start(sample, thread, evsel); + perf_event__fprintf(event, stdout); + ret = 0; + +out: + return ret; +} + +static int process_fork_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__first(session->evlist); + + if (perf_event__process_fork(tool, event, sample, machine) < 0) + return -1; + + thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); + if (thread == NULL) { + pr_debug("problem processing FORK event, skipping it.\n"); + return -1; + } + + if (!evsel->attr.sample_id_all) { + sample->cpu = 0; + sample->time = event->fork.time; + sample->tid = event->fork.tid; + sample->pid = event->fork.pid; + } + print_sample_start(sample, thread, evsel); + perf_event__fprintf(event, stdout); + + return 0; +} +static int process_exit_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__first(session->evlist); + + thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); + if (thread == NULL) { + pr_debug("problem processing EXIT event, skipping it.\n"); + return -1; + } + + if (!evsel->attr.sample_id_all) { + sample->cpu = 0; + sample->time = 0; + sample->tid = event->comm.tid; + sample->pid = event->comm.pid; + } + print_sample_start(sample, thread, evsel); + perf_event__fprintf(event, stdout); + + if (perf_event__process_exit(tool, event, sample, machine) < 0) + return -1; + + return 0; +} + +static int process_mmap_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__first(session->evlist); + + if (perf_event__process_mmap(tool, event, sample, machine) < 0) + return -1; + + thread = machine__findnew_thread(machine, event->mmap.pid, event->mmap.tid); + if (thread == NULL) { + pr_debug("problem processing MMAP event, skipping it.\n"); + return -1; + } + + if (!evsel->attr.sample_id_all) { + sample->cpu = 0; + sample->time = 0; + sample->tid = event->mmap.tid; + sample->pid = event->mmap.pid; + } + print_sample_start(sample, thread, evsel); + perf_event__fprintf(event, stdout); + + return 0; +} + +static int process_mmap2_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct thread *thread; + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__first(session->evlist); + + if (perf_event__process_mmap2(tool, event, sample, machine) < 0) + return -1; + + thread = machine__findnew_thread(machine, event->mmap2.pid, event->mmap2.tid); + if (thread == NULL) { + pr_debug("problem processing MMAP2 event, skipping it.\n"); + return -1; + } + + if (!evsel->attr.sample_id_all) { + sample->cpu = 0; + sample->time = 0; + sample->tid = event->mmap2.tid; + sample->pid = event->mmap2.pid; + } + print_sample_start(sample, thread, evsel); + perf_event__fprintf(event, stdout); + + return 0; +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -590,6 +783,17 @@ static int __cmd_script(struct perf_script *script) signal(SIGINT, sig_handler); + /* override event processing functions */ + if (script->show_task_events) { + script->tool.comm = process_comm_event; + script->tool.fork = process_fork_event; + script->tool.exit = process_exit_event; + } + if (script->show_mmap_events) { + script->tool.mmap = process_mmap_event; + script->tool.mmap2 = process_mmap2_event; + } + ret = perf_session__process_events(script->session, &script->tool); if (debug_mode) @@ -1290,6 +1494,8 @@ static int have_cmd(int argc, const char **argv) int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) { bool show_full_info = false; + bool header = false; + bool header_only = false; char *rec_script_path = NULL; char *rep_script_path = NULL; struct perf_session *session; @@ -1328,6 +1534,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_BOOLEAN('d', "debug-mode", &debug_mode, "do various checks like samples ordering and lost events"), + OPT_BOOLEAN(0, "header", &header, "Show data header."), + OPT_BOOLEAN(0, "header-only", &header_only, "Show only data header."), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, @@ -1352,6 +1560,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "display extended information from perf.data file"), OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, "Show the path of [kernel.kallsyms]"), + OPT_BOOLEAN('\0', "show-task-events", &script.show_task_events, + "Show the fork/comm/exit events"), + OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events, + "Show the mmap events"), OPT_END() }; const char * const script_usage[] = { @@ -1540,6 +1752,12 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) if (session == NULL) return -ENOMEM; + if (header || header_only) { + perf_session__fprintf_info(session, stdout, show_full_info); + if (header_only) + return 0; + } + script.session = session; if (cpu_list) { @@ -1547,9 +1765,6 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } - if (!script_name && !generate_script_lang) - perf_session__fprintf_info(session, stdout, show_full_info); - if (!no_callchain) symbol_conf.use_callchain = true; else @@ -1588,7 +1803,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } - err = scripting_ops->generate_script(session->pevent, + err = scripting_ops->generate_script(session->tevent.pevent, "perf-script"); goto out; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ee0d565f83e3..dab98b50c9fe 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -138,6 +138,7 @@ static const char *post_cmd = NULL; static bool sync_run = false; static unsigned int interval = 0; static unsigned int initial_delay = 0; +static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static struct timespec ref_time; static struct cpu_map *aggr_map; @@ -461,17 +462,17 @@ static void print_interval(void) if (num_print_interval == 0 && !csv_output) { switch (aggr_mode) { case AGGR_SOCKET: - fprintf(output, "# time socket cpus counts events\n"); + fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); break; case AGGR_CORE: - fprintf(output, "# time core cpus counts events\n"); + fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); break; case AGGR_NONE: - fprintf(output, "# time CPU counts events\n"); + fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); break; case AGGR_GLOBAL: default: - fprintf(output, "# time counts events\n"); + fprintf(output, "# time counts %*s events\n", unit_width, "unit"); } } @@ -516,6 +517,7 @@ static int __run_perf_stat(int argc, const char **argv) unsigned long long t0, t1; struct perf_evsel *counter; struct timespec ts; + size_t l; int status = 0; const bool forks = (argc > 0); @@ -565,6 +567,10 @@ static int __run_perf_stat(int argc, const char **argv) return -1; } counter->supported = true; + + l = strlen(counter->unit); + if (l > unit_width) + unit_width = l; } if (perf_evlist__apply_filters(evsel_list)) { @@ -704,14 +710,25 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) { double msecs = avg / 1e6; - const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s"; + const char *fmt_v, *fmt_n; char name[25]; + fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; + fmt_n = csv_output ? "%s" : "%-25s"; + aggr_printout(evsel, cpu, nr); scnprintf(name, sizeof(name), "%s%s", perf_evsel__name(evsel), csv_output ? "" : " (msec)"); - fprintf(output, fmt, msecs, csv_sep, name); + + fprintf(output, fmt_v, msecs, csv_sep); + + if (csv_output) + fprintf(output, "%s%s", evsel->unit, csv_sep); + else + fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); + + fprintf(output, fmt_n, name); if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); @@ -908,21 +925,31 @@ static void print_ll_cache_misses(int cpu, static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) { double total, ratio = 0.0, total2; + double sc = evsel->scale; const char *fmt; - if (csv_output) - fmt = "%.0f%s%s"; - else if (big_num) - fmt = "%'18.0f%s%-25s"; - else - fmt = "%18.0f%s%-25s"; + if (csv_output) { + fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; + } else { + if (big_num) + fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; + else + fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; + } aggr_printout(evsel, cpu, nr); if (aggr_mode == AGGR_GLOBAL) cpu = 0; - fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel)); + fprintf(output, fmt, avg, csv_sep); + + if (evsel->unit) + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + evsel->unit, csv_sep); + + fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); @@ -941,7 +968,10 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) if (total && avg) { ratio = total / avg; - fprintf(output, "\n # %5.2f stalled cycles per insn", ratio); + fprintf(output, "\n"); + if (aggr_mode == AGGR_NONE) + fprintf(output, " "); + fprintf(output, " # %5.2f stalled cycles per insn", ratio); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && @@ -1061,6 +1091,7 @@ static void print_aggr(char *prefix) { struct perf_evsel *counter; int cpu, cpu2, s, s2, id, nr; + double uval; u64 ena, run, val; if (!(aggr_map || aggr_get_id)) @@ -1087,11 +1118,17 @@ static void print_aggr(char *prefix) if (run == 0 || ena == 0) { aggr_printout(counter, id, nr); - fprintf(output, "%*s%s%*s", + fprintf(output, "%*s%s", csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep, - csv_output ? 0 : -24, + csv_sep); + + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + + fprintf(output, "%*s", + csv_output ? 0 : -25, perf_evsel__name(counter)); if (counter->cgrp) @@ -1101,11 +1138,12 @@ static void print_aggr(char *prefix) fputc('\n', output); continue; } + uval = val * counter->scale; if (nsec_counter(counter)) - nsec_printout(id, nr, counter, val); + nsec_printout(id, nr, counter, uval); else - abs_printout(id, nr, counter, val); + abs_printout(id, nr, counter, uval); if (!csv_output) { print_noise(counter, 1.0); @@ -1128,16 +1166,21 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) struct perf_stat *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); int scaled = counter->counts->scaled; + double uval; if (prefix) fprintf(output, "%s", prefix); if (scaled == -1) { - fprintf(output, "%*s%s%*s", + fprintf(output, "%*s%s", csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep, - csv_output ? 0 : -24, + csv_sep); + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + fprintf(output, "%*s", + csv_output ? 0 : -25, perf_evsel__name(counter)); if (counter->cgrp) @@ -1147,10 +1190,12 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) return; } + uval = avg * counter->scale; + if (nsec_counter(counter)) - nsec_printout(-1, 0, counter, avg); + nsec_printout(-1, 0, counter, uval); else - abs_printout(-1, 0, counter, avg); + abs_printout(-1, 0, counter, uval); print_noise(counter, avg); @@ -1177,6 +1222,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) static void print_counter(struct perf_evsel *counter, char *prefix) { u64 ena, run, val; + double uval; int cpu; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { @@ -1188,14 +1234,20 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); if (run == 0 || ena == 0) { - fprintf(output, "CPU%*d%s%*s%s%*s", + fprintf(output, "CPU%*d%s%*s%s", csv_output ? 0 : -4, perf_evsel__cpus(counter)->map[cpu], csv_sep, csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep, - csv_output ? 0 : -24, - perf_evsel__name(counter)); + csv_sep); + + fprintf(output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + + fprintf(output, "%*s", + csv_output ? 0 : -25, + perf_evsel__name(counter)); if (counter->cgrp) fprintf(output, "%s%s", @@ -1205,10 +1257,12 @@ static void print_counter(struct perf_evsel *counter, char *prefix) continue; } + uval = val * counter->scale; + if (nsec_counter(counter)) - nsec_printout(cpu, 0, counter, val); + nsec_printout(cpu, 0, counter, uval); else - abs_printout(cpu, 0, counter, val); + abs_printout(cpu, 0, counter, uval); if (!csv_output) { print_noise(counter, 1.0); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 41c9bde2fb67..0bda620a717d 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -41,25 +41,28 @@ #define SUPPORT_OLD_POWER_EVENTS 1 #define PWR_EVENT_EXIT -1 - -static unsigned int numcpus; -static u64 min_freq; /* Lowest CPU frequency seen */ -static u64 max_freq; /* Highest CPU frequency seen */ -static u64 turbo_frequency; - -static u64 first_time, last_time; - -static bool power_only; - - struct per_pid; -struct per_pidcomm; - -struct cpu_sample; struct power_event; struct wake_event; -struct sample_wrapper; +struct timechart { + struct perf_tool tool; + struct per_pid *all_data; + struct power_event *power_events; + struct wake_event *wake_events; + int proc_num; + unsigned int numcpus; + u64 min_freq, /* Lowest CPU frequency seen */ + max_freq, /* Highest CPU frequency seen */ + turbo_frequency, + first_time, last_time; + bool power_only, + tasks_only, + with_backtrace; +}; + +struct per_pidcomm; +struct cpu_sample; /* * Datastructure layout: @@ -124,10 +127,9 @@ struct cpu_sample { u64 end_time; int type; int cpu; + const char *backtrace; }; -static struct per_pid *all_data; - #define CSTATE 1 #define PSTATE 2 @@ -145,12 +147,9 @@ struct wake_event { int waker; int wakee; u64 time; + const char *backtrace; }; -static struct power_event *power_events; -static struct wake_event *wake_events; - -struct process_filter; struct process_filter { char *name; int pid; @@ -160,9 +159,9 @@ struct process_filter { static struct process_filter *process_filter; -static struct per_pid *find_create_pid(int pid) +static struct per_pid *find_create_pid(struct timechart *tchart, int pid) { - struct per_pid *cursor = all_data; + struct per_pid *cursor = tchart->all_data; while (cursor) { if (cursor->pid == pid) @@ -172,16 +171,16 @@ static struct per_pid *find_create_pid(int pid) cursor = zalloc(sizeof(*cursor)); assert(cursor != NULL); cursor->pid = pid; - cursor->next = all_data; - all_data = cursor; + cursor->next = tchart->all_data; + tchart->all_data = cursor; return cursor; } -static void pid_set_comm(int pid, char *comm) +static void pid_set_comm(struct timechart *tchart, int pid, char *comm) { struct per_pid *p; struct per_pidcomm *c; - p = find_create_pid(pid); + p = find_create_pid(tchart, pid); c = p->all; while (c) { if (c->comm && strcmp(c->comm, comm) == 0) { @@ -203,14 +202,14 @@ static void pid_set_comm(int pid, char *comm) p->all = c; } -static void pid_fork(int pid, int ppid, u64 timestamp) +static void pid_fork(struct timechart *tchart, int pid, int ppid, u64 timestamp) { struct per_pid *p, *pp; - p = find_create_pid(pid); - pp = find_create_pid(ppid); + p = find_create_pid(tchart, pid); + pp = find_create_pid(tchart, ppid); p->ppid = ppid; if (pp->current && pp->current->comm && !p->current) - pid_set_comm(pid, pp->current->comm); + pid_set_comm(tchart, pid, pp->current->comm); p->start_time = timestamp; if (p->current) { @@ -219,23 +218,24 @@ static void pid_fork(int pid, int ppid, u64 timestamp) } } -static void pid_exit(int pid, u64 timestamp) +static void pid_exit(struct timechart *tchart, int pid, u64 timestamp) { struct per_pid *p; - p = find_create_pid(pid); + p = find_create_pid(tchart, pid); p->end_time = timestamp; if (p->current) p->current->end_time = timestamp; } -static void -pid_put_sample(int pid, int type, unsigned int cpu, u64 start, u64 end) +static void pid_put_sample(struct timechart *tchart, int pid, int type, + unsigned int cpu, u64 start, u64 end, + const char *backtrace) { struct per_pid *p; struct per_pidcomm *c; struct cpu_sample *sample; - p = find_create_pid(pid); + p = find_create_pid(tchart, pid); c = p->current; if (!c) { c = zalloc(sizeof(*c)); @@ -252,6 +252,7 @@ pid_put_sample(int pid, int type, unsigned int cpu, u64 start, u64 end) sample->type = type; sample->next = c->samples; sample->cpu = cpu; + sample->backtrace = backtrace; c->samples = sample; if (sample->type == TYPE_RUNNING && end > start && start > 0) { @@ -272,84 +273,47 @@ static int cpus_cstate_state[MAX_CPUS]; static u64 cpus_pstate_start_times[MAX_CPUS]; static u64 cpus_pstate_state[MAX_CPUS]; -static int process_comm_event(struct perf_tool *tool __maybe_unused, +static int process_comm_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine __maybe_unused) { - pid_set_comm(event->comm.tid, event->comm.comm); + struct timechart *tchart = container_of(tool, struct timechart, tool); + pid_set_comm(tchart, event->comm.tid, event->comm.comm); return 0; } -static int process_fork_event(struct perf_tool *tool __maybe_unused, +static int process_fork_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine __maybe_unused) { - pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); + struct timechart *tchart = container_of(tool, struct timechart, tool); + pid_fork(tchart, event->fork.pid, event->fork.ppid, event->fork.time); return 0; } -static int process_exit_event(struct perf_tool *tool __maybe_unused, +static int process_exit_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine __maybe_unused) { - pid_exit(event->fork.pid, event->fork.time); + struct timechart *tchart = container_of(tool, struct timechart, tool); + pid_exit(tchart, event->fork.pid, event->fork.time); return 0; } -struct trace_entry { - unsigned short type; - unsigned char flags; - unsigned char preempt_count; - int pid; - int lock_depth; -}; - #ifdef SUPPORT_OLD_POWER_EVENTS static int use_old_power_events; -struct power_entry_old { - struct trace_entry te; - u64 type; - u64 value; - u64 cpu_id; -}; #endif -struct power_processor_entry { - struct trace_entry te; - u32 state; - u32 cpu_id; -}; - -#define TASK_COMM_LEN 16 -struct wakeup_entry { - struct trace_entry te; - char comm[TASK_COMM_LEN]; - int pid; - int prio; - int success; -}; - -struct sched_switch { - struct trace_entry te; - char prev_comm[TASK_COMM_LEN]; - int prev_pid; - int prev_prio; - long prev_state; /* Arjan weeps. */ - char next_comm[TASK_COMM_LEN]; - int next_pid; - int next_prio; -}; - static void c_state_start(int cpu, u64 timestamp, int state) { cpus_cstate_start_times[cpu] = timestamp; cpus_cstate_state[cpu] = state; } -static void c_state_end(int cpu, u64 timestamp) +static void c_state_end(struct timechart *tchart, int cpu, u64 timestamp) { struct power_event *pwr = zalloc(sizeof(*pwr)); @@ -361,12 +325,12 @@ static void c_state_end(int cpu, u64 timestamp) pwr->end_time = timestamp; pwr->cpu = cpu; pwr->type = CSTATE; - pwr->next = power_events; + pwr->next = tchart->power_events; - power_events = pwr; + tchart->power_events = pwr; } -static void p_state_change(int cpu, u64 timestamp, u64 new_freq) +static void p_state_change(struct timechart *tchart, int cpu, u64 timestamp, u64 new_freq) { struct power_event *pwr; @@ -382,73 +346,78 @@ static void p_state_change(int cpu, u64 timestamp, u64 new_freq) pwr->end_time = timestamp; pwr->cpu = cpu; pwr->type = PSTATE; - pwr->next = power_events; + pwr->next = tchart->power_events; if (!pwr->start_time) - pwr->start_time = first_time; + pwr->start_time = tchart->first_time; - power_events = pwr; + tchart->power_events = pwr; cpus_pstate_state[cpu] = new_freq; cpus_pstate_start_times[cpu] = timestamp; - if ((u64)new_freq > max_freq) - max_freq = new_freq; + if ((u64)new_freq > tchart->max_freq) + tchart->max_freq = new_freq; - if (new_freq < min_freq || min_freq == 0) - min_freq = new_freq; + if (new_freq < tchart->min_freq || tchart->min_freq == 0) + tchart->min_freq = new_freq; - if (new_freq == max_freq - 1000) - turbo_frequency = max_freq; + if (new_freq == tchart->max_freq - 1000) + tchart->turbo_frequency = tchart->max_freq; } -static void -sched_wakeup(int cpu, u64 timestamp, int pid, struct trace_entry *te) +static void sched_wakeup(struct timechart *tchart, int cpu, u64 timestamp, + int waker, int wakee, u8 flags, const char *backtrace) { struct per_pid *p; - struct wakeup_entry *wake = (void *)te; struct wake_event *we = zalloc(sizeof(*we)); if (!we) return; we->time = timestamp; - we->waker = pid; + we->waker = waker; + we->backtrace = backtrace; - if ((te->flags & TRACE_FLAG_HARDIRQ) || (te->flags & TRACE_FLAG_SOFTIRQ)) + if ((flags & TRACE_FLAG_HARDIRQ) || (flags & TRACE_FLAG_SOFTIRQ)) we->waker = -1; - we->wakee = wake->pid; - we->next = wake_events; - wake_events = we; - p = find_create_pid(we->wakee); + we->wakee = wakee; + we->next = tchart->wake_events; + tchart->wake_events = we; + p = find_create_pid(tchart, we->wakee); if (p && p->current && p->current->state == TYPE_NONE) { p->current->state_since = timestamp; p->current->state = TYPE_WAITING; } if (p && p->current && p->current->state == TYPE_BLOCKED) { - pid_put_sample(p->pid, p->current->state, cpu, p->current->state_since, timestamp); + pid_put_sample(tchart, p->pid, p->current->state, cpu, + p->current->state_since, timestamp, NULL); p->current->state_since = timestamp; p->current->state = TYPE_WAITING; } } -static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te) +static void sched_switch(struct timechart *tchart, int cpu, u64 timestamp, + int prev_pid, int next_pid, u64 prev_state, + const char *backtrace) { struct per_pid *p = NULL, *prev_p; - struct sched_switch *sw = (void *)te; - - prev_p = find_create_pid(sw->prev_pid); + prev_p = find_create_pid(tchart, prev_pid); - p = find_create_pid(sw->next_pid); + p = find_create_pid(tchart, next_pid); if (prev_p->current && prev_p->current->state != TYPE_NONE) - pid_put_sample(sw->prev_pid, TYPE_RUNNING, cpu, prev_p->current->state_since, timestamp); + pid_put_sample(tchart, prev_pid, TYPE_RUNNING, cpu, + prev_p->current->state_since, timestamp, + backtrace); if (p && p->current) { if (p->current->state != TYPE_NONE) - pid_put_sample(sw->next_pid, p->current->state, cpu, p->current->state_since, timestamp); + pid_put_sample(tchart, next_pid, p->current->state, cpu, + p->current->state_since, timestamp, + backtrace); p->current->state_since = timestamp; p->current->state = TYPE_RUNNING; @@ -457,109 +426,214 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te) if (prev_p->current) { prev_p->current->state = TYPE_NONE; prev_p->current->state_since = timestamp; - if (sw->prev_state & 2) + if (prev_state & 2) prev_p->current->state = TYPE_BLOCKED; - if (sw->prev_state == 0) + if (prev_state == 0) prev_p->current->state = TYPE_WAITING; } } -typedef int (*tracepoint_handler)(struct perf_evsel *evsel, - struct perf_sample *sample); +static const char *cat_backtrace(union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct addr_location al; + unsigned int i; + char *p = NULL; + size_t p_len; + u8 cpumode = PERF_RECORD_MISC_USER; + struct addr_location tal; + struct ip_callchain *chain = sample->callchain; + FILE *f = open_memstream(&p, &p_len); + + if (!f) { + perror("open_memstream error"); + return NULL; + } + + if (!chain) + goto exit; + + if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + fprintf(stderr, "problem processing %d event, skipping it.\n", + event->header.type); + goto exit; + } + + for (i = 0; i < chain->nr; i++) { + u64 ip; + + if (callchain_param.order == ORDER_CALLEE) + ip = chain->ips[i]; + else + ip = chain->ips[chain->nr - i - 1]; + + if (ip >= PERF_CONTEXT_MAX) { + switch (ip) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: " + "%"PRId64"\n", (s64) ip); + + /* + * It seems the callchain is corrupted. + * Discard all. + */ + free(p); + p = NULL; + goto exit; + } + continue; + } + + tal.filtered = false; + thread__find_addr_location(al.thread, machine, cpumode, + MAP__FUNCTION, ip, &tal); + + if (tal.sym) + fprintf(f, "..... %016" PRIx64 " %s\n", ip, + tal.sym->name); + else + fprintf(f, "..... %016" PRIx64 "\n", ip); + } + +exit: + fclose(f); + + return p; +} + +typedef int (*tracepoint_handler)(struct timechart *tchart, + struct perf_evsel *evsel, + struct perf_sample *sample, + const char *backtrace); -static int process_sample_event(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, +static int process_sample_event(struct perf_tool *tool, + union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct machine *machine __maybe_unused) + struct machine *machine) { + struct timechart *tchart = container_of(tool, struct timechart, tool); + if (evsel->attr.sample_type & PERF_SAMPLE_TIME) { - if (!first_time || first_time > sample->time) - first_time = sample->time; - if (last_time < sample->time) - last_time = sample->time; + if (!tchart->first_time || tchart->first_time > sample->time) + tchart->first_time = sample->time; + if (tchart->last_time < sample->time) + tchart->last_time = sample->time; } - if (sample->cpu > numcpus) - numcpus = sample->cpu; + if (sample->cpu > tchart->numcpus) + tchart->numcpus = sample->cpu; if (evsel->handler != NULL) { tracepoint_handler f = evsel->handler; - return f(evsel, sample); + return f(tchart, evsel, sample, cat_backtrace(event, sample, machine)); } return 0; } static int -process_sample_cpu_idle(struct perf_evsel *evsel __maybe_unused, - struct perf_sample *sample) +process_sample_cpu_idle(struct timechart *tchart __maybe_unused, + struct perf_evsel *evsel, + struct perf_sample *sample, + const char *backtrace __maybe_unused) { - struct power_processor_entry *ppe = sample->raw_data; + u32 state = perf_evsel__intval(evsel, sample, "state"); + u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id"); - if (ppe->state == (u32) PWR_EVENT_EXIT) - c_state_end(ppe->cpu_id, sample->time); + if (state == (u32)PWR_EVENT_EXIT) + c_state_end(tchart, cpu_id, sample->time); else - c_state_start(ppe->cpu_id, sample->time, ppe->state); + c_state_start(cpu_id, sample->time, state); return 0; } static int -process_sample_cpu_frequency(struct perf_evsel *evsel __maybe_unused, - struct perf_sample *sample) +process_sample_cpu_frequency(struct timechart *tchart, + struct perf_evsel *evsel, + struct perf_sample *sample, + const char *backtrace __maybe_unused) { - struct power_processor_entry *ppe = sample->raw_data; + u32 state = perf_evsel__intval(evsel, sample, "state"); + u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id"); - p_state_change(ppe->cpu_id, sample->time, ppe->state); + p_state_change(tchart, cpu_id, sample->time, state); return 0; } static int -process_sample_sched_wakeup(struct perf_evsel *evsel __maybe_unused, - struct perf_sample *sample) +process_sample_sched_wakeup(struct timechart *tchart, + struct perf_evsel *evsel, + struct perf_sample *sample, + const char *backtrace) { - struct trace_entry *te = sample->raw_data; + u8 flags = perf_evsel__intval(evsel, sample, "common_flags"); + int waker = perf_evsel__intval(evsel, sample, "common_pid"); + int wakee = perf_evsel__intval(evsel, sample, "pid"); - sched_wakeup(sample->cpu, sample->time, sample->pid, te); + sched_wakeup(tchart, sample->cpu, sample->time, waker, wakee, flags, backtrace); return 0; } static int -process_sample_sched_switch(struct perf_evsel *evsel __maybe_unused, - struct perf_sample *sample) +process_sample_sched_switch(struct timechart *tchart, + struct perf_evsel *evsel, + struct perf_sample *sample, + const char *backtrace) { - struct trace_entry *te = sample->raw_data; + int prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"); + int next_pid = perf_evsel__intval(evsel, sample, "next_pid"); + u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); - sched_switch(sample->cpu, sample->time, te); + sched_switch(tchart, sample->cpu, sample->time, prev_pid, next_pid, + prev_state, backtrace); return 0; } #ifdef SUPPORT_OLD_POWER_EVENTS static int -process_sample_power_start(struct perf_evsel *evsel __maybe_unused, - struct perf_sample *sample) +process_sample_power_start(struct timechart *tchart __maybe_unused, + struct perf_evsel *evsel, + struct perf_sample *sample, + const char *backtrace __maybe_unused) { - struct power_entry_old *peo = sample->raw_data; + u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id"); + u64 value = perf_evsel__intval(evsel, sample, "value"); - c_state_start(peo->cpu_id, sample->time, peo->value); + c_state_start(cpu_id, sample->time, value); return 0; } static int -process_sample_power_end(struct perf_evsel *evsel __maybe_unused, - struct perf_sample *sample) +process_sample_power_end(struct timechart *tchart, + struct perf_evsel *evsel __maybe_unused, + struct perf_sample *sample, + const char *backtrace __maybe_unused) { - c_state_end(sample->cpu, sample->time); + c_state_end(tchart, sample->cpu, sample->time); return 0; } static int -process_sample_power_frequency(struct perf_evsel *evsel __maybe_unused, - struct perf_sample *sample) +process_sample_power_frequency(struct timechart *tchart, + struct perf_evsel *evsel, + struct perf_sample *sample, + const char *backtrace __maybe_unused) { - struct power_entry_old *peo = sample->raw_data; + u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id"); + u64 value = perf_evsel__intval(evsel, sample, "value"); - p_state_change(peo->cpu_id, sample->time, peo->value); + p_state_change(tchart, cpu_id, sample->time, value); return 0; } #endif /* SUPPORT_OLD_POWER_EVENTS */ @@ -568,12 +642,12 @@ process_sample_power_frequency(struct perf_evsel *evsel __maybe_unused, * After the last sample we need to wrap up the current C/P state * and close out each CPU for these. */ -static void end_sample_processing(void) +static void end_sample_processing(struct timechart *tchart) { u64 cpu; struct power_event *pwr; - for (cpu = 0; cpu <= numcpus; cpu++) { + for (cpu = 0; cpu <= tchart->numcpus; cpu++) { /* C state */ #if 0 pwr = zalloc(sizeof(*pwr)); @@ -582,12 +656,12 @@ static void end_sample_processing(void) pwr->state = cpus_cstate_state[cpu]; pwr->start_time = cpus_cstate_start_times[cpu]; - pwr->end_time = last_time; + pwr->end_time = tchart->last_time; pwr->cpu = cpu; pwr->type = CSTATE; - pwr->next = power_events; + pwr->next = tchart->power_events; - power_events = pwr; + tchart->power_events = pwr; #endif /* P state */ @@ -597,32 +671,32 @@ static void end_sample_processing(void) pwr->state = cpus_pstate_state[cpu]; pwr->start_time = cpus_pstate_start_times[cpu]; - pwr->end_time = last_time; + pwr->end_time = tchart->last_time; pwr->cpu = cpu; pwr->type = PSTATE; - pwr->next = power_events; + pwr->next = tchart->power_events; if (!pwr->start_time) - pwr->start_time = first_time; + pwr->start_time = tchart->first_time; if (!pwr->state) - pwr->state = min_freq; - power_events = pwr; + pwr->state = tchart->min_freq; + tchart->power_events = pwr; } } /* * Sort the pid datastructure */ -static void sort_pids(void) +static void sort_pids(struct timechart *tchart) { struct per_pid *new_list, *p, *cursor, *prev; /* sort by ppid first, then by pid, lowest to highest */ new_list = NULL; - while (all_data) { - p = all_data; - all_data = p->next; + while (tchart->all_data) { + p = tchart->all_data; + tchart->all_data = p->next; p->next = NULL; if (new_list == NULL) { @@ -655,14 +729,14 @@ static void sort_pids(void) prev->next = p; } } - all_data = new_list; + tchart->all_data = new_list; } -static void draw_c_p_states(void) +static void draw_c_p_states(struct timechart *tchart) { struct power_event *pwr; - pwr = power_events; + pwr = tchart->power_events; /* * two pass drawing so that the P state bars are on top of the C state blocks @@ -673,30 +747,30 @@ static void draw_c_p_states(void) pwr = pwr->next; } - pwr = power_events; + pwr = tchart->power_events; while (pwr) { if (pwr->type == PSTATE) { if (!pwr->state) - pwr->state = min_freq; + pwr->state = tchart->min_freq; svg_pstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state); } pwr = pwr->next; } } -static void draw_wakeups(void) +static void draw_wakeups(struct timechart *tchart) { struct wake_event *we; struct per_pid *p; struct per_pidcomm *c; - we = wake_events; + we = tchart->wake_events; while (we) { int from = 0, to = 0; char *task_from = NULL, *task_to = NULL; /* locate the column of the waker and wakee */ - p = all_data; + p = tchart->all_data; while (p) { if (p->pid == we->waker || p->pid == we->wakee) { c = p->all; @@ -739,11 +813,12 @@ static void draw_wakeups(void) } if (we->waker == -1) - svg_interrupt(we->time, to); + svg_interrupt(we->time, to, we->backtrace); else if (from && to && abs(from - to) == 1) - svg_wakeline(we->time, from, to); + svg_wakeline(we->time, from, to, we->backtrace); else - svg_partial_wakeline(we->time, from, task_from, to, task_to); + svg_partial_wakeline(we->time, from, task_from, to, + task_to, we->backtrace); we = we->next; free(task_from); @@ -751,12 +826,12 @@ static void draw_wakeups(void) } } -static void draw_cpu_usage(void) +static void draw_cpu_usage(struct timechart *tchart) { struct per_pid *p; struct per_pidcomm *c; struct cpu_sample *sample; - p = all_data; + p = tchart->all_data; while (p) { c = p->all; while (c) { @@ -773,16 +848,16 @@ static void draw_cpu_usage(void) } } -static void draw_process_bars(void) +static void draw_process_bars(struct timechart *tchart) { struct per_pid *p; struct per_pidcomm *c; struct cpu_sample *sample; int Y = 0; - Y = 2 * numcpus + 2; + Y = 2 * tchart->numcpus + 2; - p = all_data; + p = tchart->all_data; while (p) { c = p->all; while (c) { @@ -796,11 +871,20 @@ static void draw_process_bars(void) sample = c->samples; while (sample) { if (sample->type == TYPE_RUNNING) - svg_sample(Y, sample->cpu, sample->start_time, sample->end_time); + svg_running(Y, sample->cpu, + sample->start_time, + sample->end_time, + sample->backtrace); if (sample->type == TYPE_BLOCKED) - svg_box(Y, sample->start_time, sample->end_time, "blocked"); + svg_blocked(Y, sample->cpu, + sample->start_time, + sample->end_time, + sample->backtrace); if (sample->type == TYPE_WAITING) - svg_waiting(Y, sample->start_time, sample->end_time); + svg_waiting(Y, sample->cpu, + sample->start_time, + sample->end_time, + sample->backtrace); sample = sample->next; } @@ -853,21 +937,21 @@ static int passes_filter(struct per_pid *p, struct per_pidcomm *c) return 0; } -static int determine_display_tasks_filtered(void) +static int determine_display_tasks_filtered(struct timechart *tchart) { struct per_pid *p; struct per_pidcomm *c; int count = 0; - p = all_data; + p = tchart->all_data; while (p) { p->display = 0; if (p->start_time == 1) - p->start_time = first_time; + p->start_time = tchart->first_time; /* no exit marker, task kept running to the end */ if (p->end_time == 0) - p->end_time = last_time; + p->end_time = tchart->last_time; c = p->all; @@ -875,7 +959,7 @@ static int determine_display_tasks_filtered(void) c->display = 0; if (c->start_time == 1) - c->start_time = first_time; + c->start_time = tchart->first_time; if (passes_filter(p, c)) { c->display = 1; @@ -884,7 +968,7 @@ static int determine_display_tasks_filtered(void) } if (c->end_time == 0) - c->end_time = last_time; + c->end_time = tchart->last_time; c = c->next; } @@ -893,25 +977,25 @@ static int determine_display_tasks_filtered(void) return count; } -static int determine_display_tasks(u64 threshold) +static int determine_display_tasks(struct timechart *tchart, u64 threshold) { struct per_pid *p; struct per_pidcomm *c; int count = 0; if (process_filter) - return determine_display_tasks_filtered(); + return determine_display_tasks_filtered(tchart); - p = all_data; + p = tchart->all_data; while (p) { p->display = 0; if (p->start_time == 1) - p->start_time = first_time; + p->start_time = tchart->first_time; /* no exit marker, task kept running to the end */ if (p->end_time == 0) - p->end_time = last_time; - if (p->total_time >= threshold && !power_only) + p->end_time = tchart->last_time; + if (p->total_time >= threshold) p->display = 1; c = p->all; @@ -920,15 +1004,15 @@ static int determine_display_tasks(u64 threshold) c->display = 0; if (c->start_time == 1) - c->start_time = first_time; + c->start_time = tchart->first_time; - if (c->total_time >= threshold && !power_only) { + if (c->total_time >= threshold) { c->display = 1; count++; } if (c->end_time == 0) - c->end_time = last_time; + c->end_time = tchart->last_time; c = c->next; } @@ -941,45 +1025,45 @@ static int determine_display_tasks(u64 threshold) #define TIME_THRESH 10000000 -static void write_svg_file(const char *filename) +static void write_svg_file(struct timechart *tchart, const char *filename) { u64 i; int count; + int thresh = TIME_THRESH; - numcpus++; + tchart->numcpus++; + if (tchart->power_only) + tchart->proc_num = 0; - count = determine_display_tasks(TIME_THRESH); + /* We'd like to show at least proc_num tasks; + * be less picky if we have fewer */ + do { + count = determine_display_tasks(tchart, thresh); + thresh /= 10; + } while (!process_filter && thresh && count < tchart->proc_num); - /* We'd like to show at least 15 tasks; be less picky if we have fewer */ - if (count < 15) - count = determine_display_tasks(TIME_THRESH / 10); - - open_svg(filename, numcpus, count, first_time, last_time); + open_svg(filename, tchart->numcpus, count, tchart->first_time, tchart->last_time); svg_time_grid(); svg_legenda(); - for (i = 0; i < numcpus; i++) - svg_cpu_box(i, max_freq, turbo_frequency); + for (i = 0; i < tchart->numcpus; i++) + svg_cpu_box(i, tchart->max_freq, tchart->turbo_frequency); - draw_cpu_usage(); - draw_process_bars(); - draw_c_p_states(); - draw_wakeups(); + draw_cpu_usage(tchart); + if (tchart->proc_num) + draw_process_bars(tchart); + if (!tchart->tasks_only) + draw_c_p_states(tchart); + if (tchart->proc_num) + draw_wakeups(tchart); svg_close(); } -static int __cmd_timechart(const char *output_name) +static int __cmd_timechart(struct timechart *tchart, const char *output_name) { - struct perf_tool perf_timechart = { - .comm = process_comm_event, - .fork = process_fork_event, - .exit = process_exit_event, - .sample = process_sample_event, - .ordered_samples = true, - }; const struct perf_evsel_str_handler power_tracepoints[] = { { "power:cpu_idle", process_sample_cpu_idle }, { "power:cpu_frequency", process_sample_cpu_frequency }, @@ -997,7 +1081,7 @@ static int __cmd_timechart(const char *output_name) }; struct perf_session *session = perf_session__new(&file, false, - &perf_timechart); + &tchart->tool); int ret = -EINVAL; if (session == NULL) @@ -1012,69 +1096,111 @@ static int __cmd_timechart(const char *output_name) goto out_delete; } - ret = perf_session__process_events(session, &perf_timechart); + ret = perf_session__process_events(session, &tchart->tool); if (ret) goto out_delete; - end_sample_processing(); + end_sample_processing(tchart); - sort_pids(); + sort_pids(tchart); - write_svg_file(output_name); + write_svg_file(tchart, output_name); pr_info("Written %2.1f seconds of trace to %s.\n", - (last_time - first_time) / 1000000000.0, output_name); + (tchart->last_time - tchart->first_time) / 1000000000.0, output_name); out_delete: perf_session__delete(session); return ret; } -static int __cmd_record(int argc, const char **argv) +static int timechart__record(struct timechart *tchart, int argc, const char **argv) { -#ifdef SUPPORT_OLD_POWER_EVENTS - const char * const record_old_args[] = { + unsigned int rec_argc, i, j; + const char **rec_argv; + const char **p; + unsigned int record_elems; + + const char * const common_args[] = { "record", "-a", "-R", "-c", "1", + }; + unsigned int common_args_nr = ARRAY_SIZE(common_args); + + const char * const backtrace_args[] = { + "-g", + }; + unsigned int backtrace_args_no = ARRAY_SIZE(backtrace_args); + + const char * const power_args[] = { + "-e", "power:cpu_frequency", + "-e", "power:cpu_idle", + }; + unsigned int power_args_nr = ARRAY_SIZE(power_args); + + const char * const old_power_args[] = { +#ifdef SUPPORT_OLD_POWER_EVENTS "-e", "power:power_start", "-e", "power:power_end", "-e", "power:power_frequency", - "-e", "sched:sched_wakeup", - "-e", "sched:sched_switch", - }; #endif - const char * const record_new_args[] = { - "record", "-a", "-R", "-c", "1", - "-e", "power:cpu_frequency", - "-e", "power:cpu_idle", + }; + unsigned int old_power_args_nr = ARRAY_SIZE(old_power_args); + + const char * const tasks_args[] = { "-e", "sched:sched_wakeup", "-e", "sched:sched_switch", }; - unsigned int rec_argc, i, j; - const char **rec_argv; - const char * const *record_args = record_new_args; - unsigned int record_elems = ARRAY_SIZE(record_new_args); + unsigned int tasks_args_nr = ARRAY_SIZE(tasks_args); #ifdef SUPPORT_OLD_POWER_EVENTS if (!is_valid_tracepoint("power:cpu_idle") && is_valid_tracepoint("power:power_start")) { use_old_power_events = 1; - record_args = record_old_args; - record_elems = ARRAY_SIZE(record_old_args); + power_args_nr = 0; + } else { + old_power_args_nr = 0; } #endif - rec_argc = record_elems + argc - 1; + if (tchart->power_only) + tasks_args_nr = 0; + + if (tchart->tasks_only) { + power_args_nr = 0; + old_power_args_nr = 0; + } + + if (!tchart->with_backtrace) + backtrace_args_no = 0; + + record_elems = common_args_nr + tasks_args_nr + + power_args_nr + old_power_args_nr + backtrace_args_no; + + rec_argc = record_elems + argc; rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) return -ENOMEM; - for (i = 0; i < record_elems; i++) - rec_argv[i] = strdup(record_args[i]); + p = rec_argv; + for (i = 0; i < common_args_nr; i++) + *p++ = strdup(common_args[i]); + + for (i = 0; i < backtrace_args_no; i++) + *p++ = strdup(backtrace_args[i]); + + for (i = 0; i < tasks_args_nr; i++) + *p++ = strdup(tasks_args[i]); + + for (i = 0; i < power_args_nr; i++) + *p++ = strdup(power_args[i]); - for (j = 1; j < (unsigned int)argc; j++, i++) - rec_argv[i] = argv[j]; + for (i = 0; i < old_power_args_nr; i++) + *p++ = strdup(old_power_args[i]); - return cmd_record(i, rec_argv, NULL); + for (j = 1; j < (unsigned int)argc; j++) + *p++ = argv[j]; + + return cmd_record(rec_argc, rec_argv, NULL); } static int @@ -1089,17 +1215,31 @@ parse_process(const struct option *opt __maybe_unused, const char *arg, int cmd_timechart(int argc, const char **argv, const char *prefix __maybe_unused) { + struct timechart tchart = { + .tool = { + .comm = process_comm_event, + .fork = process_fork_event, + .exit = process_exit_event, + .sample = process_sample_event, + .ordered_samples = true, + }, + .proc_num = 15, + }; const char *output_name = "output.svg"; - const struct option options[] = { + const struct option timechart_options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_STRING('o', "output", &output_name, "file", "output file name"), OPT_INTEGER('w', "width", &svg_page_width, "page width"), - OPT_BOOLEAN('P', "power-only", &power_only, "output power data only"), + OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"), + OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, + "output processes data only"), OPT_CALLBACK('p', "process", NULL, "process", "process selector. Pass a pid or process name.", parse_process), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), + OPT_INTEGER('n', "proc-num", &tchart.proc_num, + "min. number of tasks to print"), OPT_END() }; const char * const timechart_usage[] = { @@ -1107,17 +1247,41 @@ int cmd_timechart(int argc, const char **argv, NULL }; - argc = parse_options(argc, argv, options, timechart_usage, + const struct option record_options[] = { + OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"), + OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, + "output processes data only"), + OPT_BOOLEAN('g', "callchain", &tchart.with_backtrace, "record callchain"), + OPT_END() + }; + const char * const record_usage[] = { + "perf timechart record [<options>]", + NULL + }; + argc = parse_options(argc, argv, timechart_options, timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION); + if (tchart.power_only && tchart.tasks_only) { + pr_err("-P and -T options cannot be used at the same time.\n"); + return -1; + } + symbol__init(); - if (argc && !strncmp(argv[0], "rec", 3)) - return __cmd_record(argc, argv); - else if (argc) - usage_with_options(timechart_usage, options); + if (argc && !strncmp(argv[0], "rec", 3)) { + argc = parse_options(argc, argv, record_options, record_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + + if (tchart.power_only && tchart.tasks_only) { + pr_err("-P and -T options cannot be used at the same time.\n"); + return -1; + } + + return timechart__record(&tchart, argc, argv); + } else if (argc) + usage_with_options(timechart_usage, timechart_options); setup_pager(); - return __cmd_timechart(output_name); + return __cmd_timechart(&tchart, output_name); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 71e6402729a8..03d37a76c612 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -634,26 +634,9 @@ repeat: return NULL; } -/* Tag samples to be skipped. */ -static const char *skip_symbols[] = { - "intel_idle", - "default_idle", - "native_safe_halt", - "cpu_idle", - "enter_idle", - "exit_idle", - "mwait_idle", - "mwait_idle_with_hints", - "poll_idle", - "ppc64_runlatch_off", - "pseries_dedicated_idle_sleep", - NULL -}; - static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym) { const char *name = sym->name; - int i; /* * ppc64 uses function descriptors and appends a '.' to the @@ -671,12 +654,8 @@ static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym) strstr(name, "_text_end")) return 1; - for (i = 0; skip_symbols[i]; i++) { - if (!strcmp(skip_symbols[i], name)) { - sym->ignore = true; - break; - } - } + if (symbol__is_idle(sym)) + sym->ignore = true; return 0; } @@ -1084,7 +1063,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "dump the symbol table used for profiling"), OPT_INTEGER('f', "count-filter", &top.count_filter, "only display functions with more events than this"), - OPT_BOOLEAN('g', "group", &opts->group, + OPT_BOOLEAN(0, "group", &opts->group, "put the counters into a counter group"), OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit, "child tasks do not inherit counters"), @@ -1105,7 +1084,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) " abort, in_tx, transaction"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), - OPT_CALLBACK_NOOPT('G', NULL, &top.record_opts, + OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts, NULL, "enables call-graph recording", &callchain_opt), OPT_CALLBACK(0, "call-graph", &top.record_opts, diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8be17fc462ba..56bbca5bc2dc 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -11,6 +11,8 @@ #include "util/intlist.h" #include "util/thread_map.h" #include "util/stat.h" +#include "trace-event.h" +#include "util/parse-events.h" #include <libaudit.h> #include <stdlib.h> @@ -172,6 +174,10 @@ static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void { struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); + /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ + if (evsel == NULL) + evsel = perf_evsel__newtp("syscalls", direction); + if (evsel) { if (perf_evsel__init_syscall_tp(evsel, handler)) goto out_delete; @@ -1430,11 +1436,11 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->fmt = syscall_fmt__find(sc->name); snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); - sc->tp_format = event_format__new("syscalls", tp_name); + sc->tp_format = trace_event__tp_format("syscalls", tp_name); if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) { snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias); - sc->tp_format = event_format__new("syscalls", tp_name); + sc->tp_format = trace_event__tp_format("syscalls", tp_name); } if (sc->tp_format == NULL) @@ -1764,8 +1770,10 @@ static int trace__process_sample(struct perf_tool *tool, if (!trace->full_time && trace->base_time == 0) trace->base_time = sample->time; - if (handler) + if (handler) { + ++trace->nr_events; handler(trace, evsel, sample); + } return err; } @@ -1800,10 +1808,11 @@ static int trace__record(int argc, const char **argv) "-R", "-m", "1024", "-c", "1", - "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit", + "-e", }; - rec_argc = ARRAY_SIZE(record_args) + argc; + /* +1 is for the event string below */ + rec_argc = ARRAY_SIZE(record_args) + 1 + argc; rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) @@ -1812,6 +1821,17 @@ static int trace__record(int argc, const char **argv) for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = record_args[i]; + /* event string may be different for older kernels - e.g., RHEL6 */ + if (is_valid_tracepoint("raw_syscalls:sys_enter")) + rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit"; + else if (is_valid_tracepoint("syscalls:sys_enter")) + rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit"; + else { + pr_err("Neither raw_syscalls nor syscalls events exist.\n"); + return -1; + } + i++; + for (j = 0; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; @@ -1890,7 +1910,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_open; - err = perf_evlist__mmap(evlist, UINT_MAX, false); + err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); if (err < 0) { fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno)); goto out_close_evlist; @@ -2047,6 +2067,10 @@ static int trace__replay(struct trace *trace) evsel = perf_evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter"); + /* older kernels have syscalls tp versus raw_syscalls */ + if (evsel == NULL) + evsel = perf_evlist__find_tracepoint_by_name(session->evlist, + "syscalls:sys_enter"); if (evsel == NULL) { pr_err("Data file does not have raw_syscalls:sys_enter event\n"); goto out; @@ -2060,6 +2084,9 @@ static int trace__replay(struct trace *trace) evsel = perf_evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit"); + if (evsel == NULL) + evsel = perf_evlist__find_tracepoint_by_name(session->evlist, + "syscalls:sys_exit"); if (evsel == NULL) { pr_err("Data file does not have raw_syscalls:sys_exit event\n"); goto out; @@ -2158,7 +2185,6 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv) size_t printed = data->printed; struct trace *trace = data->trace; struct thread_trace *ttrace = thread->priv; - const char *color; double ratio; if (ttrace == NULL) @@ -2166,17 +2192,9 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv) ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; - color = PERF_COLOR_NORMAL; - if (ratio > 50.0) - color = PERF_COLOR_RED; - else if (ratio > 25.0) - color = PERF_COLOR_GREEN; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; - - printed += color_fprintf(fp, color, " %s (%d), ", thread__comm_str(thread), thread->tid); + printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid); printed += fprintf(fp, "%lu events, ", ttrace->nr_events); - printed += color_fprintf(fp, color, "%.1f%%", ratio); + printed += fprintf(fp, "%.1f%%", ratio); printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms); printed += thread__dump_stats(ttrace, trace, fp); diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f7d11a811c74..5a1f4df3c3a8 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -36,6 +36,30 @@ ifeq ($(ARCH),arm) LIBUNWIND_LIBS = -lunwind -lunwind-arm endif +ifeq ($(LIBUNWIND_LIBS),) + NO_LIBUNWIND := 1 +else + # + # For linking with debug library, run like: + # + # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ + # + ifdef LIBUNWIND_DIR + LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include + LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib + endif + LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS) + + # Set per-feature check compilation flags + FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS) + FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) + FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) + FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) + # and the flags for the test-all case + FEATURE_CHECK_CFLAGS-all += $(LIBUNWIND_CFLAGS) + FEATURE_CHECK_LDFLAGS-all += $(LIBUNWIND_LDFLAGS) +endif + ifeq ($(NO_PERF_REGS),0) CFLAGS += -DHAVE_PERF_REGS_SUPPORT endif @@ -102,7 +126,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS="$(LDFLAGS)" LIBUNWIND_LIBS="$(LIBUNWIND_LIBS)" -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C config/feature-checks test-$1 >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) @@ -141,7 +165,6 @@ CORE_FEATURE_TESTS = \ libslang \ libunwind \ on-exit \ - stackprotector \ stackprotector-all \ timerfd @@ -209,10 +232,6 @@ ifeq ($(feature-stackprotector-all), 1) CFLAGS += -fstack-protector-all endif -ifeq ($(feature-stackprotector), 1) - CFLAGS += -Wstack-protector -endif - ifeq ($(DEBUG),0) ifeq ($(feature-fortify-source), 1) CFLAGS += -D_FORTIFY_SOURCE=2 @@ -310,21 +329,7 @@ ifndef NO_LIBELF endif # NO_DWARF endif # NO_LIBELF -ifeq ($(LIBUNWIND_LIBS),) - NO_LIBUNWIND := 1 -endif - ifndef NO_LIBUNWIND - # - # For linking with debug library, run like: - # - # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ - # - ifdef LIBUNWIND_DIR - LIBUNWIND_CFLAGS := -I$(LIBUNWIND_DIR)/include - LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib - endif - ifneq ($(feature-libunwind), 1) msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 1.1); NO_LIBUNWIND := 1 @@ -339,14 +344,12 @@ ifndef NO_LIBUNWIND # non-ARM has no dwarf_find_debug_frame() function: CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME endif - endif -endif -ifndef NO_LIBUNWIND - CFLAGS += -DHAVE_LIBUNWIND_SUPPORT - EXTLIBS += $(LIBUNWIND_LIBS) - CFLAGS += $(LIBUNWIND_CFLAGS) - LDFLAGS += $(LIBUNWIND_LDFLAGS) + CFLAGS += -DHAVE_LIBUNWIND_SUPPORT + EXTLIBS += $(LIBUNWIND_LIBS) + CFLAGS += $(LIBUNWIND_CFLAGS) + LDFLAGS += $(LIBUNWIND_LDFLAGS) + endif # ifneq ($(feature-libunwind), 1) endif ifndef NO_LIBAUDIT @@ -533,7 +536,7 @@ endif ifndef NO_LIBNUMA ifeq ($(feature-libnuma), 0) - msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev); + msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev); NO_LIBNUMA := 1 else CFLAGS += -DHAVE_LIBNUMA_SUPPORT @@ -598,3 +601,11 @@ else perfexec_instdir = $(prefix)/$(perfexecdir) endif perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) + +# If we install to $(HOME) we keep the traceevent default: +# $(HOME)/.traceevent/plugins +# Otherwise we install plugins into the global $(libdir). +ifdef DESTDIR +plugindir=$(libdir)/traceevent/plugins +plugindir_SQ= $(subst ','\'',$(prefix)/$(plugindir)) +endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 87e790017c69..bc86462e80a2 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -26,19 +26,18 @@ FILES= \ test-libunwind-debug-frame \ test-on-exit \ test-stackprotector-all \ - test-stackprotector \ test-timerfd CC := $(CC) -MD all: $(FILES) -BUILD = $(CC) $(CFLAGS) $(LDFLAGS) -o $(OUTPUT)$@ [email protected] +BUILD = $(CC) $(CFLAGS) -o $(OUTPUT)$@ [email protected] $(LDFLAGS) ############################### test-all: - $(BUILD) -Werror -fstack-protector -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma $(LIBUNWIND_LIBS) -lelf -laudit -I/usr/include/slang -lslang $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl + $(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl test-hello: $(BUILD) @@ -46,9 +45,6 @@ test-hello: test-stackprotector-all: $(BUILD) -Werror -fstack-protector-all -test-stackprotector: - $(BUILD) -Werror -fstack-protector -Wstack-protector - test-fortify-source: $(BUILD) -O2 -Werror -D_FORTIFY_SOURCE=2 @@ -74,10 +70,10 @@ test-libnuma: $(BUILD) -lnuma test-libunwind: - $(BUILD) $(LIBUNWIND_LIBS) -lelf + $(BUILD) -lelf test-libunwind-debug-frame: - $(BUILD) $(LIBUNWIND_LIBS) -lelf + $(BUILD) -lelf test-libaudit: $(BUILD) -laudit diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 59e7a705e146..9b8a544155bb 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -85,6 +85,10 @@ # include "test-timerfd.c" #undef main +#define main main_test_stackprotector_all +# include "test-stackprotector-all.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -106,6 +110,7 @@ int main(int argc, char *argv[]) main_test_backtrace(); main_test_libnuma(); main_test_timerfd(); + main_test_stackprotector_all(); return 0; } diff --git a/tools/perf/config/feature-checks/test-stackprotector.c b/tools/perf/config/feature-checks/test-stackprotector.c deleted file mode 100644 index c9f398d87868..000000000000 --- a/tools/perf/config/feature-checks/test-stackprotector.c +++ /dev/null @@ -1,6 +0,0 @@ -#include <stdio.h> - -int main(void) -{ - return puts("hi"); -} diff --git a/tools/perf/bash_completion b/tools/perf/perf-completion.sh index 62e157db2e2b..49494882d9bb 100644 --- a/tools/perf/bash_completion +++ b/tools/perf/perf-completion.sh @@ -1,4 +1,4 @@ -# perf completion +# perf bash and zsh completion # Taken from git.git's completion script. __my_reassemble_comp_words_by_ref() @@ -89,37 +89,113 @@ __ltrim_colon_completions() fi } -type perf &>/dev/null && -_perf() +__perfcomp () { - local cur words cword prev cmd + COMPREPLY=( $( compgen -W "$1" -- "$2" ) ) +} - COMPREPLY=() - _get_comp_words_by_ref -n =: cur words cword prev +__perfcomp_colon () +{ + __perfcomp "$1" "$2" + __ltrim_colon_completions $cur +} + +__perf_main () +{ + local cmd cmd=${words[0]} + COMPREPLY=() # List perf subcommands or long options if [ $cword -eq 1 ]; then if [[ $cur == --* ]]; then - COMPREPLY=( $( compgen -W '--help --version \ + __perfcomp '--help --version \ --exec-path --html-path --paginate --no-pager \ - --perf-dir --work-tree --debugfs-dir' -- "$cur" ) ) + --perf-dir --work-tree --debugfs-dir' -- "$cur" else cmds=$($cmd --list-cmds) - COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) ) + __perfcomp "$cmds" "$cur" fi # List possible events for -e option elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) - COMPREPLY=( $( compgen -W '$evts' -- "$cur" ) ) - __ltrim_colon_completions $cur + __perfcomp_colon "$evts" "$cur" # List long option names elif [[ $cur == --* ]]; then subcmd=${words[1]} opts=$($cmd $subcmd --list-opts) - COMPREPLY=( $( compgen -W '$opts' -- "$cur" ) ) + __perfcomp "$opts" "$cur" fi +} + +if [[ -n ${ZSH_VERSION-} ]]; then + autoload -U +X compinit && compinit + + __perfcomp () + { + emulate -L zsh + + local c IFS=$' \t\n' + local -a array + + for c in ${=1}; do + case $c in + --*=*|*.) ;; + *) c="$c " ;; + esac + array[${#array[@]}+1]="$c" + done + + compset -P '*[=:]' + compadd -Q -S '' -a -- array && _ret=0 + } + + __perfcomp_colon () + { + emulate -L zsh + + local cur_="${2-$cur}" + local c IFS=$' \t\n' + local -a array + + if [[ "$cur_" == *:* ]]; then + local colon_word=${cur_%"${cur_##*:}"} + fi + + for c in ${=1}; do + case $c in + --*=*|*.) ;; + *) c="$c " ;; + esac + array[$#array+1]=${c#"$colon_word"} + done + + compset -P '*[=:]' + compadd -Q -S '' -a -- array && _ret=0 + } + + _perf () + { + local _ret=1 cur cword prev + cur=${words[CURRENT]} + prev=${words[CURRENT-1]} + let cword=CURRENT-1 + emulate ksh -c __perf_main + let _ret && _default && _ret=0 + return _ret + } + + compdef _perf perf + return +fi + +type perf &>/dev/null && +_perf() +{ + local cur words cword prev + _get_comp_words_by_ref -n =: cur words cword prev + __perf_main } && complete -o bashdefault -o default -o nospace -F _perf perf 2>/dev/null \ diff --git a/tools/perf/perf.h b/tools/perf/perf.h index b079304bd53d..b23fed527514 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -254,6 +254,7 @@ struct perf_record_opts { bool inherit_stat; bool no_delay; bool no_inherit; + bool no_inherit_set; bool no_samples; bool raw_samples; bool sample_address; diff --git a/tools/perf/tests/attr/test-record-no-inherit b/tools/perf/tests/attr/test-record-no-inherit index 9079a25cd643..44edcb2edcd5 100644 --- a/tools/perf/tests/attr/test-record-no-inherit +++ b/tools/perf/tests/attr/test-record-no-inherit @@ -3,5 +3,5 @@ command = record args = -i kill >/dev/null 2>&1 [event:base-record] -sample_type=259 +sample_type=263 inherit=0 diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index cf6242c92ee2..0fcd81ea31ae 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -900,7 +900,7 @@ fallback: * cache, or is just a kallsyms file, well, lets hope that this * DSO is the same as when 'perf record' ran. */ - filename = dso->long_name; + filename = (char *)dso->long_name; snprintf(symfs_filename, sizeof(symfs_filename), "%s%s", symbol_conf.symfs, filename); free_filename = false; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index a92770c98cc7..6baabe63182b 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -89,7 +89,7 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) return raw - build_id; } -char *dso__build_id_filename(struct dso *dso, char *bf, size_t size) +char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) { char build_id_hex[BUILD_ID_SIZE * 2 + 1]; diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 929f28a7c14d..845ef865eced 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -10,7 +10,7 @@ extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; int build_id__sprintf(const u8 *build_id, int len, char *bf); -char *dso__build_id_filename(struct dso *dso, char *bf, size_t size); +char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 7d09faf85cf1..1fbcd8bdc11b 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -118,3 +118,9 @@ void perf_data_file__close(struct perf_data_file *file) { close(file->fd); } + +ssize_t perf_data_file__write(struct perf_data_file *file, + void *buf, size_t size) +{ + return writen(file->fd, buf, size); +} diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 8c2df80152a5..2b15d0c95c7f 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -9,12 +9,12 @@ enum perf_data_mode { }; struct perf_data_file { - const char *path; - int fd; - bool is_pipe; - bool force; - unsigned long size; - enum perf_data_mode mode; + const char *path; + int fd; + bool is_pipe; + bool force; + unsigned long size; + enum perf_data_mode mode; }; static inline bool perf_data_file__is_read(struct perf_data_file *file) @@ -44,5 +44,7 @@ static inline unsigned long perf_data_file__size(struct perf_data_file *file) int perf_data_file__open(struct perf_data_file *file); void perf_data_file__close(struct perf_data_file *file); +ssize_t perf_data_file__write(struct perf_data_file *file, + void *buf, size_t size); #endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 399e74c34c1a..8640a9121e72 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -16,13 +16,11 @@ int verbose; bool dump_trace = false, quiet = false; -int eprintf(int level, const char *fmt, ...) +static int _eprintf(int level, const char *fmt, va_list args) { - va_list args; int ret = 0; if (verbose >= level) { - va_start(args, fmt); if (use_browser >= 1) ui_helpline__vshow(fmt, args); else @@ -33,6 +31,32 @@ int eprintf(int level, const char *fmt, ...) return ret; } +int eprintf(int level, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = _eprintf(level, fmt, args); + va_end(args); + + return ret; +} + +/* + * Overloading libtraceevent standard info print + * function, display with -v in perf. + */ +void pr_stat(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + _eprintf(1, fmt, args); + va_end(args); + eprintf(1, "\n"); +} + int dump_printf(const char *fmt, ...) { va_list args; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index efbd98805ad0..443694c36b03 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -17,4 +17,6 @@ void trace_event(union perf_event *event); int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2))); int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2))); +void pr_stat(const char *fmt, ...); + #endif /* __PERF_DEBUG_H */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index af4c687cc49b..436922f1f9d9 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -28,8 +28,8 @@ char dso__symtab_origin(const struct dso *dso) return origin[dso->symtab_type]; } -int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, - char *root_dir, char *file, size_t size) +int dso__binary_type_file(const struct dso *dso, enum dso_binary_type type, + char *root_dir, char *filename, size_t size) { char build_id_hex[BUILD_ID_SIZE * 2 + 1]; int ret = 0; @@ -38,36 +38,36 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, case DSO_BINARY_TYPE__DEBUGLINK: { char *debuglink; - strncpy(file, dso->long_name, size); - debuglink = file + dso->long_name_len; - while (debuglink != file && *debuglink != '/') + strncpy(filename, dso->long_name, size); + debuglink = filename + dso->long_name_len; + while (debuglink != filename && *debuglink != '/') debuglink--; if (*debuglink == '/') debuglink++; filename__read_debuglink(dso->long_name, debuglink, - size - (debuglink - file)); + size - (debuglink - filename)); } break; case DSO_BINARY_TYPE__BUILD_ID_CACHE: /* skip the locally configured cache if a symfs is given */ if (symbol_conf.symfs[0] || - (dso__build_id_filename(dso, file, size) == NULL)) + (dso__build_id_filename(dso, filename, size) == NULL)) ret = -1; break; case DSO_BINARY_TYPE__FEDORA_DEBUGINFO: - snprintf(file, size, "%s/usr/lib/debug%s.debug", + snprintf(filename, size, "%s/usr/lib/debug%s.debug", symbol_conf.symfs, dso->long_name); break; case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: - snprintf(file, size, "%s/usr/lib/debug%s", + snprintf(filename, size, "%s/usr/lib/debug%s", symbol_conf.symfs, dso->long_name); break; case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: { - char *last_slash; + const char *last_slash; size_t len; size_t dir_size; @@ -75,14 +75,14 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, while (last_slash != dso->long_name && *last_slash != '/') last_slash--; - len = scnprintf(file, size, "%s", symbol_conf.symfs); + len = scnprintf(filename, size, "%s", symbol_conf.symfs); dir_size = last_slash - dso->long_name + 2; if (dir_size > (size - len)) { ret = -1; break; } - len += scnprintf(file + len, dir_size, "%s", dso->long_name); - len += scnprintf(file + len , size - len, ".debug%s", + len += scnprintf(filename + len, dir_size, "%s", dso->long_name); + len += scnprintf(filename + len , size - len, ".debug%s", last_slash); break; } @@ -96,7 +96,7 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex); - snprintf(file, size, + snprintf(filename, size, "%s/usr/lib/debug/.build-id/%.2s/%s.debug", symbol_conf.symfs, build_id_hex, build_id_hex + 2); break; @@ -104,23 +104,23 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, case DSO_BINARY_TYPE__VMLINUX: case DSO_BINARY_TYPE__GUEST_VMLINUX: case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: - snprintf(file, size, "%s%s", + snprintf(filename, size, "%s%s", symbol_conf.symfs, dso->long_name); break; case DSO_BINARY_TYPE__GUEST_KMODULE: - snprintf(file, size, "%s%s%s", symbol_conf.symfs, + snprintf(filename, size, "%s%s%s", symbol_conf.symfs, root_dir, dso->long_name); break; case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: - snprintf(file, size, "%s%s", symbol_conf.symfs, + snprintf(filename, size, "%s%s", symbol_conf.symfs, dso->long_name); break; case DSO_BINARY_TYPE__KCORE: case DSO_BINARY_TYPE__GUEST_KCORE: - snprintf(file, size, "%s", dso->long_name); + snprintf(filename, size, "%s", dso->long_name); break; default: @@ -200,11 +200,10 @@ dso_cache__free(struct rb_root *root) } } -static struct dso_cache* -dso_cache__find(struct rb_root *root, u64 offset) +static struct dso_cache *dso_cache__find(const struct rb_root *root, u64 offset) { - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; + struct rb_node * const *p = &root->rb_node; + const struct rb_node *parent = NULL; struct dso_cache *cache; while (*p != NULL) { @@ -379,32 +378,63 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name, * processing we had no idea this was the kernel dso. */ if (dso != NULL) { - dso__set_short_name(dso, short_name); + dso__set_short_name(dso, short_name, false); dso->kernel = dso_type; } return dso; } -void dso__set_long_name(struct dso *dso, char *name) +void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) { if (name == NULL) return; - dso->long_name = name; - dso->long_name_len = strlen(name); + + if (dso->long_name_allocated) + free((char *)dso->long_name); + + dso->long_name = name; + dso->long_name_len = strlen(name); + dso->long_name_allocated = name_allocated; } -void dso__set_short_name(struct dso *dso, const char *name) +void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) { if (name == NULL) return; - dso->short_name = name; - dso->short_name_len = strlen(name); + + if (dso->short_name_allocated) + free((char *)dso->short_name); + + dso->short_name = name; + dso->short_name_len = strlen(name); + dso->short_name_allocated = name_allocated; } static void dso__set_basename(struct dso *dso) { - dso__set_short_name(dso, basename(dso->long_name)); + /* + * basename() may modify path buffer, so we must pass + * a copy. + */ + char *base, *lname = strdup(dso->long_name); + + if (!lname) + return; + + /* + * basename() may return a pointer to internal + * storage which is reused in subsequent calls + * so copy the result. + */ + base = strdup(basename(lname)); + + free(lname); + + if (!base) + return; + + dso__set_short_name(dso, base, true); } int dso__name_len(const struct dso *dso) @@ -439,8 +469,8 @@ struct dso *dso__new(const char *name) if (dso != NULL) { int i; strcpy(dso->name, name); - dso__set_long_name(dso, dso->name); - dso__set_short_name(dso, dso->name); + dso__set_long_name(dso, dso->name, false); + dso__set_short_name(dso, dso->name, false); for (i = 0; i < MAP__NR_TYPES; ++i) dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; dso->cache = RB_ROOT; @@ -451,6 +481,7 @@ struct dso *dso__new(const char *name) dso->sorted_by_name = 0; dso->has_build_id = 0; dso->has_srcline = 1; + dso->a2l_fails = 1; dso->kernel = DSO_TYPE_USER; dso->needs_swap = DSO_SWAP__UNSET; INIT_LIST_HEAD(&dso->node); @@ -464,11 +495,23 @@ void dso__delete(struct dso *dso) int i; for (i = 0; i < MAP__NR_TYPES; ++i) symbols__delete(&dso->symbols[i]); - if (dso->sname_alloc) + + if (dso->short_name_allocated) { free((char *)dso->short_name); - if (dso->lname_alloc) - free(dso->long_name); + dso->short_name = NULL; + dso->short_name_allocated = false; + } + + if (dso->long_name_allocated) { + free((char *)dso->long_name); + dso->long_name = NULL; + dso->long_name_allocated = false; + } + dso_cache__free(&dso->cache); + dso__free_a2l(dso); + free(dso->symsrc_filename); + dso->symsrc_filename = NULL; free(dso); } @@ -543,7 +586,7 @@ void dsos__add(struct list_head *head, struct dso *dso) list_add_tail(&dso->node, head); } -struct dso *dsos__find(struct list_head *head, const char *name, bool cmp_short) +struct dso *dsos__find(const struct list_head *head, const char *name, bool cmp_short) { struct dso *pos; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 9ac666abbe7e..e1cc50698137 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -77,6 +77,9 @@ struct dso { struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; struct rb_root cache; + void *a2l; + char *symsrc_filename; + unsigned int a2l_fails; enum dso_kernel_type kernel; enum dso_swap_type needs_swap; enum dso_binary_type symtab_type; @@ -86,14 +89,14 @@ struct dso { u8 has_srcline:1; u8 hit:1; u8 annotate_warned:1; - u8 sname_alloc:1; - u8 lname_alloc:1; + u8 short_name_allocated:1; + u8 long_name_allocated:1; u8 sorted_by_name; u8 loaded; u8 rel; u8 build_id[BUILD_ID_SIZE]; const char *short_name; - char *long_name; + const char *long_name; u16 long_name_len; u16 short_name_len; char name[0]; @@ -107,8 +110,8 @@ static inline void dso__set_loaded(struct dso *dso, enum map_type type) struct dso *dso__new(const char *name); void dso__delete(struct dso *dso); -void dso__set_short_name(struct dso *dso, const char *name); -void dso__set_long_name(struct dso *dso, char *name); +void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated); +void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated); int dso__name_len(const struct dso *dso); @@ -125,8 +128,8 @@ void dso__read_running_kernel_build_id(struct dso *dso, int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir); char dso__symtab_origin(const struct dso *dso); -int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, - char *root_dir, char *file, size_t size); +int dso__binary_type_file(const struct dso *dso, enum dso_binary_type type, + char *root_dir, char *filename, size_t size); int dso__data_fd(struct dso *dso, struct machine *machine); ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, @@ -140,7 +143,7 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name, const char *short_name, int dso_type); void dsos__add(struct list_head *head, struct dso *dso); -struct dso *dsos__find(struct list_head *head, const char *name, +struct dso *dsos__find(const struct list_head *head, const char *name, bool cmp_short); struct dso *__dsos__findnew(struct list_head *head, const char *name); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); @@ -166,4 +169,6 @@ static inline bool dso__is_kcore(struct dso *dso) dso->data_type == DSO_BINARY_TYPE__GUEST_KCORE; } +void dso__free_a2l(struct dso *dso); + #endif /* __PERF_DSO */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index bb788c109fe6..c77814bf01e1 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -732,8 +732,7 @@ int perf_event__preprocess_sample(const union perf_event *event, if (thread == NULL) return -1; - if (symbol_conf.comm_list && - !strlist__has_entry(symbol_conf.comm_list, thread__comm_str(thread))) + if (thread__is_filtered(thread)) goto out_filtered; dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index bbc746aa5716..af250556b33f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -732,11 +732,13 @@ static long parse_pages_arg(const char *str, unsigned long min, return -EINVAL; } - if ((pages == 0) && (min == 0)) { + if (pages == 0 && min == 0) { /* leave number of pages at 0 */ - } else if (pages < (1UL << 31) && !is_power_of_2(pages)) { + } else if (!is_power_of_2(pages)) { /* round pages up to next power of 2 */ - pages = next_pow2(pages); + pages = next_pow2_l(pages); + if (!pages) + return -EINVAL; pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", pages * page_size, pages); } @@ -754,7 +756,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, unsigned long max = UINT_MAX; long pages; - if (max < SIZE_MAX / page_size) + if (max > SIZE_MAX / page_size) max = SIZE_MAX / page_size; pages = parse_pages_arg(str, 1, max); @@ -819,11 +821,7 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->threads == NULL) return -1; - if (target->force_per_cpu) - evlist->cpus = cpu_map__new(target->cpu_list); - else if (target__has_task(target)) - evlist->cpus = cpu_map__dummy_new(); - else if (!target__has_cpu(target) && !target->uses_mmap) + if (target__uses_dummy_map(target)) evlist->cpus = cpu_map__dummy_new(); else evlist->cpus = cpu_map__new(target->cpu_list); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 46dd4c2a41ce..7b510fd1f08d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -23,6 +23,7 @@ #include "target.h" #include "perf_regs.h" #include "debug.h" +#include "trace-event.h" static struct { bool sample_id_all; @@ -162,6 +163,8 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->idx = idx; evsel->attr = *attr; evsel->leader = evsel; + evsel->unit = ""; + evsel->scale = 1.0; INIT_LIST_HEAD(&evsel->node); hists__init(&evsel->hists); evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); @@ -178,47 +181,6 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) return evsel; } -struct event_format *event_format__new(const char *sys, const char *name) -{ - int fd, n; - char *filename; - void *bf = NULL, *nbf; - size_t size = 0, alloc_size = 0; - struct event_format *format = NULL; - - if (asprintf(&filename, "%s/%s/%s/format", tracing_events_path, sys, name) < 0) - goto out; - - fd = open(filename, O_RDONLY); - if (fd < 0) - goto out_free_filename; - - do { - if (size == alloc_size) { - alloc_size += BUFSIZ; - nbf = realloc(bf, alloc_size); - if (nbf == NULL) - goto out_free_bf; - bf = nbf; - } - - n = read(fd, bf + size, alloc_size - size); - if (n < 0) - goto out_free_bf; - size += n; - } while (n > 0); - - pevent_parse_format(&format, bf, size, sys); - -out_free_bf: - free(bf); - close(fd); -out_free_filename: - free(filename); -out: - return format; -} - struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx) { struct perf_evsel *evsel = zalloc(sizeof(*evsel)); @@ -233,7 +195,7 @@ struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int if (asprintf(&evsel->name, "%s:%s", sys, name) < 0) goto out_free; - evsel->tp_format = event_format__new(sys, name); + evsel->tp_format = trace_event__tp_format(sys, name); if (evsel->tp_format == NULL) goto out_free; @@ -572,6 +534,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_evsel *leader = evsel->leader; struct perf_event_attr *attr = &evsel->attr; int track = !evsel->idx; /* only the first counter needs these */ + bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread; attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; attr->inherit = !opts->no_inherit; @@ -645,7 +608,7 @@ void perf_evsel__config(struct perf_evsel *evsel, } } - if (target__has_cpu(&opts->target) || opts->target.force_per_cpu) + if (target__has_cpu(&opts->target)) perf_evsel__set_sample_bit(evsel, CPU); if (opts->period) @@ -653,7 +616,7 @@ void perf_evsel__config(struct perf_evsel *evsel, if (!perf_missing_features.sample_id_all && (opts->sample_time || !opts->no_inherit || - target__has_cpu(&opts->target) || opts->target.force_per_cpu)) + target__has_cpu(&opts->target) || per_cpu)) perf_evsel__set_sample_bit(evsel, TIME); if (opts->raw_samples) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 1ea7c92e6e33..8120eeb86ac1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -68,6 +68,8 @@ struct perf_evsel { u32 ids; struct hists hists; char *name; + double scale; + const char *unit; struct event_format *tp_format; union { void *priv; @@ -138,6 +140,7 @@ extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX]; int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); const char *perf_evsel__name(struct perf_evsel *evsel); + const char *perf_evsel__group_name(struct perf_evsel *evsel); int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1cd035708931..0bb830f6b49c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -177,7 +177,7 @@ perf_header__set_cmdline(int argc, const char **argv) continue; \ else -static int write_buildid(char *name, size_t name_len, u8 *build_id, +static int write_buildid(const char *name, size_t name_len, u8 *build_id, pid_t pid, u16 misc, int fd) { int err; @@ -209,7 +209,7 @@ static int __dsos__write_buildid_table(struct list_head *head, dsos__for_each_with_build_id(pos, head) { int err; - char *name; + const char *name; size_t name_len; if (!pos->hit) @@ -387,7 +387,7 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, { bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; bool is_vdso = is_vdso_map(dso->short_name); - char *name = dso->long_name; + const char *name = dso->long_name; char nm[PATH_MAX]; if (dso__is_kcore(dso)) { @@ -1709,7 +1709,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, struct perf_header *ph, int fd, void *data __maybe_unused) { - size_t ret; + ssize_t ret; u32 nr; ret = readn(fd, &nr, sizeof(nr)); @@ -1753,7 +1753,7 @@ static int process_total_mem(struct perf_file_section *section __maybe_unused, void *data __maybe_unused) { uint64_t mem; - size_t ret; + ssize_t ret; ret = readn(fd, &mem, sizeof(mem)); if (ret != sizeof(mem)) @@ -1822,7 +1822,7 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused, struct perf_header *ph, int fd, void *data __maybe_unused) { - size_t ret; + ssize_t ret; char *str; u32 nr, i; struct strbuf sb; @@ -1858,7 +1858,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused struct perf_header *ph, int fd, void *data __maybe_unused) { - size_t ret; + ssize_t ret; u32 nr, i; char *str; struct strbuf sb; @@ -1914,7 +1914,7 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse struct perf_header *ph, int fd, void *data __maybe_unused) { - size_t ret; + ssize_t ret; u32 nr, node, i; char *str; uint64_t mem_total, mem_free; @@ -1974,7 +1974,7 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused struct perf_header *ph, int fd, void *data __maybe_unused) { - size_t ret; + ssize_t ret; char *name; u32 pmu_num; u32 type; @@ -2534,7 +2534,7 @@ static int check_magic_endian(u64 magic, uint64_t hdr_sz, int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd) { - int ret; + ssize_t ret; lseek(fd, 0, SEEK_SET); @@ -2628,7 +2628,7 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, struct perf_header *ph, int fd, bool repipe) { - int ret; + ssize_t ret; ret = readn(fd, header, sizeof(*header)); if (ret <= 0) @@ -2669,7 +2669,7 @@ static int read_attr(int fd, struct perf_header *ph, struct perf_event_attr *attr = &f_attr->attr; size_t sz, left; size_t our_sz = sizeof(f_attr->attr); - int ret; + ssize_t ret; memset(f_attr, 0, sizeof(*f_attr)); @@ -2834,11 +2834,11 @@ int perf_session__read_header(struct perf_session *session) symbol_conf.nr_events = nr_attrs; - perf_header__process_sections(header, fd, &session->pevent, + perf_header__process_sections(header, fd, &session->tevent, perf_file_section__process); if (perf_evlist__prepare_tracepoint_events(session->evlist, - session->pevent)) + session->tevent.pevent)) goto out_delete_evlist; return 0; @@ -3003,7 +3003,7 @@ int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused, lseek(fd, offset + sizeof(struct tracing_data_event), SEEK_SET); - size_read = trace_report(fd, &session->pevent, + size_read = trace_report(fd, &session->tevent, session->repipe); padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read; @@ -3025,7 +3025,7 @@ int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused, } perf_evlist__prepare_tracepoint_events(session->evlist, - session->pevent); + session->tevent.pevent); return size_read + padding; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 84cdb072ac83..751454bcde69 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -502,15 +502,11 @@ static u64 machine__get_kernel_start_addr(struct machine *machine) char path[PATH_MAX]; struct process_args args; - if (machine__is_host(machine)) { - filename = "/proc/kallsyms"; - } else { - if (machine__is_default_guest(machine)) - filename = (char *)symbol_conf.default_guest_kallsyms; - else { - sprintf(path, "%s/proc/kallsyms", machine->root_dir); - filename = path; - } + if (machine__is_default_guest(machine)) + filename = (char *)symbol_conf.default_guest_kallsyms; + else { + sprintf(path, "%s/proc/kallsyms", machine->root_dir); + filename = path; } if (symbol__restricted_filename(filename, "/proc/kallsyms")) @@ -767,8 +763,7 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg, ret = -1; goto out; } - dso__set_long_name(map->dso, long_name); - map->dso->lname_alloc = 1; + dso__set_long_name(map->dso, long_name, true); dso__kernel_module_get_build_id(map->dso, ""); } } @@ -939,8 +934,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (name == NULL) goto out_problem; - map->dso->short_name = name; - map->dso->sname_alloc = 1; + dso__set_short_name(map->dso, name, true); map->end = map->start + event->mmap.len; } else if (is_kernel_mmap) { const char *symbol_name = (event->mmap.filename + diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index ef5bc913ca7a..9b9bd719aa19 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -11,6 +11,7 @@ #include "strlist.h" #include "vdso.h" #include "build-id.h" +#include "util.h" #include <linux/string.h> const char *map_type__name[MAP__NR_TYPES] = { @@ -252,6 +253,22 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp) return fprintf(fp, "%s", dsoname); } +int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, + FILE *fp) +{ + char *srcline; + int ret = 0; + + if (map && map->dso) { + srcline = get_srcline(map->dso, + map__rip_2objdump(map, addr)); + if (srcline != SRCLINE_UNKNOWN) + ret = fprintf(fp, "%s%s", prefix, srcline); + free_srcline(srcline); + } + return ret; +} + /** * map__rip_2objdump - convert symbol start address to objdump address. * @map: memory map diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index e4e259c3ba16..18068c6b71c1 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -103,6 +103,8 @@ struct map *map__clone(struct map *map); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *map, FILE *fp); size_t map__fprintf_dsoname(struct map *map, FILE *fp); +int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, + FILE *fp); int map__load(struct map *map, symbol_filter_t filter); struct symbol *map__find_symbol(struct map *map, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6de6f89c2a61..969cb8f0d88d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -269,9 +269,10 @@ const char *event_type(int type) -static int __add_event(struct list_head *list, int *idx, - struct perf_event_attr *attr, - char *name, struct cpu_map *cpus) +static struct perf_evsel * +__add_event(struct list_head *list, int *idx, + struct perf_event_attr *attr, + char *name, struct cpu_map *cpus) { struct perf_evsel *evsel; @@ -279,19 +280,19 @@ static int __add_event(struct list_head *list, int *idx, evsel = perf_evsel__new_idx(attr, (*idx)++); if (!evsel) - return -ENOMEM; + return NULL; evsel->cpus = cpus; if (name) evsel->name = strdup(name); list_add_tail(&evsel->node, list); - return 0; + return evsel; } static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, char *name) { - return __add_event(list, idx, attr, name, NULL); + return __add_event(list, idx, attr, name, NULL) ? 0 : -ENOMEM; } static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) @@ -633,6 +634,9 @@ int parse_events_add_pmu(struct list_head *list, int *idx, { struct perf_event_attr attr; struct perf_pmu *pmu; + struct perf_evsel *evsel; + char *unit; + double scale; pmu = perf_pmu__find(name); if (!pmu) @@ -640,7 +644,7 @@ int parse_events_add_pmu(struct list_head *list, int *idx, memset(&attr, 0, sizeof(attr)); - if (perf_pmu__check_alias(pmu, head_config)) + if (perf_pmu__check_alias(pmu, head_config, &unit, &scale)) return -EINVAL; /* @@ -652,8 +656,14 @@ int parse_events_add_pmu(struct list_head *list, int *idx, if (perf_pmu__config(pmu, &attr, head_config)) return -EINVAL; - return __add_event(list, idx, &attr, pmu_event_name(head_config), - pmu->cpus); + evsel = __add_event(list, idx, &attr, pmu_event_name(head_config), + pmu->cpus); + if (evsel) { + evsel->unit = unit; + evsel->scale = scale; + } + + return evsel ? 0 : -ENOMEM; } int parse_events__modifier_group(struct list_head *list, diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 31f404a032a9..d22e3f8017dc 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -78,6 +78,8 @@ static int get_value(struct parse_opt_ctx_t *p, case OPTION_BOOLEAN: *(bool *)opt->value = unset ? false : true; + if (opt->set) + *(bool *)opt->set = true; return 0; case OPTION_INCR: @@ -224,6 +226,24 @@ static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, return 0; } if (!rest) { + if (!prefixcmp(options->long_name, "no-")) { + /* + * The long name itself starts with "no-", so + * accept the option without "no-" so that users + * do not have to enter "no-no-" to get the + * negation. + */ + rest = skip_prefix(arg, options->long_name + 3); + if (rest) { + flags |= OPT_UNSET; + goto match; + } + /* Abbreviated case */ + if (!prefixcmp(options->long_name + 3, arg)) { + flags |= OPT_UNSET; + goto is_abbreviated; + } + } /* abbreviated? */ if (!strncmp(options->long_name, arg, arg_end - arg)) { is_abbreviated: @@ -259,6 +279,7 @@ is_abbreviated: if (!rest) continue; } +match: if (*rest) { if (*rest != '=') continue; diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index b0241e28eaf7..cbf0149cf221 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -82,6 +82,9 @@ typedef int parse_opt_cb(const struct option *, const char *arg, int unset); * OPTION_{BIT,SET_UINT,SET_PTR} store the {mask,integer,pointer} to put in * the value when met. * CALLBACKS can use it like they want. + * + * `set`:: + * whether an option was set by the user */ struct option { enum parse_opt_type type; @@ -94,6 +97,7 @@ struct option { int flags; parse_opt_cb *callback; intptr_t defval; + bool *set; }; #define check_vtype(v, type) ( BUILD_BUG_ON_ZERO(!__builtin_types_compatible_p(typeof(v), type)) + v ) @@ -103,6 +107,10 @@ struct option { #define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) } #define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h), .defval = (b) } #define OPT_BOOLEAN(s, l, v, h) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h) } +#define OPT_BOOLEAN_SET(s, l, v, os, h) \ + { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), \ + .value = check_vtype(v, bool *), .help = (h), \ + .set = check_vtype(os, bool *)} #define OPT_INCR(s, l, v, h) { .type = OPTION_INCR, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) } #define OPT_SET_UINT(s, l, v, h, i) { .type = OPTION_SET_UINT, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h), .defval = (i) } #define OPT_SET_PTR(s, l, v, h, p) { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c232d8dd410b..56fc10a5e288 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1,19 +1,23 @@ #include <linux/list.h> #include <sys/types.h> -#include <sys/stat.h> #include <unistd.h> #include <stdio.h> #include <dirent.h> #include "fs.h" +#include <locale.h> #include "util.h" #include "pmu.h" #include "parse-events.h" #include "cpumap.h" +#define UNIT_MAX_LEN 31 /* max length for event unit name */ + struct perf_pmu_alias { char *name; struct list_head terms; struct list_head list; + char unit[UNIT_MAX_LEN+1]; + double scale; }; struct perf_pmu_format { @@ -94,7 +98,80 @@ static int pmu_format(const char *name, struct list_head *format) return 0; } -static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) +static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *name) +{ + struct stat st; + ssize_t sret; + char scale[128]; + int fd, ret = -1; + char path[PATH_MAX]; + char *lc; + + snprintf(path, PATH_MAX, "%s/%s.scale", dir, name); + + fd = open(path, O_RDONLY); + if (fd == -1) + return -1; + + if (fstat(fd, &st) < 0) + goto error; + + sret = read(fd, scale, sizeof(scale)-1); + if (sret < 0) + goto error; + + scale[sret] = '\0'; + /* + * save current locale + */ + lc = setlocale(LC_NUMERIC, NULL); + + /* + * force to C locale to ensure kernel + * scale string is converted correctly. + * kernel uses default C locale. + */ + setlocale(LC_NUMERIC, "C"); + + alias->scale = strtod(scale, NULL); + + /* restore locale */ + setlocale(LC_NUMERIC, lc); + + ret = 0; +error: + close(fd); + return ret; +} + +static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *name) +{ + char path[PATH_MAX]; + ssize_t sret; + int fd; + + snprintf(path, PATH_MAX, "%s/%s.unit", dir, name); + + fd = open(path, O_RDONLY); + if (fd == -1) + return -1; + + sret = read(fd, alias->unit, UNIT_MAX_LEN); + if (sret < 0) + goto error; + + close(fd); + + alias->unit[sret] = '\0'; + + return 0; +error: + close(fd); + alias->unit[0] = '\0'; + return -1; +} + +static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) { struct perf_pmu_alias *alias; char buf[256]; @@ -110,6 +187,9 @@ static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) return -ENOMEM; INIT_LIST_HEAD(&alias->terms); + alias->scale = 1.0; + alias->unit[0] = '\0'; + ret = parse_events_terms(&alias->terms, buf); if (ret) { free(alias); @@ -117,7 +197,14 @@ static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) } alias->name = strdup(name); + /* + * load unit name and scale if available + */ + perf_pmu__parse_unit(alias, dir, name); + perf_pmu__parse_scale(alias, dir, name); + list_add_tail(&alias->list, list); + return 0; } @@ -129,6 +216,7 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) { struct dirent *evt_ent; DIR *event_dir; + size_t len; int ret = 0; event_dir = opendir(dir); @@ -143,13 +231,24 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) if (!strcmp(name, ".") || !strcmp(name, "..")) continue; + /* + * skip .unit and .scale info files + * parsed in perf_pmu__new_alias() + */ + len = strlen(name); + if (len > 5 && !strcmp(name + len - 5, ".unit")) + continue; + if (len > 6 && !strcmp(name + len - 6, ".scale")) + continue; + snprintf(path, PATH_MAX, "%s/%s", dir, name); ret = -EINVAL; file = fopen(path, "r"); if (!file) break; - ret = perf_pmu__new_alias(head, name, file); + + ret = perf_pmu__new_alias(head, dir, name, file); fclose(file); } @@ -508,16 +607,42 @@ static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, return NULL; } + +static int check_unit_scale(struct perf_pmu_alias *alias, + char **unit, double *scale) +{ + /* + * Only one term in event definition can + * define unit and scale, fail if there's + * more than one. + */ + if ((*unit && alias->unit) || + (*scale && alias->scale)) + return -EINVAL; + + if (alias->unit) + *unit = alias->unit; + + if (alias->scale) + *scale = alias->scale; + + return 0; +} + /* * Find alias in the terms list and replace it with the terms * defined for the alias */ -int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms) +int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, + char **unit, double *scale) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; int ret; + *unit = NULL; + *scale = 0; + list_for_each_entry_safe(term, h, head_terms, list) { alias = pmu_find_alias(pmu, term); if (!alias) @@ -525,6 +650,11 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms) ret = pmu_alias_terms(alias, &term->list); if (ret) return ret; + + ret = check_unit_scale(alias, unit, scale); + if (ret) + return ret; + list_del(&term->list); free(term); } diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 1179b26f244a..9183380e2038 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -28,7 +28,8 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, int perf_pmu__config_terms(struct list_head *formats, struct perf_event_attr *attr, struct list_head *head_terms); -int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms); +int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, + char **unit, double *scale); struct list_head *perf_pmu__alias(struct perf_pmu *pmu, struct list_head *head_terms); int perf_pmu_wrap(void); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 9c6989ca2bea..d7cff57945c2 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -154,7 +154,7 @@ static struct dso *kernel_get_module_dso(const char *module) vmlinux_name = symbol_conf.vmlinux_name; if (vmlinux_name) { - if (dso__load_vmlinux(dso, map, vmlinux_name, NULL) <= 0) + if (dso__load_vmlinux(dso, map, vmlinux_name, false, NULL) <= 0) return NULL; } else { if (dso__load_vmlinux_path(dso, map, NULL) <= 0) { diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 239036fb2b2c..595bfc73d2ed 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -18,4 +18,5 @@ util/cgroup.c util/rblist.c util/strlist.c util/fs.c +util/trace-event.c ../../lib/rbtree.c diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f36d24a02445..e748f29c53cf 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1158,7 +1158,7 @@ static int __perf_session__process_pipe_events(struct perf_session *session, void *buf = NULL; int skip = 0; u64 head; - int err; + ssize_t err; void *p; perf_tool__fill_defaults(tool); @@ -1487,28 +1487,23 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, return NULL; } -void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, - struct perf_sample *sample, struct machine *machine, +void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample, + struct machine *machine, struct addr_location *al, unsigned int print_opts, unsigned int stack_depth) { - struct addr_location al; struct callchain_cursor_node *node; int print_ip = print_opts & PRINT_IP_OPT_IP; int print_sym = print_opts & PRINT_IP_OPT_SYM; int print_dso = print_opts & PRINT_IP_OPT_DSO; int print_symoffset = print_opts & PRINT_IP_OPT_SYMOFFSET; int print_oneline = print_opts & PRINT_IP_OPT_ONELINE; + int print_srcline = print_opts & PRINT_IP_OPT_SRCLINE; char s = print_oneline ? ' ' : '\t'; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { - error("problem processing %d event, skipping it.\n", - event->header.type); - return; - } - if (symbol_conf.use_callchain && sample->callchain) { + struct addr_location node_al; - if (machine__resolve_callchain(machine, evsel, al.thread, + if (machine__resolve_callchain(machine, evsel, al->thread, sample, NULL, NULL, PERF_MAX_STACK_DEPTH) != 0) { if (verbose) @@ -1517,20 +1512,31 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, } callchain_cursor_commit(&callchain_cursor); + if (print_symoffset) + node_al = *al; + while (stack_depth) { + u64 addr = 0; + node = callchain_cursor_current(&callchain_cursor); if (!node) break; + if (node->sym && node->sym->ignore) + goto next; + if (print_ip) printf("%c%16" PRIx64, s, node->ip); + if (node->map) + addr = node->map->map_ip(node->map, node->ip); + if (print_sym) { printf(" "); if (print_symoffset) { - al.addr = node->ip; - al.map = node->map; - symbol__fprintf_symname_offs(node->sym, &al, stdout); + node_al.addr = addr; + node_al.map = node->map; + symbol__fprintf_symname_offs(node->sym, &node_al, stdout); } else symbol__fprintf_symname(node->sym, stdout); } @@ -1541,32 +1547,42 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, printf(")"); } + if (print_srcline) + map__fprintf_srcline(node->map, addr, "\n ", + stdout); + if (!print_oneline) printf("\n"); - callchain_cursor_advance(&callchain_cursor); - stack_depth--; +next: + callchain_cursor_advance(&callchain_cursor); } } else { + if (al->sym && al->sym->ignore) + return; + if (print_ip) printf("%16" PRIx64, sample->ip); if (print_sym) { printf(" "); if (print_symoffset) - symbol__fprintf_symname_offs(al.sym, &al, + symbol__fprintf_symname_offs(al->sym, al, stdout); else - symbol__fprintf_symname(al.sym, stdout); + symbol__fprintf_symname(al->sym, stdout); } if (print_dso) { printf(" ("); - map__fprintf_dsoname(al.map, stdout); + map__fprintf_dsoname(al->map, stdout); printf(")"); } + + if (print_srcline) + map__fprintf_srcline(al->map, al->addr, "\n ", stdout); } } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 50f640958f0f..2a3955ea4fd8 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -1,6 +1,7 @@ #ifndef __PERF_SESSION_H #define __PERF_SESSION_H +#include "trace-event.h" #include "hist.h" #include "event.h" #include "header.h" @@ -32,7 +33,7 @@ struct perf_session { struct perf_header header; struct machines machines; struct perf_evlist *evlist; - struct pevent *pevent; + struct trace_event tevent; struct events_stats stats; bool repipe; struct ordered_samples ordered_samples; @@ -44,6 +45,7 @@ struct perf_session { #define PRINT_IP_OPT_DSO (1<<2) #define PRINT_IP_OPT_SYMOFFSET (1<<3) #define PRINT_IP_OPT_ONELINE (1<<4) +#define PRINT_IP_OPT_SRCLINE (1<<5) struct perf_tool; @@ -105,8 +107,8 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type); -void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, - struct perf_sample *sample, struct machine *machine, +void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample, + struct machine *machine, struct addr_location *al, unsigned int print_opts, unsigned int stack_depth); int perf_session__cpu_bitmap(struct perf_session *session, diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index d11aefbc4b8d..58b2bd8f38c9 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -146,18 +146,24 @@ static void addr2line_cleanup(struct a2l_data *a2l) } static int addr2line(const char *dso_name, unsigned long addr, - char **file, unsigned int *line) + char **file, unsigned int *line, struct dso *dso) { int ret = 0; - struct a2l_data *a2l; + struct a2l_data *a2l = dso->a2l; + + if (!a2l) { + dso->a2l = addr2line_init(dso_name); + a2l = dso->a2l; + } - a2l = addr2line_init(dso_name); if (a2l == NULL) { pr_warning("addr2line_init failed for %s\n", dso_name); return 0; } a2l->addr = addr; + a2l->found = false; + bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l); if (a2l->found && a2l->filename) { @@ -168,14 +174,26 @@ static int addr2line(const char *dso_name, unsigned long addr, ret = 1; } - addr2line_cleanup(a2l); return ret; } +void dso__free_a2l(struct dso *dso) +{ + struct a2l_data *a2l = dso->a2l; + + if (!a2l) + return; + + addr2line_cleanup(a2l); + + dso->a2l = NULL; +} + #else /* HAVE_LIBBFD_SUPPORT */ static int addr2line(const char *dso_name, unsigned long addr, - char **file, unsigned int *line_nr) + char **file, unsigned int *line_nr, + struct dso *dso __maybe_unused) { FILE *fp; char cmd[PATH_MAX]; @@ -219,42 +237,58 @@ out: pclose(fp); return ret; } + +void dso__free_a2l(struct dso *dso __maybe_unused) +{ +} + #endif /* HAVE_LIBBFD_SUPPORT */ +/* + * Number of addr2line failures (without success) before disabling it for that + * dso. + */ +#define A2L_FAIL_LIMIT 123 + char *get_srcline(struct dso *dso, unsigned long addr) { char *file = NULL; unsigned line = 0; char *srcline; - char *dso_name = dso->long_name; - size_t size; + const char *dso_name; if (!dso->has_srcline) return SRCLINE_UNKNOWN; + if (dso->symsrc_filename) + dso_name = dso->symsrc_filename; + else + dso_name = dso->long_name; + if (dso_name[0] == '[') goto out; if (!strncmp(dso_name, "/tmp/perf-", 10)) goto out; - if (!addr2line(dso_name, addr, &file, &line)) + if (!addr2line(dso_name, addr, &file, &line, dso)) goto out; - /* just calculate actual length */ - size = snprintf(NULL, 0, "%s:%u", file, line) + 1; + if (asprintf(&srcline, "%s:%u", file, line) < 0) { + free(file); + goto out; + } - srcline = malloc(size); - if (srcline) - snprintf(srcline, size, "%s:%u", file, line); - else - srcline = SRCLINE_UNKNOWN; + dso->a2l_fails = 0; free(file); return srcline; out: - dso->has_srcline = 0; + if (dso->a2l_fails && ++dso->a2l_fails > A2L_FAIL_LIMIT) { + dso->has_srcline = 0; + dso__free_a2l(dso); + } return SRCLINE_UNKNOWN; } diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 96c866045d60..8b79d3ad1246 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -95,6 +95,7 @@ void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end) total_height = (1 + rows + cpu2slot(cpus)) * SLOT_MULT; fprintf(svgfile, "<?xml version=\"1.0\" standalone=\"no\"?> \n"); + fprintf(svgfile, "<!DOCTYPE svg SYSTEM \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n"); fprintf(svgfile, "<svg width=\"%i\" height=\"%" PRIu64 "\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n", svg_page_width, total_height); fprintf(svgfile, "<defs>\n <style type=\"text/css\">\n <![CDATA[\n"); @@ -128,12 +129,33 @@ void svg_box(int Yslot, u64 start, u64 end, const char *type) time2pixels(start), time2pixels(end)-time2pixels(start), Yslot * SLOT_MULT, SLOT_HEIGHT, type); } -void svg_sample(int Yslot, int cpu, u64 start, u64 end) +static char *time_to_string(u64 duration); +void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace) +{ + if (!svgfile) + return; + + fprintf(svgfile, "<g>\n"); + fprintf(svgfile, "<title>#%d blocked %s</title>\n", cpu, + time_to_string(end - start)); + if (backtrace) + fprintf(svgfile, "<desc>Blocked on:\n%s</desc>\n", backtrace); + svg_box(Yslot, start, end, "blocked"); + fprintf(svgfile, "</g>\n"); +} + +void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace) { double text_size; if (!svgfile) return; + fprintf(svgfile, "<g>\n"); + + fprintf(svgfile, "<title>#%d running %s</title>\n", + cpu, time_to_string(end - start)); + if (backtrace) + fprintf(svgfile, "<desc>Switched because:\n%s</desc>\n", backtrace); fprintf(svgfile, "<rect x=\"%4.8f\" width=\"%4.8f\" y=\"%4.1f\" height=\"%4.1f\" class=\"sample\"/>\n", time2pixels(start), time2pixels(end)-time2pixels(start), Yslot * SLOT_MULT, SLOT_HEIGHT); @@ -148,6 +170,7 @@ void svg_sample(int Yslot, int cpu, u64 start, u64 end) fprintf(svgfile, "<text x=\"%1.8f\" y=\"%1.8f\" font-size=\"%1.8fpt\">%i</text>\n", time2pixels(start), Yslot * SLOT_MULT + SLOT_HEIGHT - 1, text_size, cpu + 1); + fprintf(svgfile, "</g>\n"); } static char *time_to_string(u64 duration) @@ -168,7 +191,7 @@ static char *time_to_string(u64 duration) return text; } -void svg_waiting(int Yslot, u64 start, u64 end) +void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace) { char *text; const char *style; @@ -192,6 +215,9 @@ void svg_waiting(int Yslot, u64 start, u64 end) font_size = round_text_size(font_size); fprintf(svgfile, "<g transform=\"translate(%4.8f,%4.8f)\">\n", time2pixels(start), Yslot * SLOT_MULT); + fprintf(svgfile, "<title>#%d waiting %s</title>\n", cpu, time_to_string(end - start)); + if (backtrace) + fprintf(svgfile, "<desc>Waiting on:\n%s</desc>\n", backtrace); fprintf(svgfile, "<rect x=\"0\" width=\"%4.8f\" y=\"0\" height=\"%4.1f\" class=\"%s\"/>\n", time2pixels(end)-time2pixels(start), SLOT_HEIGHT, style); if (font_size > MIN_TEXT_SIZE) @@ -242,6 +268,8 @@ void svg_cpu_box(int cpu, u64 __max_freq, u64 __turbo_freq) max_freq = __max_freq; turbo_frequency = __turbo_freq; + fprintf(svgfile, "<g>\n"); + fprintf(svgfile, "<rect x=\"%4.8f\" width=\"%4.8f\" y=\"%4.1f\" height=\"%4.1f\" class=\"cpu\"/>\n", time2pixels(first_time), time2pixels(last_time)-time2pixels(first_time), @@ -253,6 +281,8 @@ void svg_cpu_box(int cpu, u64 __max_freq, u64 __turbo_freq) fprintf(svgfile, "<text transform=\"translate(%4.8f,%4.8f)\" font-size=\"1.25pt\">%s</text>\n", 10+time2pixels(first_time), cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - 4, cpu_model()); + + fprintf(svgfile, "</g>\n"); } void svg_process(int cpu, u64 start, u64 end, const char *type, const char *name) @@ -264,6 +294,7 @@ void svg_process(int cpu, u64 start, u64 end, const char *type, const char *name fprintf(svgfile, "<g transform=\"translate(%4.8f,%4.8f)\">\n", time2pixels(start), cpu2y(cpu)); + fprintf(svgfile, "<title>%s %s</title>\n", name, time_to_string(end - start)); fprintf(svgfile, "<rect x=\"0\" width=\"%4.8f\" y=\"0\" height=\"%4.1f\" class=\"%s\"/>\n", time2pixels(end)-time2pixels(start), SLOT_MULT+SLOT_HEIGHT, type); width = time2pixels(end)-time2pixels(start); @@ -288,6 +319,8 @@ void svg_cstate(int cpu, u64 start, u64 end, int type) return; + fprintf(svgfile, "<g>\n"); + if (type > 6) type = 6; sprintf(style, "c%i", type); @@ -306,6 +339,8 @@ void svg_cstate(int cpu, u64 start, u64 end, int type) if (width > MIN_TEXT_SIZE) fprintf(svgfile, "<text x=\"%4.8f\" y=\"%4.8f\" font-size=\"%3.8fpt\">C%i</text>\n", time2pixels(start), cpu2y(cpu)+width, width, type); + + fprintf(svgfile, "</g>\n"); } static char *HzToHuman(unsigned long hz) @@ -339,6 +374,8 @@ void svg_pstate(int cpu, u64 start, u64 end, u64 freq) if (!svgfile) return; + fprintf(svgfile, "<g>\n"); + if (max_freq) height = freq * 1.0 / max_freq * (SLOT_HEIGHT + SLOT_MULT); height = 1 + cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - height; @@ -347,10 +384,11 @@ void svg_pstate(int cpu, u64 start, u64 end, u64 freq) fprintf(svgfile, "<text x=\"%4.8f\" y=\"%4.8f\" font-size=\"0.25pt\">%s</text>\n", time2pixels(start), height+0.9, HzToHuman(freq)); + fprintf(svgfile, "</g>\n"); } -void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2) +void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace) { double height; @@ -358,6 +396,15 @@ void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc return; + fprintf(svgfile, "<g>\n"); + + fprintf(svgfile, "<title>%s wakes up %s</title>\n", + desc1 ? desc1 : "?", + desc2 ? desc2 : "?"); + + if (backtrace) + fprintf(svgfile, "<desc>%s</desc>\n", backtrace); + if (row1 < row2) { if (row1) { fprintf(svgfile, "<line x1=\"%4.8f\" y1=\"%4.2f\" x2=\"%4.8f\" y2=\"%4.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n", @@ -395,9 +442,11 @@ void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc if (row1) fprintf(svgfile, "<circle cx=\"%4.8f\" cy=\"%4.2f\" r = \"0.01\" style=\"fill:rgb(32,255,32)\"/>\n", time2pixels(start), height); + + fprintf(svgfile, "</g>\n"); } -void svg_wakeline(u64 start, int row1, int row2) +void svg_wakeline(u64 start, int row1, int row2, const char *backtrace) { double height; @@ -405,6 +454,11 @@ void svg_wakeline(u64 start, int row1, int row2) return; + fprintf(svgfile, "<g>\n"); + + if (backtrace) + fprintf(svgfile, "<desc>%s</desc>\n", backtrace); + if (row1 < row2) fprintf(svgfile, "<line x1=\"%4.8f\" y1=\"%4.2f\" x2=\"%4.8f\" y2=\"%4.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n", time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row2 * SLOT_MULT); @@ -417,17 +471,28 @@ void svg_wakeline(u64 start, int row1, int row2) height += SLOT_HEIGHT; fprintf(svgfile, "<circle cx=\"%4.8f\" cy=\"%4.2f\" r = \"0.01\" style=\"fill:rgb(32,255,32)\"/>\n", time2pixels(start), height); + + fprintf(svgfile, "</g>\n"); } -void svg_interrupt(u64 start, int row) +void svg_interrupt(u64 start, int row, const char *backtrace) { if (!svgfile) return; + fprintf(svgfile, "<g>\n"); + + fprintf(svgfile, "<title>Wakeup from interrupt</title>\n"); + + if (backtrace) + fprintf(svgfile, "<desc>%s</desc>\n", backtrace); + fprintf(svgfile, "<circle cx=\"%4.8f\" cy=\"%4.2f\" r = \"0.01\" style=\"fill:rgb(255,128,128)\"/>\n", time2pixels(start), row * SLOT_MULT); fprintf(svgfile, "<circle cx=\"%4.8f\" cy=\"%4.2f\" r = \"0.01\" style=\"fill:rgb(255,128,128)\"/>\n", time2pixels(start), row * SLOT_MULT + SLOT_HEIGHT); + + fprintf(svgfile, "</g>\n"); } void svg_text(int Yslot, u64 start, const char *text) @@ -455,6 +520,7 @@ void svg_legenda(void) if (!svgfile) return; + fprintf(svgfile, "<g>\n"); svg_legenda_box(0, "Running", "sample"); svg_legenda_box(100, "Idle","c1"); svg_legenda_box(200, "Deeper Idle", "c3"); @@ -462,6 +528,7 @@ void svg_legenda(void) svg_legenda_box(550, "Sleeping", "process2"); svg_legenda_box(650, "Waiting for cpu", "waiting"); svg_legenda_box(800, "Blocked on IO", "blocked"); + fprintf(svgfile, "</g>\n"); } void svg_time_grid(void) diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h index e0781989cc31..fad79ceeac1a 100644 --- a/tools/perf/util/svghelper.h +++ b/tools/perf/util/svghelper.h @@ -5,8 +5,9 @@ extern void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end); extern void svg_box(int Yslot, u64 start, u64 end, const char *type); -extern void svg_sample(int Yslot, int cpu, u64 start, u64 end); -extern void svg_waiting(int Yslot, u64 start, u64 end); +extern void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); +extern void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); +extern void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); extern void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency); @@ -17,9 +18,9 @@ extern void svg_pstate(int cpu, u64 start, u64 end, u64 freq); extern void svg_time_grid(void); extern void svg_legenda(void); -extern void svg_wakeline(u64 start, int row1, int row2); -extern void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2); -extern void svg_interrupt(u64 start, int row); +extern void svg_wakeline(u64 start, int row1, int row2, const char *backtrace); +extern void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace); +extern void svg_interrupt(u64 start, int row, const char *backtrace); extern void svg_text(int Yslot, u64 start, const char *text); extern void svg_close(void); diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 2d2dd0532b5a..ac7070a2f2b6 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -253,6 +253,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso __maybe_unused, if (!ss->name) goto out_close; + ss->fd = fd; ss->type = type; return 0; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index c0c36965fff0..e377c2e96191 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -573,6 +573,36 @@ static u8 kallsyms2elf_type(char type) return isupper(type) ? STB_GLOBAL : STB_LOCAL; } +bool symbol__is_idle(struct symbol *sym) +{ + const char * const idle_symbols[] = { + "cpu_idle", + "intel_idle", + "default_idle", + "native_safe_halt", + "enter_idle", + "exit_idle", + "mwait_idle", + "mwait_idle_with_hints", + "poll_idle", + "ppc64_runlatch_off", + "pseries_dedicated_idle_sleep", + NULL + }; + + int i; + + if (!sym) + return false; + + for (i = 0; idle_symbols[i]; i++) { + if (!strcmp(idle_symbols[i], sym->name)) + return true; + } + + return false; +} + static int map__process_kallsym_symbol(void *arg, const char *name, char type, u64 start) { @@ -1129,7 +1159,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, dso->data_type = DSO_BINARY_TYPE__GUEST_KCORE; else dso->data_type = DSO_BINARY_TYPE__KCORE; - dso__set_long_name(dso, strdup(kcore_filename)); + dso__set_long_name(dso, strdup(kcore_filename), true); close(fd); @@ -1306,6 +1336,8 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (!syms_ss && symsrc__has_symtab(ss)) { syms_ss = ss; next_slot = true; + if (!dso->symsrc_filename) + dso->symsrc_filename = strdup(name); } if (!runtime_ss && symsrc__possibly_runtime(ss)) { @@ -1376,7 +1408,8 @@ struct map *map_groups__find_by_name(struct map_groups *mg, } int dso__load_vmlinux(struct dso *dso, struct map *map, - const char *vmlinux, symbol_filter_t filter) + const char *vmlinux, bool vmlinux_allocated, + symbol_filter_t filter) { int err = -1; struct symsrc ss; @@ -1405,7 +1438,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, dso->data_type = DSO_BINARY_TYPE__GUEST_VMLINUX; else dso->data_type = DSO_BINARY_TYPE__VMLINUX; - dso__set_long_name(dso, (char *)vmlinux); + dso__set_long_name(dso, vmlinux, vmlinux_allocated); dso__set_loaded(dso, map->type); pr_debug("Using %s for symbols\n", symfs_vmlinux); } @@ -1424,21 +1457,16 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map, filename = dso__build_id_filename(dso, NULL, 0); if (filename != NULL) { - err = dso__load_vmlinux(dso, map, filename, filter); - if (err > 0) { - dso->lname_alloc = 1; + err = dso__load_vmlinux(dso, map, filename, true, filter); + if (err > 0) goto out; - } free(filename); } for (i = 0; i < vmlinux_path__nr_entries; ++i) { - err = dso__load_vmlinux(dso, map, vmlinux_path[i], filter); - if (err > 0) { - dso__set_long_name(dso, strdup(vmlinux_path[i])); - dso->lname_alloc = 1; + err = dso__load_vmlinux(dso, map, vmlinux_path[i], false, filter); + if (err > 0) break; - } } out: return err; @@ -1496,14 +1524,15 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + scnprintf(path, sizeof(path), "%s/[kernel.kcore]/%s", buildid_dir, + sbuild_id); + /* Use /proc/kallsyms if possible */ if (is_host) { DIR *d; int fd; /* If no cached kcore go with /proc/kallsyms */ - scnprintf(path, sizeof(path), "%s/[kernel.kcore]/%s", - buildid_dir, sbuild_id); d = opendir(path); if (!d) goto proc_kallsyms; @@ -1528,6 +1557,10 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) goto proc_kallsyms; } + /* Find kallsyms in build-id cache with kcore */ + if (!find_matching_kcore(map, path, sizeof(path))) + return strdup(path); + scnprintf(path, sizeof(path), "%s/[kernel.kallsyms]/%s", buildid_dir, sbuild_id); @@ -1570,15 +1603,8 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map, } if (!symbol_conf.ignore_vmlinux && symbol_conf.vmlinux_name != NULL) { - err = dso__load_vmlinux(dso, map, - symbol_conf.vmlinux_name, filter); - if (err > 0) { - dso__set_long_name(dso, - strdup(symbol_conf.vmlinux_name)); - dso->lname_alloc = 1; - return err; - } - return err; + return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, + false, filter); } if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) { @@ -1604,7 +1630,7 @@ do_kallsyms: free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { - dso__set_long_name(dso, strdup("[kernel.kallsyms]")); + dso__set_long_name(dso, "[kernel.kallsyms]", false); map__fixup_start(map); map__fixup_end(map); } @@ -1634,7 +1660,8 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map, */ if (symbol_conf.default_guest_vmlinux_name != NULL) { err = dso__load_vmlinux(dso, map, - symbol_conf.default_guest_vmlinux_name, filter); + symbol_conf.default_guest_vmlinux_name, + false, filter); return err; } @@ -1651,7 +1678,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map, pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { machine__mmap_name(machine, path, sizeof(path)); - dso__set_long_name(dso, strdup(path)); + dso__set_long_name(dso, strdup(path), true); map__fixup_start(map); map__fixup_end(map); } @@ -1719,7 +1746,7 @@ out_fail: return -1; } -static int setup_list(struct strlist **list, const char *list_str, +int setup_list(struct strlist **list, const char *list_str, const char *list_name) { if (list_str == NULL) diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 07de8fea2f48..6de9c2b8a601 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -206,7 +206,8 @@ bool symsrc__possibly_runtime(struct symsrc *ss); int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter); int dso__load_vmlinux(struct dso *dso, struct map *map, - const char *vmlinux, symbol_filter_t filter); + const char *vmlinux, bool vmlinux_allocated, + symbol_filter_t filter); int dso__load_vmlinux_path(struct dso *dso, struct map *map, symbol_filter_t filter); int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, @@ -240,6 +241,7 @@ size_t symbol__fprintf(struct symbol *sym, FILE *fp); bool symbol_type__is_a(char symbol_type, enum map_type map_type); bool symbol__restricted_filename(const char *filename, const char *restricted_filename); +bool symbol__is_idle(struct symbol *sym); int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, symbol_filter_t filter, @@ -273,4 +275,7 @@ void kcore_extract__delete(struct kcore_extract *kce); int kcore_copy(const char *from_dir, const char *to_dir); int compare_proc_modules(const char *from, const char *to); +int setup_list(struct strlist **list, const char *list_str, + const char *list_name); + #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index 3c778a07b7cc..e74c5963dc7a 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -55,6 +55,13 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM; } + /* THREAD and SYSTEM/CPU are mutually exclusive */ + if (target->per_thread && (target->system_wide || target->cpu_list)) { + target->per_thread = false; + if (ret == TARGET_ERRNO__SUCCESS) + ret = TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD; + } + return ret; } @@ -100,6 +107,7 @@ static const char *target__error_str[] = { "UID switch overriding CPU", "PID/TID switch overriding SYSTEM", "UID switch overriding SYSTEM", + "SYSTEM/CPU switch overriding PER-THREAD", "Invalid User: %s", "Problems obtaining information for user %s", }; @@ -131,7 +139,8 @@ int target__strerror(struct target *target, int errnum, msg = target__error_str[idx]; switch (errnum) { - case TARGET_ERRNO__PID_OVERRIDE_CPU ... TARGET_ERRNO__UID_OVERRIDE_SYSTEM: + case TARGET_ERRNO__PID_OVERRIDE_CPU ... + TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD: snprintf(buf, buflen, "%s", msg); break; diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 2d0c50690892..7381b1ca4041 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -12,7 +12,8 @@ struct target { uid_t uid; bool system_wide; bool uses_mmap; - bool force_per_cpu; + bool default_per_cpu; + bool per_thread; }; enum target_errno { @@ -33,6 +34,7 @@ enum target_errno { TARGET_ERRNO__UID_OVERRIDE_CPU, TARGET_ERRNO__PID_OVERRIDE_SYSTEM, TARGET_ERRNO__UID_OVERRIDE_SYSTEM, + TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD, /* for target__parse_uid() */ TARGET_ERRNO__INVALID_UID, @@ -61,4 +63,17 @@ static inline bool target__none(struct target *target) return !target__has_task(target) && !target__has_cpu(target); } +static inline bool target__uses_dummy_map(struct target *target) +{ + bool use_dummy = false; + + if (target->default_per_cpu) + use_dummy = target->per_thread ? true : false; + else if (target__has_task(target) || + (!target__has_cpu(target) && !target->uses_mmap)) + use_dummy = true; + + return use_dummy; +} + #endif /* _PERF_TARGET_H */ diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 897c1b2a750a..5b856bf942e1 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -6,6 +6,7 @@ #include <unistd.h> #include <sys/types.h> #include "symbol.h" +#include <strlist.h> struct thread { union { @@ -66,4 +67,15 @@ static inline void thread__set_priv(struct thread *thread, void *p) { thread->priv = p; } + +static inline bool thread__is_filtered(struct thread *thread) +{ + if (symbol_conf.comm_list && + !strlist__has_entry(symbol_conf.comm_list, thread__comm_str(thread))) { + return true; + } + + return false; +} + #endif /* __PERF_THREAD_H */ diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 6681f71f2f95..e0d6d07f6848 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -28,19 +28,6 @@ #include "util.h" #include "trace-event.h" -struct pevent *read_trace_init(int file_bigendian, int host_bigendian) -{ - struct pevent *pevent = pevent_alloc(); - - if (pevent != NULL) { - pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); - pevent_set_file_bigendian(pevent, file_bigendian); - pevent_set_host_bigendian(pevent, host_bigendian); - } - - return pevent; -} - static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) { diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index f2112270c663..e113e180c48f 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -343,7 +343,7 @@ static int read_event_files(struct pevent *pevent) return 0; } -ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe) +ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe) { char buf[BUFSIZ]; char test[] = { 23, 8, 68 }; @@ -356,11 +356,9 @@ ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe) int host_bigendian; int file_long_size; int file_page_size; - struct pevent *pevent; + struct pevent *pevent = NULL; int err; - *ppevent = NULL; - repipe = __repipe; input_fd = fd; @@ -390,12 +388,17 @@ ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe) file_bigendian = buf[0]; host_bigendian = bigendian(); - pevent = read_trace_init(file_bigendian, host_bigendian); - if (pevent == NULL) { - pr_debug("read_trace_init failed"); + if (trace_event__init(tevent)) { + pr_debug("trace_event__init failed"); goto out; } + pevent = tevent->pevent; + + pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); + pevent_set_file_bigendian(pevent, file_bigendian); + pevent_set_host_bigendian(pevent, host_bigendian); + if (do_read(buf, 1) < 0) goto out; file_long_size = buf[0]; @@ -432,11 +435,10 @@ ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe) pevent_print_printk(pevent); } - *ppevent = pevent; pevent = NULL; out: if (pevent) - pevent_free(pevent); + trace_event__cleanup(tevent); return size; } diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c new file mode 100644 index 000000000000..d9f5f6137ab3 --- /dev/null +++ b/tools/perf/util/trace-event.c @@ -0,0 +1,82 @@ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <linux/kernel.h> +#include <traceevent/event-parse.h> +#include "trace-event.h" +#include "util.h" + +/* + * global trace_event object used by trace_event__tp_format + * + * TODO There's no cleanup call for this. Add some sort of + * __exit function support and call trace_event__cleanup + * there. + */ +static struct trace_event tevent; + +int trace_event__init(struct trace_event *t) +{ + struct pevent *pevent = pevent_alloc(); + + if (pevent) { + t->plugin_list = traceevent_load_plugins(pevent); + t->pevent = pevent; + } + + return pevent ? 0 : -1; +} + +void trace_event__cleanup(struct trace_event *t) +{ + pevent_free(t->pevent); + traceevent_unload_plugins(t->plugin_list); +} + +static struct event_format* +tp_format(const char *sys, const char *name) +{ + struct pevent *pevent = tevent.pevent; + struct event_format *event = NULL; + char path[PATH_MAX]; + size_t size; + char *data; + + scnprintf(path, PATH_MAX, "%s/%s/%s/format", + tracing_events_path, sys, name); + + if (filename__read_str(path, &data, &size)) + return NULL; + + pevent_parse_format(pevent, &event, data, size, sys); + + free(data); + return event; +} + +struct event_format* +trace_event__tp_format(const char *sys, const char *name) +{ + static bool initialized; + + if (!initialized) { + int be = traceevent_host_bigendian(); + struct pevent *pevent; + + if (trace_event__init(&tevent)) + return NULL; + + pevent = tevent.pevent; + pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); + pevent_set_file_bigendian(pevent, be); + pevent_set_host_bigendian(pevent, be); + initialized = true; + } + + return tp_format(sys, name); +} diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 04df63114109..3a01618c5b87 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -3,17 +3,26 @@ #include <traceevent/event-parse.h> #include "parse-events.h" -#include "session.h" struct machine; struct perf_sample; union perf_event; struct perf_tool; struct thread; +struct plugin_list; + +struct trace_event { + struct pevent *pevent; + struct plugin_list *plugin_list; +}; + +int trace_event__init(struct trace_event *t); +void trace_event__cleanup(struct trace_event *t); +struct event_format* +trace_event__tp_format(const char *sys, const char *name); int bigendian(void); -struct pevent *read_trace_init(int file_bigendian, int host_bigendian); void event_format__print(struct event_format *event, int cpu, void *data, int size); @@ -27,7 +36,7 @@ raw_field_value(struct event_format *event, const char *name, void *data); void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size); void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size); -ssize_t trace_report(int fd, struct pevent **pevent, bool repipe); +ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe); struct event_format *trace_find_next_event(struct pevent *pevent, struct event_format *event); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 28a0a89c1f73..4a57609c0b43 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -6,6 +6,9 @@ #endif #include <stdio.h> #include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <linux/kernel.h> /* * XXX We need to find a better place for these things... @@ -151,21 +154,40 @@ unsigned long convert_unit(unsigned long value, char *unit) return value; } -int readn(int fd, void *buf, size_t n) +static ssize_t ion(bool is_read, int fd, void *buf, size_t n) { void *buf_start = buf; + size_t left = n; - while (n) { - int ret = read(fd, buf, n); + while (left) { + ssize_t ret = is_read ? read(fd, buf, left) : + write(fd, buf, left); if (ret <= 0) return ret; - n -= ret; - buf += ret; + left -= ret; + buf += ret; } - return buf - buf_start; + BUG_ON((size_t)(buf - buf_start) != n); + return n; +} + +/* + * Read exactly 'n' bytes or return an error. + */ +ssize_t readn(int fd, void *buf, size_t n) +{ + return ion(true, fd, buf, n); +} + +/* + * Write exactly 'n' bytes or return an error. + */ +ssize_t writen(int fd, void *buf, size_t n) +{ + return ion(false, fd, buf, n); } size_t hex_width(u64 v) @@ -413,3 +435,64 @@ int filename__read_int(const char *filename, int *value) close(fd); return err; } + +int filename__read_str(const char *filename, char **buf, size_t *sizep) +{ + size_t size = 0, alloc_size = 0; + void *bf = NULL, *nbf; + int fd, n, err = 0; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return -errno; + + do { + if (size == alloc_size) { + alloc_size += BUFSIZ; + nbf = realloc(bf, alloc_size); + if (!nbf) { + err = -ENOMEM; + break; + } + + bf = nbf; + } + + n = read(fd, bf + size, alloc_size - size); + if (n < 0) { + if (size) { + pr_warning("read failed %d: %s\n", + errno, strerror(errno)); + err = 0; + } else + err = -errno; + + break; + } + + size += n; + } while (n > 0); + + if (!err) { + *sizep = size; + *buf = bf; + } else + free(bf); + + close(fd); + return err; +} + +const char *get_filename_for_perf_kvm(void) +{ + const char *filename; + + if (perf_host && !perf_guest) + filename = strdup("perf.data.host"); + else if (!perf_host && perf_guest) + filename = strdup("perf.data.guest"); + else + filename = strdup("perf.data.kvm"); + + return filename; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c8f362daba87..0171213d1d4d 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -73,6 +73,7 @@ #include <sys/ttydefaults.h> #include <lk/debugfs.h> #include <termios.h> +#include <linux/bitops.h> extern const char *graph_line; extern const char *graph_dotted_line; @@ -253,7 +254,8 @@ bool strlazymatch(const char *str, const char *pat); int strtailcmp(const char *s1, const char *s2); char *strxfrchar(char *s, char from, char to); unsigned long convert_unit(unsigned long value, char *unit); -int readn(int fd, void *buf, size_t size); +ssize_t readn(int fd, void *buf, size_t n); +ssize_t writen(int fd, void *buf, size_t n); struct perf_event_attr; @@ -280,6 +282,17 @@ static inline unsigned next_pow2(unsigned x) return 1ULL << (32 - __builtin_clz(x - 1)); } +static inline unsigned long next_pow2_l(unsigned long x) +{ +#if BITS_PER_LONG == 64 + if (x <= (1UL << 31)) + return next_pow2(x); + return (unsigned long)next_pow2(x >> 32) << 32; +#else + return next_pow2(x); +#endif +} + size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); @@ -307,4 +320,7 @@ char *get_srcline(struct dso *dso, unsigned long addr); void free_srcline(char *srcline); int filename__read_int(const char *filename, int *value); +int filename__read_str(const char *filename, char **buf, size_t *sizep); + +const char *get_filename_for_perf_kvm(void); #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 39159822d58f..0ddb3b8a89ec 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -103,7 +103,7 @@ struct dso *vdso__dso_findnew(struct list_head *head) dso = dso__new(VDSO__MAP_NAME); if (dso != NULL) { dsos__add(head, dso); - dso__set_long_name(dso, file); + dso__set_long_name(dso, file, false); } } |